bump version, bugfixes

Kaveen Kumarasinghe 2 years ago
commit 434cbdfc6d

@ -9,9 +9,9 @@ COPY . .
RUN mkdir /install /src RUN mkdir /install /src
WORKDIR /install WORKDIR /install
RUN pip install --target="/install" --upgrade pip setuptools wheel RUN pip install --target="/install" --upgrade pip setuptools wheel
RUN pip install setuptools_rust RUN pip install --target="/install" --upgrade setuptools_rust
RUN pip install torch==1.9.1+cpu torchvision==0.10.1+cpu -f https://download.pytorch.org/whl/torch_stable.html RUN pip install --target="/install" --upgrade torch==1.9.0+cpu torchvision==0.10.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
RUN pip install git+https://github.com/openai/whisper.git RUN pip install --target="/install" --upgrade git+https://github.com/openai/whisper.git
COPY requirements.txt /install COPY requirements.txt /install
RUN pip install --target="/install" -r requirements.txt RUN pip install --target="/install" -r requirements.txt
COPY README.md /src COPY README.md /src

@ -105,9 +105,9 @@ This bot supports per-user custom indexes. This means that users can upload file
`/index add file:<file> or link:<link>` - Use a document or use a link to create/add to your indexes. If you provide a youtube link, the transcript of the video will be used. If you provide a web url, the contents of the webpage will be used, if you provide an image, the image text will be extracted and used! `/index add file:<file> or link:<link>` - Use a document or use a link to create/add to your indexes. If you provide a youtube link, the transcript of the video will be used. If you provide a web url, the contents of the webpage will be used, if you provide an image, the image text will be extracted and used!
`/index query query:<prompt>` - Query your current index for a given prompt. GPT will answer based on your current document/indedx `/index query query:<prompt> nodes:<number> response_mode:<mode>` - Query your current index for a given prompt. GPT will answer based on your current document/index. You can also set it to query over more nodes, further refining the output over each one. A description of the modes can be found <a href="https://gpt-index.readthedocs.io/en/latest/guides/usage_pattern.html#setting-response-mode">here</a>. They do not work for deep composed indexes
`/index load index:<index>` - Load a previously created index to query `/index load user_index:<index> or server_index:<index>` - Load a previously created index you own yourself, or an index for the whole server.
`/index compose` - Combine multiple saved indexes into one, or upgrade existing indexes into Deep Compositions. `/index compose` - Combine multiple saved indexes into one, or upgrade existing indexes into Deep Compositions.
@ -115,7 +115,7 @@ This bot supports per-user custom indexes. This means that users can upload file
`/index add_discord channel:<discord channel>` - Create an add an index based on a discord channel `/index add_discord channel:<discord channel>` - Create an add an index based on a discord channel
`/index discord_backup` - Use the last 3000 messages of every channel on your discord server as an index `/index discord_backup` - Use the last 3000 messages of every channel on your discord server as an index. Needs both an admin and a index role
### System and Settings ### System and Settings

@ -532,13 +532,21 @@ class Commands(discord.Cog, name="Commands"):
) )
@discord.guild_only() @discord.guild_only()
@discord.option( @discord.option(
name="index", name="user_index",
description="Which file to load the index from", description="Which user file to load the index from",
required=True, required=False,
autocomplete=File_autocompleter.get_indexes, autocomplete=File_autocompleter.get_user_indexes,
) )
async def load_index(self, ctx: discord.ApplicationContext, index: str): @discord.option(
await self.index_cog.load_index_command(ctx, index) name="server_index",
description="Which server file to load the index from",
required=False,
autocomplete=File_autocompleter.get_server_indexes,
)
async def load_index(
self, ctx: discord.ApplicationContext, user_index: str, server_index: str
):
await self.index_cog.load_index_command(ctx, user_index, server_index)
@add_to_group("index") @add_to_group("index")
@discord.slash_command( @discord.slash_command(
@ -611,6 +619,7 @@ class Commands(discord.Cog, name="Commands"):
name="discord_backup", name="discord_backup",
description="Save an index made from the whole server", description="Save an index made from the whole server",
guild_ids=ALLOWED_GUILDS, guild_ids=ALLOWED_GUILDS,
checks=[Check.check_admin_roles(), Check.check_index_roles()],
) )
@discord.guild_only() @discord.guild_only()
async def discord_backup(self, ctx: discord.ApplicationContext): async def discord_backup(self, ctx: discord.ApplicationContext):
@ -622,18 +631,31 @@ class Commands(discord.Cog, name="Commands"):
) )
@discord.guild_only() @discord.guild_only()
@discord.option(name="query", description="What to query the index", required=True) @discord.option(name="query", description="What to query the index", required=True)
@discord.option(
name="nodes",
description="How many nodes should the response be queried from, only non-deep indexes",
required=False,
default=1,
min_value=1,
max_value=3,
input_type=discord.SlashCommandOptionType.integer,
)
@discord.option( @discord.option(
name="response_mode", name="response_mode",
description="Response mode", description="Response mode, doesn't work on deep composed indexes",
guild_ids=ALLOWED_GUILDS, guild_ids=ALLOWED_GUILDS,
required=False, required=False,
default="default", default="default",
choices=["default", "compact", "tree_summarize"], choices=["default", "compact", "tree_summarize"],
) )
async def query( async def query(
self, ctx: discord.ApplicationContext, query: str, response_mode: str self,
ctx: discord.ApplicationContext,
query: str,
nodes: int,
response_mode: str,
): ):
await self.index_cog.query_command(ctx, query, response_mode) await self.index_cog.query_command(ctx, query, nodes, response_mode)
# #
# DALLE commands # DALLE commands

@ -95,8 +95,26 @@ class IndexService(discord.Cog, name="IndexService"):
await ctx.defer(ephemeral=True) await ctx.defer(ephemeral=True)
await self.index_handler.backup_discord(ctx, user_api_key=user_api_key) await self.index_handler.backup_discord(ctx, user_api_key=user_api_key)
async def load_index_command(self, ctx, index): async def load_index_command(self, ctx, user_index, server_index):
"""Command handler to backup the entire server""" """Command handler to backup the entire server"""
if not user_index and not server_index:
await ctx.respond("Please provide a user or server index")
return
if user_index and server_index:
await ctx.respond(
"Please provide only one user index or server index. Only one or the other."
)
return
if server_index:
index = server_index
server = True
else:
index = user_index
server = False
user_api_key = None user_api_key = None
if USER_INPUT_API_KEYS: if USER_INPUT_API_KEYS:
user_api_key = await TextService.get_user_api_key( user_api_key = await TextService.get_user_api_key(
@ -106,9 +124,9 @@ class IndexService(discord.Cog, name="IndexService"):
return return
await ctx.defer(ephemeral=True) await ctx.defer(ephemeral=True)
await self.index_handler.load_index(ctx, index, user_api_key) await self.index_handler.load_index(ctx, index, server, user_api_key)
async def query_command(self, ctx, query, response_mode): async def query_command(self, ctx, query, nodes, response_mode):
"""Command handler to query your index""" """Command handler to query your index"""
user_api_key = None user_api_key = None
if USER_INPUT_API_KEYS: if USER_INPUT_API_KEYS:
@ -119,7 +137,7 @@ class IndexService(discord.Cog, name="IndexService"):
return return
await ctx.defer() await ctx.defer()
await self.index_handler.query(ctx, query, response_mode, user_api_key) await self.index_handler.query(ctx, query, response_mode, nodes, user_api_key)
async def compose_command(self, ctx, name): async def compose_command(self, ctx, name):
"""Command handler to compose from your index""" """Command handler to compose from your index"""

@ -150,8 +150,8 @@ class File_autocompleter:
except Exception: except Exception:
return ["No 'openers' folder"] return ["No 'openers' folder"]
async def get_indexes(ctx: discord.AutocompleteContext): async def get_user_indexes(ctx: discord.AutocompleteContext):
"""get all files in the openers folder""" """get all files in the indexes folder"""
try: try:
return [ return [
file file
@ -165,4 +165,21 @@ class File_autocompleter:
:25 :25
] # returns the 25 first files from your current input ] # returns the 25 first files from your current input
except Exception: except Exception:
return ["No 'indexes' folder"] return ["No user indexes found, add an index"]
async def get_server_indexes(ctx: discord.AutocompleteContext):
"""get all files in the indexes folder"""
try:
return [
file
for file in os.listdir(
EnvService.find_shared_file(
f"indexes/{str(ctx.interaction.guild.id)}/"
)
)
if file.startswith(ctx.value.lower())
][
:25
] # returns the 25 first files from your current input
except Exception:
return ["No server indexes found, add an index"]

@ -8,22 +8,25 @@ import aiofiles
from functools import partial from functools import partial
from typing import List, Optional from typing import List, Optional
from pathlib import Path from pathlib import Path
from datetime import date, datetime from datetime import date
from langchain import OpenAI
from gpt_index.readers import YoutubeTranscriptReader from gpt_index.readers import YoutubeTranscriptReader
from gpt_index.readers.schema.base import Document from gpt_index.readers.schema.base import Document
from gpt_index import ( from gpt_index import (
GPTSimpleVectorIndex, GPTSimpleVectorIndex,
SimpleDirectoryReader, SimpleDirectoryReader,
QuestionAnswerPrompt, QuestionAnswerPrompt,
BeautifulSoupWebReader, BeautifulSoupWebReader,
GPTFaissIndex,
GPTListIndex, GPTListIndex,
QueryMode, QueryMode,
GPTTreeIndex, GPTTreeIndex,
GoogleDocsReader, GoogleDocsReader,
MockLLMPredictor, MockLLMPredictor,
LLMPredictor,
QueryConfig, QueryConfig,
PromptHelper,
IndexStructType, IndexStructType,
) )
from gpt_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR from gpt_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR
@ -35,15 +38,28 @@ from services.environment_service import EnvService, app_root_path
SHORT_TO_LONG_CACHE = {} SHORT_TO_LONG_CACHE = {}
def get_and_query(user_id, index_storage, query, llm_predictor): def get_and_query(user_id, index_storage, query, response_mode, nodes, llm_predictor):
# TODO Do prediction here for token usage
index: [GPTSimpleVectorIndex, ComposableGraph] = index_storage[ index: [GPTSimpleVectorIndex, ComposableGraph] = index_storage[
user_id user_id
].get_index_or_throw() ].get_index_or_throw()
prompthelper = PromptHelper(4096, 500, 20)
if isinstance(index, GPTTreeIndex): if isinstance(index, GPTTreeIndex):
response = index.query(query, verbose=True, child_branch_factor=2) response = index.query(
query,
verbose=True,
child_branch_factor=2,
llm_predictor=llm_predictor,
prompt_helper=prompthelper,
)
else: else:
response = index.query(query, verbose=True) response = index.query(
query,
response_mode=response_mode,
verbose=True,
llm_predictor=llm_predictor,
similarity_top_k=nodes,
prompt_helper=prompthelper,
)
return response return response
@ -66,7 +82,7 @@ class IndexData:
def has_indexes(self, user_id): def has_indexes(self, user_id):
try: try:
return len(os.listdir(f"{app_root_path()}/indexes/{user_id}")) > 0 return len(os.listdir(f"{app_root_path()}/indexes/{user_id}")) > 0
except: except Exception:
return False return False
def add_index(self, index, user_id, file_name): def add_index(self, index, user_id, file_name):
@ -93,9 +109,8 @@ class IndexData:
for file in os.listdir(f"{app_root_path()}/indexes/{user_id}"): for file in os.listdir(f"{app_root_path()}/indexes/{user_id}"):
os.remove(f"{app_root_path()}/indexes/{user_id}/{file}") os.remove(f"{app_root_path()}/indexes/{user_id}/{file}")
except: except Exception:
traceback.print_exc() traceback.print_exc()
pass
class Index_handler: class Index_handler:
@ -271,14 +286,23 @@ class Index_handler:
await ctx.respond("Failed to set index") await ctx.respond("Failed to set index")
traceback.print_exc() traceback.print_exc()
async def load_index(self, ctx: discord.ApplicationContext, index, user_api_key): async def load_index(
self, ctx: discord.ApplicationContext, index, server, user_api_key
):
if not user_api_key: if not user_api_key:
os.environ["OPENAI_API_KEY"] = self.openai_key os.environ["OPENAI_API_KEY"] = self.openai_key
else: else:
os.environ["OPENAI_API_KEY"] = user_api_key os.environ["OPENAI_API_KEY"] = user_api_key
try: try:
index_file = EnvService.find_shared_file(f"indexes/{ctx.user.id}/{index}") if server:
index_file = EnvService.find_shared_file(
f"indexes/{ctx.guild.id}/{index}"
)
else:
index_file = EnvService.find_shared_file(
f"indexes/{ctx.user.id}/{index}"
)
index = await self.loop.run_in_executor( index = await self.loop.run_in_executor(
None, partial(self.index_load_file, index_file) None, partial(self.index_load_file, index_file)
) )
@ -306,7 +330,10 @@ class Index_handler:
for doc_id in [docmeta for docmeta in _index.docstore.docs.keys()] for doc_id in [docmeta for docmeta in _index.docstore.docs.keys()]
if isinstance(_index.docstore.get_document(doc_id), Document) if isinstance(_index.docstore.get_document(doc_id), Document)
] ]
tree_index = GPTTreeIndex(documents=documents) llm_predictor = LLMPredictor(llm=OpenAI(model_name="text-davinci-003"))
tree_index = GPTTreeIndex(documents=documents, llm_predictor=llm_predictor)
print("The last token usage was ", llm_predictor.last_token_usage)
await self.usage_service.update_usage(llm_predictor.last_token_usage)
# Now we have a list of tree indexes, we can compose them # Now we have a list of tree indexes, we can compose them
if not name: if not name:
@ -353,10 +380,13 @@ class Index_handler:
index = await self.loop.run_in_executor( index = await self.loop.run_in_executor(
None, partial(self.index_discord, document) None, partial(self.index_discord, document)
) )
Path(app_root_path() / "indexes").mkdir(parents=True, exist_ok=True) Path(app_root_path() / "indexes" / str(ctx.guild.id)).mkdir(
parents=True, exist_ok=True
)
index.save_to_disk( index.save_to_disk(
app_root_path() app_root_path()
/ "indexes" / "indexes"
/ str(ctx.guild.id)
/ f"{ctx.guild.name.replace(' ', '-')}_{date.today().month}_{date.today().day}.json" / f"{ctx.guild.name.replace(' ', '-')}_{date.today().month}_{date.today().day}.json"
) )
@ -366,7 +396,12 @@ class Index_handler:
traceback.print_exc() traceback.print_exc()
async def query( async def query(
self, ctx: discord.ApplicationContext, query: str, response_mode, user_api_key self,
ctx: discord.ApplicationContext,
query: str,
response_mode,
nodes,
user_api_key,
): ):
if not user_api_key: if not user_api_key:
os.environ["OPENAI_API_KEY"] = self.openai_key os.environ["OPENAI_API_KEY"] = self.openai_key
@ -374,11 +409,17 @@ class Index_handler:
os.environ["OPENAI_API_KEY"] = user_api_key os.environ["OPENAI_API_KEY"] = user_api_key
try: try:
llm_predictor = MockLLMPredictor(max_tokens=256) llm_predictor = LLMPredictor(llm=OpenAI(model_name="text-davinci-003"))
response = await self.loop.run_in_executor( response = await self.loop.run_in_executor(
None, None,
partial( partial(
get_and_query, ctx.user.id, self.index_storage, query, llm_predictor get_and_query,
ctx.user.id,
self.index_storage,
query,
response_mode,
nodes,
llm_predictor,
), ),
) )
print("The last token usage was ", llm_predictor.last_token_usage) print("The last token usage was ", llm_predictor.last_token_usage)

@ -10,7 +10,7 @@ sqlitedict==2.1.0
backoff==2.2.1 backoff==2.2.1
flask==2.2.2 flask==2.2.2
beautifulsoup4==4.11.1 beautifulsoup4==4.11.1
gpt-index==0.3.4 gpt-index==0.3.5
PyPDF2==3.0.1 PyPDF2==3.0.1
youtube_transcript_api==0.5.0 youtube_transcript_api==0.5.0
sentencepiece==0.1.97 sentencepiece==0.1.97

@ -18,7 +18,9 @@ class UsageService:
tokens_used = int(tokens_used) tokens_used = int(tokens_used)
price = (tokens_used / 1000) * 0.02 price = (tokens_used / 1000) * 0.02
usage = await self.get_usage() usage = await self.get_usage()
print("The current usage is " + str(usage) + " credits") print(
f"Cost -> Old: {str(usage)} | New: {str(usage + float(price))}, used {str(float(price))} credits"
)
# Do the same as above but with aiofiles # Do the same as above but with aiofiles
async with aiofiles.open(self.usage_file_path, "w") as f: async with aiofiles.open(self.usage_file_path, "w") as f:
await f.write(str(usage + float(price))) await f.write(str(usage + float(price)))

Loading…
Cancel
Save