Merge pull request #142 from Hikari-Haru/index-fixes

Various gpt-index fixes
Hikari Haru 1 year ago committed by GitHub
commit d780235d8d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -9,9 +9,9 @@ COPY . .
RUN mkdir /install /src
WORKDIR /install
RUN pip install --target="/install" --upgrade pip setuptools wheel
RUN pip install setuptools_rust
RUN pip install torch==1.9.1+cpu torchvision==0.10.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
RUN pip install git+https://github.com/openai/whisper.git
RUN pip install --target="/install" --upgrade setuptools_rust
RUN pip install --target="/install" --upgrade torch==1.9.1+cpu torchvision==0.10.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
RUN pip install --target="/install" --upgrade git+https://github.com/openai/whisper.git
COPY requirements.txt /install
RUN pip install --target="/install" -r requirements.txt
COPY README.md /src

@ -105,9 +105,9 @@ This bot supports per-user custom indexes. This means that users can upload file
`/index add file:<file> or link:<link>` - Use a document or use a link to create/add to your indexes. If you provide a youtube link, the transcript of the video will be used. If you provide a web url, the contents of the webpage will be used, if you provide an image, the image text will be extracted and used!
`/index query query:<prompt>` - Query your current index for a given prompt. GPT will answer based on your current document/indedx
`/index query query:<prompt> nodes:<number> response_mode:<mode>` - Query your current index for a given prompt. GPT will answer based on your current document/index. You can also set it to query over more nodes, further refining the output over each one. A description of the modes can be found <a href="https://gpt-index.readthedocs.io/en/latest/guides/usage_pattern.html#setting-response-mode">here</a>. They do not work for deep composed indexes
`/index load index:<index>` - Load a previously created index to query
`/index load user_index:<index> or server_index:<index>` - Load a previously created index you own yourself, or an index for the whole server.
`/index compose` - Combine multiple saved indexes into one, or upgrade existing indexes into Deep Compositions.
@ -115,7 +115,7 @@ This bot supports per-user custom indexes. This means that users can upload file
`/index add_discord channel:<discord channel>` - Create an add an index based on a discord channel
`/index discord_backup` - Use the last 3000 messages of every channel on your discord server as an index
`/index discord_backup` - Use the last 3000 messages of every channel on your discord server as an index. Needs both an admin and a index role
### System and Settings

@ -532,13 +532,19 @@ class Commands(discord.Cog, name="Commands"):
)
@discord.guild_only()
@discord.option(
name="index",
description="Which file to load the index from",
required=True,
autocomplete=File_autocompleter.get_indexes,
name="user_index",
description="Which user file to load the index from",
required=False,
autocomplete=File_autocompleter.get_user_indexes,
)
@discord.option(
name="server_index",
description="Which server file to load the index from",
required=False,
autocomplete=File_autocompleter.get_server_indexes,
)
async def load_index(self, ctx: discord.ApplicationContext, index: str):
await self.index_cog.load_index_command(ctx, index)
async def load_index(self, ctx: discord.ApplicationContext, user_index: str, server_index: str):
await self.index_cog.load_index_command(ctx, user_index, server_index)
@add_to_group("index")
@discord.slash_command(
@ -611,6 +617,7 @@ class Commands(discord.Cog, name="Commands"):
name="discord_backup",
description="Save an index made from the whole server",
guild_ids=ALLOWED_GUILDS,
checks=[Check.check_admin_roles(), Check.check_index_roles()]
)
@discord.guild_only()
async def discord_backup(self, ctx: discord.ApplicationContext):
@ -622,18 +629,27 @@ class Commands(discord.Cog, name="Commands"):
)
@discord.guild_only()
@discord.option(name="query", description="What to query the index", required=True)
@discord.option(
name="nodes",
description="How many nodes should the response be queried from, only non-deep indexes",
required=False,
default=1,
min_value=1,
max_value=3,
input_type=discord.SlashCommandOptionType.integer,
)
@discord.option(
name="response_mode",
description="Response mode",
description="Response mode, doesn't work on deep composed indexes",
guild_ids=ALLOWED_GUILDS,
required=False,
default="default",
choices=["default", "compact", "tree_summarize"],
)
async def query(
self, ctx: discord.ApplicationContext, query: str, response_mode: str
self, ctx: discord.ApplicationContext, query: str, nodes:int, response_mode: str
):
await self.index_cog.query_command(ctx, query, response_mode)
await self.index_cog.query_command(ctx, query, nodes, response_mode)
#
# DALLE commands

@ -95,8 +95,26 @@ class IndexService(discord.Cog, name="IndexService"):
await ctx.defer(ephemeral=True)
await self.index_handler.backup_discord(ctx, user_api_key=user_api_key)
async def load_index_command(self, ctx, index):
async def load_index_command(self, ctx, user_index, server_index):
"""Command handler to backup the entire server"""
if not user_index and not server_index:
await ctx.respond("Please provide a user or server index")
return
if user_index and server_index:
await ctx.respond(
"Please provide only one user index or server index. Only one or the other."
)
return
if server_index:
index = server_index
server = True
else:
index = user_index
server = False
user_api_key = None
if USER_INPUT_API_KEYS:
user_api_key = await TextService.get_user_api_key(
@ -106,9 +124,9 @@ class IndexService(discord.Cog, name="IndexService"):
return
await ctx.defer(ephemeral=True)
await self.index_handler.load_index(ctx, index, user_api_key)
await self.index_handler.load_index(ctx, index, server, user_api_key)
async def query_command(self, ctx, query, response_mode):
async def query_command(self, ctx, query, nodes, response_mode):
"""Command handler to query your index"""
user_api_key = None
if USER_INPUT_API_KEYS:
@ -119,7 +137,7 @@ class IndexService(discord.Cog, name="IndexService"):
return
await ctx.defer()
await self.index_handler.query(ctx, query, response_mode, user_api_key)
await self.index_handler.query(ctx, query, response_mode, nodes, user_api_key)
async def compose_command(self, ctx, name):
"""Command handler to compose from your index"""

@ -150,8 +150,8 @@ class File_autocompleter:
except Exception:
return ["No 'openers' folder"]
async def get_indexes(ctx: discord.AutocompleteContext):
"""get all files in the openers folder"""
async def get_user_indexes(ctx: discord.AutocompleteContext):
"""get all files in the indexes folder"""
try:
return [
file
@ -165,4 +165,22 @@ class File_autocompleter:
:25
] # returns the 25 first files from your current input
except Exception:
return ["No 'indexes' folder"]
return ["No user indexes found, add an index"]
async def get_server_indexes(ctx: discord.AutocompleteContext):
"""get all files in the indexes folder"""
try:
return [
file
for file in os.listdir(
EnvService.find_shared_file(
f"indexes/{str(ctx.interaction.guild.id)}/"
)
)
if file.startswith(ctx.value.lower())
][
:25
] # returns the 25 first files from your current input
except Exception:
return ["No server indexes found, add an index"]

@ -8,22 +8,25 @@ import aiofiles
from functools import partial
from typing import List, Optional
from pathlib import Path
from datetime import date, datetime
from datetime import date
from langchain import OpenAI
from gpt_index.readers import YoutubeTranscriptReader
from gpt_index.readers.schema.base import Document
from gpt_index import (
GPTSimpleVectorIndex,
SimpleDirectoryReader,
QuestionAnswerPrompt,
BeautifulSoupWebReader,
GPTFaissIndex,
GPTListIndex,
QueryMode,
GPTTreeIndex,
GoogleDocsReader,
MockLLMPredictor,
LLMPredictor,
QueryConfig,
PromptHelper,
IndexStructType,
)
from gpt_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR
@ -35,15 +38,15 @@ from services.environment_service import EnvService, app_root_path
SHORT_TO_LONG_CACHE = {}
def get_and_query(user_id, index_storage, query, llm_predictor):
# TODO Do prediction here for token usage
def get_and_query(user_id, index_storage, query, response_mode, nodes, llm_predictor):
index: [GPTSimpleVectorIndex, ComposableGraph] = index_storage[
user_id
].get_index_or_throw()
prompthelper = PromptHelper(4096, 500, 20)
if isinstance(index, GPTTreeIndex):
response = index.query(query, verbose=True, child_branch_factor=2)
response = index.query(query, verbose=True, child_branch_factor=2, llm_predictor=llm_predictor, prompt_helper=prompthelper)
else:
response = index.query(query, verbose=True)
response = index.query(query, response_mode=response_mode, verbose=True, llm_predictor=llm_predictor, similarity_top_k=nodes, prompt_helper=prompthelper)
return response
@ -66,7 +69,7 @@ class IndexData:
def has_indexes(self, user_id):
try:
return len(os.listdir(f"{app_root_path()}/indexes/{user_id}")) > 0
except:
except Exception:
return False
def add_index(self, index, user_id, file_name):
@ -93,9 +96,8 @@ class IndexData:
for file in os.listdir(f"{app_root_path()}/indexes/{user_id}"):
os.remove(f"{app_root_path()}/indexes/{user_id}/{file}")
except:
except Exception:
traceback.print_exc()
pass
class Index_handler:
@ -271,14 +273,17 @@ class Index_handler:
await ctx.respond("Failed to set index")
traceback.print_exc()
async def load_index(self, ctx: discord.ApplicationContext, index, user_api_key):
async def load_index(self, ctx: discord.ApplicationContext, index, server, user_api_key):
if not user_api_key:
os.environ["OPENAI_API_KEY"] = self.openai_key
else:
os.environ["OPENAI_API_KEY"] = user_api_key
try:
index_file = EnvService.find_shared_file(f"indexes/{ctx.user.id}/{index}")
if server:
index_file = EnvService.find_shared_file(f"indexes/{ctx.guild.id}/{index}")
else:
index_file = EnvService.find_shared_file(f"indexes/{ctx.user.id}/{index}")
index = await self.loop.run_in_executor(
None, partial(self.index_load_file, index_file)
)
@ -306,7 +311,10 @@ class Index_handler:
for doc_id in [docmeta for docmeta in _index.docstore.docs.keys()]
if isinstance(_index.docstore.get_document(doc_id), Document)
]
tree_index = GPTTreeIndex(documents=documents)
llm_predictor = LLMPredictor(llm=OpenAI(model_name="text-davinci-003"))
tree_index = GPTTreeIndex(documents=documents, llm_predictor=llm_predictor)
print("The last token usage was ", llm_predictor.last_token_usage)
await self.usage_service.update_usage(llm_predictor.last_token_usage)
# Now we have a list of tree indexes, we can compose them
if not name:
@ -353,10 +361,11 @@ class Index_handler:
index = await self.loop.run_in_executor(
None, partial(self.index_discord, document)
)
Path(app_root_path() / "indexes").mkdir(parents=True, exist_ok=True)
Path(app_root_path() / "indexes" / str(ctx.guild.id)).mkdir(parents=True, exist_ok=True)
index.save_to_disk(
app_root_path()
/ "indexes"
/ str(ctx.guild.id)
/ f"{ctx.guild.name.replace(' ', '-')}_{date.today().month}_{date.today().day}.json"
)
@ -366,7 +375,7 @@ class Index_handler:
traceback.print_exc()
async def query(
self, ctx: discord.ApplicationContext, query: str, response_mode, user_api_key
self, ctx: discord.ApplicationContext, query: str, response_mode, nodes, user_api_key
):
if not user_api_key:
os.environ["OPENAI_API_KEY"] = self.openai_key
@ -374,11 +383,11 @@ class Index_handler:
os.environ["OPENAI_API_KEY"] = user_api_key
try:
llm_predictor = MockLLMPredictor(max_tokens=256)
llm_predictor = LLMPredictor(llm=OpenAI(model_name="text-davinci-003"))
response = await self.loop.run_in_executor(
None,
partial(
get_and_query, ctx.user.id, self.index_storage, query, llm_predictor
get_and_query, ctx.user.id, self.index_storage, query, response_mode, nodes, llm_predictor
),
)
print("The last token usage was ", llm_predictor.last_token_usage)

@ -10,7 +10,7 @@ sqlitedict==2.1.0
backoff==2.2.1
flask==2.2.2
beautifulsoup4==4.11.1
gpt-index==0.3.4
gpt-index==0.3.5
PyPDF2==3.0.1
youtube_transcript_api==0.5.0
sentencepiece==0.1.97

@ -18,7 +18,7 @@ class UsageService:
tokens_used = int(tokens_used)
price = (tokens_used / 1000) * 0.02
usage = await self.get_usage()
print("The current usage is " + str(usage) + " credits")
print(f"Cost -> Old: {str(usage)} | New: {str(usage + float(price))}, used {str(float(price))} credits")
# Do the same as above but with aiofiles
async with aiofiles.open(self.usage_file_path, "w") as f:
await f.write(str(usage + float(price)))

Loading…
Cancel
Save