From 258a87945c0daaf252c10332441162434d5550d6 Mon Sep 17 00:00:00 2001 From: Kaveen Kumarasinghe Date: Sun, 5 Feb 2023 00:35:49 -0500 Subject: [PATCH] composability --- cogs/commands.py | 11 +++ cogs/index_service_cog.py | 13 ++- gpt3discord.py | 3 +- models/index_model.py | 163 +++++++++++++++++++++++++++++++++----- 4 files changed, 167 insertions(+), 23 deletions(-) diff --git a/cogs/commands.py b/cogs/commands.py index 591a96f..c97aa9a 100644 --- a/cogs/commands.py +++ b/cogs/commands.py @@ -535,6 +535,17 @@ class Commands(discord.Cog, name="Commands"): async def reset(self, ctx:discord.ApplicationContext): await self.index_cog.reset_command(ctx) + @add_to_group("index") + @discord.slash_command( + name="compose", + description="Combine multiple indexes together", + guild_ids=ALLOWED_GUILDS + ) + @discord.option(name="name", description="The name of the new index", required=False, input_type=discord.SlashCommandOptionType.string) + @discord.guild_only() + async def compose(self, ctx:discord.ApplicationContext, name : str): + await self.index_cog.compose_command(ctx,name) + @add_to_group("index") @discord.slash_command( name="add_discord", diff --git a/cogs/index_service_cog.py b/cogs/index_service_cog.py index 4cb0d84..b7fd70a 100644 --- a/cogs/index_service_cog.py +++ b/cogs/index_service_cog.py @@ -14,10 +14,11 @@ class IndexService(discord.Cog, name="IndexService"): def __init__( self, bot, + usage_service, ): super().__init__() self.bot = bot - self.index_handler = Index_handler(bot) + self.index_handler = Index_handler(bot, usage_service) async def set_index_command(self, ctx, file: discord.Attachment = None, link: str = None): """Command handler to set a file as your personal index""" @@ -98,3 +99,13 @@ class IndexService(discord.Cog, name="IndexService"): await ctx.defer() await self.index_handler.query(ctx, query, response_mode, user_api_key) + + async def compose_command(self, ctx, name): + """Command handler to compose from your index""" + user_api_key = None + if USER_INPUT_API_KEYS: + user_api_key = await TextService.get_user_api_key(ctx.user.id, ctx, USER_KEY_DB) + if not user_api_key: + return + + await self.index_handler.compose(ctx, name, user_api_key) diff --git a/gpt3discord.py b/gpt3discord.py index 09efeec..2a8c790 100644 --- a/gpt3discord.py +++ b/gpt3discord.py @@ -172,7 +172,8 @@ async def main(): bot.add_cog( IndexService( - bot + bot, + usage_service, ) ) diff --git a/models/index_model.py b/models/index_model.py index fb45a5f..1b4ae62 100644 --- a/models/index_model.py +++ b/models/index_model.py @@ -13,7 +13,8 @@ from datetime import date, datetime from gpt_index.readers import YoutubeTranscriptReader from gpt_index.readers.schema.base import Document from gpt_index import GPTSimpleVectorIndex, SimpleDirectoryReader, QuestionAnswerPrompt, BeautifulSoupWebReader, \ - GPTFaissIndex, GPTListIndex, QueryMode, GPTTreeIndex, GoogleDocsReader + GPTFaissIndex, GPTListIndex, QueryMode, GPTTreeIndex, GoogleDocsReader, MockLLMPredictor, QueryConfig, \ + IndexStructType from gpt_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR from gpt_index.composability import ComposableGraph @@ -21,6 +22,24 @@ from gpt_index.composability import ComposableGraph from services.environment_service import EnvService, app_root_path +def get_and_query(user_id, index_storage, query, llm_predictor): + # TODO Do prediction here for token usage + index: [GPTSimpleVectorIndex, ComposableGraph] = index_storage[user_id].get_index_or_throw() + if isinstance(index, GPTSimpleVectorIndex): + response = index.query(query,verbose=True) + else: + query_configs = [ + QueryConfig( + index_struct_type=IndexStructType.TREE, + query_mode=QueryMode.RECURSIVE, + query_kwargs={ + "child_branch_factor": 4 + } + ) + ] + response = index.query(query, verbose=True, query_configs=[]) + + return response class IndexData: def __init__(self): @@ -41,7 +60,6 @@ class IndexData: # Create a folder called "indexes/{USER_ID}" if it doesn't exist already Path(f"{app_root_path()}/indexes/{user_id}").mkdir(parents=True, exist_ok=True) - print(f"{app_root_path()}/indexes/{user_id}") # Save the index to file under the user id index.save_to_disk(app_root_path() / "indexes" / f"{str(user_id)}"/f"{file_name}_{date.today()}-H{datetime.now().hour}.json") @@ -60,11 +78,12 @@ class IndexData: pass class Index_handler: - def __init__(self, bot): + def __init__(self, bot, usage_service): self.bot = bot self.openai_key = os.getenv("OPENAI_TOKEN") self.index_storage = defaultdict(IndexData) self.loop = asyncio.get_running_loop() + self.usage_service = usage_service self.qaprompt = QuestionAnswerPrompt( "Context information is below. The text '<|endofstatement|>' is used to separate chat entries and make it easier for you to understand the context\n" "---------------------\n" @@ -74,31 +93,35 @@ class Index_handler: "Given the context information and not prior knowledge, " "answer the question: {query_str}\n" ) - - def index_file(self, file_path): + + # TODO We need to do predictions below for token usage. + def index_file(self, file_path) -> GPTSimpleVectorIndex: document = SimpleDirectoryReader(file_path).load_data() index = GPTSimpleVectorIndex(document) return index - def index_gdoc(self, doc_id): + def index_gdoc(self, doc_id) -> GPTSimpleVectorIndex: document = GoogleDocsReader().load_data(doc_id) index = GPTSimpleVectorIndex(document) return index def index_youtube_transcript(self, link): documents = YoutubeTranscriptReader().load_data(ytlinks=[link]) - index = GPTSimpleVectorIndex(documents) + index = GPTSimpleVectorIndex(documents,) return index - def index_load_file(self, file_path): - index = GPTSimpleVectorIndex.load_from_disk(file_path) + def index_load_file(self, file_path) -> [GPTSimpleVectorIndex, ComposableGraph]: + if not "composed" in str(file_path): + index = GPTSimpleVectorIndex.load_from_disk(file_path) + else: + index = ComposableGraph.load_from_disk(file_path) return index - def index_discord(self, document): - index = GPTSimpleVectorIndex(document) + def index_discord(self, document) -> GPTSimpleVectorIndex: + index = GPTSimpleVectorIndex(document,) return index - def index_webpage(self, url): + def index_webpage(self, url) -> GPTSimpleVectorIndex: documents = BeautifulSoupWebReader(website_extractor=DEFAULT_WEBSITE_EXTRACTOR).load_data(urls=[url]) index = GPTSimpleVectorIndex(documents) return index @@ -143,7 +166,6 @@ class Index_handler: file_name = file.filename self.index_storage[ctx.user.id].add_index(index, ctx.user.id, file_name) - await ctx.respond("Index added to your indexes.") except Exception: await ctx.respond("Failed to set index") @@ -204,8 +226,42 @@ class Index_handler: await ctx.respond("Loaded index") except Exception as e: await ctx.respond(e) - - + + async def compose_indexes(self, user_id, indexes, name): + # Load all the indexes first + index_objects = [] + for _index in indexes: + index_file = EnvService.find_shared_file(f"indexes/{user_id}/{_index}") + index = await self.loop.run_in_executor(None, partial(self.index_load_file, index_file)) + index_objects.append(index) + + # For each index object, add its documents to a GPTTreeIndex + tree_indexes = [] + for _index in index_objects: + # Get all the document objects out of _index.docstore.docs + document_ids = [docmeta for docmeta in _index.docstore.docs.keys()] + documents = list([_index.docstore.get_document(doc_id) for doc_id in document_ids if isinstance(_index.docstore.get_document(doc_id), Document)]) + tree_index = GPTTreeIndex(documents=documents) + + summary = tree_index.query( + "What is a summary of this document?", mode="summarize" + ) + tree_index.set_text(str(summary)) + tree_indexes.append(tree_index) + + # Now we have a list of tree indexes, we can compose them + list_index = GPTListIndex(tree_indexes) + graph = ComposableGraph.build_from_index(list_index) + + if not name: + name = f"composed_index_{date.today()}-H{datetime.now().hour}.json" + + # Save the composed index + graph.save_to_disk(f"indexes/{user_id}/{name}.json") + + self.index_storage[user_id].queryable_index = graph + + async def backup_discord(self, ctx: discord.ApplicationContext, user_api_key): if not user_api_key: os.environ["OPENAI_API_KEY"] = self.openai_key @@ -235,12 +291,10 @@ class Index_handler: os.environ["OPENAI_API_KEY"] = user_api_key try: - index: [GPTSimpleVectorIndex, ComposableGraph] = self.index_storage[ctx.user.id].get_index_or_throw() - if isinstance(index, GPTSimpleVectorIndex): - response = await self.loop.run_in_executor(None, partial(index.query, query, verbose=True, text_qa_template=self.qaprompt)) - else: - response = await self.loop.run_in_executor(None, - partial(index.query, query, query_configs=[], verbose=True)) + llm_predictor = MockLLMPredictor(max_tokens=256) + response = await self.loop.run_in_executor(None, partial(get_and_query, ctx.user.id, self.index_storage, query, llm_predictor)) + print("The last token usage was ", llm_predictor.last_token_usage) + await self.usage_service.update_usage(llm_predictor.last_token_usage) await ctx.respond(f"**Query:**\n\n{query.strip()}\n\n**Query response:**\n\n{response.response.strip()}") except Exception: traceback.print_exc() @@ -319,3 +373,70 @@ class Index_handler: Document(channel_content, extra_info={"channel_name": channel_name}) ) return results + + async def compose(self, ctx: discord.ApplicationContext, name, user_api_key): + # Send the ComposeModal + if not user_api_key: + os.environ["OPENAI_API_KEY"] = self.openai_key + else: + os.environ["OPENAI_API_KEY"] = user_api_key + + if not self.index_storage[ctx.user.id].queryable(): + await ctx.respond("You must load at least two indexes before composing") + return + + await ctx.respond("Select the indexes to compose.", view=ComposeModal(self, ctx.user.id, name)) + + +class ComposeModal(discord.ui.View): + def __init__(self, index_cog, user_id, name=None) -> None: + super().__init__() + # Get the argument named "user_key_db" and save it as USER_KEY_DB + self.index_cog = index_cog + self.user_id = user_id + + # Get all the indexes for the user + self.indexes = [ + file + for file in os.listdir(EnvService.find_shared_file(f"indexes/{str(user_id)}/")) + ] + + # A text entry field for the name of the composed index + self.name = name + + # A discord UI select menu with all the indexes + self.index_select = discord.ui.Select( + placeholder="Select an index", + options=[ + discord.SelectOption(label=index, value=index) + for index in self.indexes + ], + max_values=len(self.indexes), + min_values=1, + + ) + # Add the select menu to the modal + self.add_item(self.index_select) + + # Add a button to the modal called "Compose" + self.add_item(discord.ui.Button(label="Compose", style=discord.ButtonStyle.green, custom_id="compose")) + + # The callback for the button + async def interaction_check(self, interaction: discord.Interaction) -> bool: + # Check that the interaction was for custom_id "compose" + if interaction.data["custom_id"] == "compose": + # Check that the user selected at least one index + if len(self.index_select.values) < 2: + await interaction.response.send_message("You must select at least two indexes") + else: + composing_message = await interaction.response.send_message("Composing indexes, this may take a long time...", ephemeral=True, delete_after=120) + # Compose the indexes + await self.index_cog.compose_indexes(self.user_id,self.index_select.values,self.name) + await interaction.followup.send("Composed indexes", ephemeral=True, delete_after=10) + + try: + await composing_message.delete() + except: + pass + else: + await interaction.response.defer() \ No newline at end of file