composability

2 years ago · 258a87945c
parent c93bcfa59a
commit 258a87945c
4 changed files with 167 additions and 23 deletions
--- a/cogs/commands.py
+++ b/cogs/commands.py
@ -535,6 +535,17 @@ class Commands(discord.Cog, name="Commands"):
    async def reset(self, ctx:discord.ApplicationContext):
        await self.index_cog.reset_command(ctx)

+    @add_to_group("index")
+    @discord.slash_command(
+        name="compose",
+        description="Combine multiple indexes together",
+        guild_ids=ALLOWED_GUILDS
+    )
+    @discord.option(name="name", description="The name of the new index", required=False, input_type=discord.SlashCommandOptionType.string)
+    @discord.guild_only()
+    async def compose(self, ctx:discord.ApplicationContext, name : str):
+        await self.index_cog.compose_command(ctx,name)
+
    @add_to_group("index")
    @discord.slash_command(
        name="add_discord",
--- a/cogs/index_service_cog.py
+++ b/cogs/index_service_cog.py
@ -14,10 +14,11 @@ class IndexService(discord.Cog, name="IndexService"):
    def __init__(
        self,
        bot,
+        usage_service,
    ):
        super().__init__()
        self.bot = bot
-        self.index_handler = Index_handler(bot)
+        self.index_handler = Index_handler(bot, usage_service)
    
    async def set_index_command(self, ctx, file: discord.Attachment = None, link: str = None):
        """Command handler to set a file as your personal index"""
@ -98,3 +99,13 @@ class IndexService(discord.Cog, name="IndexService"):

        await ctx.defer()
        await self.index_handler.query(ctx, query, response_mode, user_api_key)
+
+    async def compose_command(self, ctx, name):
+        """Command handler to compose from your index"""
+        user_api_key = None
+        if USER_INPUT_API_KEYS:
+            user_api_key = await TextService.get_user_api_key(ctx.user.id, ctx, USER_KEY_DB)
+            if not user_api_key:
+                return
+
+        await self.index_handler.compose(ctx, name, user_api_key)
--- a/gpt3discord.py
+++ b/gpt3discord.py
@ -172,7 +172,8 @@ async def main():

    bot.add_cog(
        IndexService(
-            bot
+            bot,
+            usage_service,
        )
    )

--- a/models/index_model.py
+++ b/models/index_model.py
@ -13,7 +13,8 @@ from datetime import date, datetime
 from gpt_index.readers import YoutubeTranscriptReader
 from gpt_index.readers.schema.base import Document
 from gpt_index import GPTSimpleVectorIndex, SimpleDirectoryReader, QuestionAnswerPrompt, BeautifulSoupWebReader, \
-    GPTFaissIndex, GPTListIndex, QueryMode, GPTTreeIndex, GoogleDocsReader
+    GPTFaissIndex, GPTListIndex, QueryMode, GPTTreeIndex, GoogleDocsReader, MockLLMPredictor, QueryConfig, \
+    IndexStructType
 from gpt_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR

 from gpt_index.composability import ComposableGraph
@ -21,6 +22,24 @@ from gpt_index.composability import ComposableGraph
 from services.environment_service import EnvService, app_root_path


+def get_and_query(user_id, index_storage, query, llm_predictor):
+    # TODO Do prediction here for token usage
+    index: [GPTSimpleVectorIndex, ComposableGraph] = index_storage[user_id].get_index_or_throw()
+    if isinstance(index, GPTSimpleVectorIndex):
+        response = index.query(query,verbose=True)
+    else:
+        query_configs = [
+            QueryConfig(
+                index_struct_type=IndexStructType.TREE,
+                query_mode=QueryMode.RECURSIVE,
+                query_kwargs={
+                    "child_branch_factor": 4
+                }
+            )
+        ]
+        response = index.query(query, verbose=True, query_configs=[])
+
+    return response

 class IndexData:
    def __init__(self):
@ -41,7 +60,6 @@ class IndexData:

        # Create a folder called "indexes/{USER_ID}" if it doesn't exist already
        Path(f"{app_root_path()}/indexes/{user_id}").mkdir(parents=True, exist_ok=True)
-        print(f"{app_root_path()}/indexes/{user_id}")
        # Save the index to file under the user id
        index.save_to_disk(app_root_path() / "indexes" / f"{str(user_id)}"/f"{file_name}_{date.today()}-H{datetime.now().hour}.json")

@ -60,11 +78,12 @@ class IndexData:
            pass

 class Index_handler:
-    def __init__(self, bot):
+    def __init__(self, bot, usage_service):
        self.bot = bot
        self.openai_key = os.getenv("OPENAI_TOKEN")
        self.index_storage = defaultdict(IndexData)
        self.loop = asyncio.get_running_loop()
+        self.usage_service = usage_service
        self.qaprompt = QuestionAnswerPrompt(
            "Context information is below. The text '<|endofstatement|>' is used to separate chat entries and make it easier for you to understand the context\n"
            "---------------------\n"
@ -74,31 +93,35 @@ class Index_handler:
            "Given the context information and not prior knowledge, "
            "answer the question: {query_str}\n"
        )
-    
-    def index_file(self, file_path):
+
+    # TODO We need to do predictions below for token usage.
+    def index_file(self, file_path) -> GPTSimpleVectorIndex:
        document = SimpleDirectoryReader(file_path).load_data()
        index = GPTSimpleVectorIndex(document)
        return index

-    def index_gdoc(self, doc_id):
+    def index_gdoc(self, doc_id) -> GPTSimpleVectorIndex:
        document = GoogleDocsReader().load_data(doc_id)
        index = GPTSimpleVectorIndex(document)
        return index

    def index_youtube_transcript(self, link):
        documents = YoutubeTranscriptReader().load_data(ytlinks=[link])
-        index = GPTSimpleVectorIndex(documents)
+        index = GPTSimpleVectorIndex(documents,)
        return index

-    def index_load_file(self, file_path):
-        index = GPTSimpleVectorIndex.load_from_disk(file_path)
+    def index_load_file(self, file_path) -> [GPTSimpleVectorIndex, ComposableGraph]:
+        if not "composed" in str(file_path):
+            index = GPTSimpleVectorIndex.load_from_disk(file_path)
+        else:
+            index = ComposableGraph.load_from_disk(file_path)
        return index

-    def index_discord(self, document):
-        index = GPTSimpleVectorIndex(document)
+    def index_discord(self, document) -> GPTSimpleVectorIndex:
+        index = GPTSimpleVectorIndex(document,)
        return index

-    def index_webpage(self, url):
+    def index_webpage(self, url) -> GPTSimpleVectorIndex:
        documents = BeautifulSoupWebReader(website_extractor=DEFAULT_WEBSITE_EXTRACTOR).load_data(urls=[url])
        index = GPTSimpleVectorIndex(documents)
        return index
@ -143,7 +166,6 @@ class Index_handler:

            file_name = file.filename
            self.index_storage[ctx.user.id].add_index(index, ctx.user.id, file_name)
-
            await ctx.respond("Index added to your indexes.")
        except Exception:
            await ctx.respond("Failed to set index")
@ -204,8 +226,42 @@ class Index_handler:
            await ctx.respond("Loaded index")
        except Exception as e:
            await ctx.respond(e)
-    
-    
+
+    async def compose_indexes(self, user_id, indexes, name):
+        # Load all the indexes first
+        index_objects = []
+        for _index in indexes:
+            index_file = EnvService.find_shared_file(f"indexes/{user_id}/{_index}")
+            index = await self.loop.run_in_executor(None, partial(self.index_load_file, index_file))
+            index_objects.append(index)
+
+        # For each index object, add its documents to a GPTTreeIndex
+        tree_indexes = []
+        for _index in index_objects:
+            # Get all the document objects out of _index.docstore.docs
+            document_ids = [docmeta for docmeta in _index.docstore.docs.keys()]
+            documents = list([_index.docstore.get_document(doc_id) for doc_id in document_ids if isinstance(_index.docstore.get_document(doc_id), Document)])
+            tree_index = GPTTreeIndex(documents=documents)
+
+            summary = tree_index.query(
+                "What is a summary of this document?", mode="summarize"
+            )
+            tree_index.set_text(str(summary))
+            tree_indexes.append(tree_index)
+
+        # Now we have a list of tree indexes, we can compose them
+        list_index = GPTListIndex(tree_indexes)
+        graph = ComposableGraph.build_from_index(list_index)
+
+        if not name:
+            name = f"composed_index_{date.today()}-H{datetime.now().hour}.json"
+
+        # Save the composed index
+        graph.save_to_disk(f"indexes/{user_id}/{name}.json")
+
+        self.index_storage[user_id].queryable_index = graph
+
+
    async def backup_discord(self, ctx: discord.ApplicationContext, user_api_key):
        if not user_api_key:
            os.environ["OPENAI_API_KEY"] = self.openai_key
@ -235,12 +291,10 @@ class Index_handler:
            os.environ["OPENAI_API_KEY"] = user_api_key
        
        try:
-            index: [GPTSimpleVectorIndex, ComposableGraph] = self.index_storage[ctx.user.id].get_index_or_throw()
-            if isinstance(index, GPTSimpleVectorIndex):
-                response = await self.loop.run_in_executor(None, partial(index.query, query, verbose=True, text_qa_template=self.qaprompt))
-            else:
-                response = await self.loop.run_in_executor(None,
-                                                           partial(index.query, query, query_configs=[], verbose=True))
+            llm_predictor = MockLLMPredictor(max_tokens=256)
+            response = await self.loop.run_in_executor(None, partial(get_and_query, ctx.user.id, self.index_storage, query, llm_predictor))
+            print("The last token usage was ", llm_predictor.last_token_usage)
+            await self.usage_service.update_usage(llm_predictor.last_token_usage)
            await ctx.respond(f"**Query:**\n\n{query.strip()}\n\n**Query response:**\n\n{response.response.strip()}")
        except Exception:
            traceback.print_exc()
@ -319,3 +373,70 @@ class Index_handler:
                Document(channel_content, extra_info={"channel_name": channel_name})
            )
        return results
+
+    async def compose(self, ctx: discord.ApplicationContext, name, user_api_key):
+        # Send the ComposeModal
+        if not user_api_key:
+            os.environ["OPENAI_API_KEY"] = self.openai_key
+        else:
+            os.environ["OPENAI_API_KEY"] = user_api_key
+
+        if not self.index_storage[ctx.user.id].queryable():
+            await ctx.respond("You must load at least two indexes before composing")
+            return
+
+        await ctx.respond("Select the indexes to compose.", view=ComposeModal(self, ctx.user.id, name))
+
+
+class ComposeModal(discord.ui.View):
+    def __init__(self, index_cog, user_id, name=None) -> None:
+        super().__init__()
+        # Get the argument named "user_key_db" and save it as USER_KEY_DB
+        self.index_cog = index_cog
+        self.user_id = user_id
+
+        # Get all the indexes for the user
+        self.indexes = [
+            file
+            for file in os.listdir(EnvService.find_shared_file(f"indexes/{str(user_id)}/"))
+        ]
+
+        # A text entry field for the name of the composed index
+        self.name = name
+
+        # A discord UI select menu with all the indexes
+        self.index_select = discord.ui.Select(
+            placeholder="Select an index",
+            options=[
+                discord.SelectOption(label=index, value=index)
+                for index in self.indexes
+            ],
+            max_values=len(self.indexes),
+            min_values=1,
+
+        )
+        # Add the select menu to the modal
+        self.add_item(self.index_select)
+
+        # Add a button to the modal called "Compose"
+        self.add_item(discord.ui.Button(label="Compose", style=discord.ButtonStyle.green, custom_id="compose"))
+
+    # The callback for the button
+    async def interaction_check(self, interaction: discord.Interaction) -> bool:
+        # Check that the interaction was for custom_id "compose"
+        if interaction.data["custom_id"] == "compose":
+            # Check that the user selected at least one index
+            if len(self.index_select.values) < 2:
+                await interaction.response.send_message("You must select at least two indexes")
+            else:
+                composing_message = await interaction.response.send_message("Composing indexes, this may take a long time...", ephemeral=True, delete_after=120)
+                # Compose the indexes
+                await self.index_cog.compose_indexes(self.user_id,self.index_select.values,self.name)
+                await interaction.followup.send("Composed indexes", ephemeral=True, delete_after=10)
+
+                try:
+                    await composing_message.delete()
+                except:
+                    pass
+        else:
+            await interaction.response.defer()