From e63c6227ddaed264a68b89074cc05c6386c0c152 Mon Sep 17 00:00:00 2001 From: Kaveen Kumarasinghe Date: Tue, 31 Jan 2023 23:19:26 -0500 Subject: [PATCH] initial link loading --- cogs/commands.py | 9 +++++---- cogs/index_service_cog.py | 14 ++++++++++++-- models/index_model.py | 31 +++++++++++++++++++++++++++++-- 3 files changed, 46 insertions(+), 8 deletions(-) diff --git a/cogs/commands.py b/cogs/commands.py index 93af420..0b00dd0 100644 --- a/cogs/commands.py +++ b/cogs/commands.py @@ -515,14 +515,15 @@ class Commands(discord.Cog, name="Commands"): @add_to_group("index") @discord.slash_command( - name="set_file", + name="set", description="Set an index to query from", guild_ids=ALLOWED_GUILDS ) @discord.guild_only() - @discord.option(name="file", description="A file to create the index from", required=True, input_type=discord.SlashCommandOptionType.attachment) - async def set_file(self, ctx:discord.ApplicationContext, file: discord.Attachment): - await self.index_cog.set_index_command(ctx, file) + @discord.option(name="file", description="A file to create the index from", required=False, input_type=discord.SlashCommandOptionType.attachment) + @discord.option(name="link", description="A link to a file to a webpage ", required=False, input_type=str) + async def set_file(self, ctx:discord.ApplicationContext, file: discord.Attachment, link: str): + await self.index_cog.set_index_command(ctx, file, link) @add_to_group("index") @discord.slash_command( diff --git a/cogs/index_service_cog.py b/cogs/index_service_cog.py index f57cb6a..6841739 100644 --- a/cogs/index_service_cog.py +++ b/cogs/index_service_cog.py @@ -17,8 +17,15 @@ class IndexService(discord.Cog, name="IndexService"): self.bot = bot self.index_handler = Index_handler(bot) - async def set_index_command(self, ctx, file: discord.Attachment): + async def set_index_command(self, ctx, file: discord.Attachment = None, link: str = None): """Command handler to set a file as your personal index""" + if not file and not link: + await ctx.respond("Please provide a file or a link") + return + + if file and link: + await ctx.respond("Please provide only one file or link. Only one or the other.") + return user_api_key = None if USER_INPUT_API_KEYS: @@ -27,7 +34,10 @@ class IndexService(discord.Cog, name="IndexService"): return await ctx.defer(ephemeral=True) - await self.index_handler.set_file_index(ctx, file, user_api_key=user_api_key) + if file: + await self.index_handler.set_file_index(ctx, file, user_api_key=user_api_key) + elif link: + await self.index_handler.set_link_index(ctx, link, user_api_key=user_api_key) async def set_discord_command(self, ctx, channel: discord.TextChannel = None): diff --git a/models/index_model.py b/models/index_model.py index 0ac6682..8c3880e 100644 --- a/models/index_model.py +++ b/models/index_model.py @@ -9,7 +9,9 @@ from pathlib import Path from datetime import date, datetime from gpt_index.readers.schema.base import Document -from gpt_index import GPTSimpleVectorIndex, SimpleDirectoryReader, QuestionAnswerPrompt +from gpt_index import GPTSimpleVectorIndex, SimpleDirectoryReader, QuestionAnswerPrompt, BeautifulSoupWebReader, \ + GPTFaissIndex +from gpt_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR from services.environment_service import EnvService, app_root_path @@ -41,6 +43,11 @@ class Index_handler: def index_discord(self, document): index = GPTSimpleVectorIndex(document) return index + + def index_webpage(self, url): + documents = BeautifulSoupWebReader(website_extractor=DEFAULT_WEBSITE_EXTRACTOR).load_data(urls=[url]) + index = GPTSimpleVectorIndex(documents) + return index async def set_file_index(self, ctx: discord.ApplicationContext, file: discord.Attachment, user_api_key): @@ -67,6 +74,26 @@ class Index_handler: await ctx.respond("Failed to set index") traceback.print_exc() + async def set_link_index(self, ctx: discord.ApplicationContext, link: str, user_api_key): + if not user_api_key: + os.environ["OPENAI_API_KEY"] = self.openai_key + else: + os.environ["OPENAI_API_KEY"] = user_api_key + + # TODO Link validation + try: + + index = await self.loop.run_in_executor(None, partial(self.index_webpage, link)) + + self.index_storage[ctx.user.id] = index + + except Exception: + await ctx.respond("Failed to set index") + traceback.print_exc() + + await ctx.respond("Index set") + + async def set_discord_index(self, ctx: discord.ApplicationContext, channel: discord.TextChannel, user_api_key): if not user_api_key: os.environ["OPENAI_API_KEY"] = self.openai_key @@ -205,4 +232,4 @@ class Index_handler: results.append( Document(channel_content, extra_info={"channel_name": channel_name}) ) - return results \ No newline at end of file + return results