composability

Kaveen Kumarasinghe 2 years ago
parent c93bcfa59a
commit 258a87945c

@ -535,6 +535,17 @@ class Commands(discord.Cog, name="Commands"):
async def reset(self, ctx:discord.ApplicationContext):
await self.index_cog.reset_command(ctx)
@add_to_group("index")
@discord.slash_command(
name="compose",
description="Combine multiple indexes together",
guild_ids=ALLOWED_GUILDS
)
@discord.option(name="name", description="The name of the new index", required=False, input_type=discord.SlashCommandOptionType.string)
@discord.guild_only()
async def compose(self, ctx:discord.ApplicationContext, name : str):
await self.index_cog.compose_command(ctx,name)
@add_to_group("index")
@discord.slash_command(
name="add_discord",

@ -14,10 +14,11 @@ class IndexService(discord.Cog, name="IndexService"):
def __init__(
self,
bot,
usage_service,
):
super().__init__()
self.bot = bot
self.index_handler = Index_handler(bot)
self.index_handler = Index_handler(bot, usage_service)
async def set_index_command(self, ctx, file: discord.Attachment = None, link: str = None):
"""Command handler to set a file as your personal index"""
@ -98,3 +99,13 @@ class IndexService(discord.Cog, name="IndexService"):
await ctx.defer()
await self.index_handler.query(ctx, query, response_mode, user_api_key)
async def compose_command(self, ctx, name):
"""Command handler to compose from your index"""
user_api_key = None
if USER_INPUT_API_KEYS:
user_api_key = await TextService.get_user_api_key(ctx.user.id, ctx, USER_KEY_DB)
if not user_api_key:
return
await self.index_handler.compose(ctx, name, user_api_key)

@ -172,7 +172,8 @@ async def main():
bot.add_cog(
IndexService(
bot
bot,
usage_service,
)
)

@ -13,7 +13,8 @@ from datetime import date, datetime
from gpt_index.readers import YoutubeTranscriptReader
from gpt_index.readers.schema.base import Document
from gpt_index import GPTSimpleVectorIndex, SimpleDirectoryReader, QuestionAnswerPrompt, BeautifulSoupWebReader, \
GPTFaissIndex, GPTListIndex, QueryMode, GPTTreeIndex, GoogleDocsReader
GPTFaissIndex, GPTListIndex, QueryMode, GPTTreeIndex, GoogleDocsReader, MockLLMPredictor, QueryConfig, \
IndexStructType
from gpt_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR
from gpt_index.composability import ComposableGraph
@ -21,6 +22,24 @@ from gpt_index.composability import ComposableGraph
from services.environment_service import EnvService, app_root_path
def get_and_query(user_id, index_storage, query, llm_predictor):
# TODO Do prediction here for token usage
index: [GPTSimpleVectorIndex, ComposableGraph] = index_storage[user_id].get_index_or_throw()
if isinstance(index, GPTSimpleVectorIndex):
response = index.query(query,verbose=True)
else:
query_configs = [
QueryConfig(
index_struct_type=IndexStructType.TREE,
query_mode=QueryMode.RECURSIVE,
query_kwargs={
"child_branch_factor": 4
}
)
]
response = index.query(query, verbose=True, query_configs=[])
return response
class IndexData:
def __init__(self):
@ -41,7 +60,6 @@ class IndexData:
# Create a folder called "indexes/{USER_ID}" if it doesn't exist already
Path(f"{app_root_path()}/indexes/{user_id}").mkdir(parents=True, exist_ok=True)
print(f"{app_root_path()}/indexes/{user_id}")
# Save the index to file under the user id
index.save_to_disk(app_root_path() / "indexes" / f"{str(user_id)}"/f"{file_name}_{date.today()}-H{datetime.now().hour}.json")
@ -60,11 +78,12 @@ class IndexData:
pass
class Index_handler:
def __init__(self, bot):
def __init__(self, bot, usage_service):
self.bot = bot
self.openai_key = os.getenv("OPENAI_TOKEN")
self.index_storage = defaultdict(IndexData)
self.loop = asyncio.get_running_loop()
self.usage_service = usage_service
self.qaprompt = QuestionAnswerPrompt(
"Context information is below. The text '<|endofstatement|>' is used to separate chat entries and make it easier for you to understand the context\n"
"---------------------\n"
@ -74,31 +93,35 @@ class Index_handler:
"Given the context information and not prior knowledge, "
"answer the question: {query_str}\n"
)
def index_file(self, file_path):
# TODO We need to do predictions below for token usage.
def index_file(self, file_path) -> GPTSimpleVectorIndex:
document = SimpleDirectoryReader(file_path).load_data()
index = GPTSimpleVectorIndex(document)
return index
def index_gdoc(self, doc_id):
def index_gdoc(self, doc_id) -> GPTSimpleVectorIndex:
document = GoogleDocsReader().load_data(doc_id)
index = GPTSimpleVectorIndex(document)
return index
def index_youtube_transcript(self, link):
documents = YoutubeTranscriptReader().load_data(ytlinks=[link])
index = GPTSimpleVectorIndex(documents)
index = GPTSimpleVectorIndex(documents,)
return index
def index_load_file(self, file_path):
index = GPTSimpleVectorIndex.load_from_disk(file_path)
def index_load_file(self, file_path) -> [GPTSimpleVectorIndex, ComposableGraph]:
if not "composed" in str(file_path):
index = GPTSimpleVectorIndex.load_from_disk(file_path)
else:
index = ComposableGraph.load_from_disk(file_path)
return index
def index_discord(self, document):
index = GPTSimpleVectorIndex(document)
def index_discord(self, document) -> GPTSimpleVectorIndex:
index = GPTSimpleVectorIndex(document,)
return index
def index_webpage(self, url):
def index_webpage(self, url) -> GPTSimpleVectorIndex:
documents = BeautifulSoupWebReader(website_extractor=DEFAULT_WEBSITE_EXTRACTOR).load_data(urls=[url])
index = GPTSimpleVectorIndex(documents)
return index
@ -143,7 +166,6 @@ class Index_handler:
file_name = file.filename
self.index_storage[ctx.user.id].add_index(index, ctx.user.id, file_name)
await ctx.respond("Index added to your indexes.")
except Exception:
await ctx.respond("Failed to set index")
@ -204,8 +226,42 @@ class Index_handler:
await ctx.respond("Loaded index")
except Exception as e:
await ctx.respond(e)
async def compose_indexes(self, user_id, indexes, name):
# Load all the indexes first
index_objects = []
for _index in indexes:
index_file = EnvService.find_shared_file(f"indexes/{user_id}/{_index}")
index = await self.loop.run_in_executor(None, partial(self.index_load_file, index_file))
index_objects.append(index)
# For each index object, add its documents to a GPTTreeIndex
tree_indexes = []
for _index in index_objects:
# Get all the document objects out of _index.docstore.docs
document_ids = [docmeta for docmeta in _index.docstore.docs.keys()]
documents = list([_index.docstore.get_document(doc_id) for doc_id in document_ids if isinstance(_index.docstore.get_document(doc_id), Document)])
tree_index = GPTTreeIndex(documents=documents)
summary = tree_index.query(
"What is a summary of this document?", mode="summarize"
)
tree_index.set_text(str(summary))
tree_indexes.append(tree_index)
# Now we have a list of tree indexes, we can compose them
list_index = GPTListIndex(tree_indexes)
graph = ComposableGraph.build_from_index(list_index)
if not name:
name = f"composed_index_{date.today()}-H{datetime.now().hour}.json"
# Save the composed index
graph.save_to_disk(f"indexes/{user_id}/{name}.json")
self.index_storage[user_id].queryable_index = graph
async def backup_discord(self, ctx: discord.ApplicationContext, user_api_key):
if not user_api_key:
os.environ["OPENAI_API_KEY"] = self.openai_key
@ -235,12 +291,10 @@ class Index_handler:
os.environ["OPENAI_API_KEY"] = user_api_key
try:
index: [GPTSimpleVectorIndex, ComposableGraph] = self.index_storage[ctx.user.id].get_index_or_throw()
if isinstance(index, GPTSimpleVectorIndex):
response = await self.loop.run_in_executor(None, partial(index.query, query, verbose=True, text_qa_template=self.qaprompt))
else:
response = await self.loop.run_in_executor(None,
partial(index.query, query, query_configs=[], verbose=True))
llm_predictor = MockLLMPredictor(max_tokens=256)
response = await self.loop.run_in_executor(None, partial(get_and_query, ctx.user.id, self.index_storage, query, llm_predictor))
print("The last token usage was ", llm_predictor.last_token_usage)
await self.usage_service.update_usage(llm_predictor.last_token_usage)
await ctx.respond(f"**Query:**\n\n{query.strip()}\n\n**Query response:**\n\n{response.response.strip()}")
except Exception:
traceback.print_exc()
@ -319,3 +373,70 @@ class Index_handler:
Document(channel_content, extra_info={"channel_name": channel_name})
)
return results
async def compose(self, ctx: discord.ApplicationContext, name, user_api_key):
# Send the ComposeModal
if not user_api_key:
os.environ["OPENAI_API_KEY"] = self.openai_key
else:
os.environ["OPENAI_API_KEY"] = user_api_key
if not self.index_storage[ctx.user.id].queryable():
await ctx.respond("You must load at least two indexes before composing")
return
await ctx.respond("Select the indexes to compose.", view=ComposeModal(self, ctx.user.id, name))
class ComposeModal(discord.ui.View):
def __init__(self, index_cog, user_id, name=None) -> None:
super().__init__()
# Get the argument named "user_key_db" and save it as USER_KEY_DB
self.index_cog = index_cog
self.user_id = user_id
# Get all the indexes for the user
self.indexes = [
file
for file in os.listdir(EnvService.find_shared_file(f"indexes/{str(user_id)}/"))
]
# A text entry field for the name of the composed index
self.name = name
# A discord UI select menu with all the indexes
self.index_select = discord.ui.Select(
placeholder="Select an index",
options=[
discord.SelectOption(label=index, value=index)
for index in self.indexes
],
max_values=len(self.indexes),
min_values=1,
)
# Add the select menu to the modal
self.add_item(self.index_select)
# Add a button to the modal called "Compose"
self.add_item(discord.ui.Button(label="Compose", style=discord.ButtonStyle.green, custom_id="compose"))
# The callback for the button
async def interaction_check(self, interaction: discord.Interaction) -> bool:
# Check that the interaction was for custom_id "compose"
if interaction.data["custom_id"] == "compose":
# Check that the user selected at least one index
if len(self.index_select.values) < 2:
await interaction.response.send_message("You must select at least two indexes")
else:
composing_message = await interaction.response.send_message("Composing indexes, this may take a long time...", ephemeral=True, delete_after=120)
# Compose the indexes
await self.index_cog.compose_indexes(self.user_id,self.index_select.values,self.name)
await interaction.followup.send("Composed indexes", ephemeral=True, delete_after=10)
try:
await composing_message.delete()
except:
pass
else:
await interaction.response.defer()
Loading…
Cancel
Save