Merge pull request #172 from Hikari-Haru/compose-fix

Modernize GPT3Discord with gpt-index updates, async support, index renaming, bug fixes, QOL improvements
Kaveen Kumarasinghe 2 years ago committed by GitHub
commit 45c2eea8a5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -523,6 +523,89 @@ class Commands(discord.Cog, name="Commands"):
# #
# Index commands # Index commands
# #
@add_to_group("index")
@discord.slash_command(
name="rename-user",
description="Select one of your saved indexes to rename",
guild_ids=ALLOWED_GUILDS,
)
@discord.guild_only()
@discord.option(
name="user_index",
description="Which user index to rename",
required=False,
autocomplete=File_autocompleter.get_user_indexes,
)
@discord.option(
name="new_name",
description="The new name",
required=False,
type=discord.SlashCommandOptionType.string,
)
async def rename_user_index(
self,
ctx: discord.ApplicationContext,
user_index: str,
new_name: str,
):
await ctx.defer()
await self.index_cog.rename_user_index_command(ctx, user_index, new_name)
@add_to_group("index")
@discord.slash_command(
name="rename-server",
description="Select one of your saved server indexes to rename",
guild_ids=ALLOWED_GUILDS,
)
@discord.guild_only()
@discord.option(
name="server_index",
description="Which server index to rename",
required=False,
autocomplete=File_autocompleter.get_server_indexes,
)
@discord.option(
name="new_name",
description="The new name",
required=False,
type=discord.SlashCommandOptionType.string,
)
async def rename_server_index(
self,
ctx: discord.ApplicationContext,
server_index: str,
new_name: str,
):
await ctx.defer()
await self.index_cog.rename_server_index_command(ctx, server_index, new_name)
@add_to_group("index")
@discord.slash_command(
name="rename-search",
description="Select one of your saved search indexes to rename",
guild_ids=ALLOWED_GUILDS,
)
@discord.guild_only()
@discord.option(
name="search_index",
description="Which search index to rename",
required=False,
autocomplete=File_autocompleter.get_user_search_indexes,
)
@discord.option(
name="new_name",
description="The new name",
required=False,
type=discord.SlashCommandOptionType.string,
)
async def rename_search_index(
self,
ctx: discord.ApplicationContext,
search_index: str,
new_name: str,
):
await ctx.defer()
await self.index_cog.rename_search_index_command(ctx, search_index, new_name)
@add_to_group("index") @add_to_group("index")
@discord.slash_command( @discord.slash_command(
@ -622,10 +705,21 @@ class Commands(discord.Cog, name="Commands"):
required=False, required=False,
input_type=discord.SlashCommandOptionType.channel, input_type=discord.SlashCommandOptionType.channel,
) )
@discord.option(
name="message_limit",
description="The number of messages to index",
required=False,
input_type=discord.SlashCommandOptionType.integer,
)
async def set_discord( async def set_discord(
self, ctx: discord.ApplicationContext, channel: discord.TextChannel self,
ctx: discord.ApplicationContext,
channel: discord.TextChannel,
message_limit: int,
): ):
await self.index_cog.set_discord_command(ctx, channel) await self.index_cog.set_discord_command(
ctx, channel, message_limit=message_limit
)
@add_to_group("index") @add_to_group("index")
@discord.slash_command( @discord.slash_command(
@ -634,9 +728,15 @@ class Commands(discord.Cog, name="Commands"):
guild_ids=ALLOWED_GUILDS, guild_ids=ALLOWED_GUILDS,
checks=[Check.check_admin_roles(), Check.check_index_roles()], checks=[Check.check_admin_roles(), Check.check_index_roles()],
) )
@discord.option(
name="message_limit",
description="The number of messages to index per channel",
required=False,
input_type=discord.SlashCommandOptionType.integer,
)
@discord.guild_only() @discord.guild_only()
async def discord_backup(self, ctx: discord.ApplicationContext): async def discord_backup(self, ctx: discord.ApplicationContext, message_limit: int):
await self.index_cog.discord_backup_command(ctx) await self.index_cog.discord_backup_command(ctx, message_limit=message_limit)
@add_to_group("index") @add_to_group("index")
@discord.slash_command( @discord.slash_command(
@ -650,7 +750,7 @@ class Commands(discord.Cog, name="Commands"):
required=False, required=False,
default=1, default=1,
min_value=1, min_value=1,
max_value=3, max_value=5,
input_type=discord.SlashCommandOptionType.integer, input_type=discord.SlashCommandOptionType.integer,
) )
@discord.option( @discord.option(
@ -661,15 +761,27 @@ class Commands(discord.Cog, name="Commands"):
default="default", default="default",
choices=["default", "compact", "tree_summarize"], choices=["default", "compact", "tree_summarize"],
) )
@discord.option(
name="child_branch_factor",
description="Only for deep indexes, how deep to go, higher is expensive.",
required=False,
default=1,
min_value=1,
max_value=3,
input_type=discord.SlashCommandOptionType.integer,
)
async def query( async def query(
self, self,
ctx: discord.ApplicationContext, ctx: discord.ApplicationContext,
query: str, query: str,
nodes: int, nodes: int,
response_mode: str, response_mode: str,
child_branch_factor: int,
): ):
await ctx.defer() await ctx.defer()
await self.index_cog.query_command(ctx, query, nodes, response_mode) await self.index_cog.query_command(
ctx, query, nodes, response_mode, child_branch_factor
)
# #
# DALLE commands # DALLE commands
@ -859,7 +971,7 @@ class Commands(discord.Cog, name="Commands"):
description="The higher the number, the more accurate the results, but more expensive", description="The higher the number, the more accurate the results, but more expensive",
required=False, required=False,
input_type=discord.SlashCommandOptionType.integer, input_type=discord.SlashCommandOptionType.integer,
max_value=5, max_value=8,
min_value=1, min_value=1,
) )
@discord.option( @discord.option(

@ -1,4 +1,5 @@
import traceback import traceback
from pathlib import Path
import discord import discord
@ -24,6 +25,52 @@ class IndexService(discord.Cog, name="IndexService"):
self.bot = bot self.bot = bot
self.index_handler = Index_handler(bot, usage_service) self.index_handler = Index_handler(bot, usage_service)
async def rename_user_index_command(self, ctx, user_index, new_name):
"""Command handler to rename a user index"""
if not new_name:
await ctx.respond("Please provide a new name for this index")
return
if await self.index_handler.rename_index(
ctx,
f"indexes/{ctx.user.id}/{user_index}",
f"indexes/{ctx.user.id}/{new_name}",
):
await ctx.respond(f"Your index has been renamed to `{new_name}`")
else:
await ctx.respond("Something went wrong while renaming your index")
async def rename_server_index_command(self, ctx, server_index, new_name):
"""Command handler to rename a user index"""
if not new_name:
await ctx.respond("Please provide a new name for this index")
return
if await self.index_handler.rename_index(
ctx,
f"indexes/{ctx.guild.id}/{server_index}",
f"indexes/{ctx.guild.id}/{new_name}",
):
await ctx.respond(f"Your index has been renamed to `{new_name}`")
else:
await ctx.respond("Something went wrong while renaming your index")
async def rename_search_index_command(self, ctx, search_index, new_name):
if not new_name:
await ctx.respond("Please provide a new name for this index")
return
if await self.index_handler.rename_index(
ctx,
f"indexes/{ctx.user.id}_search/{search_index}",
f"indexes/{ctx.user.id}_search/{new_name}",
):
await ctx.respond(f"Your index has been renamed to `{new_name}`")
else:
await ctx.respond("Something went wrong while renaming your index")
async def set_index_command( async def set_index_command(
self, ctx, file: discord.Attachment = None, link: str = None self, ctx, file: discord.Attachment = None, link: str = None
): ):
@ -56,7 +103,9 @@ class IndexService(discord.Cog, name="IndexService"):
ctx, link, user_api_key=user_api_key ctx, link, user_api_key=user_api_key
) )
async def set_discord_command(self, ctx, channel: discord.TextChannel = None): async def set_discord_command(
self, ctx, channel: discord.TextChannel = None, message_limit: int = 2500
):
"""Command handler to set a channel as your personal index""" """Command handler to set a channel as your personal index"""
await ctx.defer() await ctx.defer()
@ -69,7 +118,7 @@ class IndexService(discord.Cog, name="IndexService"):
return return
await self.index_handler.set_discord_index( await self.index_handler.set_discord_index(
ctx, channel, user_api_key=user_api_key ctx, channel, user_api_key=user_api_key, message_limit=message_limit
) )
async def reset_command(self, ctx): async def reset_command(self, ctx):
@ -83,7 +132,7 @@ class IndexService(discord.Cog, name="IndexService"):
"Something went wrong while resetting your indexes. Contact the server admin." "Something went wrong while resetting your indexes. Contact the server admin."
) )
async def discord_backup_command(self, ctx): async def discord_backup_command(self, ctx, message_limit: int = 2500):
"""Command handler to backup the entire server""" """Command handler to backup the entire server"""
await ctx.defer() await ctx.defer()
@ -94,7 +143,9 @@ class IndexService(discord.Cog, name="IndexService"):
) )
if not user_api_key: if not user_api_key:
return return
await self.index_handler.backup_discord(ctx, user_api_key=user_api_key) await self.index_handler.backup_discord(
ctx, user_api_key=user_api_key, message_limit=message_limit
)
async def load_index_command(self, ctx, user_index, server_index, search_index): async def load_index_command(self, ctx, user_index, server_index, search_index):
"""Command handler to load indexes""" """Command handler to load indexes"""
@ -137,7 +188,9 @@ class IndexService(discord.Cog, name="IndexService"):
return return
await self.index_handler.load_index(ctx, index, server, search, user_api_key) await self.index_handler.load_index(ctx, index, server, search, user_api_key)
async def query_command(self, ctx, query, nodes, response_mode): async def query_command(
self, ctx, query, nodes, response_mode, child_branch_factor
):
"""Command handler to query your index""" """Command handler to query your index"""
user_api_key = None user_api_key = None
@ -153,7 +206,9 @@ class IndexService(discord.Cog, name="IndexService"):
if await Moderation.simple_moderate_and_respond(query, ctx): if await Moderation.simple_moderate_and_respond(query, ctx):
return return
await self.index_handler.query(ctx, query, response_mode, nodes, user_api_key) await self.index_handler.query(
ctx, query, response_mode, nodes, user_api_key, child_branch_factor
)
async def compose_command(self, ctx, name): async def compose_command(self, ctx, name):
"""Command handler to compose from your index""" """Command handler to compose from your index"""

@ -793,6 +793,13 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
user = ctx.user if is_context else ctx.author user = ctx.user if is_context else ctx.author
prompt = await self.mention_to_username(ctx, prompt.strip()) prompt = await self.mention_to_username(ctx, prompt.strip())
if len(prompt) < self.model.prompt_min_length:
alias = ctx.respond if is_context else ctx.send
await alias(
f"Prompt must be greater than {self.model.prompt_min_length} characters, it is currently: {len(prompt)} characters"
)
return
user_api_key = None user_api_key = None
if USER_INPUT_API_KEYS: if USER_INPUT_API_KEYS:
user_api_key = await TextService.get_user_api_key(user.id, ctx, USER_KEY_DB) user_api_key = await TextService.get_user_api_key(user.id, ctx, USER_KEY_DB)
@ -846,6 +853,13 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
text = await self.mention_to_username(ctx, text.strip()) text = await self.mention_to_username(ctx, text.strip())
instruction = await self.mention_to_username(ctx, instruction.strip()) instruction = await self.mention_to_username(ctx, instruction.strip())
# Validate that all the parameters are in a good state before we send the request
if len(instruction) < self.model.prompt_min_length:
await ctx.respond(
f"Instruction must be at least {self.model.prompt_min_length} characters long"
)
return
user_api_key = None user_api_key = None
if USER_INPUT_API_KEYS: if USER_INPUT_API_KEYS:
user_api_key = await TextService.get_user_api_key(user.id, ctx, USER_KEY_DB) user_api_key = await TextService.get_user_api_key(user.id, ctx, USER_KEY_DB)

@ -12,4 +12,8 @@ Supported filetypes:
Index Compositions: Index Compositions:
Indexes can be combined with other indexes through a composition. To combine indexes, you can run the `/index compose` command, and select the indexes that you want to combine together. You should only combine relevant indexes together, combining irrelevant indexes together will result in poor results (for example, don't upload a math textbook and then upload a large set of poems and combine them together). When creating a composition, you will be given the option to do a "Deep" composition, deep compositions are more detailed and will give you better results, but are incredibly costly and will sometimes take multiple minutes to compose. Indexes can be combined with other indexes through a composition. To combine indexes, you can run the `/index compose` command, and select the indexes that you want to combine together. You should only combine relevant indexes together, combining irrelevant indexes together will result in poor results (for example, don't upload a math textbook and then upload a large set of poems and combine them together). When creating a composition, you will be given the option to do a "Deep" composition, deep compositions are more detailed and will give you better results, but are incredibly costly and will sometimes take multiple minutes to compose.
You can also compose a singular index with itself with "Deep Compose", this will give you a more detailed version of the index, but will be costly and will sometimes take multiple minutes to compose. **Deep compositions are useless for very short documents!** You can also compose a singular index with itself with "Deep Compose", this will give you a more detailed version of the index, but will be costly and will sometimes take multiple minutes to compose. **Deep compositions are useless for very short documents!**
Doing a deep composition will also allow you to use the `child_branch_factor` parameter for `/index query`, increasing this past 1 will take a much longer time to query and will be much more expensive for large documents, so be wary.
**When doing Deep Compositions, it's highly reccomended to keep the document size small, or only do deep compositions on single documents.** This is because a deep composition reorganizes the simple index into a tree structure and uses GPT3 to summarize different nodes of the tree, which will lead to high costs. For example, a deep composition of a 300 page lab manual and the contents of my personal website at https://kaveenk.com cost me $2 USD roughly.

@ -31,7 +31,7 @@ from services.environment_service import EnvService
from models.openai_model import Model from models.openai_model import Model
__version__ = "10.6.3" __version__ = "10.7.0"
PID_FILE = Path("bot.pid") PID_FILE = Path("bot.pid")

@ -1,3 +1,4 @@
import functools
import os import os
import random import random
import tempfile import tempfile
@ -18,6 +19,7 @@ from langchain import OpenAI
from gpt_index.readers import YoutubeTranscriptReader from gpt_index.readers import YoutubeTranscriptReader
from gpt_index.readers.schema.base import Document from gpt_index.readers.schema.base import Document
from gpt_index.langchain_helpers.text_splitter import TokenTextSplitter
from gpt_index import ( from gpt_index import (
GPTSimpleVectorIndex, GPTSimpleVectorIndex,
@ -46,7 +48,14 @@ SHORT_TO_LONG_CACHE = {}
def get_and_query( def get_and_query(
user_id, index_storage, query, response_mode, nodes, llm_predictor, embed_model user_id,
index_storage,
query,
response_mode,
nodes,
llm_predictor,
embed_model,
child_branch_factor,
): ):
index: [GPTSimpleVectorIndex, ComposableGraph] = index_storage[ index: [GPTSimpleVectorIndex, ComposableGraph] = index_storage[
user_id user_id
@ -54,9 +63,10 @@ def get_and_query(
if isinstance(index, GPTTreeIndex): if isinstance(index, GPTTreeIndex):
response = index.query( response = index.query(
query, query,
child_branch_factor=2, child_branch_factor=child_branch_factor,
llm_predictor=llm_predictor, llm_predictor=llm_predictor,
embed_model=embed_model, embed_model=embed_model,
use_async=True,
) )
else: else:
response = index.query( response = index.query(
@ -65,6 +75,7 @@ def get_and_query(
llm_predictor=llm_predictor, llm_predictor=llm_predictor,
embed_model=embed_model, embed_model=embed_model,
similarity_top_k=nodes, similarity_top_k=nodes,
use_async=True,
) )
return response return response
@ -116,7 +127,8 @@ class IndexData:
# First, clear all the files inside it # First, clear all the files inside it
for file in os.listdir(f"{app_root_path()}/indexes/{user_id}"): for file in os.listdir(f"{app_root_path()}/indexes/{user_id}"):
os.remove(f"{app_root_path()}/indexes/{user_id}/{file}") os.remove(f"{app_root_path()}/indexes/{user_id}/{file}")
for file in os.listdir(f"{app_root_path()}/indexes/{user_id}_search"):
os.remove(f"{app_root_path()}/indexes/{user_id}_search/{file}")
except Exception: except Exception:
traceback.print_exc() traceback.print_exc()
@ -139,6 +151,22 @@ class Index_handler:
) )
self.EMBED_CUTOFF = 2000 self.EMBED_CUTOFF = 2000
async def rename_index(self, ctx, original_path, rename_path):
"""Command handler to rename a user index"""
index_file = EnvService.find_shared_file(original_path)
if not index_file:
return False
# Rename the file at f"indexes/{ctx.user.id}/{user_index}" to f"indexes/{ctx.user.id}/{new_name}" using Pathlib
try:
if not rename_path.endswith(".json"):
rename_path = rename_path + ".json"
Path(original_path).rename(rename_path)
return True
except Exception as e:
return False
async def paginate_embed(self, response_text): async def paginate_embed(self, response_text):
"""Given a response text make embed pages and return a list of the pages. Codex makes it a codeblock in the embed""" """Given a response text make embed pages and return a list of the pages. Codex makes it a codeblock in the embed"""
@ -165,22 +193,26 @@ class Index_handler:
return pages return pages
# TODO We need to do predictions below for token usage.
def index_file(self, file_path, embed_model) -> GPTSimpleVectorIndex: def index_file(self, file_path, embed_model) -> GPTSimpleVectorIndex:
document = SimpleDirectoryReader(file_path).load_data() document = SimpleDirectoryReader(file_path).load_data()
index = GPTSimpleVectorIndex(document, embed_model=embed_model) index = GPTSimpleVectorIndex(document, embed_model=embed_model, use_async=True)
return index return index
def index_gdoc(self, doc_id, embed_model) -> GPTSimpleVectorIndex: def index_gdoc(self, doc_id, embed_model) -> GPTSimpleVectorIndex:
document = GoogleDocsReader().load_data(doc_id) document = GoogleDocsReader().load_data(doc_id)
index = GPTSimpleVectorIndex(document, embed_model=embed_model) index = GPTSimpleVectorIndex(document, embed_model=embed_model, use_async=True)
return index return index
def index_youtube_transcript(self, link, embed_model): def index_youtube_transcript(self, link, embed_model):
documents = YoutubeTranscriptReader().load_data(ytlinks=[link]) try:
documents = YoutubeTranscriptReader().load_data(ytlinks=[link])
except Exception as e:
raise ValueError(f"The youtube transcript couldn't be loaded: {e}")
index = GPTSimpleVectorIndex( index = GPTSimpleVectorIndex(
documents, documents,
embed_model=embed_model, embed_model=embed_model,
use_async=True,
) )
return index return index
@ -202,6 +234,7 @@ class Index_handler:
index = GPTSimpleVectorIndex( index = GPTSimpleVectorIndex(
documents, documents,
embed_model=embed_model, embed_model=embed_model,
use_async=True,
) )
return index return index
@ -216,6 +249,7 @@ class Index_handler:
index = GPTSimpleVectorIndex( index = GPTSimpleVectorIndex(
document, document,
embed_model=embed_model, embed_model=embed_model,
use_async=True,
) )
return index return index
@ -252,10 +286,16 @@ class Index_handler:
# Detect if the link is a PDF, if it is, we load it differently # Detect if the link is a PDF, if it is, we load it differently
if response.headers["Content-Type"] == "application/pdf": if response.headers["Content-Type"] == "application/pdf":
documents = await self.index_pdf(url) documents = await self.index_pdf(url)
index = GPTSimpleVectorIndex( index = await self.loop.run_in_executor(
documents, None,
embed_model=embed_model, functools.partial(
GPTSimpleVectorIndex,
documents=documents,
embed_model=embed_model,
use_async=True,
),
) )
return index return index
except: except:
raise ValueError("Could not load webpage") raise ValueError("Could not load webpage")
@ -263,7 +303,16 @@ class Index_handler:
documents = BeautifulSoupWebReader( documents = BeautifulSoupWebReader(
website_extractor=DEFAULT_WEBSITE_EXTRACTOR website_extractor=DEFAULT_WEBSITE_EXTRACTOR
).load_data(urls=[url]) ).load_data(urls=[url])
index = GPTSimpleVectorIndex(documents, embed_model=embed_model) # index = GPTSimpleVectorIndex(documents, embed_model=embed_model, use_async=True)
index = await self.loop.run_in_executor(
None,
functools.partial(
GPTSimpleVectorIndex,
documents=documents,
embed_model=embed_model,
use_async=True,
),
)
return index return index
def reset_indexes(self, user_id): def reset_indexes(self, user_id):
@ -331,7 +380,6 @@ class Index_handler:
else: else:
os.environ["OPENAI_API_KEY"] = user_api_key os.environ["OPENAI_API_KEY"] = user_api_key
# TODO Link validation
try: try:
embedding_model = OpenAIEmbedding() embedding_model = OpenAIEmbedding()
@ -380,6 +428,11 @@ class Index_handler:
self.index_storage[ctx.user.id].add_index(index, ctx.user.id, file_name) self.index_storage[ctx.user.id].add_index(index, ctx.user.id, file_name)
except ValueError as e:
await ctx.respond(str(e))
traceback.print_exc()
return
except Exception: except Exception:
await ctx.respond("Failed to set index") await ctx.respond("Failed to set index")
traceback.print_exc() traceback.print_exc()
@ -392,6 +445,7 @@ class Index_handler:
ctx: discord.ApplicationContext, ctx: discord.ApplicationContext,
channel: discord.TextChannel, channel: discord.TextChannel,
user_api_key, user_api_key,
message_limit: int = 2500,
): ):
if not user_api_key: if not user_api_key:
os.environ["OPENAI_API_KEY"] = self.openai_key os.environ["OPENAI_API_KEY"] = self.openai_key
@ -400,7 +454,7 @@ class Index_handler:
try: try:
document = await self.load_data( document = await self.load_data(
channel_ids=[channel.id], limit=1000, oldest_first=False channel_ids=[channel.id], limit=message_limit, oldest_first=False
) )
embedding_model = OpenAIEmbedding() embedding_model = OpenAIEmbedding()
index = await self.loop.run_in_executor( index = await self.loop.run_in_executor(
@ -445,6 +499,33 @@ class Index_handler:
traceback.print_exc() traceback.print_exc()
await ctx.respond(e) await ctx.respond(e)
async def index_to_docs(
self, old_index, chunk_size: int = 4000, chunk_overlap: int = 200
) -> List[Document]:
documents = []
for doc_id in old_index.docstore.docs.keys():
text = ""
if isinstance(old_index, GPTSimpleVectorIndex):
nodes = old_index.docstore.get_document(doc_id).get_nodes(
old_index.docstore.docs[doc_id].id_map
)
for node in nodes:
extra_info = node.extra_info
text += f"{node.text} "
if isinstance(old_index, GPTTreeIndex):
nodes = old_index.docstore.get_document(doc_id).all_nodes.items()
for node in nodes:
extra_info = node[1].extra_info
text += f"{node[1].text} "
text_splitter = TokenTextSplitter(
separator=" ", chunk_size=chunk_size, chunk_overlap=chunk_overlap
)
text_chunks = text_splitter.split_text(text)
for text in text_chunks:
document = Document(text, extra_info=extra_info)
documents.append(document)
return documents
async def compose_indexes(self, user_id, indexes, name, deep_compose): async def compose_indexes(self, user_id, indexes, name, deep_compose):
# Load all the indexes first # Load all the indexes first
index_objects = [] index_objects = []
@ -459,11 +540,7 @@ class Index_handler:
if deep_compose: if deep_compose:
documents = [] documents = []
for _index in index_objects: for _index in index_objects:
[ documents.extend(await self.index_to_docs(_index, 256, 20))
documents.append(_index.docstore.get_document(doc_id))
for doc_id in [docmeta for docmeta in _index.docstore.docs.keys()]
if isinstance(_index.docstore.get_document(doc_id), Document)
]
llm_predictor = LLMPredictor( llm_predictor = LLMPredictor(
llm=OpenAI(model_name="text-davinci-003", max_tokens=-1) llm=OpenAI(model_name="text-davinci-003", max_tokens=-1)
) )
@ -476,6 +553,7 @@ class Index_handler:
documents=documents, documents=documents,
llm_predictor=llm_predictor, llm_predictor=llm_predictor,
embed_model=embedding_model, embed_model=embedding_model,
use_async=True,
), ),
) )
@ -497,11 +575,7 @@ class Index_handler:
else: else:
documents = [] documents = []
for _index in index_objects: for _index in index_objects:
[ documents.extend(await self.index_to_docs(_index))
documents.append(_index.docstore.get_document(doc_id))
for doc_id in [docmeta for docmeta in _index.docstore.docs.keys()]
if isinstance(_index.docstore.get_document(doc_id), Document)
]
embedding_model = OpenAIEmbedding() embedding_model = OpenAIEmbedding()
@ -511,6 +585,7 @@ class Index_handler:
GPTSimpleVectorIndex, GPTSimpleVectorIndex,
documents=documents, documents=documents,
embed_model=embedding_model, embed_model=embedding_model,
use_async=True,
), ),
) )
@ -525,7 +600,9 @@ class Index_handler:
simple_index.save_to_disk(f"indexes/{user_id}/{name}.json") simple_index.save_to_disk(f"indexes/{user_id}/{name}.json")
self.index_storage[user_id].queryable_index = simple_index self.index_storage[user_id].queryable_index = simple_index
async def backup_discord(self, ctx: discord.ApplicationContext, user_api_key): async def backup_discord(
self, ctx: discord.ApplicationContext, user_api_key, message_limit
):
if not user_api_key: if not user_api_key:
os.environ["OPENAI_API_KEY"] = self.openai_key os.environ["OPENAI_API_KEY"] = self.openai_key
else: else:
@ -536,7 +613,7 @@ class Index_handler:
for c in ctx.guild.text_channels: for c in ctx.guild.text_channels:
channel_ids.append(c.id) channel_ids.append(c.id)
document = await self.load_data( document = await self.load_data(
channel_ids=channel_ids, limit=3000, oldest_first=False channel_ids=channel_ids, limit=message_limit, oldest_first=False
) )
embedding_model = OpenAIEmbedding() embedding_model = OpenAIEmbedding()
index = await self.loop.run_in_executor( index = await self.loop.run_in_executor(
@ -567,6 +644,7 @@ class Index_handler:
response_mode, response_mode,
nodes, nodes,
user_api_key, user_api_key,
child_branch_factor,
): ):
if not user_api_key: if not user_api_key:
os.environ["OPENAI_API_KEY"] = self.openai_key os.environ["OPENAI_API_KEY"] = self.openai_key
@ -588,6 +666,7 @@ class Index_handler:
nodes, nodes,
llm_predictor, llm_predictor,
embedding_model, embedding_model,
child_branch_factor,
), ),
) )
print("The last token usage was ", llm_predictor.last_token_usage) print("The last token usage was ", llm_predictor.last_token_usage)

@ -649,13 +649,6 @@ class Model:
codex=False, codex=False,
custom_api_key=None, custom_api_key=None,
): ):
# Validate that all the parameters are in a good state before we send the request
if len(instruction) < self.prompt_min_length:
raise ValueError(
"Instruction must be greater than 8 characters, it is currently "
+ str(len(instruction))
)
print( print(
f"The text about to be edited is [{text}] with instructions [{instruction}] codex [{codex}]" f"The text about to be edited is [{text}] with instructions [{instruction}] codex [{codex}]"
) )
@ -831,10 +824,6 @@ class Model:
Tuple[dict, bool] Tuple[dict, bool]
): # The response, and a boolean indicating whether or not the context limit was reached. ): # The response, and a boolean indicating whether or not the context limit was reached.
# Validate that all the parameters are in a good state before we send the request # Validate that all the parameters are in a good state before we send the request
if len(prompt) < self.prompt_min_length:
raise ValueError(
f"Prompt must be greater than {self.prompt_min_length} characters, it is currently: {len(prompt)} characters"
)
if not max_tokens_override: if not max_tokens_override:
if model: if model:

@ -37,6 +37,7 @@ dependencies = [
"sentencepiece", "sentencepiece",
"protobuf", "protobuf",
"python-pptx", "python-pptx",
"langchain",
] ]
dynamic = ["version"] dynamic = ["version"]

@ -17,4 +17,5 @@ sentencepiece==0.1.97
protobuf==3.20.2 protobuf==3.20.2
python-pptx==0.6.21 python-pptx==0.6.21
sentence-transformers==2.2.2 sentence-transformers==2.2.2
openai-whisper langchain==0.0.93
openai-whisper

@ -15,4 +15,5 @@ PyPDF2==3.0.1
youtube_transcript_api==0.5.0 youtube_transcript_api==0.5.0
sentencepiece==0.1.97 sentencepiece==0.1.97
protobuf==3.20.2 protobuf==3.20.2
python-pptx==0.6.21 python-pptx==0.6.21
langchain==0.0.93

@ -2,11 +2,11 @@
OPENAI_TOKEN = "<openai_api_token>" OPENAI_TOKEN = "<openai_api_token>"
DISCORD_TOKEN = "<discord_bot_token>" DISCORD_TOKEN = "<discord_bot_token>"
#PINECONE_TOKEN = "<pinecone_token>" # pinecone token if you have it enabled. See readme # PINECONE_TOKEN = "<pinecone_token>" # pinecone token if you have it enabled. See readme
#PINECONE_REGION = "<pinecone_region>" # add your region here if it's not us-west1-gcp # PINECONE_REGION = "<pinecone_region>" # add your region here if it's not us-west1-gcp
#GOOGLE_SEARCH_API_KEY: "<google_api_key>" # GOOGLE_SEARCH_API_KEY: "<google_api_key>"
#GOOGLE_SEARCH_ENGINE_ID: "<google_engine_id>" # GOOGLE_SEARCH_ENGINE_ID: "<google_engine_id>"
#DEEPL_TOKEN: "<deepl_token>" # DEEPL_TOKEN: "<deepl_token>"
DEBUG_GUILD = "974519864045756446" # discord_server_id DEBUG_GUILD = "974519864045756446" # discord_server_id
DEBUG_CHANNEL = "977697652147892304" # discord_chanel_id DEBUG_CHANNEL = "977697652147892304" # discord_chanel_id
@ -29,7 +29,7 @@ SEARCH_ROLES: "Admin,Owner"
CUSTOM_BOT_NAME: "GPT3Discord" CUSTOM_BOT_NAME: "GPT3Discord"
# If True, users must use their own API keys for OpenAI. If False, the bot will use the API key in the .env file. # If True, users must use their own API keys for OpenAI. If False, the bot will use the API key in the .env file.
USER_INPUT_API_KEYS="False" USER_INPUT_API_KEYS = "False"
# Moderations Service alert channel, this is where moderation alerts will be sent as a default if enabled # Moderations Service alert channel, this is where moderation alerts will be sent as a default if enabled
MODERATIONS_ALERT_CHANNEL = "977697652147892304" MODERATIONS_ALERT_CHANNEL = "977697652147892304"
@ -44,4 +44,4 @@ PRE_MODERATE = "False"
FORCE_ENGLISH = "False" FORCE_ENGLISH = "False"
# The welcome message to send it the welcome setting is set to true # The welcome message to send it the welcome setting is set to true
WELCOME_MESSAGE = "Hi There! Welcome to our Discord server. We hope you'll enjoy our server and we look forward to engaging with you!" # This is a fallback message if gpt3 fails to generate a welcome message. WELCOME_MESSAGE = "Hi There! Welcome to our Discord server. We hope you'll enjoy our server and we look forward to engaging with you!" # This is a fallback message if gpt3 fails to generate a welcome message.

Loading…
Cancel
Save