From c977f6c5f5ff43bced98bfefad94c45039928fe0 Mon Sep 17 00:00:00 2001 From: Kaveen Kumarasinghe Date: Sat, 25 Feb 2023 03:22:07 -0500 Subject: [PATCH] Token safeguards, better deep /search --- cogs/search_service_cog.py | 4 +- gpt3discord.py | 2 +- models/index_model.py | 59 ++++++++++++++++---- models/search_model.py | 95 ++++++++++++++++++++++++--------- services/environment_service.py | 16 ++++++ services/usage_service.py | 10 ++++ 6 files changed, 146 insertions(+), 40 deletions(-) diff --git a/cogs/search_service_cog.py b/cogs/search_service_cog.py index 47af11e..d2d1e8c 100644 --- a/cogs/search_service_cog.py +++ b/cogs/search_service_cog.py @@ -119,9 +119,9 @@ class SearchService(discord.Cog, name="SearchService"): response, refined_text = await self.model.search( ctx, query, user_api_key, search_scope, nodes, deep ) - except ValueError: + except ValueError as e: await ctx.respond( - "The Google Search API returned an error. Check the console for more details.", + str(e), ephemeral=True, ) return diff --git a/gpt3discord.py b/gpt3discord.py index 31fae08..2fc21b9 100644 --- a/gpt3discord.py +++ b/gpt3discord.py @@ -31,7 +31,7 @@ from services.environment_service import EnvService from models.openai_model import Model -__version__ = "10.7.1" +__version__ = "10.7.3" PID_FILE = Path("bot.pid") diff --git a/models/index_model.py b/models/index_model.py index 8b034bb..54de309 100644 --- a/models/index_model.py +++ b/models/index_model.py @@ -37,7 +37,7 @@ from gpt_index import ( PromptHelper, IndexStructType, OpenAIEmbedding, - GithubRepositoryReader, + GithubRepositoryReader, MockEmbedding, ) from gpt_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR @@ -46,6 +46,7 @@ from gpt_index.composability import ComposableGraph from services.environment_service import EnvService, app_root_path SHORT_TO_LONG_CACHE = {} +MAX_DEEP_COMPOSE_PRICE = EnvService.get_max_deep_compose_price() def get_and_query( @@ -218,7 +219,7 @@ class Index_handler: return index def index_github_repository(self, link, embed_model): - print("indexing github repo") + # Extract the "owner" and the "repo" name from the github link. owner = link.split("/")[3] repo = link.split("/")[4] @@ -274,7 +275,6 @@ class Index_handler: # Get the file path of this tempfile.NamedTemporaryFile # Save this temp file to an actual file that we can put into something else to read it documents = SimpleDirectoryReader(input_files=[f.name]).load_data() - print("Loaded the PDF document data") # Delete the temporary file return documents @@ -310,6 +310,7 @@ class Index_handler: documents = BeautifulSoupWebReader( website_extractor=DEFAULT_WEBSITE_EXTRACTOR ).load_data(urls=[url]) + # index = GPTSimpleVectorIndex(documents, embed_model=embed_model, use_async=True) index = await self.loop.run_in_executor( None, @@ -553,6 +554,25 @@ class Index_handler: ) embedding_model = OpenAIEmbedding() + llm_predictor_mock = MockLLMPredictor(4096) + embedding_model_mock = MockEmbedding(1536) + + # Run the mock call first + await self.loop.run_in_executor( + None, + partial( + GPTTreeIndex, + documents=documents, + llm_predictor=llm_predictor_mock, + embed_model=embedding_model_mock, + ), + ) + total_usage_price = await self.usage_service.get_price(llm_predictor_mock.last_token_usage) + await self.usage_service.get_price(embedding_model_mock.last_token_usage, True) + print("The total composition price is: ", total_usage_price) + if total_usage_price > MAX_DEEP_COMPOSE_PRICE: + raise ValueError("Doing this deep search would be prohibitively expensive. Please try a narrower search scope.") + + tree_index = await self.loop.run_in_executor( None, partial( @@ -574,6 +594,8 @@ class Index_handler: name = ( f"composed_deep_index_{date.today().month}_{date.today().day}.json" ) + else: + name = name+"_deep" # Save the composed index tree_index.save_to_disk(f"indexes/{user_id}/{name}.json") @@ -905,14 +927,29 @@ class ComposeModal(discord.ui.View): delete_after=120, ) # Compose the indexes - await self.index_cog.compose_indexes( - self.user_id, - indexes, - self.name, - False - if not self.deep_select.values or self.deep_select.values[0] == "no" - else True, - ) + try: + await self.index_cog.compose_indexes( + self.user_id, + indexes, + self.name, + False + if not self.deep_select.values or self.deep_select.values[0] == "no" + else True, + ) + except ValueError as e: + await interaction.followup.send( + str(e), ephemeral=True, delete_after=180 + ) + return False + except Exception as e: + await interaction.followup.send( + "An error occurred while composing the indexes: " + str(e), + ephemeral=True, + delete_after=180,) + return False + + + await interaction.followup.send( "Composed indexes", ephemeral=True, delete_after=180 ) diff --git a/models/search_model.py b/models/search_model.py index c08243e..91ab0a3 100644 --- a/models/search_model.py +++ b/models/search_model.py @@ -19,7 +19,7 @@ from gpt_index import ( PromptHelper, LLMPredictor, OpenAIEmbedding, - SimpleDirectoryReader, + SimpleDirectoryReader, GPTTreeIndex, MockLLMPredictor, MockEmbedding, ) from gpt_index.indices.knowledge_graph import GPTKnowledgeGraphIndex from gpt_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR @@ -28,6 +28,7 @@ from langchain import OpenAI from services.environment_service import EnvService, app_root_path from services.usage_service import UsageService +MAX_SEARCH_PRICE = EnvService.get_max_search_price() class Search: def __init__(self, gpt_model, usage_service): @@ -130,13 +131,12 @@ class Search: f.write(data) f.close() else: - return "An error occurred while downloading the PDF." + raise ValueError("Could not download PDF") # Get the file path of this tempfile.NamedTemporaryFile # Save this temp file to an actual file that we can put into something else to read it documents = SimpleDirectoryReader(input_files=[f.name]).load_data() for document in documents: document.extra_info = {"URL": url} - print("Loaded the PDF document data") # Delete the temporary file return documents @@ -155,11 +155,7 @@ class Search: [item["link"] for item in data["items"]], ) else: - print( - "The Google Search API returned an error: " - + str(response.status) - ) - return ["An error occurred while searching.", None] + raise ValueError("Error while retrieving links") async def try_edit(self, message, embed): try: @@ -246,19 +242,17 @@ class Search: pdf = False try: async with aiohttp.ClientSession() as session: - async with session.get(link, timeout=2) as response: + async with session.get(link, timeout=1) as response: # Add another entry to links from all_links if the link is not already in it to compensate for the failed request if response.status not in [200, 203, 202, 204]: for link2 in all_links: if link2 not in links: - print("Found a replacement link") links.append(link2) break continue # Follow redirects elif response.status in [301, 302, 303, 307, 308]: try: - print("Adding redirect") links.append(response.url) continue except: @@ -266,7 +260,6 @@ class Search: else: # Detect if the link is a PDF, if it is, we load it differently if response.headers["Content-Type"] == "application/pdf": - print("Found a PDF at the link " + link) pdf = True except: @@ -275,7 +268,6 @@ class Search: # Try to add a link from all_links, this is kind of messy. for link2 in all_links: if link2 not in links: - print("Found a replacement link") links.append(link2) break except: @@ -307,9 +299,18 @@ class Search: ) if not deep: + embed_model_mock = MockEmbedding(embed_dim=1536) + self.loop.run_in_executor( + None, + partial(GPTSimpleVectorIndex, documents, embed_model=embed_model_mock), + ) + total_usage_price = await self.usage_service.get_price(embed_model_mock.last_token_usage, True) + if total_usage_price > 1.00: + raise ValueError("Doing this search would be prohibitively expensive. Please try a narrower search scope.") + index = await self.loop.run_in_executor( None, - partial(GPTSimpleVectorIndex, documents, embed_model=embedding_model), + partial(GPTSimpleVectorIndex, documents, embed_model=embedding_model, use_async=True), ) # save the index to disk if not a redo if not redo: @@ -320,22 +321,54 @@ class Search: else ctx.author.id, query, ) + + await self.usage_service.update_usage( + embedding_model.last_token_usage, embeddings=True + ) else: - print("Doing a deep search") - llm_predictor_deep = LLMPredictor( - llm=OpenAI(model_name="text-davinci-002", temperature=0, max_tokens=-1) + llm_predictor_deep = LLMPredictor(llm=OpenAI(model_name="text-davinci-003")) + # Try a mock call first + llm_predictor_mock = MockLLMPredictor(4096) + embed_model_mock = MockEmbedding(embed_dim=1536) + + await self.loop.run_in_executor( + None, + partial( + GPTTreeIndex, + documents, + embed_model=embed_model_mock, + llm_predictor=llm_predictor_mock, + ), ) + total_usage_price = await self.usage_service.get_price(llm_predictor_mock.last_token_usage) + await self.usage_service.get_price(embed_model_mock.last_token_usage, True) + if total_usage_price > MAX_SEARCH_PRICE: + raise ValueError("Doing this deep search would be prohibitively expensive. Please try a narrower search scope.") + index = await self.loop.run_in_executor( None, partial( - GPTKnowledgeGraphIndex, + GPTTreeIndex, documents, - chunk_size_limit=512, - max_triplets_per_chunk=2, embed_model=embedding_model, llm_predictor=llm_predictor_deep, + use_async=True, ), ) + + # llm_predictor_deep = LLMPredictor( + # llm=OpenAI(model_name="text-davinci-002", temperature=0, max_tokens=-1) + # ) + # index = await self.loop.run_in_executor( + # None, + # partial( + # GPTKnowledgeGraphIndex, + # documents, + # chunk_size_limit=512, + # max_triplets_per_chunk=2, + # embed_model=embedding_model, + # llm_predictor=llm_predictor_deep, + # ), + # ) await self.usage_service.update_usage( embedding_model.last_token_usage, embeddings=True ) @@ -348,10 +381,6 @@ class Search: in_progress_message, self.build_search_indexed_embed(query_refined_text) ) - await self.usage_service.update_usage( - embedding_model.last_token_usage, embeddings=True - ) - # Now we can search the index for a query: embedding_model.last_token_usage = 0 @@ -365,17 +394,31 @@ class Search: llm_predictor=llm_predictor, similarity_top_k=nodes or DEFAULT_SEARCH_NODES, text_qa_template=self.qaprompt, + use_async=True, + response_mode="tree_summarize", ), ) else: + # response = await self.loop.run_in_executor( + # None, + # partial( + # index.query, + # query, + # include_text=True, + # embed_model=embedding_model, + # llm_predictor=llm_predictor_deep, + # use_async=True, + # ), + # ) response = await self.loop.run_in_executor( None, partial( index.query, query, - include_text=True, + child_branch_factor=2, + llm_predictor=llm_predictor, embed_model=embedding_model, - llm_predictor=llm_predictor_deep, + use_async=True, ), ) diff --git a/services/environment_service.py b/services/environment_service.py index 80cac2a..3ea22b3 100644 --- a/services/environment_service.py +++ b/services/environment_service.py @@ -379,3 +379,19 @@ class EnvService: return pinecone_region except Exception: return "us-west1-gcp" + + @staticmethod + def get_max_search_price(): + try: + search_price = float(os.getenv("MAX_SEARCH_PRICE")) + return search_price + except Exception: + return 1.00 + + @staticmethod + def get_max_deep_compose_price(): + try: + deep_compose_price = float(os.getenv("MAX_DEEP_COMPOSE_PRICE")) + return deep_compose_price + except Exception: + return 3.00 diff --git a/services/usage_service.py b/services/usage_service.py index b882c69..c12bcfd 100644 --- a/services/usage_service.py +++ b/services/usage_service.py @@ -14,6 +14,16 @@ class UsageService: f.close() self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2") + async def get_price(self, tokens_used, embeddings=False): + tokens_used = int(tokens_used) + if not embeddings: + price = ( + tokens_used / 1000 + ) * 0.02 # Just use the highest rate instead of model-based... I am overestimating on purpose. + else: + price = (tokens_used / 1000) * 0.0004 + return price + async def update_usage(self, tokens_used, embeddings=False): tokens_used = int(tokens_used) if not embeddings: