From 3ce319b25423e217ae16f39c80a05539cdc6293e Mon Sep 17 00:00:00 2001 From: Kaveen Kumarasinghe Date: Wed, 1 Mar 2023 21:17:11 -0500 Subject: [PATCH] chatgpt support for search and indexing --- cogs/search_service_cog.py | 1 + models/index_model.py | 20 ++++---- models/search_model.py | 99 +++++++++++++++++++------------------- pyproject.toml | 3 +- requirements.txt | 3 +- requirements_base.txt | 3 +- services/usage_service.py | 13 +++-- 7 files changed, 76 insertions(+), 66 deletions(-) diff --git a/cogs/search_service_cog.py b/cogs/search_service_cog.py index 49937d7..9d5d310 100644 --- a/cogs/search_service_cog.py +++ b/cogs/search_service_cog.py @@ -128,6 +128,7 @@ class SearchService(discord.Cog, name="SearchService"): ctx, query, user_api_key, search_scope, nodes, deep, response_mode ) except ValueError as e: + traceback.print_exc() await ctx.respond( embed=EmbedStatics.get_search_failure_embed(str(e)), ephemeral=True, diff --git a/models/index_model.py b/models/index_model.py index 6399f2e..d1ce25d 100644 --- a/models/index_model.py +++ b/models/index_model.py @@ -17,6 +17,7 @@ from datetime import date from discord import InteractionResponse, Interaction from discord.ext import pages +from gpt_index.langchain_helpers.chatgpt import ChatGPTLLMPredictor from langchain import OpenAI from gpt_index.readers import YoutubeTranscriptReader @@ -50,6 +51,7 @@ from services.environment_service import EnvService, app_root_path SHORT_TO_LONG_CACHE = {} MAX_DEEP_COMPOSE_PRICE = EnvService.get_max_deep_compose_price() +llm_predictor = ChatGPTLLMPredictor() def get_and_query( @@ -596,9 +598,7 @@ class Index_handler: documents = [] for _index in index_objects: documents.extend(await self.index_to_docs(_index, 256, 20)) - llm_predictor = LLMPredictor( - llm=OpenAI(model_name="text-davinci-003", max_tokens=-1) - ) + embedding_model = OpenAIEmbedding() llm_predictor_mock = MockLLMPredictor(4096) @@ -615,9 +615,9 @@ class Index_handler: ), ) total_usage_price = await self.usage_service.get_price( - llm_predictor_mock.last_token_usage + llm_predictor_mock.last_token_usage, chatgpt=False, # TODO Enable again when tree indexes are fixed ) + await self.usage_service.get_price( - embedding_model_mock.last_token_usage, True + embedding_model_mock.last_token_usage, embeddings=True ) print("The total composition price is: ", total_usage_price) if total_usage_price > MAX_DEEP_COMPOSE_PRICE: @@ -625,18 +625,20 @@ class Index_handler: "Doing this deep search would be prohibitively expensive. Please try a narrower search scope." ) + llm_predictor_temp_non_cgpt = LLMPredictor(llm=OpenAI(model_name="text-davinci-003")) # TODO Get rid of this + tree_index = await self.loop.run_in_executor( None, partial( GPTTreeIndex, documents=documents, - llm_predictor=llm_predictor, + llm_predictor=llm_predictor_temp_non_cgpt, embed_model=embedding_model, use_async=True, ), ) - await self.usage_service.update_usage(llm_predictor.last_token_usage) + await self.usage_service.update_usage(llm_predictor_temp_non_cgpt.last_token_usage, chatgpt=False) # Todo set to false await self.usage_service.update_usage( embedding_model.last_token_usage, embeddings=True ) @@ -746,7 +748,7 @@ class Index_handler: ) try: - llm_predictor = LLMPredictor(llm=OpenAI(model_name="text-davinci-003")) + embedding_model = OpenAIEmbedding() embedding_model.last_token_usage = 0 response = await self.loop.run_in_executor( @@ -764,7 +766,7 @@ class Index_handler: ), ) print("The last token usage was ", llm_predictor.last_token_usage) - await self.usage_service.update_usage(llm_predictor.last_token_usage) + await self.usage_service.update_usage(llm_predictor.last_token_usage, chatgpt=True) await self.usage_service.update_usage( embedding_model.last_token_usage, embeddings=True ) diff --git a/models/search_model.py b/models/search_model.py index 5fc707f..5003bc5 100644 --- a/models/search_model.py +++ b/models/search_model.py @@ -25,6 +25,7 @@ from gpt_index import ( MockEmbedding, ) from gpt_index.indices.knowledge_graph import GPTKnowledgeGraphIndex +from gpt_index.langchain_helpers.chatgpt import ChatGPTLLMPredictor from gpt_index.prompts.prompt_type import PromptType from gpt_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR from langchain import OpenAI @@ -49,7 +50,7 @@ class Search: "\n---------------------\n" "Never say '<|endofstatement|>'\n" "Given the context information and not prior knowledge, " - "answer the question, say that you were unable to answer the question if there is not sufficient context to formulate a decisive answer. The search query was: {query_str}\n" + "answer the question, say that you were unable to answer the question if there is not sufficient context to formulate a decisive answer. If the prior knowledge/context was sufficient, simply repeat it. The search query was: {query_str}\n" ) self.openai_key = os.getenv("OPENAI_TOKEN") self.EMBED_CUTOFF = 2000 @@ -215,7 +216,7 @@ class Search: try: llm_predictor_presearch = OpenAI( max_tokens=50, - temperature=0.25, + temperature=0.4, presence_penalty=0.65, model_name="text-davinci-003", ) @@ -314,9 +315,7 @@ class Search: embedding_model = OpenAIEmbedding() - llm_predictor = LLMPredictor( - llm=OpenAI(model_name="text-davinci-003", max_tokens=-1) - ) + llm_predictor = ChatGPTLLMPredictor() if not deep: embed_model_mock = MockEmbedding(embed_dim=1536) @@ -325,7 +324,7 @@ class Search: partial(GPTSimpleVectorIndex, documents, embed_model=embed_model_mock), ) total_usage_price = await self.usage_service.get_price( - embed_model_mock.last_token_usage, True + embed_model_mock.last_token_usage, embeddings=True ) if total_usage_price > 1.00: raise ValueError( @@ -356,63 +355,60 @@ class Search: ) price += total_usage_price else: - llm_predictor_deep = LLMPredictor(llm=OpenAI(model_name="text-davinci-003")) - # Try a mock call first - llm_predictor_mock = MockLLMPredictor(4096) - embed_model_mock = MockEmbedding(embed_dim=1536) + llm_predictor_deep = ChatGPTLLMPredictor() - await self.loop.run_in_executor( - None, - partial( - GPTTreeIndex, - documents, - embed_model=embed_model_mock, - llm_predictor=llm_predictor_mock, - ), - ) - total_usage_price = await self.usage_service.get_price( - llm_predictor_mock.last_token_usage - ) + await self.usage_service.get_price( - embed_model_mock.last_token_usage, True - ) - if total_usage_price > MAX_SEARCH_PRICE: - await self.try_delete(in_progress_message) - raise ValueError( - "Doing this deep search would be prohibitively expensive. Please try a narrower search scope. This deep search indexing would have cost ${:.2f}.".format( - total_usage_price - ) - ) + # # Try a mock call first + # llm_predictor_mock = MockLLMPredictor(4096) + # embed_model_mock = MockEmbedding(embed_dim=1536) + + # await self.loop.run_in_executor( + # None, + # partial( + # GPTKnowledgeGraphIndex, + # documents, + # chunk_size_limit=512, + # max_triplets_per_chunk=2, + # embed_model=embed_model_mock, + # llm_predictor=llm_predictor_mock, + # ), + # ) + # total_usage_price = await self.usage_service.get_price( + # llm_predictor_mock.last_token_usage, chatgpt=True, + # ) + await self.usage_service.get_price( + # embed_model_mock.last_token_usage, embeddings=True + # ) + # print(f"Total usage price: {total_usage_price}") + # if total_usage_price > MAX_SEARCH_PRICE: + # await self.try_delete(in_progress_message) + # raise ValueError( + # "Doing this deep search would be prohibitively expensive. Please try a narrower search scope. This deep search indexing would have cost ${:.2f}.".format( + # total_usage_price + # ) + # ) + # TODO Add back the mock when fixed! index = await self.loop.run_in_executor( None, partial( - GPTTreeIndex, + GPTKnowledgeGraphIndex, documents, + chunk_size_limit=512, + max_triplets_per_chunk=2, embed_model=embedding_model, llm_predictor=llm_predictor_deep, - use_async=True, ), ) - # llm_predictor_deep = LLMPredictor( - # llm=OpenAI(model_name="text-davinci-002", temperature=0, max_tokens=-1) - # ) - # index = await self.loop.run_in_executor( - # None, - # partial( - # GPTKnowledgeGraphIndex, - # documents, - # chunk_size_limit=512, - # max_triplets_per_chunk=2, - # embed_model=embedding_model, - # llm_predictor=llm_predictor_deep, - # ), - # ) + total_usage_price = await self.usage_service.get_price( + llm_predictor_deep.last_token_usage, chatgpt=True, + ) + await self.usage_service.get_price( + embedding_model.last_token_usage, embeddings=True) + await self.usage_service.update_usage( embedding_model.last_token_usage, embeddings=True ) await self.usage_service.update_usage( - llm_predictor_deep.last_token_usage, embeddings=False + llm_predictor_deep.last_token_usage, chatgpt=True, ) price += total_usage_price @@ -455,14 +451,17 @@ class Search: partial( index.query, query, - child_branch_factor=2, + embedding_mode='hybrid', llm_predictor=llm_predictor, + include_text=True, embed_model=embedding_model, use_async=True, + similarity_top_k=nodes or DEFAULT_SEARCH_NODES, + response_mode=response_mode, ), ) - await self.usage_service.update_usage(llm_predictor.last_token_usage) + await self.usage_service.update_usage(llm_predictor.last_token_usage, chatgpt=True) await self.usage_service.update_usage( embedding_model.last_token_usage, embeddings=True ) diff --git a/pyproject.toml b/pyproject.toml index 32e44d1..b96df77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ classifiers = [ ] dependencies = [ "Pillow==9.3.0", +"openai==0.27.0", "py-cord==2.3.2", "python-dotenv==0.21.0", "requests==2.28.1", @@ -30,7 +31,7 @@ dependencies = [ "backoff==2.2.1", "flask==2.2.3", "beautifulsoup4==4.11.1", -"gpt-index==0.4.14", +"gpt-index==0.4.17", "PyPDF2==3.0.1", "youtube_transcript_api==0.5.0", "sentencepiece==0.1.97", diff --git a/requirements.txt b/requirements.txt index 6b816c9..cd3c727 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ Pillow==9.3.0 +openai==0.27.0 py-cord==2.3.2 python-dotenv==0.21.0 requests==2.28.1 @@ -10,7 +11,7 @@ sqlitedict==2.1.0 backoff==2.2.1 flask==2.2.3 beautifulsoup4==4.11.1 -gpt-index==0.4.14 +gpt-index==0.4.17 PyPDF2==3.0.1 youtube_transcript_api==0.5.0 sentencepiece==0.1.97 diff --git a/requirements_base.txt b/requirements_base.txt index c0f641c..76e0fce 100644 --- a/requirements_base.txt +++ b/requirements_base.txt @@ -1,4 +1,5 @@ Pillow==9.3.0 +openai==0.27.0 py-cord==2.3.2 python-dotenv==0.21.0 requests==2.28.1 @@ -10,7 +11,7 @@ sqlitedict==2.1.0 backoff==2.2.1 flask==2.2.3 beautifulsoup4==4.11.1 -gpt-index==0.4.14 +gpt-index==0.4.17 PyPDF2==3.0.1 youtube_transcript_api==0.5.0 sentencepiece==0.1.97 diff --git a/services/usage_service.py b/services/usage_service.py index c12bcfd..d8ec005 100644 --- a/services/usage_service.py +++ b/services/usage_service.py @@ -14,9 +14,12 @@ class UsageService: f.close() self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2") - async def get_price(self, tokens_used, embeddings=False): + async def get_price(self, tokens_used, embeddings=False, chatgpt=False): tokens_used = int(tokens_used) - if not embeddings: + if chatgpt: + price = (tokens_used / 1000) * 0.002 + return price + elif not embeddings: price = ( tokens_used / 1000 ) * 0.02 # Just use the highest rate instead of model-based... I am overestimating on purpose. @@ -24,9 +27,11 @@ class UsageService: price = (tokens_used / 1000) * 0.0004 return price - async def update_usage(self, tokens_used, embeddings=False): + async def update_usage(self, tokens_used, embeddings=False, chatgpt=False): tokens_used = int(tokens_used) - if not embeddings: + if chatgpt: + price = (tokens_used / 1000) * 0.002 + elif not embeddings: price = ( tokens_used / 1000 ) * 0.02 # Just use the highest rate instead of model-based... I am overestimating on purpose.