chatgpt support for search and indexing

Kaveen Kumarasinghe 1 year ago
parent c9b9d9ce2b
commit 3ce319b254

@ -128,6 +128,7 @@ class SearchService(discord.Cog, name="SearchService"):
ctx, query, user_api_key, search_scope, nodes, deep, response_mode ctx, query, user_api_key, search_scope, nodes, deep, response_mode
) )
except ValueError as e: except ValueError as e:
traceback.print_exc()
await ctx.respond( await ctx.respond(
embed=EmbedStatics.get_search_failure_embed(str(e)), embed=EmbedStatics.get_search_failure_embed(str(e)),
ephemeral=True, ephemeral=True,

@ -17,6 +17,7 @@ from datetime import date
from discord import InteractionResponse, Interaction from discord import InteractionResponse, Interaction
from discord.ext import pages from discord.ext import pages
from gpt_index.langchain_helpers.chatgpt import ChatGPTLLMPredictor
from langchain import OpenAI from langchain import OpenAI
from gpt_index.readers import YoutubeTranscriptReader from gpt_index.readers import YoutubeTranscriptReader
@ -50,6 +51,7 @@ from services.environment_service import EnvService, app_root_path
SHORT_TO_LONG_CACHE = {} SHORT_TO_LONG_CACHE = {}
MAX_DEEP_COMPOSE_PRICE = EnvService.get_max_deep_compose_price() MAX_DEEP_COMPOSE_PRICE = EnvService.get_max_deep_compose_price()
llm_predictor = ChatGPTLLMPredictor()
def get_and_query( def get_and_query(
@ -596,9 +598,7 @@ class Index_handler:
documents = [] documents = []
for _index in index_objects: for _index in index_objects:
documents.extend(await self.index_to_docs(_index, 256, 20)) documents.extend(await self.index_to_docs(_index, 256, 20))
llm_predictor = LLMPredictor(
llm=OpenAI(model_name="text-davinci-003", max_tokens=-1)
)
embedding_model = OpenAIEmbedding() embedding_model = OpenAIEmbedding()
llm_predictor_mock = MockLLMPredictor(4096) llm_predictor_mock = MockLLMPredictor(4096)
@ -615,9 +615,9 @@ class Index_handler:
), ),
) )
total_usage_price = await self.usage_service.get_price( total_usage_price = await self.usage_service.get_price(
llm_predictor_mock.last_token_usage llm_predictor_mock.last_token_usage, chatgpt=False, # TODO Enable again when tree indexes are fixed
) + await self.usage_service.get_price( ) + await self.usage_service.get_price(
embedding_model_mock.last_token_usage, True embedding_model_mock.last_token_usage, embeddings=True
) )
print("The total composition price is: ", total_usage_price) print("The total composition price is: ", total_usage_price)
if total_usage_price > MAX_DEEP_COMPOSE_PRICE: if total_usage_price > MAX_DEEP_COMPOSE_PRICE:
@ -625,18 +625,20 @@ class Index_handler:
"Doing this deep search would be prohibitively expensive. Please try a narrower search scope." "Doing this deep search would be prohibitively expensive. Please try a narrower search scope."
) )
llm_predictor_temp_non_cgpt = LLMPredictor(llm=OpenAI(model_name="text-davinci-003")) # TODO Get rid of this
tree_index = await self.loop.run_in_executor( tree_index = await self.loop.run_in_executor(
None, None,
partial( partial(
GPTTreeIndex, GPTTreeIndex,
documents=documents, documents=documents,
llm_predictor=llm_predictor, llm_predictor=llm_predictor_temp_non_cgpt,
embed_model=embedding_model, embed_model=embedding_model,
use_async=True, use_async=True,
), ),
) )
await self.usage_service.update_usage(llm_predictor.last_token_usage) await self.usage_service.update_usage(llm_predictor_temp_non_cgpt.last_token_usage, chatgpt=False) # Todo set to false
await self.usage_service.update_usage( await self.usage_service.update_usage(
embedding_model.last_token_usage, embeddings=True embedding_model.last_token_usage, embeddings=True
) )
@ -746,7 +748,7 @@ class Index_handler:
) )
try: try:
llm_predictor = LLMPredictor(llm=OpenAI(model_name="text-davinci-003"))
embedding_model = OpenAIEmbedding() embedding_model = OpenAIEmbedding()
embedding_model.last_token_usage = 0 embedding_model.last_token_usage = 0
response = await self.loop.run_in_executor( response = await self.loop.run_in_executor(
@ -764,7 +766,7 @@ class Index_handler:
), ),
) )
print("The last token usage was ", llm_predictor.last_token_usage) print("The last token usage was ", llm_predictor.last_token_usage)
await self.usage_service.update_usage(llm_predictor.last_token_usage) await self.usage_service.update_usage(llm_predictor.last_token_usage, chatgpt=True)
await self.usage_service.update_usage( await self.usage_service.update_usage(
embedding_model.last_token_usage, embeddings=True embedding_model.last_token_usage, embeddings=True
) )

@ -25,6 +25,7 @@ from gpt_index import (
MockEmbedding, MockEmbedding,
) )
from gpt_index.indices.knowledge_graph import GPTKnowledgeGraphIndex from gpt_index.indices.knowledge_graph import GPTKnowledgeGraphIndex
from gpt_index.langchain_helpers.chatgpt import ChatGPTLLMPredictor
from gpt_index.prompts.prompt_type import PromptType from gpt_index.prompts.prompt_type import PromptType
from gpt_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR from gpt_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR
from langchain import OpenAI from langchain import OpenAI
@ -49,7 +50,7 @@ class Search:
"\n---------------------\n" "\n---------------------\n"
"Never say '<|endofstatement|>'\n" "Never say '<|endofstatement|>'\n"
"Given the context information and not prior knowledge, " "Given the context information and not prior knowledge, "
"answer the question, say that you were unable to answer the question if there is not sufficient context to formulate a decisive answer. The search query was: {query_str}\n" "answer the question, say that you were unable to answer the question if there is not sufficient context to formulate a decisive answer. If the prior knowledge/context was sufficient, simply repeat it. The search query was: {query_str}\n"
) )
self.openai_key = os.getenv("OPENAI_TOKEN") self.openai_key = os.getenv("OPENAI_TOKEN")
self.EMBED_CUTOFF = 2000 self.EMBED_CUTOFF = 2000
@ -215,7 +216,7 @@ class Search:
try: try:
llm_predictor_presearch = OpenAI( llm_predictor_presearch = OpenAI(
max_tokens=50, max_tokens=50,
temperature=0.25, temperature=0.4,
presence_penalty=0.65, presence_penalty=0.65,
model_name="text-davinci-003", model_name="text-davinci-003",
) )
@ -314,9 +315,7 @@ class Search:
embedding_model = OpenAIEmbedding() embedding_model = OpenAIEmbedding()
llm_predictor = LLMPredictor( llm_predictor = ChatGPTLLMPredictor()
llm=OpenAI(model_name="text-davinci-003", max_tokens=-1)
)
if not deep: if not deep:
embed_model_mock = MockEmbedding(embed_dim=1536) embed_model_mock = MockEmbedding(embed_dim=1536)
@ -325,7 +324,7 @@ class Search:
partial(GPTSimpleVectorIndex, documents, embed_model=embed_model_mock), partial(GPTSimpleVectorIndex, documents, embed_model=embed_model_mock),
) )
total_usage_price = await self.usage_service.get_price( total_usage_price = await self.usage_service.get_price(
embed_model_mock.last_token_usage, True embed_model_mock.last_token_usage, embeddings=True
) )
if total_usage_price > 1.00: if total_usage_price > 1.00:
raise ValueError( raise ValueError(
@ -356,63 +355,60 @@ class Search:
) )
price += total_usage_price price += total_usage_price
else: else:
llm_predictor_deep = LLMPredictor(llm=OpenAI(model_name="text-davinci-003")) llm_predictor_deep = ChatGPTLLMPredictor()
# Try a mock call first
llm_predictor_mock = MockLLMPredictor(4096)
embed_model_mock = MockEmbedding(embed_dim=1536)
await self.loop.run_in_executor( # # Try a mock call first
None, # llm_predictor_mock = MockLLMPredictor(4096)
partial( # embed_model_mock = MockEmbedding(embed_dim=1536)
GPTTreeIndex,
documents, # await self.loop.run_in_executor(
embed_model=embed_model_mock, # None,
llm_predictor=llm_predictor_mock, # partial(
), # GPTKnowledgeGraphIndex,
) # documents,
total_usage_price = await self.usage_service.get_price( # chunk_size_limit=512,
llm_predictor_mock.last_token_usage # max_triplets_per_chunk=2,
) + await self.usage_service.get_price( # embed_model=embed_model_mock,
embed_model_mock.last_token_usage, True # llm_predictor=llm_predictor_mock,
) # ),
if total_usage_price > MAX_SEARCH_PRICE: # )
await self.try_delete(in_progress_message) # total_usage_price = await self.usage_service.get_price(
raise ValueError( # llm_predictor_mock.last_token_usage, chatgpt=True,
"Doing this deep search would be prohibitively expensive. Please try a narrower search scope. This deep search indexing would have cost ${:.2f}.".format( # ) + await self.usage_service.get_price(
total_usage_price # embed_model_mock.last_token_usage, embeddings=True
) # )
) # print(f"Total usage price: {total_usage_price}")
# if total_usage_price > MAX_SEARCH_PRICE:
# await self.try_delete(in_progress_message)
# raise ValueError(
# "Doing this deep search would be prohibitively expensive. Please try a narrower search scope. This deep search indexing would have cost ${:.2f}.".format(
# total_usage_price
# )
# )
# TODO Add back the mock when fixed!
index = await self.loop.run_in_executor( index = await self.loop.run_in_executor(
None, None,
partial( partial(
GPTTreeIndex, GPTKnowledgeGraphIndex,
documents, documents,
chunk_size_limit=512,
max_triplets_per_chunk=2,
embed_model=embedding_model, embed_model=embedding_model,
llm_predictor=llm_predictor_deep, llm_predictor=llm_predictor_deep,
use_async=True,
), ),
) )
# llm_predictor_deep = LLMPredictor( total_usage_price = await self.usage_service.get_price(
# llm=OpenAI(model_name="text-davinci-002", temperature=0, max_tokens=-1) llm_predictor_deep.last_token_usage, chatgpt=True,
# ) ) + await self.usage_service.get_price(
# index = await self.loop.run_in_executor( embedding_model.last_token_usage, embeddings=True)
# None,
# partial(
# GPTKnowledgeGraphIndex,
# documents,
# chunk_size_limit=512,
# max_triplets_per_chunk=2,
# embed_model=embedding_model,
# llm_predictor=llm_predictor_deep,
# ),
# )
await self.usage_service.update_usage( await self.usage_service.update_usage(
embedding_model.last_token_usage, embeddings=True embedding_model.last_token_usage, embeddings=True
) )
await self.usage_service.update_usage( await self.usage_service.update_usage(
llm_predictor_deep.last_token_usage, embeddings=False llm_predictor_deep.last_token_usage, chatgpt=True,
) )
price += total_usage_price price += total_usage_price
@ -455,14 +451,17 @@ class Search:
partial( partial(
index.query, index.query,
query, query,
child_branch_factor=2, embedding_mode='hybrid',
llm_predictor=llm_predictor, llm_predictor=llm_predictor,
include_text=True,
embed_model=embedding_model, embed_model=embedding_model,
use_async=True, use_async=True,
similarity_top_k=nodes or DEFAULT_SEARCH_NODES,
response_mode=response_mode,
), ),
) )
await self.usage_service.update_usage(llm_predictor.last_token_usage) await self.usage_service.update_usage(llm_predictor.last_token_usage, chatgpt=True)
await self.usage_service.update_usage( await self.usage_service.update_usage(
embedding_model.last_token_usage, embeddings=True embedding_model.last_token_usage, embeddings=True
) )

@ -19,6 +19,7 @@ classifiers = [
] ]
dependencies = [ dependencies = [
"Pillow==9.3.0", "Pillow==9.3.0",
"openai==0.27.0",
"py-cord==2.3.2", "py-cord==2.3.2",
"python-dotenv==0.21.0", "python-dotenv==0.21.0",
"requests==2.28.1", "requests==2.28.1",
@ -30,7 +31,7 @@ dependencies = [
"backoff==2.2.1", "backoff==2.2.1",
"flask==2.2.3", "flask==2.2.3",
"beautifulsoup4==4.11.1", "beautifulsoup4==4.11.1",
"gpt-index==0.4.14", "gpt-index==0.4.17",
"PyPDF2==3.0.1", "PyPDF2==3.0.1",
"youtube_transcript_api==0.5.0", "youtube_transcript_api==0.5.0",
"sentencepiece==0.1.97", "sentencepiece==0.1.97",

@ -1,4 +1,5 @@
Pillow==9.3.0 Pillow==9.3.0
openai==0.27.0
py-cord==2.3.2 py-cord==2.3.2
python-dotenv==0.21.0 python-dotenv==0.21.0
requests==2.28.1 requests==2.28.1
@ -10,7 +11,7 @@ sqlitedict==2.1.0
backoff==2.2.1 backoff==2.2.1
flask==2.2.3 flask==2.2.3
beautifulsoup4==4.11.1 beautifulsoup4==4.11.1
gpt-index==0.4.14 gpt-index==0.4.17
PyPDF2==3.0.1 PyPDF2==3.0.1
youtube_transcript_api==0.5.0 youtube_transcript_api==0.5.0
sentencepiece==0.1.97 sentencepiece==0.1.97

@ -1,4 +1,5 @@
Pillow==9.3.0 Pillow==9.3.0
openai==0.27.0
py-cord==2.3.2 py-cord==2.3.2
python-dotenv==0.21.0 python-dotenv==0.21.0
requests==2.28.1 requests==2.28.1
@ -10,7 +11,7 @@ sqlitedict==2.1.0
backoff==2.2.1 backoff==2.2.1
flask==2.2.3 flask==2.2.3
beautifulsoup4==4.11.1 beautifulsoup4==4.11.1
gpt-index==0.4.14 gpt-index==0.4.17
PyPDF2==3.0.1 PyPDF2==3.0.1
youtube_transcript_api==0.5.0 youtube_transcript_api==0.5.0
sentencepiece==0.1.97 sentencepiece==0.1.97

@ -14,9 +14,12 @@ class UsageService:
f.close() f.close()
self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2") self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
async def get_price(self, tokens_used, embeddings=False): async def get_price(self, tokens_used, embeddings=False, chatgpt=False):
tokens_used = int(tokens_used) tokens_used = int(tokens_used)
if not embeddings: if chatgpt:
price = (tokens_used / 1000) * 0.002
return price
elif not embeddings:
price = ( price = (
tokens_used / 1000 tokens_used / 1000
) * 0.02 # Just use the highest rate instead of model-based... I am overestimating on purpose. ) * 0.02 # Just use the highest rate instead of model-based... I am overestimating on purpose.
@ -24,9 +27,11 @@ class UsageService:
price = (tokens_used / 1000) * 0.0004 price = (tokens_used / 1000) * 0.0004
return price return price
async def update_usage(self, tokens_used, embeddings=False): async def update_usage(self, tokens_used, embeddings=False, chatgpt=False):
tokens_used = int(tokens_used) tokens_used = int(tokens_used)
if not embeddings: if chatgpt:
price = (tokens_used / 1000) * 0.002
elif not embeddings:
price = ( price = (
tokens_used / 1000 tokens_used / 1000
) * 0.02 # Just use the highest rate instead of model-based... I am overestimating on purpose. ) * 0.02 # Just use the highest rate instead of model-based... I am overestimating on purpose.

Loading…
Cancel
Save