chatgpt support for search and indexing

Kaveen Kumarasinghe 1 year ago
parent c9b9d9ce2b
commit 3ce319b254

@ -128,6 +128,7 @@ class SearchService(discord.Cog, name="SearchService"):
ctx, query, user_api_key, search_scope, nodes, deep, response_mode
)
except ValueError as e:
traceback.print_exc()
await ctx.respond(
embed=EmbedStatics.get_search_failure_embed(str(e)),
ephemeral=True,

@ -17,6 +17,7 @@ from datetime import date
from discord import InteractionResponse, Interaction
from discord.ext import pages
from gpt_index.langchain_helpers.chatgpt import ChatGPTLLMPredictor
from langchain import OpenAI
from gpt_index.readers import YoutubeTranscriptReader
@ -50,6 +51,7 @@ from services.environment_service import EnvService, app_root_path
SHORT_TO_LONG_CACHE = {}
MAX_DEEP_COMPOSE_PRICE = EnvService.get_max_deep_compose_price()
llm_predictor = ChatGPTLLMPredictor()
def get_and_query(
@ -596,9 +598,7 @@ class Index_handler:
documents = []
for _index in index_objects:
documents.extend(await self.index_to_docs(_index, 256, 20))
llm_predictor = LLMPredictor(
llm=OpenAI(model_name="text-davinci-003", max_tokens=-1)
)
embedding_model = OpenAIEmbedding()
llm_predictor_mock = MockLLMPredictor(4096)
@ -615,9 +615,9 @@ class Index_handler:
),
)
total_usage_price = await self.usage_service.get_price(
llm_predictor_mock.last_token_usage
llm_predictor_mock.last_token_usage, chatgpt=False, # TODO Enable again when tree indexes are fixed
) + await self.usage_service.get_price(
embedding_model_mock.last_token_usage, True
embedding_model_mock.last_token_usage, embeddings=True
)
print("The total composition price is: ", total_usage_price)
if total_usage_price > MAX_DEEP_COMPOSE_PRICE:
@ -625,18 +625,20 @@ class Index_handler:
"Doing this deep search would be prohibitively expensive. Please try a narrower search scope."
)
llm_predictor_temp_non_cgpt = LLMPredictor(llm=OpenAI(model_name="text-davinci-003")) # TODO Get rid of this
tree_index = await self.loop.run_in_executor(
None,
partial(
GPTTreeIndex,
documents=documents,
llm_predictor=llm_predictor,
llm_predictor=llm_predictor_temp_non_cgpt,
embed_model=embedding_model,
use_async=True,
),
)
await self.usage_service.update_usage(llm_predictor.last_token_usage)
await self.usage_service.update_usage(llm_predictor_temp_non_cgpt.last_token_usage, chatgpt=False) # Todo set to false
await self.usage_service.update_usage(
embedding_model.last_token_usage, embeddings=True
)
@ -746,7 +748,7 @@ class Index_handler:
)
try:
llm_predictor = LLMPredictor(llm=OpenAI(model_name="text-davinci-003"))
embedding_model = OpenAIEmbedding()
embedding_model.last_token_usage = 0
response = await self.loop.run_in_executor(
@ -764,7 +766,7 @@ class Index_handler:
),
)
print("The last token usage was ", llm_predictor.last_token_usage)
await self.usage_service.update_usage(llm_predictor.last_token_usage)
await self.usage_service.update_usage(llm_predictor.last_token_usage, chatgpt=True)
await self.usage_service.update_usage(
embedding_model.last_token_usage, embeddings=True
)

@ -25,6 +25,7 @@ from gpt_index import (
MockEmbedding,
)
from gpt_index.indices.knowledge_graph import GPTKnowledgeGraphIndex
from gpt_index.langchain_helpers.chatgpt import ChatGPTLLMPredictor
from gpt_index.prompts.prompt_type import PromptType
from gpt_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR
from langchain import OpenAI
@ -49,7 +50,7 @@ class Search:
"\n---------------------\n"
"Never say '<|endofstatement|>'\n"
"Given the context information and not prior knowledge, "
"answer the question, say that you were unable to answer the question if there is not sufficient context to formulate a decisive answer. The search query was: {query_str}\n"
"answer the question, say that you were unable to answer the question if there is not sufficient context to formulate a decisive answer. If the prior knowledge/context was sufficient, simply repeat it. The search query was: {query_str}\n"
)
self.openai_key = os.getenv("OPENAI_TOKEN")
self.EMBED_CUTOFF = 2000
@ -215,7 +216,7 @@ class Search:
try:
llm_predictor_presearch = OpenAI(
max_tokens=50,
temperature=0.25,
temperature=0.4,
presence_penalty=0.65,
model_name="text-davinci-003",
)
@ -314,9 +315,7 @@ class Search:
embedding_model = OpenAIEmbedding()
llm_predictor = LLMPredictor(
llm=OpenAI(model_name="text-davinci-003", max_tokens=-1)
)
llm_predictor = ChatGPTLLMPredictor()
if not deep:
embed_model_mock = MockEmbedding(embed_dim=1536)
@ -325,7 +324,7 @@ class Search:
partial(GPTSimpleVectorIndex, documents, embed_model=embed_model_mock),
)
total_usage_price = await self.usage_service.get_price(
embed_model_mock.last_token_usage, True
embed_model_mock.last_token_usage, embeddings=True
)
if total_usage_price > 1.00:
raise ValueError(
@ -356,63 +355,60 @@ class Search:
)
price += total_usage_price
else:
llm_predictor_deep = LLMPredictor(llm=OpenAI(model_name="text-davinci-003"))
# Try a mock call first
llm_predictor_mock = MockLLMPredictor(4096)
embed_model_mock = MockEmbedding(embed_dim=1536)
llm_predictor_deep = ChatGPTLLMPredictor()
await self.loop.run_in_executor(
None,
partial(
GPTTreeIndex,
documents,
embed_model=embed_model_mock,
llm_predictor=llm_predictor_mock,
),
)
total_usage_price = await self.usage_service.get_price(
llm_predictor_mock.last_token_usage
) + await self.usage_service.get_price(
embed_model_mock.last_token_usage, True
)
if total_usage_price > MAX_SEARCH_PRICE:
await self.try_delete(in_progress_message)
raise ValueError(
"Doing this deep search would be prohibitively expensive. Please try a narrower search scope. This deep search indexing would have cost ${:.2f}.".format(
total_usage_price
)
)
# # Try a mock call first
# llm_predictor_mock = MockLLMPredictor(4096)
# embed_model_mock = MockEmbedding(embed_dim=1536)
# await self.loop.run_in_executor(
# None,
# partial(
# GPTKnowledgeGraphIndex,
# documents,
# chunk_size_limit=512,
# max_triplets_per_chunk=2,
# embed_model=embed_model_mock,
# llm_predictor=llm_predictor_mock,
# ),
# )
# total_usage_price = await self.usage_service.get_price(
# llm_predictor_mock.last_token_usage, chatgpt=True,
# ) + await self.usage_service.get_price(
# embed_model_mock.last_token_usage, embeddings=True
# )
# print(f"Total usage price: {total_usage_price}")
# if total_usage_price > MAX_SEARCH_PRICE:
# await self.try_delete(in_progress_message)
# raise ValueError(
# "Doing this deep search would be prohibitively expensive. Please try a narrower search scope. This deep search indexing would have cost ${:.2f}.".format(
# total_usage_price
# )
# )
# TODO Add back the mock when fixed!
index = await self.loop.run_in_executor(
None,
partial(
GPTTreeIndex,
GPTKnowledgeGraphIndex,
documents,
chunk_size_limit=512,
max_triplets_per_chunk=2,
embed_model=embedding_model,
llm_predictor=llm_predictor_deep,
use_async=True,
),
)
# llm_predictor_deep = LLMPredictor(
# llm=OpenAI(model_name="text-davinci-002", temperature=0, max_tokens=-1)
# )
# index = await self.loop.run_in_executor(
# None,
# partial(
# GPTKnowledgeGraphIndex,
# documents,
# chunk_size_limit=512,
# max_triplets_per_chunk=2,
# embed_model=embedding_model,
# llm_predictor=llm_predictor_deep,
# ),
# )
total_usage_price = await self.usage_service.get_price(
llm_predictor_deep.last_token_usage, chatgpt=True,
) + await self.usage_service.get_price(
embedding_model.last_token_usage, embeddings=True)
await self.usage_service.update_usage(
embedding_model.last_token_usage, embeddings=True
)
await self.usage_service.update_usage(
llm_predictor_deep.last_token_usage, embeddings=False
llm_predictor_deep.last_token_usage, chatgpt=True,
)
price += total_usage_price
@ -455,14 +451,17 @@ class Search:
partial(
index.query,
query,
child_branch_factor=2,
embedding_mode='hybrid',
llm_predictor=llm_predictor,
include_text=True,
embed_model=embedding_model,
use_async=True,
similarity_top_k=nodes or DEFAULT_SEARCH_NODES,
response_mode=response_mode,
),
)
await self.usage_service.update_usage(llm_predictor.last_token_usage)
await self.usage_service.update_usage(llm_predictor.last_token_usage, chatgpt=True)
await self.usage_service.update_usage(
embedding_model.last_token_usage, embeddings=True
)

@ -19,6 +19,7 @@ classifiers = [
]
dependencies = [
"Pillow==9.3.0",
"openai==0.27.0",
"py-cord==2.3.2",
"python-dotenv==0.21.0",
"requests==2.28.1",
@ -30,7 +31,7 @@ dependencies = [
"backoff==2.2.1",
"flask==2.2.3",
"beautifulsoup4==4.11.1",
"gpt-index==0.4.14",
"gpt-index==0.4.17",
"PyPDF2==3.0.1",
"youtube_transcript_api==0.5.0",
"sentencepiece==0.1.97",

@ -1,4 +1,5 @@
Pillow==9.3.0
openai==0.27.0
py-cord==2.3.2
python-dotenv==0.21.0
requests==2.28.1
@ -10,7 +11,7 @@ sqlitedict==2.1.0
backoff==2.2.1
flask==2.2.3
beautifulsoup4==4.11.1
gpt-index==0.4.14
gpt-index==0.4.17
PyPDF2==3.0.1
youtube_transcript_api==0.5.0
sentencepiece==0.1.97

@ -1,4 +1,5 @@
Pillow==9.3.0
openai==0.27.0
py-cord==2.3.2
python-dotenv==0.21.0
requests==2.28.1
@ -10,7 +11,7 @@ sqlitedict==2.1.0
backoff==2.2.1
flask==2.2.3
beautifulsoup4==4.11.1
gpt-index==0.4.14
gpt-index==0.4.17
PyPDF2==3.0.1
youtube_transcript_api==0.5.0
sentencepiece==0.1.97

@ -14,9 +14,12 @@ class UsageService:
f.close()
self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
async def get_price(self, tokens_used, embeddings=False):
async def get_price(self, tokens_used, embeddings=False, chatgpt=False):
tokens_used = int(tokens_used)
if not embeddings:
if chatgpt:
price = (tokens_used / 1000) * 0.002
return price
elif not embeddings:
price = (
tokens_used / 1000
) * 0.02 # Just use the highest rate instead of model-based... I am overestimating on purpose.
@ -24,9 +27,11 @@ class UsageService:
price = (tokens_used / 1000) * 0.0004
return price
async def update_usage(self, tokens_used, embeddings=False):
async def update_usage(self, tokens_used, embeddings=False, chatgpt=False):
tokens_used = int(tokens_used)
if not embeddings:
if chatgpt:
price = (tokens_used / 1000) * 0.002
elif not embeddings:
price = (
tokens_used / 1000
) * 0.02 # Just use the highest rate instead of model-based... I am overestimating on purpose.

Loading…
Cancel
Save