Token safeguards, better deep /search

Kaveen Kumarasinghe 2 years ago
parent 65f0190561
commit c977f6c5f5

@ -119,9 +119,9 @@ class SearchService(discord.Cog, name="SearchService"):
response, refined_text = await self.model.search( response, refined_text = await self.model.search(
ctx, query, user_api_key, search_scope, nodes, deep ctx, query, user_api_key, search_scope, nodes, deep
) )
except ValueError: except ValueError as e:
await ctx.respond( await ctx.respond(
"The Google Search API returned an error. Check the console for more details.", str(e),
ephemeral=True, ephemeral=True,
) )
return return

@ -31,7 +31,7 @@ from services.environment_service import EnvService
from models.openai_model import Model from models.openai_model import Model
__version__ = "10.7.1" __version__ = "10.7.3"
PID_FILE = Path("bot.pid") PID_FILE = Path("bot.pid")

@ -37,7 +37,7 @@ from gpt_index import (
PromptHelper, PromptHelper,
IndexStructType, IndexStructType,
OpenAIEmbedding, OpenAIEmbedding,
GithubRepositoryReader, GithubRepositoryReader, MockEmbedding,
) )
from gpt_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR from gpt_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR
@ -46,6 +46,7 @@ from gpt_index.composability import ComposableGraph
from services.environment_service import EnvService, app_root_path from services.environment_service import EnvService, app_root_path
SHORT_TO_LONG_CACHE = {} SHORT_TO_LONG_CACHE = {}
MAX_DEEP_COMPOSE_PRICE = EnvService.get_max_deep_compose_price()
def get_and_query( def get_and_query(
@ -218,7 +219,7 @@ class Index_handler:
return index return index
def index_github_repository(self, link, embed_model): def index_github_repository(self, link, embed_model):
print("indexing github repo")
# Extract the "owner" and the "repo" name from the github link. # Extract the "owner" and the "repo" name from the github link.
owner = link.split("/")[3] owner = link.split("/")[3]
repo = link.split("/")[4] repo = link.split("/")[4]
@ -274,7 +275,6 @@ class Index_handler:
# Get the file path of this tempfile.NamedTemporaryFile # Get the file path of this tempfile.NamedTemporaryFile
# Save this temp file to an actual file that we can put into something else to read it # Save this temp file to an actual file that we can put into something else to read it
documents = SimpleDirectoryReader(input_files=[f.name]).load_data() documents = SimpleDirectoryReader(input_files=[f.name]).load_data()
print("Loaded the PDF document data")
# Delete the temporary file # Delete the temporary file
return documents return documents
@ -310,6 +310,7 @@ class Index_handler:
documents = BeautifulSoupWebReader( documents = BeautifulSoupWebReader(
website_extractor=DEFAULT_WEBSITE_EXTRACTOR website_extractor=DEFAULT_WEBSITE_EXTRACTOR
).load_data(urls=[url]) ).load_data(urls=[url])
# index = GPTSimpleVectorIndex(documents, embed_model=embed_model, use_async=True) # index = GPTSimpleVectorIndex(documents, embed_model=embed_model, use_async=True)
index = await self.loop.run_in_executor( index = await self.loop.run_in_executor(
None, None,
@ -553,6 +554,25 @@ class Index_handler:
) )
embedding_model = OpenAIEmbedding() embedding_model = OpenAIEmbedding()
llm_predictor_mock = MockLLMPredictor(4096)
embedding_model_mock = MockEmbedding(1536)
# Run the mock call first
await self.loop.run_in_executor(
None,
partial(
GPTTreeIndex,
documents=documents,
llm_predictor=llm_predictor_mock,
embed_model=embedding_model_mock,
),
)
total_usage_price = await self.usage_service.get_price(llm_predictor_mock.last_token_usage) + await self.usage_service.get_price(embedding_model_mock.last_token_usage, True)
print("The total composition price is: ", total_usage_price)
if total_usage_price > MAX_DEEP_COMPOSE_PRICE:
raise ValueError("Doing this deep search would be prohibitively expensive. Please try a narrower search scope.")
tree_index = await self.loop.run_in_executor( tree_index = await self.loop.run_in_executor(
None, None,
partial( partial(
@ -574,6 +594,8 @@ class Index_handler:
name = ( name = (
f"composed_deep_index_{date.today().month}_{date.today().day}.json" f"composed_deep_index_{date.today().month}_{date.today().day}.json"
) )
else:
name = name+"_deep"
# Save the composed index # Save the composed index
tree_index.save_to_disk(f"indexes/{user_id}/{name}.json") tree_index.save_to_disk(f"indexes/{user_id}/{name}.json")
@ -905,6 +927,7 @@ class ComposeModal(discord.ui.View):
delete_after=120, delete_after=120,
) )
# Compose the indexes # Compose the indexes
try:
await self.index_cog.compose_indexes( await self.index_cog.compose_indexes(
self.user_id, self.user_id,
indexes, indexes,
@ -913,6 +936,20 @@ class ComposeModal(discord.ui.View):
if not self.deep_select.values or self.deep_select.values[0] == "no" if not self.deep_select.values or self.deep_select.values[0] == "no"
else True, else True,
) )
except ValueError as e:
await interaction.followup.send(
str(e), ephemeral=True, delete_after=180
)
return False
except Exception as e:
await interaction.followup.send(
"An error occurred while composing the indexes: " + str(e),
ephemeral=True,
delete_after=180,)
return False
await interaction.followup.send( await interaction.followup.send(
"Composed indexes", ephemeral=True, delete_after=180 "Composed indexes", ephemeral=True, delete_after=180
) )

@ -19,7 +19,7 @@ from gpt_index import (
PromptHelper, PromptHelper,
LLMPredictor, LLMPredictor,
OpenAIEmbedding, OpenAIEmbedding,
SimpleDirectoryReader, SimpleDirectoryReader, GPTTreeIndex, MockLLMPredictor, MockEmbedding,
) )
from gpt_index.indices.knowledge_graph import GPTKnowledgeGraphIndex from gpt_index.indices.knowledge_graph import GPTKnowledgeGraphIndex
from gpt_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR from gpt_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR
@ -28,6 +28,7 @@ from langchain import OpenAI
from services.environment_service import EnvService, app_root_path from services.environment_service import EnvService, app_root_path
from services.usage_service import UsageService from services.usage_service import UsageService
MAX_SEARCH_PRICE = EnvService.get_max_search_price()
class Search: class Search:
def __init__(self, gpt_model, usage_service): def __init__(self, gpt_model, usage_service):
@ -130,13 +131,12 @@ class Search:
f.write(data) f.write(data)
f.close() f.close()
else: else:
return "An error occurred while downloading the PDF." raise ValueError("Could not download PDF")
# Get the file path of this tempfile.NamedTemporaryFile # Get the file path of this tempfile.NamedTemporaryFile
# Save this temp file to an actual file that we can put into something else to read it # Save this temp file to an actual file that we can put into something else to read it
documents = SimpleDirectoryReader(input_files=[f.name]).load_data() documents = SimpleDirectoryReader(input_files=[f.name]).load_data()
for document in documents: for document in documents:
document.extra_info = {"URL": url} document.extra_info = {"URL": url}
print("Loaded the PDF document data")
# Delete the temporary file # Delete the temporary file
return documents return documents
@ -155,11 +155,7 @@ class Search:
[item["link"] for item in data["items"]], [item["link"] for item in data["items"]],
) )
else: else:
print( raise ValueError("Error while retrieving links")
"The Google Search API returned an error: "
+ str(response.status)
)
return ["An error occurred while searching.", None]
async def try_edit(self, message, embed): async def try_edit(self, message, embed):
try: try:
@ -246,19 +242,17 @@ class Search:
pdf = False pdf = False
try: try:
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
async with session.get(link, timeout=2) as response: async with session.get(link, timeout=1) as response:
# Add another entry to links from all_links if the link is not already in it to compensate for the failed request # Add another entry to links from all_links if the link is not already in it to compensate for the failed request
if response.status not in [200, 203, 202, 204]: if response.status not in [200, 203, 202, 204]:
for link2 in all_links: for link2 in all_links:
if link2 not in links: if link2 not in links:
print("Found a replacement link")
links.append(link2) links.append(link2)
break break
continue continue
# Follow redirects # Follow redirects
elif response.status in [301, 302, 303, 307, 308]: elif response.status in [301, 302, 303, 307, 308]:
try: try:
print("Adding redirect")
links.append(response.url) links.append(response.url)
continue continue
except: except:
@ -266,7 +260,6 @@ class Search:
else: else:
# Detect if the link is a PDF, if it is, we load it differently # Detect if the link is a PDF, if it is, we load it differently
if response.headers["Content-Type"] == "application/pdf": if response.headers["Content-Type"] == "application/pdf":
print("Found a PDF at the link " + link)
pdf = True pdf = True
except: except:
@ -275,7 +268,6 @@ class Search:
# Try to add a link from all_links, this is kind of messy. # Try to add a link from all_links, this is kind of messy.
for link2 in all_links: for link2 in all_links:
if link2 not in links: if link2 not in links:
print("Found a replacement link")
links.append(link2) links.append(link2)
break break
except: except:
@ -307,9 +299,18 @@ class Search:
) )
if not deep: if not deep:
embed_model_mock = MockEmbedding(embed_dim=1536)
self.loop.run_in_executor(
None,
partial(GPTSimpleVectorIndex, documents, embed_model=embed_model_mock),
)
total_usage_price = await self.usage_service.get_price(embed_model_mock.last_token_usage, True)
if total_usage_price > 1.00:
raise ValueError("Doing this search would be prohibitively expensive. Please try a narrower search scope.")
index = await self.loop.run_in_executor( index = await self.loop.run_in_executor(
None, None,
partial(GPTSimpleVectorIndex, documents, embed_model=embedding_model), partial(GPTSimpleVectorIndex, documents, embed_model=embedding_model, use_async=True),
) )
# save the index to disk if not a redo # save the index to disk if not a redo
if not redo: if not redo:
@ -320,22 +321,54 @@ class Search:
else ctx.author.id, else ctx.author.id,
query, query,
) )
await self.usage_service.update_usage(
embedding_model.last_token_usage, embeddings=True
)
else: else:
print("Doing a deep search") llm_predictor_deep = LLMPredictor(llm=OpenAI(model_name="text-davinci-003"))
llm_predictor_deep = LLMPredictor( # Try a mock call first
llm=OpenAI(model_name="text-davinci-002", temperature=0, max_tokens=-1) llm_predictor_mock = MockLLMPredictor(4096)
embed_model_mock = MockEmbedding(embed_dim=1536)
await self.loop.run_in_executor(
None,
partial(
GPTTreeIndex,
documents,
embed_model=embed_model_mock,
llm_predictor=llm_predictor_mock,
),
) )
total_usage_price = await self.usage_service.get_price(llm_predictor_mock.last_token_usage) + await self.usage_service.get_price(embed_model_mock.last_token_usage, True)
if total_usage_price > MAX_SEARCH_PRICE:
raise ValueError("Doing this deep search would be prohibitively expensive. Please try a narrower search scope.")
index = await self.loop.run_in_executor( index = await self.loop.run_in_executor(
None, None,
partial( partial(
GPTKnowledgeGraphIndex, GPTTreeIndex,
documents, documents,
chunk_size_limit=512,
max_triplets_per_chunk=2,
embed_model=embedding_model, embed_model=embedding_model,
llm_predictor=llm_predictor_deep, llm_predictor=llm_predictor_deep,
use_async=True,
), ),
) )
# llm_predictor_deep = LLMPredictor(
# llm=OpenAI(model_name="text-davinci-002", temperature=0, max_tokens=-1)
# )
# index = await self.loop.run_in_executor(
# None,
# partial(
# GPTKnowledgeGraphIndex,
# documents,
# chunk_size_limit=512,
# max_triplets_per_chunk=2,
# embed_model=embedding_model,
# llm_predictor=llm_predictor_deep,
# ),
# )
await self.usage_service.update_usage( await self.usage_service.update_usage(
embedding_model.last_token_usage, embeddings=True embedding_model.last_token_usage, embeddings=True
) )
@ -348,10 +381,6 @@ class Search:
in_progress_message, self.build_search_indexed_embed(query_refined_text) in_progress_message, self.build_search_indexed_embed(query_refined_text)
) )
await self.usage_service.update_usage(
embedding_model.last_token_usage, embeddings=True
)
# Now we can search the index for a query: # Now we can search the index for a query:
embedding_model.last_token_usage = 0 embedding_model.last_token_usage = 0
@ -365,17 +394,31 @@ class Search:
llm_predictor=llm_predictor, llm_predictor=llm_predictor,
similarity_top_k=nodes or DEFAULT_SEARCH_NODES, similarity_top_k=nodes or DEFAULT_SEARCH_NODES,
text_qa_template=self.qaprompt, text_qa_template=self.qaprompt,
use_async=True,
response_mode="tree_summarize",
), ),
) )
else: else:
# response = await self.loop.run_in_executor(
# None,
# partial(
# index.query,
# query,
# include_text=True,
# embed_model=embedding_model,
# llm_predictor=llm_predictor_deep,
# use_async=True,
# ),
# )
response = await self.loop.run_in_executor( response = await self.loop.run_in_executor(
None, None,
partial( partial(
index.query, index.query,
query, query,
include_text=True, child_branch_factor=2,
llm_predictor=llm_predictor,
embed_model=embedding_model, embed_model=embedding_model,
llm_predictor=llm_predictor_deep, use_async=True,
), ),
) )

@ -379,3 +379,19 @@ class EnvService:
return pinecone_region return pinecone_region
except Exception: except Exception:
return "us-west1-gcp" return "us-west1-gcp"
@staticmethod
def get_max_search_price():
try:
search_price = float(os.getenv("MAX_SEARCH_PRICE"))
return search_price
except Exception:
return 1.00
@staticmethod
def get_max_deep_compose_price():
try:
deep_compose_price = float(os.getenv("MAX_DEEP_COMPOSE_PRICE"))
return deep_compose_price
except Exception:
return 3.00

@ -14,6 +14,16 @@ class UsageService:
f.close() f.close()
self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2") self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
async def get_price(self, tokens_used, embeddings=False):
tokens_used = int(tokens_used)
if not embeddings:
price = (
tokens_used / 1000
) * 0.02 # Just use the highest rate instead of model-based... I am overestimating on purpose.
else:
price = (tokens_used / 1000) * 0.0004
return price
async def update_usage(self, tokens_used, embeddings=False): async def update_usage(self, tokens_used, embeddings=False):
tokens_used = int(tokens_used) tokens_used = int(tokens_used)
if not embeddings: if not embeddings:

Loading…
Cancel
Save