Token safeguards, better deep /search

Kaveen Kumarasinghe 2 years ago
parent 65f0190561
commit c977f6c5f5

@ -119,9 +119,9 @@ class SearchService(discord.Cog, name="SearchService"):
response, refined_text = await self.model.search(
ctx, query, user_api_key, search_scope, nodes, deep
)
except ValueError:
except ValueError as e:
await ctx.respond(
"The Google Search API returned an error. Check the console for more details.",
str(e),
ephemeral=True,
)
return

@ -31,7 +31,7 @@ from services.environment_service import EnvService
from models.openai_model import Model
__version__ = "10.7.1"
__version__ = "10.7.3"
PID_FILE = Path("bot.pid")

@ -37,7 +37,7 @@ from gpt_index import (
PromptHelper,
IndexStructType,
OpenAIEmbedding,
GithubRepositoryReader,
GithubRepositoryReader, MockEmbedding,
)
from gpt_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR
@ -46,6 +46,7 @@ from gpt_index.composability import ComposableGraph
from services.environment_service import EnvService, app_root_path
SHORT_TO_LONG_CACHE = {}
MAX_DEEP_COMPOSE_PRICE = EnvService.get_max_deep_compose_price()
def get_and_query(
@ -218,7 +219,7 @@ class Index_handler:
return index
def index_github_repository(self, link, embed_model):
print("indexing github repo")
# Extract the "owner" and the "repo" name from the github link.
owner = link.split("/")[3]
repo = link.split("/")[4]
@ -274,7 +275,6 @@ class Index_handler:
# Get the file path of this tempfile.NamedTemporaryFile
# Save this temp file to an actual file that we can put into something else to read it
documents = SimpleDirectoryReader(input_files=[f.name]).load_data()
print("Loaded the PDF document data")
# Delete the temporary file
return documents
@ -310,6 +310,7 @@ class Index_handler:
documents = BeautifulSoupWebReader(
website_extractor=DEFAULT_WEBSITE_EXTRACTOR
).load_data(urls=[url])
# index = GPTSimpleVectorIndex(documents, embed_model=embed_model, use_async=True)
index = await self.loop.run_in_executor(
None,
@ -553,6 +554,25 @@ class Index_handler:
)
embedding_model = OpenAIEmbedding()
llm_predictor_mock = MockLLMPredictor(4096)
embedding_model_mock = MockEmbedding(1536)
# Run the mock call first
await self.loop.run_in_executor(
None,
partial(
GPTTreeIndex,
documents=documents,
llm_predictor=llm_predictor_mock,
embed_model=embedding_model_mock,
),
)
total_usage_price = await self.usage_service.get_price(llm_predictor_mock.last_token_usage) + await self.usage_service.get_price(embedding_model_mock.last_token_usage, True)
print("The total composition price is: ", total_usage_price)
if total_usage_price > MAX_DEEP_COMPOSE_PRICE:
raise ValueError("Doing this deep search would be prohibitively expensive. Please try a narrower search scope.")
tree_index = await self.loop.run_in_executor(
None,
partial(
@ -574,6 +594,8 @@ class Index_handler:
name = (
f"composed_deep_index_{date.today().month}_{date.today().day}.json"
)
else:
name = name+"_deep"
# Save the composed index
tree_index.save_to_disk(f"indexes/{user_id}/{name}.json")
@ -905,14 +927,29 @@ class ComposeModal(discord.ui.View):
delete_after=120,
)
# Compose the indexes
await self.index_cog.compose_indexes(
self.user_id,
indexes,
self.name,
False
if not self.deep_select.values or self.deep_select.values[0] == "no"
else True,
)
try:
await self.index_cog.compose_indexes(
self.user_id,
indexes,
self.name,
False
if not self.deep_select.values or self.deep_select.values[0] == "no"
else True,
)
except ValueError as e:
await interaction.followup.send(
str(e), ephemeral=True, delete_after=180
)
return False
except Exception as e:
await interaction.followup.send(
"An error occurred while composing the indexes: " + str(e),
ephemeral=True,
delete_after=180,)
return False
await interaction.followup.send(
"Composed indexes", ephemeral=True, delete_after=180
)

@ -19,7 +19,7 @@ from gpt_index import (
PromptHelper,
LLMPredictor,
OpenAIEmbedding,
SimpleDirectoryReader,
SimpleDirectoryReader, GPTTreeIndex, MockLLMPredictor, MockEmbedding,
)
from gpt_index.indices.knowledge_graph import GPTKnowledgeGraphIndex
from gpt_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR
@ -28,6 +28,7 @@ from langchain import OpenAI
from services.environment_service import EnvService, app_root_path
from services.usage_service import UsageService
MAX_SEARCH_PRICE = EnvService.get_max_search_price()
class Search:
def __init__(self, gpt_model, usage_service):
@ -130,13 +131,12 @@ class Search:
f.write(data)
f.close()
else:
return "An error occurred while downloading the PDF."
raise ValueError("Could not download PDF")
# Get the file path of this tempfile.NamedTemporaryFile
# Save this temp file to an actual file that we can put into something else to read it
documents = SimpleDirectoryReader(input_files=[f.name]).load_data()
for document in documents:
document.extra_info = {"URL": url}
print("Loaded the PDF document data")
# Delete the temporary file
return documents
@ -155,11 +155,7 @@ class Search:
[item["link"] for item in data["items"]],
)
else:
print(
"The Google Search API returned an error: "
+ str(response.status)
)
return ["An error occurred while searching.", None]
raise ValueError("Error while retrieving links")
async def try_edit(self, message, embed):
try:
@ -246,19 +242,17 @@ class Search:
pdf = False
try:
async with aiohttp.ClientSession() as session:
async with session.get(link, timeout=2) as response:
async with session.get(link, timeout=1) as response:
# Add another entry to links from all_links if the link is not already in it to compensate for the failed request
if response.status not in [200, 203, 202, 204]:
for link2 in all_links:
if link2 not in links:
print("Found a replacement link")
links.append(link2)
break
continue
# Follow redirects
elif response.status in [301, 302, 303, 307, 308]:
try:
print("Adding redirect")
links.append(response.url)
continue
except:
@ -266,7 +260,6 @@ class Search:
else:
# Detect if the link is a PDF, if it is, we load it differently
if response.headers["Content-Type"] == "application/pdf":
print("Found a PDF at the link " + link)
pdf = True
except:
@ -275,7 +268,6 @@ class Search:
# Try to add a link from all_links, this is kind of messy.
for link2 in all_links:
if link2 not in links:
print("Found a replacement link")
links.append(link2)
break
except:
@ -307,9 +299,18 @@ class Search:
)
if not deep:
embed_model_mock = MockEmbedding(embed_dim=1536)
self.loop.run_in_executor(
None,
partial(GPTSimpleVectorIndex, documents, embed_model=embed_model_mock),
)
total_usage_price = await self.usage_service.get_price(embed_model_mock.last_token_usage, True)
if total_usage_price > 1.00:
raise ValueError("Doing this search would be prohibitively expensive. Please try a narrower search scope.")
index = await self.loop.run_in_executor(
None,
partial(GPTSimpleVectorIndex, documents, embed_model=embedding_model),
partial(GPTSimpleVectorIndex, documents, embed_model=embedding_model, use_async=True),
)
# save the index to disk if not a redo
if not redo:
@ -320,22 +321,54 @@ class Search:
else ctx.author.id,
query,
)
await self.usage_service.update_usage(
embedding_model.last_token_usage, embeddings=True
)
else:
print("Doing a deep search")
llm_predictor_deep = LLMPredictor(
llm=OpenAI(model_name="text-davinci-002", temperature=0, max_tokens=-1)
llm_predictor_deep = LLMPredictor(llm=OpenAI(model_name="text-davinci-003"))
# Try a mock call first
llm_predictor_mock = MockLLMPredictor(4096)
embed_model_mock = MockEmbedding(embed_dim=1536)
await self.loop.run_in_executor(
None,
partial(
GPTTreeIndex,
documents,
embed_model=embed_model_mock,
llm_predictor=llm_predictor_mock,
),
)
total_usage_price = await self.usage_service.get_price(llm_predictor_mock.last_token_usage) + await self.usage_service.get_price(embed_model_mock.last_token_usage, True)
if total_usage_price > MAX_SEARCH_PRICE:
raise ValueError("Doing this deep search would be prohibitively expensive. Please try a narrower search scope.")
index = await self.loop.run_in_executor(
None,
partial(
GPTKnowledgeGraphIndex,
GPTTreeIndex,
documents,
chunk_size_limit=512,
max_triplets_per_chunk=2,
embed_model=embedding_model,
llm_predictor=llm_predictor_deep,
use_async=True,
),
)
# llm_predictor_deep = LLMPredictor(
# llm=OpenAI(model_name="text-davinci-002", temperature=0, max_tokens=-1)
# )
# index = await self.loop.run_in_executor(
# None,
# partial(
# GPTKnowledgeGraphIndex,
# documents,
# chunk_size_limit=512,
# max_triplets_per_chunk=2,
# embed_model=embedding_model,
# llm_predictor=llm_predictor_deep,
# ),
# )
await self.usage_service.update_usage(
embedding_model.last_token_usage, embeddings=True
)
@ -348,10 +381,6 @@ class Search:
in_progress_message, self.build_search_indexed_embed(query_refined_text)
)
await self.usage_service.update_usage(
embedding_model.last_token_usage, embeddings=True
)
# Now we can search the index for a query:
embedding_model.last_token_usage = 0
@ -365,17 +394,31 @@ class Search:
llm_predictor=llm_predictor,
similarity_top_k=nodes or DEFAULT_SEARCH_NODES,
text_qa_template=self.qaprompt,
use_async=True,
response_mode="tree_summarize",
),
)
else:
# response = await self.loop.run_in_executor(
# None,
# partial(
# index.query,
# query,
# include_text=True,
# embed_model=embedding_model,
# llm_predictor=llm_predictor_deep,
# use_async=True,
# ),
# )
response = await self.loop.run_in_executor(
None,
partial(
index.query,
query,
include_text=True,
child_branch_factor=2,
llm_predictor=llm_predictor,
embed_model=embedding_model,
llm_predictor=llm_predictor_deep,
use_async=True,
),
)

@ -379,3 +379,19 @@ class EnvService:
return pinecone_region
except Exception:
return "us-west1-gcp"
@staticmethod
def get_max_search_price():
try:
search_price = float(os.getenv("MAX_SEARCH_PRICE"))
return search_price
except Exception:
return 1.00
@staticmethod
def get_max_deep_compose_price():
try:
deep_compose_price = float(os.getenv("MAX_DEEP_COMPOSE_PRICE"))
return deep_compose_price
except Exception:
return 3.00

@ -14,6 +14,16 @@ class UsageService:
f.close()
self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
async def get_price(self, tokens_used, embeddings=False):
tokens_used = int(tokens_used)
if not embeddings:
price = (
tokens_used / 1000
) * 0.02 # Just use the highest rate instead of model-based... I am overestimating on purpose.
else:
price = (tokens_used / 1000) * 0.0004
return price
async def update_usage(self, tokens_used, embeddings=False):
tokens_used = int(tokens_used)
if not embeddings:

Loading…
Cancel
Save