bump gpt-index, better async query support

Kaveen Kumarasinghe 1 year ago
parent ec87a90fb8
commit 1a63fb84c0

@ -32,7 +32,7 @@ from services.environment_service import EnvService
from models.openai_model import Model
__version__ = "10.9.15"
__version__ = "10.9.16"
PID_FILE = Path("bot.pid")

@ -20,6 +20,7 @@ from discord.ext import pages
from langchain.llms import OpenAIChat
from llama_index.langchain_helpers.chatgpt import ChatGPTLLMPredictor
from langchain import OpenAI
from llama_index.optimization.optimizer import SentenceEmbeddingOptimizer
from llama_index.prompts.chat_prompts import CHAT_REFINE_PROMPT
from llama_index.readers import YoutubeTranscriptReader
@ -55,7 +56,7 @@ RemoteReader = download_loader("RemoteReader")
RemoteDepthReader = download_loader("RemoteDepthReader")
def get_and_query(
async def get_and_query(
user_id,
index_storage,
query,
@ -69,23 +70,23 @@ def get_and_query(
user_id
].get_index_or_throw()
if isinstance(index, GPTTreeIndex):
response = index.query(
response = await index.aquery(
query,
child_branch_factor=child_branch_factor,
llm_predictor=llm_predictor,
refine_template=CHAT_REFINE_PROMPT,
embed_model=embed_model,
use_async=True,
#optimizer=SentenceEmbeddingOptimizer(threshold_cutoff=0.7)
)
else:
response = index.query(
response = await index.aquery(
query,
response_mode=response_mode,
llm_predictor=llm_predictor,
embed_model=embed_model,
similarity_top_k=nodes,
refine_template=CHAT_REFINE_PROMPT,
use_async=True,
#optimizer=SentenceEmbeddingOptimizer(threshold_cutoff=0.7)
)
return response
@ -921,20 +922,7 @@ class Index_handler:
try:
embedding_model = OpenAIEmbedding()
embedding_model.last_token_usage = 0
response = await self.loop.run_in_executor(
None,
partial(
get_and_query,
ctx.user.id,
self.index_storage,
query,
response_mode,
nodes,
self.llm_predictor,
embedding_model,
child_branch_factor,
),
)
response = await get_and_query(ctx.user.id, self.index_storage, query, response_mode, nodes, self.llm_predictor, embedding_model, child_branch_factor)
print("The last token usage was ", self.llm_predictor.last_token_usage)
await self.usage_service.update_usage(
self.llm_predictor.last_token_usage, chatgpt=True

@ -455,36 +455,9 @@ class Search:
embedding_model.last_token_usage = 0
if not deep:
response = await self.loop.run_in_executor(
None,
partial(
index.query,
query,
embed_model=embedding_model,
llm_predictor=llm_predictor,
refine_template=CHAT_REFINE_PROMPT,
similarity_top_k=nodes or DEFAULT_SEARCH_NODES,
text_qa_template=self.qaprompt,
use_async=True,
response_mode=response_mode,
),
)
response = await index.aquery(query, embed_model=embedding_model, llm_predictor=llm_predictor, refine_template=CHAT_REFINE_PROMPT, similarity_top_k=nodes or DEFAULT_SEARCH_NODES, text_qa_template=self.qaprompt, response_mode=response_mode)
else:
response = await self.loop.run_in_executor(
None,
partial(
index.query,
query,
embedding_mode="hybrid",
llm_predictor=llm_predictor,
refine_template=CHAT_REFINE_PROMPT,
include_text=True,
embed_model=embedding_model,
use_async=True,
similarity_top_k=nodes or DEFAULT_SEARCH_NODES,
response_mode=response_mode,
),
)
response = await index.aquery(query, embed_model=embedding_model, llm_predictor=llm_predictor, refine_template=CHAT_REFINE_PROMPT, similarity_top_k=nodes or DEFAULT_SEARCH_NODES, text_qa_template=self.qaprompt, response_mode=response_mode)
await self.usage_service.update_usage(
llm_predictor.last_token_usage, chatgpt=True

@ -32,7 +32,7 @@ dependencies = [
"sqlitedict==2.1.0",
"backoff==2.2.1",
"flask==2.2.3",
"llama-index==0.4.23",
"llama-index==0.4.26",
"PyPDF2==3.0.1",
"youtube_transcript_api==0.5.0",
"sentencepiece==0.1.97",

@ -12,7 +12,7 @@ pinecone-client==2.1.0
sqlitedict==2.1.0
backoff==2.2.1
flask==2.2.3
llama-index==0.4.23
llama-index==0.4.26
PyPDF2==3.0.1
youtube_transcript_api==0.5.0
sentencepiece==0.1.97

@ -12,7 +12,7 @@ pinecone-client==2.1.0
sqlitedict==2.1.0
backoff==2.2.1
flask==2.2.3
llama-index==0.4.23
llama-index==0.4.26
PyPDF2==3.0.1
youtube_transcript_api==0.5.0
sentencepiece==0.1.97

Loading…
Cancel
Save