better refining for chatgpt model

Kaveen Kumarasinghe 2 years ago
parent 81c9fba9a0
commit bf8574ddfe

@ -32,7 +32,7 @@ from services.environment_service import EnvService
from models.openai_model import Model
__version__ = "10.9.14"
__version__ = "10.9.15"
PID_FILE = Path("bot.pid")

@ -17,8 +17,10 @@ from datetime import date
from discord import InteractionResponse, Interaction
from discord.ext import pages
from langchain.llms import OpenAIChat
from llama_index.langchain_helpers.chatgpt import ChatGPTLLMPredictor
from langchain import OpenAI
from llama_index.prompts.chat_prompts import CHAT_REFINE_PROMPT
from llama_index.readers import YoutubeTranscriptReader
from llama_index.readers.schema.base import Document
@ -29,19 +31,13 @@ from llama_index import (
SimpleDirectoryReader,
QuestionAnswerPrompt,
BeautifulSoupWebReader,
GPTListIndex,
QueryMode,
GPTTreeIndex,
GoogleDocsReader,
MockLLMPredictor,
LLMPredictor,
QueryConfig,
PromptHelper,
IndexStructType,
OpenAIEmbedding,
GithubRepositoryReader,
MockEmbedding,
download_loader,
download_loader, LLMPredictor,
)
from llama_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR
@ -52,7 +48,6 @@ from services.environment_service import EnvService
SHORT_TO_LONG_CACHE = {}
MAX_DEEP_COMPOSE_PRICE = EnvService.get_max_deep_compose_price()
llm_predictor = ChatGPTLLMPredictor()
EpubReader = download_loader("EpubReader")
MarkdownReader = download_loader("MarkdownReader")
RemoteReader = download_loader("RemoteReader")
@ -77,6 +72,7 @@ def get_and_query(
query,
child_branch_factor=child_branch_factor,
llm_predictor=llm_predictor,
refine_template=CHAT_REFINE_PROMPT,
embed_model=embed_model,
use_async=True,
)
@ -87,6 +83,7 @@ def get_and_query(
llm_predictor=llm_predictor,
embed_model=embed_model,
similarity_top_k=nodes,
refine_template=CHAT_REFINE_PROMPT,
use_async=True,
)
return response
@ -168,6 +165,7 @@ class Index_handler:
def __init__(self, bot, usage_service):
self.bot = bot
self.openai_key = os.getenv("OPENAI_TOKEN")
self.llm_predictor = LLMPredictor(llm=OpenAIChat(temperature=0, model_name="gpt-3.5-turbo", openai_api_key=self.openai_key))
self.index_storage = defaultdict(IndexData)
self.loop = asyncio.get_running_loop()
self.usage_service = usage_service
@ -785,14 +783,14 @@ class Index_handler:
partial(
GPTTreeIndex,
documents=documents,
llm_predictor=llm_predictor,
llm_predictor=self.llm_predictor,
embed_model=embedding_model,
use_async=True,
),
)
await self.usage_service.update_usage(
llm_predictor.last_token_usage, chatgpt=True
self.llm_predictor.last_token_usage, chatgpt=True
)
await self.usage_service.update_usage(
embedding_model.last_token_usage, embeddings=True
@ -925,14 +923,14 @@ class Index_handler:
query,
response_mode,
nodes,
llm_predictor,
self.llm_predictor,
embedding_model,
child_branch_factor,
),
)
print("The last token usage was ", llm_predictor.last_token_usage)
print("The last token usage was ", self.llm_predictor.last_token_usage)
await self.usage_service.update_usage(
llm_predictor.last_token_usage, chatgpt=True
self.llm_predictor.last_token_usage, chatgpt=True
)
await self.usage_service.update_usage(
embedding_model.last_token_usage, embeddings=True
@ -941,7 +939,7 @@ class Index_handler:
try:
total_price = round(
await self.usage_service.get_price(
llm_predictor.last_token_usage, chatgpt=True
self.llm_predictor.last_token_usage, chatgpt=True
)
+ await self.usage_service.get_price(
embedding_model.last_token_usage, embeddings=True

@ -11,6 +11,7 @@ from pathlib import Path
import discord
from bs4 import BeautifulSoup
import aiohttp
from langchain.llms import OpenAIChat
from llama_index import (
QuestionAnswerPrompt,
GPTSimpleVectorIndex,
@ -26,6 +27,7 @@ from llama_index import (
)
from llama_index.indices.knowledge_graph import GPTKnowledgeGraphIndex
from llama_index.langchain_helpers.chatgpt import ChatGPTLLMPredictor
from llama_index.prompts.chat_prompts import CHAT_REFINE_PROMPT
from llama_index.prompts.prompt_type import PromptType
from llama_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR
from langchain import OpenAI
@ -329,7 +331,7 @@ class Search:
embedding_model = OpenAIEmbedding()
llm_predictor = ChatGPTLLMPredictor()
llm_predictor = LLMPredictor(llm=OpenAIChat(temperature=0, model_name="gpt-3.5-turbo"))
if not deep:
embed_model_mock = MockEmbedding(embed_dim=1536)
@ -369,7 +371,7 @@ class Search:
)
price += total_usage_price
else:
llm_predictor_deep = ChatGPTLLMPredictor()
llm_predictor_deep = LLMPredictor(llm=OpenAIChat(temperature=0, model_name="gpt-3.5-turbo"))
# Try a mock call first
llm_predictor_mock = MockLLMPredictor(4096)
@ -451,6 +453,7 @@ class Search:
query,
embed_model=embedding_model,
llm_predictor=llm_predictor,
refine_template=CHAT_REFINE_PROMPT,
similarity_top_k=nodes or DEFAULT_SEARCH_NODES,
text_qa_template=self.qaprompt,
use_async=True,
@ -458,17 +461,6 @@ class Search:
),
)
else:
# response = await self.loop.run_in_executor(
# None,
# partial(
# index.query,
# query,
# include_text=True,
# embed_model=embedding_model,
# llm_predictor=llm_predictor_deep,
# use_async=True,
# ),
# )
response = await self.loop.run_in_executor(
None,
partial(
@ -476,6 +468,7 @@ class Search:
query,
embedding_mode="hybrid",
llm_predictor=llm_predictor,
refine_template=CHAT_REFINE_PROMPT,
include_text=True,
embed_model=embedding_model,
use_async=True,

@ -12,13 +12,13 @@ pinecone-client==2.1.0
sqlitedict==2.1.0
backoff==2.2.1
flask==2.2.3
llama-index==0.4.21
llama-index==0.4.23
PyPDF2==3.0.1
youtube_transcript_api==0.5.0
sentencepiece==0.1.97
protobuf==3.20.2
python-pptx==0.6.21
sentence-transformers==2.2.2
langchain==0.0.93
langchain==0.0.104
openai-whisper
unidecode==1.3.6

@ -12,11 +12,11 @@ pinecone-client==2.1.0
sqlitedict==2.1.0
backoff==2.2.1
flask==2.2.3
llama-index==0.4.21
llama-index==0.4.23
PyPDF2==3.0.1
youtube_transcript_api==0.5.0
sentencepiece==0.1.97
protobuf==3.20.2
python-pptx==0.6.21
langchain==0.0.93
langchain==0.0.104
unidecode==1.3.6
Loading…
Cancel
Save