diff --git a/gpt3discord.py b/gpt3discord.py index 2a041e7..97c9797 100644 --- a/gpt3discord.py +++ b/gpt3discord.py @@ -32,7 +32,7 @@ from services.environment_service import EnvService from models.openai_model import Model -__version__ = "10.9.14" +__version__ = "10.9.15" PID_FILE = Path("bot.pid") diff --git a/models/index_model.py b/models/index_model.py index 68e0775..c6d38ea 100644 --- a/models/index_model.py +++ b/models/index_model.py @@ -17,8 +17,10 @@ from datetime import date from discord import InteractionResponse, Interaction from discord.ext import pages +from langchain.llms import OpenAIChat from llama_index.langchain_helpers.chatgpt import ChatGPTLLMPredictor from langchain import OpenAI +from llama_index.prompts.chat_prompts import CHAT_REFINE_PROMPT from llama_index.readers import YoutubeTranscriptReader from llama_index.readers.schema.base import Document @@ -29,19 +31,13 @@ from llama_index import ( SimpleDirectoryReader, QuestionAnswerPrompt, BeautifulSoupWebReader, - GPTListIndex, - QueryMode, GPTTreeIndex, GoogleDocsReader, MockLLMPredictor, - LLMPredictor, - QueryConfig, - PromptHelper, - IndexStructType, OpenAIEmbedding, GithubRepositoryReader, MockEmbedding, - download_loader, + download_loader, LLMPredictor, ) from llama_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR @@ -52,7 +48,6 @@ from services.environment_service import EnvService SHORT_TO_LONG_CACHE = {} MAX_DEEP_COMPOSE_PRICE = EnvService.get_max_deep_compose_price() -llm_predictor = ChatGPTLLMPredictor() EpubReader = download_loader("EpubReader") MarkdownReader = download_loader("MarkdownReader") RemoteReader = download_loader("RemoteReader") @@ -77,6 +72,7 @@ def get_and_query( query, child_branch_factor=child_branch_factor, llm_predictor=llm_predictor, + refine_template=CHAT_REFINE_PROMPT, embed_model=embed_model, use_async=True, ) @@ -87,6 +83,7 @@ def get_and_query( llm_predictor=llm_predictor, embed_model=embed_model, similarity_top_k=nodes, + refine_template=CHAT_REFINE_PROMPT, use_async=True, ) return response @@ -168,6 +165,7 @@ class Index_handler: def __init__(self, bot, usage_service): self.bot = bot self.openai_key = os.getenv("OPENAI_TOKEN") + self.llm_predictor = LLMPredictor(llm=OpenAIChat(temperature=0, model_name="gpt-3.5-turbo", openai_api_key=self.openai_key)) self.index_storage = defaultdict(IndexData) self.loop = asyncio.get_running_loop() self.usage_service = usage_service @@ -785,14 +783,14 @@ class Index_handler: partial( GPTTreeIndex, documents=documents, - llm_predictor=llm_predictor, + llm_predictor=self.llm_predictor, embed_model=embedding_model, use_async=True, ), ) await self.usage_service.update_usage( - llm_predictor.last_token_usage, chatgpt=True + self.llm_predictor.last_token_usage, chatgpt=True ) await self.usage_service.update_usage( embedding_model.last_token_usage, embeddings=True @@ -925,14 +923,14 @@ class Index_handler: query, response_mode, nodes, - llm_predictor, + self.llm_predictor, embedding_model, child_branch_factor, ), ) - print("The last token usage was ", llm_predictor.last_token_usage) + print("The last token usage was ", self.llm_predictor.last_token_usage) await self.usage_service.update_usage( - llm_predictor.last_token_usage, chatgpt=True + self.llm_predictor.last_token_usage, chatgpt=True ) await self.usage_service.update_usage( embedding_model.last_token_usage, embeddings=True @@ -941,7 +939,7 @@ class Index_handler: try: total_price = round( await self.usage_service.get_price( - llm_predictor.last_token_usage, chatgpt=True + self.llm_predictor.last_token_usage, chatgpt=True ) + await self.usage_service.get_price( embedding_model.last_token_usage, embeddings=True diff --git a/models/search_model.py b/models/search_model.py index ca656a6..680fc12 100644 --- a/models/search_model.py +++ b/models/search_model.py @@ -11,6 +11,7 @@ from pathlib import Path import discord from bs4 import BeautifulSoup import aiohttp +from langchain.llms import OpenAIChat from llama_index import ( QuestionAnswerPrompt, GPTSimpleVectorIndex, @@ -26,6 +27,7 @@ from llama_index import ( ) from llama_index.indices.knowledge_graph import GPTKnowledgeGraphIndex from llama_index.langchain_helpers.chatgpt import ChatGPTLLMPredictor +from llama_index.prompts.chat_prompts import CHAT_REFINE_PROMPT from llama_index.prompts.prompt_type import PromptType from llama_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR from langchain import OpenAI @@ -329,7 +331,7 @@ class Search: embedding_model = OpenAIEmbedding() - llm_predictor = ChatGPTLLMPredictor() + llm_predictor = LLMPredictor(llm=OpenAIChat(temperature=0, model_name="gpt-3.5-turbo")) if not deep: embed_model_mock = MockEmbedding(embed_dim=1536) @@ -369,7 +371,7 @@ class Search: ) price += total_usage_price else: - llm_predictor_deep = ChatGPTLLMPredictor() + llm_predictor_deep = LLMPredictor(llm=OpenAIChat(temperature=0, model_name="gpt-3.5-turbo")) # Try a mock call first llm_predictor_mock = MockLLMPredictor(4096) @@ -451,6 +453,7 @@ class Search: query, embed_model=embedding_model, llm_predictor=llm_predictor, + refine_template=CHAT_REFINE_PROMPT, similarity_top_k=nodes or DEFAULT_SEARCH_NODES, text_qa_template=self.qaprompt, use_async=True, @@ -458,17 +461,6 @@ class Search: ), ) else: - # response = await self.loop.run_in_executor( - # None, - # partial( - # index.query, - # query, - # include_text=True, - # embed_model=embedding_model, - # llm_predictor=llm_predictor_deep, - # use_async=True, - # ), - # ) response = await self.loop.run_in_executor( None, partial( @@ -476,6 +468,7 @@ class Search: query, embedding_mode="hybrid", llm_predictor=llm_predictor, + refine_template=CHAT_REFINE_PROMPT, include_text=True, embed_model=embedding_model, use_async=True, diff --git a/requirements.txt b/requirements.txt index dcdea40..e3ac1d1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,13 +12,13 @@ pinecone-client==2.1.0 sqlitedict==2.1.0 backoff==2.2.1 flask==2.2.3 -llama-index==0.4.21 +llama-index==0.4.23 PyPDF2==3.0.1 youtube_transcript_api==0.5.0 sentencepiece==0.1.97 protobuf==3.20.2 python-pptx==0.6.21 sentence-transformers==2.2.2 -langchain==0.0.93 +langchain==0.0.104 openai-whisper unidecode==1.3.6 diff --git a/requirements_base.txt b/requirements_base.txt index 41a7b37..686e24f 100644 --- a/requirements_base.txt +++ b/requirements_base.txt @@ -12,11 +12,11 @@ pinecone-client==2.1.0 sqlitedict==2.1.0 backoff==2.2.1 flask==2.2.3 -llama-index==0.4.21 +llama-index==0.4.23 PyPDF2==3.0.1 youtube_transcript_api==0.5.0 sentencepiece==0.1.97 protobuf==3.20.2 python-pptx==0.6.21 -langchain==0.0.93 +langchain==0.0.104 unidecode==1.3.6 \ No newline at end of file