From 54291c60329a4af5f533a94abaae2bb6f9fbf743 Mon Sep 17 00:00:00 2001 From: Kaveen Kumarasinghe Date: Fri, 17 Mar 2023 17:14:39 -0400 Subject: [PATCH] channel conversations, gpt4 pricing, summarize fix --- README.md | 6 +++-- cogs/commands.py | 2 +- cogs/text_service_cog.py | 15 ++++-------- gpt3discord.py | 2 +- models/openai_model.py | 50 +++++++++++++++++++++++++-------------- pyproject.toml | 4 ++-- requirements.txt | 4 ++-- requirements_base.txt | 4 ++-- services/text_service.py | 12 ++-------- services/usage_service.py | 13 ++++++---- 10 files changed, 60 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 9fba5cc..f1548ab 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,9 @@ SUPPORT SERVER FOR BOT SETUP: https://discord.gg/WvAHXDMS7Q (You can try out the # Recent Notable Updates -- **ChatGPT API Integration** - The ChatGPT API has been released and our bot is now fully integrated with it! Change to model to one of the ChatGPT turbo models with `/system settings`, or include the model as a param in your `/gpt converse`, `/gpt ask`, etc requests! The two currently available ChatGPT models are `gpt-3.5-turbo` and `gpt-3.5-turbo-0301`. This change is very experimental, so we're looking for your feedback and input on what you think of the new model's performance, especially for search and indexing functionality. +- **Full-channel conversations** - Start a conversation in a full discord channel, retained across restarts, permanent memory. Set `use_threads` to False in `/gpt converse`! + +- **GPT4 API Integration** - GPT-4 is fully supported by our bot, if you're off the waitlist, you can put in your organization ID in the environment file and it will work out of the box and allow you to select gpt-4 models in /system settings and etc! - **AI-Assisted Google Search** - Use GPT3 to browse the internet, you can search the internet for a query and GPT3 will look at the top websites for you automatically and formulate an answer to your query! You can also ask follow-up questions, this is kinda like BingGPT, but much better lol!

@@ -105,7 +107,7 @@ These commands are grouped, so each group has a prefix but you can easily tab co `/gpt edit ` Use the bot to edit text using the given instructions for how to do it, currently an alpha openai feature so results might vary. Codex uses a model trained on code. Editing is currently free -`/gpt converse ` - Start a conversation with the bot, like ChatGPT +`/gpt converse ` - Start a conversation with the bot, like ChatGPT. Also use the option `use_threads:False` to start a conversation in a full discord channel! - `opener:` - Start a conversation with the bot, with a custom opener text (this is useful if you want it to take on a custom personality from the start). diff --git a/cogs/commands.py b/cogs/commands.py index ba06419..ff14acc 100644 --- a/cogs/commands.py +++ b/cogs/commands.py @@ -503,7 +503,7 @@ class Commands(discord.Cog, name="Commands"): name="use_threads", description="Set this to false to start a channel conversation", required=False, - default=False, + default=True, ) @discord.guild_only() async def converse( diff --git a/cogs/text_service_cog.py b/cogs/text_service_cog.py index 6dd6ee6..b17e37c 100644 --- a/cogs/text_service_cog.py +++ b/cogs/text_service_cog.py @@ -388,8 +388,8 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): if thread: try: thread = await self.bot.fetch_channel(channel_id) - await thread.edit(locked=True) await thread.edit(name="Closed-GPT") + await thread.edit(archived=True) except Exception: traceback.print_exc() except Exception: @@ -405,8 +405,8 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): if thread: try: thread = await self.bot.fetch_channel(thread_id) - await thread.edit(locked=True) await thread.edit(name="Closed-GPT") + await thread.edit(archived=True) except Exception: traceback.print_exc() @@ -606,17 +606,11 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): ) new_conversation_history.append( EmbeddedConversationItem( - "\nThis conversation has some context from earlier, which has been summarized as follows: ", - 0, - ) - ) - new_conversation_history.append(EmbeddedConversationItem(summarized_text, 0)) - new_conversation_history.append( - EmbeddedConversationItem( - "\nContinue the conversation, paying very close attention to things told you, such as their name, and personal details.\n", + f"\nThis conversation has some context from earlier, which has been summarized as follows: {summarized_text} \nContinue the conversation, paying very close attention to things told you, such as their name, and personal details.", 0, ) ) + # Get the last entry from the thread's conversation history new_conversation_history.append( EmbeddedConversationItem( @@ -1077,6 +1071,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): name=user.name + "'s conversation with GPT", auto_archive_duration=60, ) + await ctx.respond("Conversation started.") target = thread else: target = ctx.channel diff --git a/gpt3discord.py b/gpt3discord.py index 641e9db..1a8258c 100644 --- a/gpt3discord.py +++ b/gpt3discord.py @@ -33,7 +33,7 @@ from services.environment_service import EnvService from models.openai_model import Model -__version__ = "11.0.4" +__version__ = "11.1.0" PID_FILE = Path("bot.pid") diff --git a/models/openai_model.py b/models/openai_model.py index e5ea5ec..24a9f1f 100644 --- a/models/openai_model.py +++ b/models/openai_model.py @@ -620,10 +620,16 @@ class Model: f"{details['exception'].args[0]}" ) - async def valid_text_request(self, response): + async def valid_text_request(self,response, model=None): try: tokens_used = int(response["usage"]["total_tokens"]) - await self.usage_service.update_usage(tokens_used) + if model and model in Models.GPT4_MODELS: + await self.usage_service.update_usage(tokens_used, + prompt_tokens=int(response["usage"]["prompt_tokens"]), + completion_tokens=int(response["usage"]["completion_tokens"]), + gpt4=True) + else: + await self.usage_service.update_usage(tokens_used) except Exception as e: raise ValueError( "The API returned an invalid response: " @@ -775,13 +781,6 @@ class Model: return response - # async def send_language_detect_request_local(self, text): - # detected = await asyncio.get_running_loop().run_in_executor( - # None, self.detector.compute_language_confidence, text, Language.ENGLISH - # ) - # if detected < 0.03: - # return False - # return True @backoff.on_exception( backoff.expo, @@ -887,13 +886,28 @@ class Model: {"role": "assistant", "name": bot_name_clean, "content": text} ) else: - username = re.search(r"(?<=\n)(.*?)(?=:)", message.text).group() - username_clean = self.cleanse_username(username) - text = message.text.replace(f"{username}:", "") - text = text.replace("<|endofstatement|>", "") - messages.append( - {"role": "user", "name": username_clean, "content": text} - ) + try: + + print("In first block The message text is ->" + message.text) + if message.text.strip().lower().startswith("this conversation has some context from earlier"): + print("Hit the exception clause") + raise Exception("This is a context message") + + + username = re.search(r"(?<=\n)(.*?)(?=:)", message.text).group() + username_clean = self.cleanse_username(username) + text = message.text.replace(f"{username}:", "") + text = text.replace("<|endofstatement|>", "") + messages.append( + {"role": "user", "name": username_clean, "content": text} + ) + print("Got to here") + except Exception: + print("In second block The message text is ->" + message.text) + text = message.text.replace("<|endofstatement|>", "") + messages.append( + {"role": "system", "content": text} + ) print(f"Messages -> {messages}") async with aiohttp.ClientSession(raise_for_status=False) as session: @@ -1044,7 +1058,7 @@ class Model: response = await resp.json() # print(f"Payload -> {payload}") # Parse the total tokens used for this request and response pair from the response - await self.valid_text_request(response) + await self.valid_text_request(response, model=self.model if model is None else model) print(f"Response -> {response}") return response @@ -1078,7 +1092,7 @@ class Model: response = await resp.json() # print(f"Payload -> {payload}") # Parse the total tokens used for this request and response pair from the response - await self.valid_text_request(response) + await self.valid_text_request(response, model=self.model if model is None else model) print(f"Response -> {response}") return response diff --git a/pyproject.toml b/pyproject.toml index b19344a..d9ddcdc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,13 +32,13 @@ dependencies = [ "sqlitedict==2.1.0", "backoff==2.2.1", "flask==2.2.3", -"llama-index==0.4.27", +"llama-index==0.4.29", "PyPDF2==3.0.1", "youtube_transcript_api==0.5.0", "sentencepiece==0.1.97", "protobuf==3.20.2", "python-pptx==0.6.21", -"langchain==0.0.105", +"langchain==0.0.115", "unidecode==1.3.6", "tqdm==4.64.1", "docx2txt==0.8" diff --git a/requirements.txt b/requirements.txt index db526f0..05da893 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,14 +12,14 @@ pinecone-client==2.1.0 sqlitedict==2.1.0 backoff==2.2.1 flask==2.2.3 -llama-index==0.4.27 +llama-index==0.4.29 PyPDF2==3.0.1 youtube_transcript_api==0.5.0 sentencepiece==0.1.97 protobuf==3.20.2 python-pptx==0.6.21 sentence-transformers==2.2.2 -langchain==0.0.105 +langchain==0.0.115 openai-whisper unidecode==1.3.6 tqdm==4.64.1 diff --git a/requirements_base.txt b/requirements_base.txt index d73968a..fcac1c7 100644 --- a/requirements_base.txt +++ b/requirements_base.txt @@ -12,13 +12,13 @@ pinecone-client==2.1.0 sqlitedict==2.1.0 backoff==2.2.1 flask==2.2.3 -llama-index==0.4.27 +llama-index==0.4.29 PyPDF2==3.0.1 youtube_transcript_api==0.5.0 sentencepiece==0.1.97 protobuf==3.20.2 python-pptx==0.6.21 -langchain==0.0.105 +langchain==0.0.115 unidecode==1.3.6 tqdm==4.64.1 docx2txt==0.8 \ No newline at end of file diff --git a/services/text_service.py b/services/text_service.py index 9046767..364a69d 100644 --- a/services/text_service.py +++ b/services/text_service.py @@ -73,14 +73,6 @@ class TextService: else prompt ), prompt - # Determine if we're sending a ChatGPT model request. If chatgpt is in the model name or the default model is a ChatGPT model. - # chatgpt_conversation = False - # chatgpt = False - # if (model and "chatgpt" in model.lower()) or (not model and converser_cog.model.model.lower() in Models.CHATGPT_MODELS): - # chatgpt = True - # if ctx.channel.id in converser_cog.conversation_threads: - # chatgpt_conversation = True - stop = f"{ctx.author.display_name if user is None else user.display_name}:" from_context = isinstance(ctx, discord.ApplicationContext) @@ -260,7 +252,7 @@ class TextService: tokens = converser_cog.usage_service.count_tokens(new_prompt) if ( - tokens > converser_cog.model.summarize_threshold - 150 + tokens > converser_cog.model.summarize_threshold ): # 150 is a buffer for the second stage await ctx.reply( "I tried to summarize our current conversation so we could keep chatting, " @@ -269,7 +261,7 @@ class TextService: ) await converser_cog.end_conversation(ctx) - converser_cog.remove_awaiting(ctx.author.id, ctx.channel.id) + converser_cog.remove_awaiting(ctx.author.id, ctx.channel.id, False, False) return else: await ctx.reply("The conversation context limit has been reached.") diff --git a/services/usage_service.py b/services/usage_service.py index d8ec005..5771a05 100644 --- a/services/usage_service.py +++ b/services/usage_service.py @@ -14,27 +14,32 @@ class UsageService: f.close() self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2") - async def get_price(self, tokens_used, embeddings=False, chatgpt=False): + async def get_price(self, tokens_used, prompt_tokens=None, completion_tokens=None, embeddings=False, chatgpt=False, gpt4=False): tokens_used = int(tokens_used) if chatgpt: price = (tokens_used / 1000) * 0.002 return price + elif gpt4: + price = (prompt_tokens / 1000) * 0.03 + (completion_tokens / 1000) * 0.06 + return price elif not embeddings: price = ( tokens_used / 1000 - ) * 0.02 # Just use the highest rate instead of model-based... I am overestimating on purpose. + ) * 0.02 else: price = (tokens_used / 1000) * 0.0004 return price - async def update_usage(self, tokens_used, embeddings=False, chatgpt=False): + async def update_usage(self, tokens_used, prompt_tokens=None, completion_tokens=None, embeddings=False, chatgpt=False, gpt4=False): tokens_used = int(tokens_used) if chatgpt: price = (tokens_used / 1000) * 0.002 + elif gpt4: + price = (prompt_tokens / 1000) * 0.03 + (completion_tokens / 1000) * 0.06 elif not embeddings: price = ( tokens_used / 1000 - ) * 0.02 # Just use the highest rate instead of model-based... I am overestimating on purpose. + ) * 0.02 else: price = (tokens_used / 1000) * 0.0004 usage = await self.get_usage()