diff --git a/README.md b/README.md index d2eee70..48d4008 100644 --- a/README.md +++ b/README.md @@ -21,9 +21,9 @@ A big shoutout to `CrypticHeaven-Lab` for hitting our first sponsorship goal! -**PERMANENT MEMORY FOR CONVERSATIONS WORK IS STILL UNDERWAY, APOLOGIES FOR THE DELAY, COMING SOON!** - # Recent Notable Updates +- **Permanent memory with embeddings and PineconeDB finished!** - An initial alpha version of permanent memory is now done! This allows you to chat with GPT3 infinitely and accurately, and save tokens, by using embeddings. *Please read the Permanent Memory section for more information!* + - **Multi-user, group chats with GPT3** - Multiple users can converse with GPT3 in a chat now, and it will know that there are multiple distinct users chatting with it! @@ -36,13 +36,10 @@ A big shoutout to `CrypticHeaven-Lab` for hitting our first sponsorship goal! - Custom conversation openers from https://github.com/f/awesome-chatgpt-prompts were integrated into the bot, check out `/gpt converse opener_file`! The bot now has built in support to make GPT3 behave like various personalities, such as a life coach, python interpreter, interviewer, text based adventure game, and much more! - -- Autocomplete for settings and various commands to make it easier to use the bot! - # Features - **Directly prompt GPT3 with `/gpt ask `** -- **Have conversations with the bot, just like chatgpt, with `/gpt converse`** - Conversations happen in threads that get automatically cleaned up! +- **Have long term, permanent conversations with the bot, just like chatgpt, with `/gpt converse`** - Conversations happen in threads that get automatically cleaned up! - **DALL-E Image Generation** - Generate DALL-E AI images right in discord with `/dalle draw `! It even supports multiple image qualities, multiple images, creating image variants, retrying, and saving images. @@ -122,6 +119,32 @@ These commands are grouped, so each group has a prefix but you can easily tab co - This uses the OpenAI Moderations endpoint to check for messages, requests are only sent to the moderations endpoint at a MINIMUM request gap of 0.5 seconds, to ensure you don't get blocked and to ensure reliability. - The bot uses numerical thresholds to determine whether a message is toxic or not, and I have manually tested and fine tuned these thresholds to a point that I think is good, please open an issue if you have any suggestions for the thresholds! +# Permanent Memory +Permanent memory has now been implemented into the bot, using the OpenAI Ada embeddings endpoint, and Pinecone DB. + +PineconeDB is a vector database. The OpenAI Ada embeddings endpoint turns pieces of text into embeddings. The way that this feature works is by embedding the user prompts and the GPT3 responses, storing them in a pinecone index, and then retrieving the most relevant bits of conversation whenever a new user prompt is given in a conversation. + +**You do NOT need to use pinecone, if you do not define a `PINECONE_TOKEN` in your `.env` file, the bot will default to not using pinecone, and will use conversation summarization as the long term conversation method instead.** + +To enable permanent memory with pinecone, you must define a `PINECONE_TOKEN` in your `.env` file as follows (along with the other variables too): +```env +PINECONE_TOKEN="87juwi58-1jk9-9182-9b3c-f84d90e8bshq" +``` + +To get a pinecone token, you can sign up for a free pinecone account here: https://app.pinecone.io/ and click the "API Keys" section on the left navbar to find the key. (I am not affiliated with pinecone). + +After signing up for a free pinecone account, you need to create an index in pinecone. To do this, go to the pinecone dashboard and click "Create Index" on the top right. + + + +Then, name the index `conversation-embeddings`, set the dimensions to `1536`, and set the metric to `DotProduct`: + + + +Moreover, an important thing to keep in mind is: pinecone indexes are currently not automatically cleared by the bot, so you will eventually need to clear the index manually through the pinecone website if things are getting too slow (although it should be a very long time until this happens). Pinecone indexes are keyed on the `metadata` field using the thread id of the conversation thread. + +Permanent memory using pinecone is still in alpha, I will be working on cleaning up this work, adding auto-clearing, and optimizing for stability and reliability, any help and feedback is appreciated (**add me on Discord Kaveen#0001 for pinecone help**)! If at any time you're having too many issues with pinecone, simply remove the `PINECONE_TOKEN` line in your `.env` file and the bot will revert to using conversation summarizations. + # Configuration diff --git a/cogs/gpt_3_commands_and_converser.py b/cogs/gpt_3_commands_and_converser.py index b1c9ba9..136b58d 100644 --- a/cogs/gpt_3_commands_and_converser.py +++ b/cogs/gpt_3_commands_and_converser.py @@ -14,7 +14,7 @@ from models.deletion_service_model import Deletion from models.env_service_model import EnvService from models.message_model import Message from models.moderations_service_model import Moderation -from models.user_model import RedoUser, Thread +from models.user_model import RedoUser, Thread, EmbeddedConversationItem from models.check_model import Check from models.autocomplete_model import Settings_autocompleter, File_autocompleter from collections import defaultdict @@ -38,6 +38,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): DEBUG_GUILD, DEBUG_CHANNEL, data_path: Path, + pinecone_service, ): super().__init__() self.data_path = data_path @@ -67,6 +68,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): self.moderation_alerts_channel = EnvService.get_moderations_alert_channel() self.moderation_enabled_guilds = [] self.moderation_tasks = {} + self.pinecone_service = pinecone_service try: conversation_file_path = data_path / "conversation_starter_pretext.txt" @@ -519,7 +521,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): new_conversation_history.append( "\nContinue the conversation, paying very close attention to things told you, such as their name, and personal details.\n" ) - # Get the last entry from the user's conversation history + # Get the last entry from the thread's conversation history new_conversation_history.append( self.conversation_threads[message.channel.id].history[-1] + "\n" ) @@ -569,7 +571,6 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): ) self.conversation_threads[after.channel.id].count += 1 - print("Doing the encapsulated send") await self.encapsulated_send( id=after.channel.id, prompt=edited_content, @@ -613,7 +614,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): # GPT3 command if conversing: # Extract all the text after the !g and use it as the prompt. - prompt = content # dead store but its okay :3 + prompt = content await self.check_conversation_limit(message) @@ -640,6 +641,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): await self.deletion_queue.put(deletion_message) return + if message.channel.id in self.awaiting_thread_responses: message = await message.reply( "This thread is already waiting for a response from GPT3. Please wait for it to respond before sending another message." @@ -662,24 +664,36 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): original_message[message.author.id] = message.id - self.conversation_threads[message.channel.id].history.append( - f"\n'{message.author.display_name}': {prompt} <|endofstatement|>\n" - ) + if not self.pinecone_service: + self.conversation_threads[message.channel.id].history.append( + f"\n'{message.author.display_name}': {prompt} <|endofstatement|>\n" + ) # increment the conversation counter for the user self.conversation_threads[message.channel.id].count += 1 # Send the request to the model # If conversing, the prompt to send is the history, otherwise, it's just the prompt + if self.pinecone_service or message.channel.id not in self.conversation_threads: + primary_prompt = prompt + else: + primary_prompt = "".join( + self.conversation_threads[message.channel.id].history + ) await self.encapsulated_send( message.channel.id, - prompt - if message.channel.id not in self.conversation_threads - else "".join(self.conversation_threads[message.channel.id].history), + primary_prompt, message, ) + def cleanse_response(self, response_text): + response_text = response_text.replace("GPTie:\n", "") + response_text = response_text.replace("GPTie:", "") + response_text = response_text.replace("GPTie: ", "") + response_text = response_text.replace("<|endofstatement|>", "") + return response_text + # ctx can be of type AppContext(interaction) or Message async def encapsulated_send( self, @@ -697,14 +711,84 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): from_context = isinstance(ctx, discord.ApplicationContext) + tokens = self.usage_service.count_tokens(new_prompt) + try: - tokens = self.usage_service.count_tokens(new_prompt) - # Check if the prompt is about to go past the token limit - if ( + # This is the EMBEDDINGS CASE + if self.pinecone_service and not from_g_command: + # The conversation_id is the id of the thread + conversation_id = ctx.channel.id + + # Create an embedding and timestamp for the prompt + new_prompt = prompt.encode("ascii", "ignore").decode() + prompt_less_author = f"{new_prompt} <|endofstatement|>\n" + + user_displayname = ctx.user.name if isinstance(ctx, discord.ApplicationContext) else ctx.author.display_name + + new_prompt = f"\n'{user_displayname}': {new_prompt} <|endofstatement|>\n" + + # print("Creating embedding for ", prompt) + # Print the current timestamp + timestamp = int(str(datetime.datetime.now().timestamp()).replace(".", "")) + + starter_conversation_item = EmbeddedConversationItem( + str(self.conversation_threads[ctx.channel.id].history[0]), 0) + self.conversation_threads[ctx.channel.id].history[0] = starter_conversation_item + + new_prompt_item = EmbeddedConversationItem(new_prompt, timestamp) + + self.conversation_threads[conversation_id].history.append(new_prompt_item) + + # Create and upsert the embedding for the conversation id, prompt, timestamp + embedding = await self.pinecone_service.upsert_conversation_embedding(self.model, conversation_id, + new_prompt, timestamp) + + embedding_prompt_less_author = await self.model.send_embedding_request(prompt_less_author) # Use the version of + # the prompt without the author's name for better clarity on retrieval. + + # Now, build the new prompt by getting the X most similar with pinecone + similar_prompts = self.pinecone_service.get_n_similar(conversation_id, embedding_prompt_less_author, + n=self.model.num_conversation_lookback) + + # When we are in embeddings mode, only the pre-text is contained in self.conversation_threads[message.channel.id].history, so we + # can use that as a base to build our new prompt + prompt_with_history = [self.conversation_threads[ctx.channel.id].history[0]] + + # Append the similar prompts to the prompt with history + prompt_with_history += [EmbeddedConversationItem(prompt, timestamp) for prompt, timestamp in + similar_prompts] + + # iterate UP TO the last X prompts in the history + for i in range(1, min(len(self.conversation_threads[ctx.channel.id].history), self.model.num_static_conversation_items)): + prompt_with_history.append(self.conversation_threads[ctx.channel.id].history[-i]) + + # remove duplicates from prompt_with_history + prompt_with_history = list(dict.fromkeys(prompt_with_history)) + + # Sort the prompt_with_history by increasing timestamp + prompt_with_history.sort(key=lambda x: x.timestamp) + + # Ensure that the last prompt in this list is the prompt we just sent (new_prompt_item) + if prompt_with_history[-1] != new_prompt_item: + try: + prompt_with_history.remove(new_prompt_item) + except ValueError: + pass + prompt_with_history.append(new_prompt_item) + + prompt_with_history = "".join([item.text for item in prompt_with_history]) + + new_prompt = prompt_with_history + + tokens = self.usage_service.count_tokens(new_prompt) + + # Summarize case + elif ( id in self.conversation_threads and tokens > self.model.summarize_threshold and not from_g_command + and not self.pinecone_service # This should only happen if we are not doing summarizations. ): # We don't need to worry about the differences between interactions and messages in this block, @@ -741,7 +825,6 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): return # Send the request to the model - print("About to send model request") response = await self.model.send_request( new_prompt, tokens=tokens, @@ -752,9 +835,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): ) # Clean the request response - response_text = str(response["choices"][0]["text"]) - response_text = response_text.replace("GPTie: ", "") - response_text = response_text.replace("<|endofstatement|>", "") + response_text = self.cleanse_response(str(response["choices"][0]["text"])) if from_g_command: # Append the prompt to the beginning of the response, in italics, then a new line @@ -769,11 +850,31 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): ) # If the user is conversing, add the GPT response to their conversation history. - if id in self.conversation_threads and not from_g_command: + if id in self.conversation_threads and not from_g_command and not self.pinecone_service: self.conversation_threads[id].history.append( "\nGPTie: " + str(response_text) + "<|endofstatement|>\n" ) + # Embeddings case! + elif id in self.conversation_threads and not from_g_command and self.pinecone_service: + conversation_id = id + + # Create an embedding and timestamp for the prompt + response_text = "\nGPTie: " + str(response_text) + "<|endofstatement|>\n" + + response_text = response_text.encode("ascii", "ignore").decode() + + # Print the current timestamp + timestamp = int(str(datetime.datetime.now().timestamp()).replace(".", "")) + self.conversation_threads[conversation_id].history.append(EmbeddedConversationItem(response_text, timestamp)) + + # Create and upsert the embedding for the conversation id, prompt, timestamp + embedding = await self.pinecone_service.upsert_conversation_embedding(self.model, conversation_id, + response_text, timestamp) + + # Cleanse + response_text = self.cleanse_response(response_text) + # If we don't have a response message, we are not doing a redo, send as a new message(s) if not response_message: if len(response_text) > self.TEXT_CUTOFF: @@ -1037,20 +1138,22 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): self.awaiting_responses.append(user_id_normalized) self.awaiting_thread_responses.append(thread.id) - self.conversation_threads[thread.id].history.append( - f"\n'{ctx.user.name}': {opener} <|endofstatement|>\n" - ) + if not self.pinecone_service: + self.conversation_threads[thread.id].history.append( + f"\n'{ctx.user.name}': {opener} <|endofstatement|>\n" + ) self.conversation_threads[thread.id].count += 1 await self.encapsulated_send( thread.id, opener - if thread.id not in self.conversation_threads + if thread.id not in self.conversation_threads or self.pinecone_service else "".join(self.conversation_threads[thread.id].history), thread_message, ) self.awaiting_responses.remove(user_id_normalized) + self.awaiting_thread_responses.remove(thread.id) self.conversation_thread_owners[user_id_normalized] = thread.id diff --git a/conversation_starter_pretext.txt b/conversation_starter_pretext.txt index fba9fea..f69c3a6 100644 --- a/conversation_starter_pretext.txt +++ b/conversation_starter_pretext.txt @@ -34,4 +34,4 @@ GPTie: [RESPONSE TO MESSAGE 1] <|endofstatement|> GPTie: [RESPONSE TO MESSAGE 2] <|endofstatement|> ... -You're a regular discord user, be friendly, casual, and fun, speak with "lol", "haha", and other slang when it seems fitting, and use emojis in your responses in a way that makes sense, avoid repeating yourself at all costs. +You're a regular discord user, be friendly, casual, and fun, speak with "lol", "haha", and other slang when it seems fitting, and use emojis in your responses in a way that makes sense, avoid repeating yourself at all costs. Never say "GPTie" when responding. diff --git a/gpt3discord.py b/gpt3discord.py index 14871bd..271174f 100644 --- a/gpt3discord.py +++ b/gpt3discord.py @@ -4,10 +4,13 @@ import traceback from pathlib import Path import discord +import pinecone from dotenv import load_dotenv from pycord.multicog import apply_multicog import os +from models.pinecone_service_model import PineconeService + if sys.platform == "win32": separator = "\\" else: @@ -24,7 +27,23 @@ from models.message_model import Message from models.openai_model import Model from models.usage_service_model import UsageService -__version__ = "3.1.2" +__version__ = "4.0" + +""" +The pinecone service is used to store and retrieve conversation embeddings. +""" +try: + PINECONE_TOKEN = os.getenv("PINECONE_TOKEN") +except: + PINECONE_TOKEN = None + +pinecone_service = None +if PINECONE_TOKEN: + pinecone.init(api_key=PINECONE_TOKEN, environment="us-west1-gcp") + PINECONE_INDEX = "conversation-embeddings" # This will become unfixed later. + pinecone_service = PineconeService(pinecone.Index(PINECONE_INDEX)) + print("Got the pinecone service") + """ Message queueing for the debug service, defer debug messages to be sent later so we don't hit rate limits. @@ -85,6 +104,7 @@ async def main(): debug_guild, debug_channel, data_path, + pinecone_service=pinecone_service, ) ) diff --git a/models/autocomplete_model.py b/models/autocomplete_model.py index 80b9fa6..1d845d9 100644 --- a/models/autocomplete_model.py +++ b/models/autocomplete_model.py @@ -27,12 +27,17 @@ class Settings_autocompleter: ctx: discord.AutocompleteContext, ): # Behaves a bit weird if you go back and edit the parameter without typing in a new command values = { + "max_conversation_length": [str(num) for num in range(1,500,2)], + "num_images": [str(num) for num in range(1,4+1)], "mode": ["temperature", "top_p"], "model": ["text-davinci-003", "text-curie-001"], "low_usage_mode": ["True", "False"], "image_size": ["256x256", "512x512", "1024x1024"], - "summarize_conversastion": ["True", "False"], + "summarize_conversation": ["True", "False"], "welcome_message_enabled": ["True", "False"], + "num_static_conversation_items": [str(num) for num in range(5,20+1)], + "num_conversation_lookback": [str(num) for num in range(5,15+1)], + "summarize_threshold": [str(num) for num in range(800, 3500, 50)] } if ctx.options["parameter"] in values.keys(): return [value for value in values[ctx.options["parameter"]]] diff --git a/models/message_model.py b/models/message_model.py index 66c1219..3221661 100644 --- a/models/message_model.py +++ b/models/message_model.py @@ -20,7 +20,10 @@ class Message: message = await message_queue.get() # Send the message - await message.channel.send(message.content) + try: + await message.channel.send(message.content) + except: + pass # Sleep for a short time before processing the next message # This will prevent the bot from spamming messages too quickly diff --git a/models/openai_model.py b/models/openai_model.py index 52e2133..160f4f1 100644 --- a/models/openai_model.py +++ b/models/openai_model.py @@ -3,6 +3,7 @@ import functools import math import os import tempfile +import traceback import uuid from typing import Tuple, List, Any @@ -23,6 +24,7 @@ class Mode: class Models: DAVINCI = "text-davinci-003" CURIE = "text-curie-001" + EMBEDDINGS = "text-embedding-ada-002" class ImageSize: @@ -54,6 +56,8 @@ class Model: self._summarize_threshold = 2500 self.model_max_tokens = 4024 self._welcome_message_enabled = True + self._num_static_conversation_items = 6 + self._num_conversation_lookback = 10 try: self.IMAGE_SAVE_PATH = os.environ["IMAGE_SAVE_PATH"] @@ -79,6 +83,32 @@ class Model: # Use the @property and @setter decorators for all the self fields to provide value checking + @property + def num_static_conversation_items(self): + return self._num_static_conversation_items + + @num_static_conversation_items.setter + def num_static_conversation_items(self, value): + value = int(value) + if value < 3: + raise ValueError("num_static_conversation_items must be >= 3") + if value > 20: + raise ValueError("num_static_conversation_items must be <= 20, this is to ensure reliability and reduce token wastage!") + self._num_static_conversation_items = value + + @property + def num_conversation_lookback(self): + return self._num_conversation_lookback + + @num_conversation_lookback.setter + def num_conversation_lookback(self, value): + value = int(value) + if value < 3: + raise ValueError("num_conversation_lookback must be >= 3") + if value > 15: + raise ValueError("num_conversation_lookback must be <= 15, this is to ensure reliability and reduce token wastage!") + self._num_conversation_lookback = value + @property def welcome_message_enabled(self): return self._welcome_message_enabled @@ -188,9 +218,9 @@ class Model: value = int(value) if value < 1: raise ValueError("Max conversation length must be greater than 1") - if value > 30: + if value > 500: raise ValueError( - "Max conversation length must be less than 30, this will start using credits quick." + "Max conversation length must be less than 500, this will start using credits quick." ) self._max_conversation_length = value @@ -317,6 +347,28 @@ class Model: + str(response["error"]["message"]) ) + async def send_embedding_request(self, text): + async with aiohttp.ClientSession() as session: + payload = { + "model": Models.EMBEDDINGS, + "input": text, + } + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {self.openai_key}", + } + async with session.post( + "https://api.openai.com/v1/embeddings", json=payload, headers=headers + ) as resp: + response = await resp.json() + + try: + return response["data"][0]["embedding"] + except Exception as e: + print(response) + traceback.print_exc() + return + async def send_moderations_request(self, text): # Use aiohttp to send the above request: async with aiohttp.ClientSession() as session: @@ -422,8 +474,8 @@ class Model: "https://api.openai.com/v1/completions", json=payload, headers=headers ) as resp: response = await resp.json() - print(f"Payload -> {payload}") - print(f"Response -> {response}") + #print(f"Payload -> {payload}") + #print(f"Response -> {response}") # Parse the total tokens used for this request and response pair from the response await self.valid_text_request(response) diff --git a/models/pinecone_service_model.py b/models/pinecone_service_model.py new file mode 100644 index 0000000..708d63e --- /dev/null +++ b/models/pinecone_service_model.py @@ -0,0 +1,43 @@ +import pinecone + + +class PineconeService: + + def __init__(self, index: pinecone.Index): + self.index = index + + def upsert_basic(self, text, embeddings): + self.index.upsert([(text, embeddings)]) + + def get_all_for_conversation(self, conversation_id: int): + response = self.index.query(top_k=100, filter={"conversation_id": conversation_id}) + return response + + async def upsert_conversation_embedding(self, model, conversation_id: int, text, timestamp): + # If the text is > 512 characters, we need to split it up into multiple entries. + first_embedding = None + if len(text) > 500: + # Split the text into 512 character chunks + chunks = [text[i:i + 500] for i in range(0, len(text), 500)] + for chunk in chunks: + print("The split chunk is ", chunk) + + # Create an embedding for the split chunk + embedding = await model.send_embedding_request(chunk) + if not first_embedding: + first_embedding = embedding + self.index.upsert([(chunk, embedding)], metadata={"conversation_id": conversation_id, "timestamp": timestamp}) + return first_embedding + else: + embedding = await model.send_embedding_request(text) + self.index.upsert([(text, embedding, {"conversation_id": conversation_id, + "timestamp": timestamp})]) + return embedding + + def get_n_similar(self, conversation_id: int, embedding, n=10): + response = self.index.query(vector=embedding, top_k=n, include_metadata=True, filter={"conversation_id": conversation_id}) + print(response) + relevant_phrases = [(match['id'],match['metadata']['timestamp']) for match in response['matches']] + # Sort the relevant phrases based on the timestamp + relevant_phrases.sort(key=lambda x: x[1]) + return relevant_phrases \ No newline at end of file diff --git a/models/user_model.py b/models/user_model.py index 990e41f..67cedf5 100644 --- a/models/user_model.py +++ b/models/user_model.py @@ -72,3 +72,37 @@ class Thread: def __str__(self): return self.__repr__() + +class EmbeddedConversationItem: + def __init__(self, text, timestamp): + self.text = text + self.timestamp = int(timestamp) + + def __repr__(self): + return self.text + + def __str__(self): + return self.__repr__() + + def __eq__(self, other): + return self.text == other.text and self.timestamp == other.timestamp + + def __hash__(self): + return hash(self.text) + hash(self.timestamp) + + def __lt__(self, other): + return self.timestamp < other.timestamp + + def __gt__(self, other): + return self.timestamp > other.timestamp + + def __le__(self, other): + return self.timestamp <= other.timestamp + + def __ge__(self, other): + return self.timestamp >= other.timestamp + + def __ne__(self, other): + return not self.__eq__(other) + +