diff --git a/cogs/gpt_3_commands_and_converser.py b/cogs/gpt_3_commands_and_converser.py index b1c9ba9..7fde2b8 100644 --- a/cogs/gpt_3_commands_and_converser.py +++ b/cogs/gpt_3_commands_and_converser.py @@ -14,7 +14,7 @@ from models.deletion_service_model import Deletion from models.env_service_model import EnvService from models.message_model import Message from models.moderations_service_model import Moderation -from models.user_model import RedoUser, Thread +from models.user_model import RedoUser, Thread, EmbeddedConversationItem from models.check_model import Check from models.autocomplete_model import Settings_autocompleter, File_autocompleter from collections import defaultdict @@ -38,6 +38,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): DEBUG_GUILD, DEBUG_CHANNEL, data_path: Path, + pinecone_service, ): super().__init__() self.data_path = data_path @@ -67,6 +68,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): self.moderation_alerts_channel = EnvService.get_moderations_alert_channel() self.moderation_enabled_guilds = [] self.moderation_tasks = {} + self.pinecone_service = pinecone_service try: conversation_file_path = data_path / "conversation_starter_pretext.txt" @@ -519,7 +521,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): new_conversation_history.append( "\nContinue the conversation, paying very close attention to things told you, such as their name, and personal details.\n" ) - # Get the last entry from the user's conversation history + # Get the last entry from the thread's conversation history new_conversation_history.append( self.conversation_threads[message.channel.id].history[-1] + "\n" ) @@ -657,6 +659,83 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): return + print("BEFORE PINECONE SERVICE CHECK") + if self.pinecone_service: + # The conversation_id is the id of the thread + conversation_id = message.channel.id + print("Conversation id is", conversation_id) + + # Create an embedding and timestamp for the prompt + prompt = prompt.encode("ascii", "ignore").decode() + prompt_less_author = f"{prompt} <|endofstatement|>\n" + prompt_with_gpt_instead = f"GPTie: {prompt} <|endofstatement|>\n" + prompt = f"\n'{message.author.display_name}': {prompt} <|endofstatement|>\n" + + #print("Creating embedding for ", prompt) + # Print the current timestamp + timestamp = int(str(datetime.datetime.now().timestamp()).replace(".", "")) + print("Timestamp is ", timestamp) + + starter_conversation_item = EmbeddedConversationItem(str(self.conversation_threads[message.channel.id].history[0]), 0) + self.conversation_threads[message.channel.id].history[0] = starter_conversation_item + + new_prompt_item = EmbeddedConversationItem(prompt, timestamp) + + self.conversation_threads[conversation_id].history.append(new_prompt_item) + + # Create and upsert the embedding for the conversation id, prompt, timestamp + embedding = await self.pinecone_service.upsert_conversation_embedding(self.model, conversation_id, prompt, timestamp) + + embedding_prompt_less_author = await self.model.send_embedding_request(prompt_less_author) + + # Now, build the new prompt by getting the 10 most similar with pinecone + similar_prompts = self.pinecone_service.get_n_similar(conversation_id, embedding_prompt_less_author, n=5) + + # When we are in embeddings mode, only the pre-text is contained in self.conversation_threads[message.channel.id].history, so we + # can use that as a base to build our new prompt + prompt_with_history = [] + prompt_with_history.append(self.conversation_threads[message.channel.id].history[0]) + + # Append the similar prompts to the prompt with history + prompt_with_history += [EmbeddedConversationItem(prompt, timestamp) for prompt, timestamp in similar_prompts] + + # iterate UP TO the last 5 prompts in the history + for i in range(1, min(len(self.conversation_threads[message.channel.id].history), 3)): + prompt_with_history.append(self.conversation_threads[message.channel.id].history[-i]) + + # remove duplicates from prompt_with_history + prompt_with_history = list(dict.fromkeys(prompt_with_history)) + + # Sort the prompt_with_history by increasing timestamp + prompt_with_history.sort(key=lambda x: x.timestamp) + + # Ensure that the last prompt in this list is the prompt we just sent (new_prompt_item) + if prompt_with_history[-1] != new_prompt_item: + try: + prompt_with_history.remove(new_prompt_item) + except ValueError: + pass + prompt_with_history.append(new_prompt_item) + + prompt_with_history = "".join([item.text for item in prompt_with_history]) + + print("The prompt with history is", prompt_with_history) + + self.awaiting_responses.append(message.author.id) + self.awaiting_thread_responses.append(message.channel.id) + + self.conversation_threads[message.channel.id].count += 1 + + original_message[message.author.id] = message.id + + await self.encapsulated_send( + message.channel.id, + prompt_with_history, + message, + ) + + return + self.awaiting_responses.append(message.author.id) self.awaiting_thread_responses.append(message.channel.id) @@ -700,11 +779,13 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): try: tokens = self.usage_service.count_tokens(new_prompt) - # Check if the prompt is about to go past the token limit + + # This is the NO-EMBEDDINGS-SUMMARIZE CASE if ( id in self.conversation_threads and tokens > self.model.summarize_threshold and not from_g_command + and not self.pinecone_service # This should only happen if we are not doing summarizations. ): # We don't need to worry about the differences between interactions and messages in this block, @@ -769,11 +850,34 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): ) # If the user is conversing, add the GPT response to their conversation history. - if id in self.conversation_threads and not from_g_command: + # Don't append to the history if we're using embeddings! + if id in self.conversation_threads and not from_g_command and not self.pinecone_service: self.conversation_threads[id].history.append( "\nGPTie: " + str(response_text) + "<|endofstatement|>\n" ) + # Embeddings case! + elif id in self.conversation_threads and not from_g_command and self.pinecone_service: + conversation_id = id + print("Conversation id is", conversation_id) + + # Create an embedding and timestamp for the prompt + response_text = "\nGPTie: " + str(response_text) + "<|endofstatement|>\n" + + response_text = response_text.encode("ascii", "ignore").decode() + + + print("Creating embedding for ", response_text) + # Print the current timestamp + timestamp = int(str(datetime.datetime.now().timestamp()).replace(".", "")) + print("Timestamp is ", timestamp) + self.conversation_threads[conversation_id].history.append(EmbeddedConversationItem(response_text, timestamp)) + + # Create and upsert the embedding for the conversation id, prompt, timestamp + embedding = await self.pinecone_service.upsert_conversation_embedding(self.model, conversation_id, + response_text, timestamp) + print("Embedded the response") + # If we don't have a response message, we are not doing a redo, send as a new message(s) if not response_message: if len(response_text) > self.TEXT_CUTOFF: diff --git a/gpt3discord.py b/gpt3discord.py index 14871bd..bf785d8 100644 --- a/gpt3discord.py +++ b/gpt3discord.py @@ -4,10 +4,13 @@ import traceback from pathlib import Path import discord +import pinecone from dotenv import load_dotenv from pycord.multicog import apply_multicog import os +from models.pinecone_service_model import PineconeService + if sys.platform == "win32": separator = "\\" else: @@ -26,6 +29,22 @@ from models.usage_service_model import UsageService __version__ = "3.1.2" +""" +The pinecone service is used to store and retrieve conversation embeddings. +""" +try: + PINECONE_TOKEN = os.getenv("PINECONE_TOKEN") +except: + PINECONE_TOKEN = None + +pinecone_service = None +if PINECONE_TOKEN: + pinecone.init(api_key=PINECONE_TOKEN, environment="us-west1-gcp") + PINECONE_INDEX = "conversation-embeddings" # This will become unfixed later. + pinecone_service = PineconeService(pinecone.Index(PINECONE_INDEX)) + print("Got the pinecone service") + + """ Message queueing for the debug service, defer debug messages to be sent later so we don't hit rate limits. """ @@ -85,6 +104,7 @@ async def main(): debug_guild, debug_channel, data_path, + pinecone_service=pinecone_service, ) ) diff --git a/models/message_model.py b/models/message_model.py index 66c1219..3221661 100644 --- a/models/message_model.py +++ b/models/message_model.py @@ -20,7 +20,10 @@ class Message: message = await message_queue.get() # Send the message - await message.channel.send(message.content) + try: + await message.channel.send(message.content) + except: + pass # Sleep for a short time before processing the next message # This will prevent the bot from spamming messages too quickly diff --git a/models/openai_model.py b/models/openai_model.py index 52e2133..75d18ab 100644 --- a/models/openai_model.py +++ b/models/openai_model.py @@ -3,6 +3,7 @@ import functools import math import os import tempfile +import traceback import uuid from typing import Tuple, List, Any @@ -23,6 +24,7 @@ class Mode: class Models: DAVINCI = "text-davinci-003" CURIE = "text-curie-001" + EMBEDDINGS = "text-embedding-ada-002" class ImageSize: @@ -317,6 +319,27 @@ class Model: + str(response["error"]["message"]) ) + async def send_embedding_request(self, text): + async with aiohttp.ClientSession() as session: + payload = { + "model": Models.EMBEDDINGS, + "input": text, + } + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {self.openai_key}", + } + async with session.post( + "https://api.openai.com/v1/embeddings", json=payload, headers=headers + ) as resp: + response = await resp.json() + + try: + return response["data"][0]["embedding"] + except Exception as e: + traceback.print_exc() + return + async def send_moderations_request(self, text): # Use aiohttp to send the above request: async with aiohttp.ClientSession() as session: @@ -422,8 +445,8 @@ class Model: "https://api.openai.com/v1/completions", json=payload, headers=headers ) as resp: response = await resp.json() - print(f"Payload -> {payload}") - print(f"Response -> {response}") + #print(f"Payload -> {payload}") + #print(f"Response -> {response}") # Parse the total tokens used for this request and response pair from the response await self.valid_text_request(response) diff --git a/models/pinecone_service_model.py b/models/pinecone_service_model.py new file mode 100644 index 0000000..708d63e --- /dev/null +++ b/models/pinecone_service_model.py @@ -0,0 +1,43 @@ +import pinecone + + +class PineconeService: + + def __init__(self, index: pinecone.Index): + self.index = index + + def upsert_basic(self, text, embeddings): + self.index.upsert([(text, embeddings)]) + + def get_all_for_conversation(self, conversation_id: int): + response = self.index.query(top_k=100, filter={"conversation_id": conversation_id}) + return response + + async def upsert_conversation_embedding(self, model, conversation_id: int, text, timestamp): + # If the text is > 512 characters, we need to split it up into multiple entries. + first_embedding = None + if len(text) > 500: + # Split the text into 512 character chunks + chunks = [text[i:i + 500] for i in range(0, len(text), 500)] + for chunk in chunks: + print("The split chunk is ", chunk) + + # Create an embedding for the split chunk + embedding = await model.send_embedding_request(chunk) + if not first_embedding: + first_embedding = embedding + self.index.upsert([(chunk, embedding)], metadata={"conversation_id": conversation_id, "timestamp": timestamp}) + return first_embedding + else: + embedding = await model.send_embedding_request(text) + self.index.upsert([(text, embedding, {"conversation_id": conversation_id, + "timestamp": timestamp})]) + return embedding + + def get_n_similar(self, conversation_id: int, embedding, n=10): + response = self.index.query(vector=embedding, top_k=n, include_metadata=True, filter={"conversation_id": conversation_id}) + print(response) + relevant_phrases = [(match['id'],match['metadata']['timestamp']) for match in response['matches']] + # Sort the relevant phrases based on the timestamp + relevant_phrases.sort(key=lambda x: x[1]) + return relevant_phrases \ No newline at end of file diff --git a/models/user_model.py b/models/user_model.py index 990e41f..67cedf5 100644 --- a/models/user_model.py +++ b/models/user_model.py @@ -72,3 +72,37 @@ class Thread: def __str__(self): return self.__repr__() + +class EmbeddedConversationItem: + def __init__(self, text, timestamp): + self.text = text + self.timestamp = int(timestamp) + + def __repr__(self): + return self.text + + def __str__(self): + return self.__repr__() + + def __eq__(self, other): + return self.text == other.text and self.timestamp == other.timestamp + + def __hash__(self): + return hash(self.text) + hash(self.timestamp) + + def __lt__(self, other): + return self.timestamp < other.timestamp + + def __gt__(self, other): + return self.timestamp > other.timestamp + + def __le__(self, other): + return self.timestamp <= other.timestamp + + def __ge__(self, other): + return self.timestamp >= other.timestamp + + def __ne__(self, other): + return not self.__eq__(other) + +