Kaveen Kumarasinghe 2 years ago
parent fe54b250f6
commit a8b209c212

@ -14,7 +14,7 @@ from models.deletion_service_model import Deletion
from models.env_service_model import EnvService from models.env_service_model import EnvService
from models.message_model import Message from models.message_model import Message
from models.moderations_service_model import Moderation from models.moderations_service_model import Moderation
from models.user_model import RedoUser, Thread from models.user_model import RedoUser, Thread, EmbeddedConversationItem
from models.check_model import Check from models.check_model import Check
from models.autocomplete_model import Settings_autocompleter, File_autocompleter from models.autocomplete_model import Settings_autocompleter, File_autocompleter
from collections import defaultdict from collections import defaultdict
@ -38,6 +38,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
DEBUG_GUILD, DEBUG_GUILD,
DEBUG_CHANNEL, DEBUG_CHANNEL,
data_path: Path, data_path: Path,
pinecone_service,
): ):
super().__init__() super().__init__()
self.data_path = data_path self.data_path = data_path
@ -67,6 +68,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
self.moderation_alerts_channel = EnvService.get_moderations_alert_channel() self.moderation_alerts_channel = EnvService.get_moderations_alert_channel()
self.moderation_enabled_guilds = [] self.moderation_enabled_guilds = []
self.moderation_tasks = {} self.moderation_tasks = {}
self.pinecone_service = pinecone_service
try: try:
conversation_file_path = data_path / "conversation_starter_pretext.txt" conversation_file_path = data_path / "conversation_starter_pretext.txt"
@ -519,7 +521,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
new_conversation_history.append( new_conversation_history.append(
"\nContinue the conversation, paying very close attention to things <username> told you, such as their name, and personal details.\n" "\nContinue the conversation, paying very close attention to things <username> told you, such as their name, and personal details.\n"
) )
# Get the last entry from the user's conversation history # Get the last entry from the thread's conversation history
new_conversation_history.append( new_conversation_history.append(
self.conversation_threads[message.channel.id].history[-1] + "\n" self.conversation_threads[message.channel.id].history[-1] + "\n"
) )
@ -657,6 +659,83 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
return return
print("BEFORE PINECONE SERVICE CHECK")
if self.pinecone_service:
# The conversation_id is the id of the thread
conversation_id = message.channel.id
print("Conversation id is", conversation_id)
# Create an embedding and timestamp for the prompt
prompt = prompt.encode("ascii", "ignore").decode()
prompt_less_author = f"{prompt} <|endofstatement|>\n"
prompt_with_gpt_instead = f"GPTie: {prompt} <|endofstatement|>\n"
prompt = f"\n'{message.author.display_name}': {prompt} <|endofstatement|>\n"
#print("Creating embedding for ", prompt)
# Print the current timestamp
timestamp = int(str(datetime.datetime.now().timestamp()).replace(".", ""))
print("Timestamp is ", timestamp)
starter_conversation_item = EmbeddedConversationItem(str(self.conversation_threads[message.channel.id].history[0]), 0)
self.conversation_threads[message.channel.id].history[0] = starter_conversation_item
new_prompt_item = EmbeddedConversationItem(prompt, timestamp)
self.conversation_threads[conversation_id].history.append(new_prompt_item)
# Create and upsert the embedding for the conversation id, prompt, timestamp
embedding = await self.pinecone_service.upsert_conversation_embedding(self.model, conversation_id, prompt, timestamp)
embedding_prompt_less_author = await self.model.send_embedding_request(prompt_less_author)
# Now, build the new prompt by getting the 10 most similar with pinecone
similar_prompts = self.pinecone_service.get_n_similar(conversation_id, embedding_prompt_less_author, n=5)
# When we are in embeddings mode, only the pre-text is contained in self.conversation_threads[message.channel.id].history, so we
# can use that as a base to build our new prompt
prompt_with_history = []
prompt_with_history.append(self.conversation_threads[message.channel.id].history[0])
# Append the similar prompts to the prompt with history
prompt_with_history += [EmbeddedConversationItem(prompt, timestamp) for prompt, timestamp in similar_prompts]
# iterate UP TO the last 5 prompts in the history
for i in range(1, min(len(self.conversation_threads[message.channel.id].history), 3)):
prompt_with_history.append(self.conversation_threads[message.channel.id].history[-i])
# remove duplicates from prompt_with_history
prompt_with_history = list(dict.fromkeys(prompt_with_history))
# Sort the prompt_with_history by increasing timestamp
prompt_with_history.sort(key=lambda x: x.timestamp)
# Ensure that the last prompt in this list is the prompt we just sent (new_prompt_item)
if prompt_with_history[-1] != new_prompt_item:
try:
prompt_with_history.remove(new_prompt_item)
except ValueError:
pass
prompt_with_history.append(new_prompt_item)
prompt_with_history = "".join([item.text for item in prompt_with_history])
print("The prompt with history is", prompt_with_history)
self.awaiting_responses.append(message.author.id)
self.awaiting_thread_responses.append(message.channel.id)
self.conversation_threads[message.channel.id].count += 1
original_message[message.author.id] = message.id
await self.encapsulated_send(
message.channel.id,
prompt_with_history,
message,
)
return
self.awaiting_responses.append(message.author.id) self.awaiting_responses.append(message.author.id)
self.awaiting_thread_responses.append(message.channel.id) self.awaiting_thread_responses.append(message.channel.id)
@ -700,11 +779,13 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
try: try:
tokens = self.usage_service.count_tokens(new_prompt) tokens = self.usage_service.count_tokens(new_prompt)
# Check if the prompt is about to go past the token limit
# This is the NO-EMBEDDINGS-SUMMARIZE CASE
if ( if (
id in self.conversation_threads id in self.conversation_threads
and tokens > self.model.summarize_threshold and tokens > self.model.summarize_threshold
and not from_g_command and not from_g_command
and not self.pinecone_service # This should only happen if we are not doing summarizations.
): ):
# We don't need to worry about the differences between interactions and messages in this block, # We don't need to worry about the differences between interactions and messages in this block,
@ -769,11 +850,34 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
) )
# If the user is conversing, add the GPT response to their conversation history. # If the user is conversing, add the GPT response to their conversation history.
if id in self.conversation_threads and not from_g_command: # Don't append to the history if we're using embeddings!
if id in self.conversation_threads and not from_g_command and not self.pinecone_service:
self.conversation_threads[id].history.append( self.conversation_threads[id].history.append(
"\nGPTie: " + str(response_text) + "<|endofstatement|>\n" "\nGPTie: " + str(response_text) + "<|endofstatement|>\n"
) )
# Embeddings case!
elif id in self.conversation_threads and not from_g_command and self.pinecone_service:
conversation_id = id
print("Conversation id is", conversation_id)
# Create an embedding and timestamp for the prompt
response_text = "\nGPTie: " + str(response_text) + "<|endofstatement|>\n"
response_text = response_text.encode("ascii", "ignore").decode()
print("Creating embedding for ", response_text)
# Print the current timestamp
timestamp = int(str(datetime.datetime.now().timestamp()).replace(".", ""))
print("Timestamp is ", timestamp)
self.conversation_threads[conversation_id].history.append(EmbeddedConversationItem(response_text, timestamp))
# Create and upsert the embedding for the conversation id, prompt, timestamp
embedding = await self.pinecone_service.upsert_conversation_embedding(self.model, conversation_id,
response_text, timestamp)
print("Embedded the response")
# If we don't have a response message, we are not doing a redo, send as a new message(s) # If we don't have a response message, we are not doing a redo, send as a new message(s)
if not response_message: if not response_message:
if len(response_text) > self.TEXT_CUTOFF: if len(response_text) > self.TEXT_CUTOFF:

@ -4,10 +4,13 @@ import traceback
from pathlib import Path from pathlib import Path
import discord import discord
import pinecone
from dotenv import load_dotenv from dotenv import load_dotenv
from pycord.multicog import apply_multicog from pycord.multicog import apply_multicog
import os import os
from models.pinecone_service_model import PineconeService
if sys.platform == "win32": if sys.platform == "win32":
separator = "\\" separator = "\\"
else: else:
@ -26,6 +29,22 @@ from models.usage_service_model import UsageService
__version__ = "3.1.2" __version__ = "3.1.2"
"""
The pinecone service is used to store and retrieve conversation embeddings.
"""
try:
PINECONE_TOKEN = os.getenv("PINECONE_TOKEN")
except:
PINECONE_TOKEN = None
pinecone_service = None
if PINECONE_TOKEN:
pinecone.init(api_key=PINECONE_TOKEN, environment="us-west1-gcp")
PINECONE_INDEX = "conversation-embeddings" # This will become unfixed later.
pinecone_service = PineconeService(pinecone.Index(PINECONE_INDEX))
print("Got the pinecone service")
""" """
Message queueing for the debug service, defer debug messages to be sent later so we don't hit rate limits. Message queueing for the debug service, defer debug messages to be sent later so we don't hit rate limits.
""" """
@ -85,6 +104,7 @@ async def main():
debug_guild, debug_guild,
debug_channel, debug_channel,
data_path, data_path,
pinecone_service=pinecone_service,
) )
) )

@ -20,7 +20,10 @@ class Message:
message = await message_queue.get() message = await message_queue.get()
# Send the message # Send the message
await message.channel.send(message.content) try:
await message.channel.send(message.content)
except:
pass
# Sleep for a short time before processing the next message # Sleep for a short time before processing the next message
# This will prevent the bot from spamming messages too quickly # This will prevent the bot from spamming messages too quickly

@ -3,6 +3,7 @@ import functools
import math import math
import os import os
import tempfile import tempfile
import traceback
import uuid import uuid
from typing import Tuple, List, Any from typing import Tuple, List, Any
@ -23,6 +24,7 @@ class Mode:
class Models: class Models:
DAVINCI = "text-davinci-003" DAVINCI = "text-davinci-003"
CURIE = "text-curie-001" CURIE = "text-curie-001"
EMBEDDINGS = "text-embedding-ada-002"
class ImageSize: class ImageSize:
@ -317,6 +319,27 @@ class Model:
+ str(response["error"]["message"]) + str(response["error"]["message"])
) )
async def send_embedding_request(self, text):
async with aiohttp.ClientSession() as session:
payload = {
"model": Models.EMBEDDINGS,
"input": text,
}
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.openai_key}",
}
async with session.post(
"https://api.openai.com/v1/embeddings", json=payload, headers=headers
) as resp:
response = await resp.json()
try:
return response["data"][0]["embedding"]
except Exception as e:
traceback.print_exc()
return
async def send_moderations_request(self, text): async def send_moderations_request(self, text):
# Use aiohttp to send the above request: # Use aiohttp to send the above request:
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
@ -422,8 +445,8 @@ class Model:
"https://api.openai.com/v1/completions", json=payload, headers=headers "https://api.openai.com/v1/completions", json=payload, headers=headers
) as resp: ) as resp:
response = await resp.json() response = await resp.json()
print(f"Payload -> {payload}") #print(f"Payload -> {payload}")
print(f"Response -> {response}") #print(f"Response -> {response}")
# Parse the total tokens used for this request and response pair from the response # Parse the total tokens used for this request and response pair from the response
await self.valid_text_request(response) await self.valid_text_request(response)

@ -0,0 +1,43 @@
import pinecone
class PineconeService:
def __init__(self, index: pinecone.Index):
self.index = index
def upsert_basic(self, text, embeddings):
self.index.upsert([(text, embeddings)])
def get_all_for_conversation(self, conversation_id: int):
response = self.index.query(top_k=100, filter={"conversation_id": conversation_id})
return response
async def upsert_conversation_embedding(self, model, conversation_id: int, text, timestamp):
# If the text is > 512 characters, we need to split it up into multiple entries.
first_embedding = None
if len(text) > 500:
# Split the text into 512 character chunks
chunks = [text[i:i + 500] for i in range(0, len(text), 500)]
for chunk in chunks:
print("The split chunk is ", chunk)
# Create an embedding for the split chunk
embedding = await model.send_embedding_request(chunk)
if not first_embedding:
first_embedding = embedding
self.index.upsert([(chunk, embedding)], metadata={"conversation_id": conversation_id, "timestamp": timestamp})
return first_embedding
else:
embedding = await model.send_embedding_request(text)
self.index.upsert([(text, embedding, {"conversation_id": conversation_id,
"timestamp": timestamp})])
return embedding
def get_n_similar(self, conversation_id: int, embedding, n=10):
response = self.index.query(vector=embedding, top_k=n, include_metadata=True, filter={"conversation_id": conversation_id})
print(response)
relevant_phrases = [(match['id'],match['metadata']['timestamp']) for match in response['matches']]
# Sort the relevant phrases based on the timestamp
relevant_phrases.sort(key=lambda x: x[1])
return relevant_phrases

@ -72,3 +72,37 @@ class Thread:
def __str__(self): def __str__(self):
return self.__repr__() return self.__repr__()
class EmbeddedConversationItem:
def __init__(self, text, timestamp):
self.text = text
self.timestamp = int(timestamp)
def __repr__(self):
return self.text
def __str__(self):
return self.__repr__()
def __eq__(self, other):
return self.text == other.text and self.timestamp == other.timestamp
def __hash__(self):
return hash(self.text) + hash(self.timestamp)
def __lt__(self, other):
return self.timestamp < other.timestamp
def __gt__(self, other):
return self.timestamp > other.timestamp
def __le__(self, other):
return self.timestamp <= other.timestamp
def __ge__(self, other):
return self.timestamp >= other.timestamp
def __ne__(self, other):
return not self.__eq__(other)

Loading…
Cancel
Save