From f439ef9ce9b2760725affde01a17e9d4ea11bd3e Mon Sep 17 00:00:00 2001 From: Kaveen Kumarasinghe Date: Sun, 12 Mar 2023 00:00:57 -0500 Subject: [PATCH] persist conversations through restarts --- cogs/text_service_cog.py | 46 +++++++++++++++++++++++++++++ detailed_guides/PERMANENT-MEMORY.md | 4 ++- gpt3discord.py | 13 +++++++- models/openai_model.py | 2 +- services/pickle_service.py | 45 ++++++++++++++++++++++++++++ 5 files changed, 107 insertions(+), 3 deletions(-) create mode 100644 services/pickle_service.py diff --git a/cogs/text_service_cog.py b/cogs/text_service_cog.py index 80ed66a..3ca8070 100644 --- a/cogs/text_service_cog.py +++ b/cogs/text_service_cog.py @@ -1,4 +1,6 @@ +import asyncio import datetime +import pickle import re import traceback import sys @@ -21,6 +23,7 @@ from models.user_model import Thread, EmbeddedConversationItem from collections import defaultdict from sqlitedict import SqliteDict +from services.pickle_service import Pickler from services.sharegpt_service import ShareGPTService from services.text_service import SetupModal, TextService @@ -80,6 +83,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): DEBUG_CHANNEL, data_path: Path, pinecone_service, + pickle_queue, ): super().__init__() self.GLOBAL_COOLDOWN_TIME = 0.25 @@ -99,6 +103,9 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): self.users_to_interactions = defaultdict(list) self.redo_users = {} + # Pickle queue + self.pickle_queue = pickle_queue + # Conversations-specific data self.END_PROMPTS = [ "end", @@ -113,6 +120,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): self.full_conversation_history = defaultdict(list) self.summarize = self.model.summarize_conversations + # Pinecone data self.pinecone_service = pinecone_service @@ -221,6 +229,35 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): ) print("The debug channel was acquired") + print("Attempting to load from pickles") + # Try to load self.full_conversation_history, self.conversation_threads, and self.conversation_thread_owners from the `pickles` folder + try: + with open(EnvService.save_path() / "pickles" / "full_conversation_history.pickle", "rb") as f: + self.full_conversation_history = pickle.load(f) + print("Loaded full_conversation_history") + + with open(EnvService.save_path() / "pickles" / "conversation_threads.pickle", "rb") as f: + self.conversation_threads = pickle.load(f) + print("Loaded conversation_threads") + + with open(EnvService.save_path() / "pickles" / "conversation_thread_owners.pickle", "rb") as f: + self.conversation_thread_owners = pickle.load(f) + print("Loaded conversation_thread_owners") + + # Fail if all three weren't loaded + assert self.full_conversation_history is not {} + assert self.conversation_threads is not {} + assert self.conversation_thread_owners is not defaultdict(list) + + except Exception: + print("Failed to load from pickles") + self.full_conversation_history = defaultdict(list) + self.conversation_threads = {} + self.conversation_thread_owners = defaultdict(list) + traceback.print_exc() + + print("Syncing commands...") + await self.bot.sync_commands( commands=None, method="individual", @@ -232,6 +269,15 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"): ) print("Commands synced") + + # Start an inline async loop that runs every 10 seconds to save the conversation history to a pickle file + print("Starting pickle loop") + while True: + await asyncio.sleep(15) + await self.pickle_queue.put( + Pickler(self.full_conversation_history, self.conversation_threads, self.conversation_thread_owners)) + + def check_conversing(self, channel_id, message_content): '''given channel id and a message, return true if it's a conversation thread, false if not, or if the message starts with "~"''' cond1 = channel_id in self.conversation_threads diff --git a/detailed_guides/PERMANENT-MEMORY.md b/detailed_guides/PERMANENT-MEMORY.md index 4e4f56d..6c9617e 100644 --- a/detailed_guides/PERMANENT-MEMORY.md +++ b/detailed_guides/PERMANENT-MEMORY.md @@ -20,4 +20,6 @@ Then, name the index `conversation-embeddings`, set the dimensions to `1536`, an
-Permanent memory using pinecone is still in alpha, I will be working on cleaning up this work, adding auto-clearing, and optimizing for stability and reliability, any help and feedback is appreciated (**add me on Discord Kaveen#0001 for pinecone help**)! If at any time you're having too many issues with pinecone, simply remove the `PINECONE_TOKEN` line in your `.env` file and the bot will revert to using conversation summarizations. \ No newline at end of file +Permanent memory using pinecone is still in alpha, I will be working on cleaning up this work, adding auto-clearing, and optimizing for stability and reliability, any help and feedback is appreciated (**add me on Discord Kaveen#0001 for pinecone help**)! If at any time you're having too many issues with pinecone, simply remove the `PINECONE_TOKEN` line in your `.env` file and the bot will revert to using conversation summarizations. + +Conversations persist even through bot restarts. Bot conversation data is stored locally in a folder called `pickles`. If you find your bot getting slow, delete this folder. A cleaner solution will be implemented in the future. \ No newline at end of file diff --git a/gpt3discord.py b/gpt3discord.py index d6c982b..3b94f52 100644 --- a/gpt3discord.py +++ b/gpt3discord.py @@ -22,6 +22,7 @@ from cogs.translation_service_cog import TranslationService from cogs.index_service_cog import IndexService from models.deepl_model import TranslationModel from services.health_service import HealthService +from services.pickle_service import Pickler from services.pinecone_service import PineconeService from services.deletion_service import Deletion @@ -32,7 +33,7 @@ from services.environment_service import EnvService from models.openai_model import Model -__version__ = "10.9.17" +__version__ = "11.0.0" PID_FILE = Path("bot.pid") @@ -76,6 +77,15 @@ deletion_queue = asyncio.Queue() asyncio.ensure_future(Message.process_message_queue(message_queue, 1.5, 5)) asyncio.ensure_future(Deletion.process_deletion_queue(deletion_queue, 1, 1)) +# Pickling service for conversation persistence +try: + Path(EnvService.save_path()/"pickles").mkdir(exist_ok=True) +except Exception: + traceback.print_exc() + print("Could not start pickle service. Conversation history will not be persistent across restarts.") +pickle_queue = asyncio.Queue() +asyncio.ensure_future(Pickler.process_pickle_queue(pickle_queue, 5, 1)) + # # Settings for the bot @@ -131,6 +141,7 @@ async def main(): debug_channel, data_path, pinecone_service=pinecone_service, + pickle_queue=pickle_queue, ) ) diff --git a/models/openai_model.py b/models/openai_model.py index 8e2157f..aa36597 100644 --- a/models/openai_model.py +++ b/models/openai_model.py @@ -823,7 +823,7 @@ class Model: def cleanse_username(self, text): text = text.strip() - text = text.replace(":", "_") + text = text.replace(":", "") text = text.replace(" ", "") # Replace any character that's not a letter or number with an underscore text = re.sub(r"[^a-zA-Z0-9]", "_", text) diff --git a/services/pickle_service.py b/services/pickle_service.py new file mode 100644 index 0000000..4749e09 --- /dev/null +++ b/services/pickle_service.py @@ -0,0 +1,45 @@ +import asyncio +import pickle +import traceback +from datetime import datetime + +import aiofiles +import discord + +from services.environment_service import EnvService + + +class Pickler: + def __init__(self, full_conversation_history, conversation_threads, conversation_thread_owners): + self.full_conversation_history = full_conversation_history + self.conversation_threads = conversation_threads + self.conversation_thread_owners = conversation_thread_owners + + # This function will be called by the bot to process the message queue + @staticmethod + async def process_pickle_queue( + pickle_queue, PROCESS_WAIT_TIME, EMPTY_WAIT_TIME + ): + while True: + try: + # If the queue is empty, sleep for a short time before checking again + if pickle_queue.empty(): + await asyncio.sleep(EMPTY_WAIT_TIME) + continue + + # Get the next object to pickle from the queue + to_pickle = await pickle_queue.get() + + # Pickle all the objects inside to_pickle using aiofiles + async with aiofiles.open(EnvService.save_path() / "pickles" / "full_conversation_history.pickle", "wb") as f: + await f.write(pickle.dumps(to_pickle.full_conversation_history)) + + async with aiofiles.open(EnvService.save_path() / "pickles" / "conversation_threads.pickle", "wb") as f: + await f.write(pickle.dumps(to_pickle.conversation_threads)) + + async with aiofiles.open(EnvService.save_path() / "pickles" / "conversation_thread_owners.pickle", "wb") as f: + await f.write(pickle.dumps(to_pickle.conversation_thread_owners)) + + await asyncio.sleep(PROCESS_WAIT_TIME) + except Exception: + traceback.print_exc()