From f439ef9ce9b2760725affde01a17e9d4ea11bd3e Mon Sep 17 00:00:00 2001
From: Kaveen Kumarasinghe <ocfinancesmc@gmail.com>
Date: Sun, 12 Mar 2023 00:00:57 -0500
Subject: [PATCH] persist conversations through restarts

---
 cogs/text_service_cog.py            | 46 +++++++++++++++++++++++++++++
 detailed_guides/PERMANENT-MEMORY.md |  4 ++-
 gpt3discord.py                      | 13 +++++++-
 models/openai_model.py              |  2 +-
 services/pickle_service.py          | 45 ++++++++++++++++++++++++++++
 5 files changed, 107 insertions(+), 3 deletions(-)
 create mode 100644 services/pickle_service.py

diff --git a/cogs/text_service_cog.py b/cogs/text_service_cog.py
index 80ed66a..3ca8070 100644
--- a/cogs/text_service_cog.py
+++ b/cogs/text_service_cog.py
@@ -1,4 +1,6 @@
+import asyncio
 import datetime
+import pickle
 import re
 import traceback
 import sys
@@ -21,6 +23,7 @@ from models.user_model import Thread, EmbeddedConversationItem
 from collections import defaultdict
 from sqlitedict import SqliteDict
 
+from services.pickle_service import Pickler
 from services.sharegpt_service import ShareGPTService
 from services.text_service import SetupModal, TextService
 
@@ -80,6 +83,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
         DEBUG_CHANNEL,
         data_path: Path,
         pinecone_service,
+        pickle_queue,
     ):
         super().__init__()
         self.GLOBAL_COOLDOWN_TIME = 0.25
@@ -99,6 +103,9 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
         self.users_to_interactions = defaultdict(list)
         self.redo_users = {}
 
+        # Pickle queue
+        self.pickle_queue = pickle_queue
+
         # Conversations-specific data
         self.END_PROMPTS = [
             "end",
@@ -113,6 +120,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
         self.full_conversation_history = defaultdict(list)
         self.summarize = self.model.summarize_conversations
 
+
         # Pinecone data
         self.pinecone_service = pinecone_service
 
@@ -221,6 +229,35 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
         )
         print("The debug channel was acquired")
 
+        print("Attempting to load from pickles")
+        # Try to load self.full_conversation_history, self.conversation_threads, and self.conversation_thread_owners from the `pickles` folder
+        try:
+            with open(EnvService.save_path() / "pickles" / "full_conversation_history.pickle", "rb") as f:
+                self.full_conversation_history = pickle.load(f)
+                print("Loaded full_conversation_history")
+
+            with open(EnvService.save_path() / "pickles" / "conversation_threads.pickle", "rb") as f:
+                self.conversation_threads = pickle.load(f)
+                print("Loaded conversation_threads")
+
+            with open(EnvService.save_path() / "pickles" / "conversation_thread_owners.pickle", "rb") as f:
+                self.conversation_thread_owners = pickle.load(f)
+                print("Loaded conversation_thread_owners")
+
+            # Fail if all three weren't loaded
+            assert self.full_conversation_history is not {}
+            assert self.conversation_threads is not {}
+            assert self.conversation_thread_owners is not defaultdict(list)
+
+        except Exception:
+            print("Failed to load from pickles")
+            self.full_conversation_history = defaultdict(list)
+            self.conversation_threads = {}
+            self.conversation_thread_owners = defaultdict(list)
+            traceback.print_exc()
+
+        print("Syncing commands...")
+
         await self.bot.sync_commands(
             commands=None,
             method="individual",
@@ -232,6 +269,15 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
         )
         print("Commands synced")
 
+
+        # Start an inline async loop that runs every 10 seconds to save the conversation history to a pickle file
+        print("Starting pickle loop")
+        while True:
+            await asyncio.sleep(15)
+            await self.pickle_queue.put(
+                Pickler(self.full_conversation_history, self.conversation_threads, self.conversation_thread_owners))
+
+
     def check_conversing(self, channel_id, message_content):
         '''given channel id and a message, return true if it's a conversation thread, false if not, or if the message starts with "~"'''
         cond1 = channel_id in self.conversation_threads
diff --git a/detailed_guides/PERMANENT-MEMORY.md b/detailed_guides/PERMANENT-MEMORY.md
index 4e4f56d..6c9617e 100644
--- a/detailed_guides/PERMANENT-MEMORY.md
+++ b/detailed_guides/PERMANENT-MEMORY.md
@@ -20,4 +20,6 @@ Then, name the index `conversation-embeddings`, set the dimensions to `1536`, an
   
 <center><img src="https://i.imgur.com/zoeLsrw.png"/></center>  
   
-Permanent memory using pinecone is still in alpha, I will be working on cleaning up this work, adding auto-clearing, and optimizing for stability and reliability, any help and feedback is appreciated (**add me on Discord Kaveen#0001 for pinecone help**)! If at any time you're having too many issues with pinecone, simply remove the `PINECONE_TOKEN` line in your `.env` file and the bot will revert to using conversation summarizations.  
\ No newline at end of file
+Permanent memory using pinecone is still in alpha, I will be working on cleaning up this work, adding auto-clearing, and optimizing for stability and reliability, any help and feedback is appreciated (**add me on Discord Kaveen#0001 for pinecone help**)! If at any time you're having too many issues with pinecone, simply remove the `PINECONE_TOKEN` line in your `.env` file and the bot will revert to using conversation summarizations.  
+
+Conversations persist even through bot restarts. Bot conversation data is stored locally in a folder called `pickles`. If you find your bot getting slow, delete this folder. A cleaner solution will be implemented in the future.
\ No newline at end of file
diff --git a/gpt3discord.py b/gpt3discord.py
index d6c982b..3b94f52 100644
--- a/gpt3discord.py
+++ b/gpt3discord.py
@@ -22,6 +22,7 @@ from cogs.translation_service_cog import TranslationService
 from cogs.index_service_cog import IndexService
 from models.deepl_model import TranslationModel
 from services.health_service import HealthService
+from services.pickle_service import Pickler
 
 from services.pinecone_service import PineconeService
 from services.deletion_service import Deletion
@@ -32,7 +33,7 @@ from services.environment_service import EnvService
 from models.openai_model import Model
 
 
-__version__ = "10.9.17"
+__version__ = "11.0.0"
 
 
 PID_FILE = Path("bot.pid")
@@ -76,6 +77,15 @@ deletion_queue = asyncio.Queue()
 asyncio.ensure_future(Message.process_message_queue(message_queue, 1.5, 5))
 asyncio.ensure_future(Deletion.process_deletion_queue(deletion_queue, 1, 1))
 
+# Pickling service for conversation persistence
+try:
+    Path(EnvService.save_path()/"pickles").mkdir(exist_ok=True)
+except Exception:
+    traceback.print_exc()
+    print("Could not start pickle service. Conversation history will not be persistent across restarts.")
+pickle_queue = asyncio.Queue()
+asyncio.ensure_future(Pickler.process_pickle_queue(pickle_queue, 5, 1))
+
 
 #
 # Settings for the bot
@@ -131,6 +141,7 @@ async def main():
             debug_channel,
             data_path,
             pinecone_service=pinecone_service,
+            pickle_queue=pickle_queue,
         )
     )
 
diff --git a/models/openai_model.py b/models/openai_model.py
index 8e2157f..aa36597 100644
--- a/models/openai_model.py
+++ b/models/openai_model.py
@@ -823,7 +823,7 @@ class Model:
 
     def cleanse_username(self, text):
         text = text.strip()
-        text = text.replace(":", "_")
+        text = text.replace(":", "")
         text = text.replace(" ", "")
         # Replace any character that's not a letter or number with an underscore
         text = re.sub(r"[^a-zA-Z0-9]", "_", text)
diff --git a/services/pickle_service.py b/services/pickle_service.py
new file mode 100644
index 0000000..4749e09
--- /dev/null
+++ b/services/pickle_service.py
@@ -0,0 +1,45 @@
+import asyncio
+import pickle
+import traceback
+from datetime import datetime
+
+import aiofiles
+import discord
+
+from services.environment_service import EnvService
+
+
+class Pickler:
+    def __init__(self, full_conversation_history, conversation_threads, conversation_thread_owners):
+        self.full_conversation_history = full_conversation_history
+        self.conversation_threads = conversation_threads
+        self.conversation_thread_owners = conversation_thread_owners
+
+    # This function will be called by the bot to process the message queue
+    @staticmethod
+    async def process_pickle_queue(
+        pickle_queue, PROCESS_WAIT_TIME, EMPTY_WAIT_TIME
+    ):
+        while True:
+            try:
+                # If the queue is empty, sleep for a short time before checking again
+                if pickle_queue.empty():
+                    await asyncio.sleep(EMPTY_WAIT_TIME)
+                    continue
+
+                # Get the next object to pickle from the queue
+                to_pickle = await pickle_queue.get()
+
+                # Pickle all the objects inside to_pickle using aiofiles
+                async with aiofiles.open(EnvService.save_path() / "pickles" / "full_conversation_history.pickle", "wb") as f:
+                    await f.write(pickle.dumps(to_pickle.full_conversation_history))
+
+                async with aiofiles.open(EnvService.save_path() / "pickles" / "conversation_threads.pickle", "wb") as f:
+                    await f.write(pickle.dumps(to_pickle.conversation_threads))
+
+                async with aiofiles.open(EnvService.save_path() / "pickles" / "conversation_thread_owners.pickle", "wb") as f:
+                    await f.write(pickle.dumps(to_pickle.conversation_thread_owners))
+
+                await asyncio.sleep(PROCESS_WAIT_TIME)
+            except Exception:
+                traceback.print_exc()