diff --git a/cogs/gpt_3_commands_and_converser.py b/cogs/gpt_3_commands_and_converser.py
index b1c9ba9..7fde2b8 100644
--- a/cogs/gpt_3_commands_and_converser.py
+++ b/cogs/gpt_3_commands_and_converser.py
@@ -14,7 +14,7 @@ from models.deletion_service_model import Deletion
 from models.env_service_model import EnvService
 from models.message_model import Message
 from models.moderations_service_model import Moderation
-from models.user_model import RedoUser, Thread
+from models.user_model import RedoUser, Thread, EmbeddedConversationItem
 from models.check_model import Check
 from models.autocomplete_model import Settings_autocompleter, File_autocompleter
 from collections import defaultdict
@@ -38,6 +38,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
         DEBUG_GUILD,
         DEBUG_CHANNEL,
         data_path: Path,
+        pinecone_service,
     ):
         super().__init__()
         self.data_path = data_path
@@ -67,6 +68,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
         self.moderation_alerts_channel = EnvService.get_moderations_alert_channel()
         self.moderation_enabled_guilds = []
         self.moderation_tasks = {}
+        self.pinecone_service = pinecone_service
 
         try:
             conversation_file_path = data_path / "conversation_starter_pretext.txt"
@@ -519,7 +521,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
         new_conversation_history.append(
             "\nContinue the conversation, paying very close attention to things <username> told you, such as their name, and personal details.\n"
         )
-        # Get the last entry from the user's conversation history
+        # Get the last entry from the thread's conversation history
         new_conversation_history.append(
             self.conversation_threads[message.channel.id].history[-1] + "\n"
         )
@@ -657,6 +659,83 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
 
                     return
 
+                print("BEFORE PINECONE SERVICE CHECK")
+                if self.pinecone_service:
+                    # The conversation_id is the id of the thread
+                    conversation_id = message.channel.id
+                    print("Conversation id is", conversation_id)
+
+                    # Create an embedding and timestamp for the prompt
+                    prompt = prompt.encode("ascii", "ignore").decode()
+                    prompt_less_author =  f"{prompt} <|endofstatement|>\n"
+                    prompt_with_gpt_instead = f"GPTie: {prompt} <|endofstatement|>\n"
+                    prompt = f"\n'{message.author.display_name}': {prompt} <|endofstatement|>\n"
+
+                    #print("Creating embedding for ", prompt)
+                    # Print the current timestamp
+                    timestamp = int(str(datetime.datetime.now().timestamp()).replace(".", ""))
+                    print("Timestamp is ", timestamp)
+
+                    starter_conversation_item = EmbeddedConversationItem(str(self.conversation_threads[message.channel.id].history[0]), 0)
+                    self.conversation_threads[message.channel.id].history[0] = starter_conversation_item
+
+                    new_prompt_item = EmbeddedConversationItem(prompt, timestamp)
+
+                    self.conversation_threads[conversation_id].history.append(new_prompt_item)
+
+                    # Create and upsert the embedding for  the conversation id, prompt, timestamp
+                    embedding = await self.pinecone_service.upsert_conversation_embedding(self.model, conversation_id, prompt, timestamp)
+
+                    embedding_prompt_less_author = await self.model.send_embedding_request(prompt_less_author)
+
+                    # Now, build the new prompt by getting the 10 most similar with pinecone
+                    similar_prompts = self.pinecone_service.get_n_similar(conversation_id, embedding_prompt_less_author, n=5)
+
+                    # When we are in embeddings mode, only the pre-text is contained in self.conversation_threads[message.channel.id].history, so we
+                    # can use that as a base to build our new prompt
+                    prompt_with_history = []
+                    prompt_with_history.append(self.conversation_threads[message.channel.id].history[0])
+
+                    # Append the similar prompts to the prompt with history
+                    prompt_with_history += [EmbeddedConversationItem(prompt, timestamp) for prompt, timestamp in similar_prompts]
+
+                    # iterate UP TO the last 5 prompts in the history
+                    for i in range(1, min(len(self.conversation_threads[message.channel.id].history), 3)):
+                        prompt_with_history.append(self.conversation_threads[message.channel.id].history[-i])
+
+                    # remove duplicates from prompt_with_history
+                    prompt_with_history = list(dict.fromkeys(prompt_with_history))
+
+                    # Sort the prompt_with_history by increasing timestamp
+                    prompt_with_history.sort(key=lambda x: x.timestamp)
+
+                    # Ensure that the last prompt in this list is the prompt we just sent (new_prompt_item)
+                    if prompt_with_history[-1] != new_prompt_item:
+                        try:
+                            prompt_with_history.remove(new_prompt_item)
+                        except ValueError:
+                            pass
+                        prompt_with_history.append(new_prompt_item)
+
+                    prompt_with_history = "".join([item.text for item in prompt_with_history])
+
+                    print("The prompt with history is", prompt_with_history)
+
+                    self.awaiting_responses.append(message.author.id)
+                    self.awaiting_thread_responses.append(message.channel.id)
+
+                    self.conversation_threads[message.channel.id].count += 1
+
+                    original_message[message.author.id] = message.id
+
+                    await self.encapsulated_send(
+                        message.channel.id,
+                        prompt_with_history,
+                        message,
+                    )
+
+                    return
+
                 self.awaiting_responses.append(message.author.id)
                 self.awaiting_thread_responses.append(message.channel.id)
 
@@ -700,11 +779,13 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
         try:
             tokens = self.usage_service.count_tokens(new_prompt)
 
-            # Check if the prompt is about to go past the token limit
+
+            # This is the NO-EMBEDDINGS-SUMMARIZE CASE
             if (
                 id in self.conversation_threads
                 and tokens > self.model.summarize_threshold
                 and not from_g_command
+                and not self.pinecone_service # This should only happen if we are not doing summarizations.
             ):
 
                 # We don't need to worry about the differences between interactions and messages in this block,
@@ -769,11 +850,34 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
                 )
 
             # If the user is conversing, add the GPT response to their conversation history.
-            if id in self.conversation_threads and not from_g_command:
+            # Don't append to the history if we're using embeddings!
+            if id in self.conversation_threads and not from_g_command and not self.pinecone_service:
                 self.conversation_threads[id].history.append(
                     "\nGPTie: " + str(response_text) + "<|endofstatement|>\n"
                 )
 
+            # Embeddings case!
+            elif id in self.conversation_threads and not from_g_command and self.pinecone_service:
+                conversation_id = id
+                print("Conversation id is", conversation_id)
+
+                # Create an embedding and timestamp for the prompt
+                response_text = "\nGPTie: " + str(response_text) + "<|endofstatement|>\n"
+
+                response_text = response_text.encode("ascii", "ignore").decode()
+
+
+                print("Creating embedding for ", response_text)
+                # Print the current timestamp
+                timestamp = int(str(datetime.datetime.now().timestamp()).replace(".", ""))
+                print("Timestamp is ", timestamp)
+                self.conversation_threads[conversation_id].history.append(EmbeddedConversationItem(response_text, timestamp))
+
+                # Create and upsert the embedding for  the conversation id, prompt, timestamp
+                embedding = await self.pinecone_service.upsert_conversation_embedding(self.model, conversation_id,
+                                                                                      response_text, timestamp)
+                print("Embedded the response")
+
             # If we don't have a response message, we are not doing a redo, send as a new message(s)
             if not response_message:
                 if len(response_text) > self.TEXT_CUTOFF:
diff --git a/gpt3discord.py b/gpt3discord.py
index 14871bd..bf785d8 100644
--- a/gpt3discord.py
+++ b/gpt3discord.py
@@ -4,10 +4,13 @@ import traceback
 from pathlib import Path
 
 import discord
+import pinecone
 from dotenv import load_dotenv
 from pycord.multicog import apply_multicog
 import os
 
+from models.pinecone_service_model import PineconeService
+
 if sys.platform == "win32":
     separator = "\\"
 else:
@@ -26,6 +29,22 @@ from models.usage_service_model import UsageService
 
 __version__ = "3.1.2"
 
+"""
+The pinecone service is used to store and retrieve conversation embeddings.
+"""
+try:
+    PINECONE_TOKEN = os.getenv("PINECONE_TOKEN")
+except:
+    PINECONE_TOKEN = None
+
+pinecone_service = None
+if PINECONE_TOKEN:
+    pinecone.init(api_key=PINECONE_TOKEN, environment="us-west1-gcp")
+    PINECONE_INDEX = "conversation-embeddings" # This will become unfixed later.
+    pinecone_service = PineconeService(pinecone.Index(PINECONE_INDEX))
+    print("Got the pinecone service")
+
+
 """
 Message queueing for the debug service, defer debug messages to be sent later so we don't hit rate limits.
 """
@@ -85,6 +104,7 @@ async def main():
             debug_guild,
             debug_channel,
             data_path,
+            pinecone_service=pinecone_service,
         )
     )
 
diff --git a/models/message_model.py b/models/message_model.py
index 66c1219..3221661 100644
--- a/models/message_model.py
+++ b/models/message_model.py
@@ -20,7 +20,10 @@ class Message:
             message = await message_queue.get()
 
             # Send the message
-            await message.channel.send(message.content)
+            try:
+                await message.channel.send(message.content)
+            except:
+                pass
 
             # Sleep for a short time before processing the next message
             # This will prevent the bot from spamming messages too quickly
diff --git a/models/openai_model.py b/models/openai_model.py
index 52e2133..75d18ab 100644
--- a/models/openai_model.py
+++ b/models/openai_model.py
@@ -3,6 +3,7 @@ import functools
 import math
 import os
 import tempfile
+import traceback
 import uuid
 from typing import Tuple, List, Any
 
@@ -23,6 +24,7 @@ class Mode:
 class Models:
     DAVINCI = "text-davinci-003"
     CURIE = "text-curie-001"
+    EMBEDDINGS = "text-embedding-ada-002"
 
 
 class ImageSize:
@@ -317,6 +319,27 @@ class Model:
                 + str(response["error"]["message"])
             )
 
+    async def send_embedding_request(self, text):
+        async with aiohttp.ClientSession() as session:
+            payload = {
+                "model": Models.EMBEDDINGS,
+                "input": text,
+            }
+            headers = {
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.openai_key}",
+            }
+            async with session.post(
+                    "https://api.openai.com/v1/embeddings", json=payload, headers=headers
+            ) as resp:
+                response = await resp.json()
+
+                try:
+                    return response["data"][0]["embedding"]
+                except Exception as e:
+                    traceback.print_exc()
+                    return
+
     async def send_moderations_request(self, text):
         # Use aiohttp to send the above request:
         async with aiohttp.ClientSession() as session:
@@ -422,8 +445,8 @@ class Model:
                 "https://api.openai.com/v1/completions", json=payload, headers=headers
             ) as resp:
                 response = await resp.json()
-                print(f"Payload -> {payload}")
-                print(f"Response -> {response}")
+                #print(f"Payload -> {payload}")
+                #print(f"Response -> {response}")
                 # Parse the total tokens used for this request and response pair from the response
                 await self.valid_text_request(response)
 
diff --git a/models/pinecone_service_model.py b/models/pinecone_service_model.py
new file mode 100644
index 0000000..708d63e
--- /dev/null
+++ b/models/pinecone_service_model.py
@@ -0,0 +1,43 @@
+import pinecone
+
+
+class PineconeService:
+
+    def __init__(self, index: pinecone.Index):
+        self.index = index
+
+    def upsert_basic(self, text, embeddings):
+        self.index.upsert([(text, embeddings)])
+
+    def get_all_for_conversation(self, conversation_id: int):
+        response = self.index.query(top_k=100, filter={"conversation_id": conversation_id})
+        return response
+
+    async def upsert_conversation_embedding(self, model, conversation_id: int, text, timestamp):
+        # If the text is > 512 characters, we need to split it up into multiple entries.
+        first_embedding = None
+        if len(text) > 500:
+            # Split the text into 512 character chunks
+            chunks = [text[i:i + 500] for i in range(0, len(text), 500)]
+            for chunk in chunks:
+                print("The split chunk is ", chunk)
+
+                # Create an embedding for the split chunk
+                embedding = await model.send_embedding_request(chunk)
+                if not first_embedding:
+                    first_embedding = embedding
+                self.index.upsert([(chunk, embedding)], metadata={"conversation_id": conversation_id, "timestamp": timestamp})
+            return first_embedding
+        else:
+            embedding = await model.send_embedding_request(text)
+            self.index.upsert([(text, embedding, {"conversation_id": conversation_id,
+                                              "timestamp": timestamp})])
+            return embedding
+
+    def get_n_similar(self, conversation_id: int, embedding, n=10):
+        response = self.index.query(vector=embedding, top_k=n, include_metadata=True, filter={"conversation_id": conversation_id})
+        print(response)
+        relevant_phrases = [(match['id'],match['metadata']['timestamp']) for match in response['matches']]
+        # Sort the relevant phrases based on the timestamp
+        relevant_phrases.sort(key=lambda x: x[1])
+        return relevant_phrases
\ No newline at end of file
diff --git a/models/user_model.py b/models/user_model.py
index 990e41f..67cedf5 100644
--- a/models/user_model.py
+++ b/models/user_model.py
@@ -72,3 +72,37 @@ class Thread:
 
     def __str__(self):
         return self.__repr__()
+
+class EmbeddedConversationItem:
+    def __init__(self, text, timestamp):
+        self.text = text
+        self.timestamp = int(timestamp)
+
+    def __repr__(self):
+        return self.text
+
+    def __str__(self):
+        return self.__repr__()
+
+    def __eq__(self, other):
+        return self.text == other.text and self.timestamp == other.timestamp
+
+    def __hash__(self):
+        return hash(self.text) + hash(self.timestamp)
+
+    def __lt__(self, other):
+        return self.timestamp < other.timestamp
+
+    def __gt__(self, other):
+        return self.timestamp > other.timestamp
+
+    def __le__(self, other):
+        return self.timestamp <= other.timestamp
+
+    def __ge__(self, other):
+        return self.timestamp >= other.timestamp
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+