Format Python code with psf/black push

github-actions 2 years ago
parent b1d3304dbd
commit 7f1a8e30f8

@ -674,7 +674,10 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
# Send the request to the model
# If conversing, the prompt to send is the history, otherwise, it's just the prompt
if self.pinecone_service or message.channel.id not in self.conversation_threads:
if (
self.pinecone_service
or message.channel.id not in self.conversation_threads
):
primary_prompt = prompt
else:
primary_prompt = "".join(
@ -724,44 +727,75 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
new_prompt = prompt.encode("ascii", "ignore").decode()
prompt_less_author = f"{new_prompt} <|endofstatement|>\n"
user_displayname = ctx.user.name if isinstance(ctx, discord.ApplicationContext) else ctx.author.display_name
user_displayname = (
ctx.user.name
if isinstance(ctx, discord.ApplicationContext)
else ctx.author.display_name
)
new_prompt = f"\n'{user_displayname}': {new_prompt} <|endofstatement|>\n"
new_prompt = (
f"\n'{user_displayname}': {new_prompt} <|endofstatement|>\n"
)
# print("Creating embedding for ", prompt)
# Print the current timestamp
timestamp = int(str(datetime.datetime.now().timestamp()).replace(".", ""))
timestamp = int(
str(datetime.datetime.now().timestamp()).replace(".", "")
)
starter_conversation_item = EmbeddedConversationItem(
str(self.conversation_threads[ctx.channel.id].history[0]), 0)
self.conversation_threads[ctx.channel.id].history[0] = starter_conversation_item
str(self.conversation_threads[ctx.channel.id].history[0]), 0
)
self.conversation_threads[ctx.channel.id].history[
0
] = starter_conversation_item
new_prompt_item = EmbeddedConversationItem(new_prompt, timestamp)
self.conversation_threads[conversation_id].history.append(new_prompt_item)
self.conversation_threads[conversation_id].history.append(
new_prompt_item
)
# Create and upsert the embedding for the conversation id, prompt, timestamp
embedding = await self.pinecone_service.upsert_conversation_embedding(self.model, conversation_id,
new_prompt, timestamp)
embedding = await self.pinecone_service.upsert_conversation_embedding(
self.model, conversation_id, new_prompt, timestamp
)
embedding_prompt_less_author = await self.model.send_embedding_request(prompt_less_author) # Use the version of
embedding_prompt_less_author = await self.model.send_embedding_request(
prompt_less_author
) # Use the version of
# the prompt without the author's name for better clarity on retrieval.
# Now, build the new prompt by getting the X most similar with pinecone
similar_prompts = self.pinecone_service.get_n_similar(conversation_id, embedding_prompt_less_author,
n=self.model.num_conversation_lookback)
similar_prompts = self.pinecone_service.get_n_similar(
conversation_id,
embedding_prompt_less_author,
n=self.model.num_conversation_lookback,
)
# When we are in embeddings mode, only the pre-text is contained in self.conversation_threads[message.channel.id].history, so we
# can use that as a base to build our new prompt
prompt_with_history = [self.conversation_threads[ctx.channel.id].history[0]]
prompt_with_history = [
self.conversation_threads[ctx.channel.id].history[0]
]
# Append the similar prompts to the prompt with history
prompt_with_history += [EmbeddedConversationItem(prompt, timestamp) for prompt, timestamp in
similar_prompts]
prompt_with_history += [
EmbeddedConversationItem(prompt, timestamp)
for prompt, timestamp in similar_prompts
]
# iterate UP TO the last X prompts in the history
for i in range(1, min(len(self.conversation_threads[ctx.channel.id].history), self.model.num_static_conversation_items)):
prompt_with_history.append(self.conversation_threads[ctx.channel.id].history[-i])
for i in range(
1,
min(
len(self.conversation_threads[ctx.channel.id].history),
self.model.num_static_conversation_items,
),
):
prompt_with_history.append(
self.conversation_threads[ctx.channel.id].history[-i]
)
# remove duplicates from prompt_with_history
prompt_with_history = list(dict.fromkeys(prompt_with_history))
@ -777,7 +811,9 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
pass
prompt_with_history.append(new_prompt_item)
prompt_with_history = "".join([item.text for item in prompt_with_history])
prompt_with_history = "".join(
[item.text for item in prompt_with_history]
)
new_prompt = prompt_with_history
@ -788,7 +824,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
id in self.conversation_threads
and tokens > self.model.summarize_threshold
and not from_g_command
and not self.pinecone_service # This should only happen if we are not doing summarizations.
and not self.pinecone_service # This should only happen if we are not doing summarizations.
):
# We don't need to worry about the differences between interactions and messages in this block,
@ -850,27 +886,42 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
)
# If the user is conversing, add the GPT response to their conversation history.
if id in self.conversation_threads and not from_g_command and not self.pinecone_service:
if (
id in self.conversation_threads
and not from_g_command
and not self.pinecone_service
):
self.conversation_threads[id].history.append(
"\nGPTie: " + str(response_text) + "<|endofstatement|>\n"
)
# Embeddings case!
elif id in self.conversation_threads and not from_g_command and self.pinecone_service:
elif (
id in self.conversation_threads
and not from_g_command
and self.pinecone_service
):
conversation_id = id
# Create an embedding and timestamp for the prompt
response_text = "\nGPTie: " + str(response_text) + "<|endofstatement|>\n"
response_text = (
"\nGPTie: " + str(response_text) + "<|endofstatement|>\n"
)
response_text = response_text.encode("ascii", "ignore").decode()
# Print the current timestamp
timestamp = int(str(datetime.datetime.now().timestamp()).replace(".", ""))
self.conversation_threads[conversation_id].history.append(EmbeddedConversationItem(response_text, timestamp))
timestamp = int(
str(datetime.datetime.now().timestamp()).replace(".", "")
)
self.conversation_threads[conversation_id].history.append(
EmbeddedConversationItem(response_text, timestamp)
)
# Create and upsert the embedding for the conversation id, prompt, timestamp
embedding = await self.pinecone_service.upsert_conversation_embedding(self.model, conversation_id,
response_text, timestamp)
embedding = await self.pinecone_service.upsert_conversation_embedding(
self.model, conversation_id, response_text, timestamp
)
# Cleanse
response_text = self.cleanse_response(response_text)

@ -40,7 +40,7 @@ except:
pinecone_service = None
if PINECONE_TOKEN:
pinecone.init(api_key=PINECONE_TOKEN, environment="us-west1-gcp")
PINECONE_INDEX = "conversation-embeddings" # This will become unfixed later.
PINECONE_INDEX = "conversation-embeddings" # This will become unfixed later.
pinecone_service = PineconeService(pinecone.Index(PINECONE_INDEX))
print("Got the pinecone service")

@ -27,17 +27,17 @@ class Settings_autocompleter:
ctx: discord.AutocompleteContext,
): # Behaves a bit weird if you go back and edit the parameter without typing in a new command
values = {
"max_conversation_length": [str(num) for num in range(1,500,2)],
"num_images": [str(num) for num in range(1,4+1)],
"max_conversation_length": [str(num) for num in range(1, 500, 2)],
"num_images": [str(num) for num in range(1, 4 + 1)],
"mode": ["temperature", "top_p"],
"model": ["text-davinci-003", "text-curie-001"],
"low_usage_mode": ["True", "False"],
"image_size": ["256x256", "512x512", "1024x1024"],
"summarize_conversation": ["True", "False"],
"welcome_message_enabled": ["True", "False"],
"num_static_conversation_items": [str(num) for num in range(5,20+1)],
"num_conversation_lookback": [str(num) for num in range(5,15+1)],
"summarize_threshold": [str(num) for num in range(800, 3500, 50)]
"num_static_conversation_items": [str(num) for num in range(5, 20 + 1)],
"num_conversation_lookback": [str(num) for num in range(5, 15 + 1)],
"summarize_threshold": [str(num) for num in range(800, 3500, 50)],
}
if ctx.options["parameter"] in values.keys():
return [value for value in values[ctx.options["parameter"]]]

@ -93,7 +93,9 @@ class Model:
if value < 3:
raise ValueError("num_static_conversation_items must be >= 3")
if value > 20:
raise ValueError("num_static_conversation_items must be <= 20, this is to ensure reliability and reduce token wastage!")
raise ValueError(
"num_static_conversation_items must be <= 20, this is to ensure reliability and reduce token wastage!"
)
self._num_static_conversation_items = value
@property
@ -106,7 +108,9 @@ class Model:
if value < 3:
raise ValueError("num_conversation_lookback must be >= 3")
if value > 15:
raise ValueError("num_conversation_lookback must be <= 15, this is to ensure reliability and reduce token wastage!")
raise ValueError(
"num_conversation_lookback must be <= 15, this is to ensure reliability and reduce token wastage!"
)
self._num_conversation_lookback = value
@property
@ -358,7 +362,7 @@ class Model:
"Authorization": f"Bearer {self.openai_key}",
}
async with session.post(
"https://api.openai.com/v1/embeddings", json=payload, headers=headers
"https://api.openai.com/v1/embeddings", json=payload, headers=headers
) as resp:
response = await resp.json()
@ -474,8 +478,8 @@ class Model:
"https://api.openai.com/v1/completions", json=payload, headers=headers
) as resp:
response = await resp.json()
#print(f"Payload -> {payload}")
#print(f"Response -> {response}")
# print(f"Payload -> {payload}")
# print(f"Response -> {response}")
# Parse the total tokens used for this request and response pair from the response
await self.valid_text_request(response)

@ -2,7 +2,6 @@ import pinecone
class PineconeService:
def __init__(self, index: pinecone.Index):
self.index = index
@ -10,15 +9,19 @@ class PineconeService:
self.index.upsert([(text, embeddings)])
def get_all_for_conversation(self, conversation_id: int):
response = self.index.query(top_k=100, filter={"conversation_id": conversation_id})
response = self.index.query(
top_k=100, filter={"conversation_id": conversation_id}
)
return response
async def upsert_conversation_embedding(self, model, conversation_id: int, text, timestamp):
async def upsert_conversation_embedding(
self, model, conversation_id: int, text, timestamp
):
# If the text is > 512 characters, we need to split it up into multiple entries.
first_embedding = None
if len(text) > 500:
# Split the text into 512 character chunks
chunks = [text[i:i + 500] for i in range(0, len(text), 500)]
chunks = [text[i : i + 500] for i in range(0, len(text), 500)]
for chunk in chunks:
print("The split chunk is ", chunk)
@ -26,18 +29,39 @@ class PineconeService:
embedding = await model.send_embedding_request(chunk)
if not first_embedding:
first_embedding = embedding
self.index.upsert([(chunk, embedding)], metadata={"conversation_id": conversation_id, "timestamp": timestamp})
self.index.upsert(
[(chunk, embedding)],
metadata={
"conversation_id": conversation_id,
"timestamp": timestamp,
},
)
return first_embedding
else:
embedding = await model.send_embedding_request(text)
self.index.upsert([(text, embedding, {"conversation_id": conversation_id,
"timestamp": timestamp})])
self.index.upsert(
[
(
text,
embedding,
{"conversation_id": conversation_id, "timestamp": timestamp},
)
]
)
return embedding
def get_n_similar(self, conversation_id: int, embedding, n=10):
response = self.index.query(vector=embedding, top_k=n, include_metadata=True, filter={"conversation_id": conversation_id})
response = self.index.query(
vector=embedding,
top_k=n,
include_metadata=True,
filter={"conversation_id": conversation_id},
)
print(response)
relevant_phrases = [(match['id'],match['metadata']['timestamp']) for match in response['matches']]
relevant_phrases = [
(match["id"], match["metadata"]["timestamp"])
for match in response["matches"]
]
# Sort the relevant phrases based on the timestamp
relevant_phrases.sort(key=lambda x: x[1])
return relevant_phrases
return relevant_phrases

@ -73,6 +73,7 @@ class Thread:
def __str__(self):
return self.__repr__()
class EmbeddedConversationItem:
def __init__(self, text, timestamp):
self.text = text
@ -104,5 +105,3 @@ class EmbeddedConversationItem:
def __ne__(self, other):
return not self.__eq__(other)

Loading…
Cancel
Save