Add Paraphrase

Kaveen Kumarasinghe 2 years ago
parent 4181b2d1a1
commit 11f563e16a

@ -27,6 +27,7 @@ class Commands(discord.Cog, name="Commands"):
image_service_cog,
moderations_cog,
translations_cog=None,
search_cog=None,
):
super().__init__()
self.bot = bot
@ -39,6 +40,7 @@ class Commands(discord.Cog, name="Commands"):
self.image_service_cog = image_service_cog
self.moderations_cog = moderations_cog
self.translations_cog = translations_cog
self.search_cog = search_cog
# Create slash command groups
dalle = discord.SlashCommandGroup(
@ -576,3 +578,25 @@ class Commands(discord.Cog, name="Commands"):
await ctx.respond(
"Translations are disabled on this server.", ephemeral=True
)
@discord.message_command(
name="Paraphrase",
guild_ids=ALLOWED_GUILDS,
checks=[Check.check_dalle_roles()],
)
async def paraphrase_action(self, ctx, message: discord.Message):
await self.converser_cog.paraphrase_action(ctx, message)
# Search slash commands
@discord.slash_command(
name="search",
description="Search google alongside GPT3 for something",
guild_ids=ALLOWED_GUILDS,
)
@discord.option(name="query", description="The query to search", required=True)
@discord.guild_only()
async def search(self, ctx: discord.ApplicationContext, query: str):
await ctx.respond("Not implemented yet")
#await self.search_cog.search_command(ctx, query)

@ -0,0 +1,37 @@
import traceback
import aiohttp
import discord
from models.deepl_model import TranslationModel
from models.search_model import Search
from services.environment_service import EnvService
ALLOWED_GUILDS = EnvService.get_allowed_guilds()
class SearchService(discord.Cog, name="SearchService"):
"""Cog containing translation commands and retrieval of translation services"""
def __init__(
self,
bot,
gpt_model,
pinecone_service,
):
super().__init__()
self.bot = bot
self.model = Search(gpt_model, pinecone_service)
# Make a mapping of all the country codes and their full country names:
async def search_command(self, ctx, query):
"""Command handler for the translation command"""
await ctx.defer()
await self.model.search(query)
await ctx.respond("ok")

@ -1036,6 +1036,19 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
async def ask_gpt_action(self, ctx, message: discord.Message):
"""Message command. Return the message"""
prompt = await self.mention_to_username(ctx, message.content)
await self.ask_command(
ctx, message.content, None, None, None, None, from_action=message.content
ctx, prompt, None, None, None, None, from_action=prompt
)
async def paraphrase_action(self, ctx, message: discord.Message):
"""Message command. paraphrase the current message content"""
user = ctx.user
prompt = await self.mention_to_username(ctx, message.content)
# Construct the paraphrase prompt
prompt = f"Paraphrase the following text. Maintain roughly the same text length after paraphrasing and the same tone of voice: {prompt} \n\nParaphrased:"
await self.ask_command(
ctx, prompt, None, None, None, None, from_action=prompt
)

@ -11,6 +11,7 @@ import discord
import pinecone
from pycord.multicog import apply_multicog
from cogs.search_service_cog import SearchService
from cogs.text_service_cog import GPT3ComCon
from cogs.image_service_cog import DrawDallEService
from cogs.prompt_optimizer_cog import ImgPromptOptimizer
@ -29,7 +30,7 @@ from services.environment_service import EnvService
from models.openai_model import Model
__version__ = "8.4.1"
__version__ = "8.5"
PID_FILE = Path("bot.pid")
PROCESS = None
@ -55,13 +56,23 @@ if PINECONE_TOKEN:
if PINECONE_INDEX not in pinecone.list_indexes():
print("Creating pinecone index. Please wait...")
pinecone.create_index(
"conversation-embeddings",
PINECONE_INDEX,
dimension=1536,
metric="dotproduct",
pod_type="s1",
)
PINECONE_INDEX_SEARCH = "search-embeddings"
if PINECONE_INDEX_SEARCH not in pinecone.list_indexes():
print("Creating pinecone index for seraches. Please wait...")
pinecone.create_index(
PINECONE_INDEX_SEARCH,
dimension=1536,
metric="dotproduct",
pod_type="s1",
)
pinecone_service = PineconeService(pinecone.Index(PINECONE_INDEX))
pinecone_search_service = PineconeService(pinecone.Index(PINECONE_INDEX_SEARCH))
print("Got the pinecone service")
#
@ -157,6 +168,10 @@ async def main():
bot.add_cog(TranslationService(bot, TranslationModel()))
print("The translation service is enabled.")
if EnvService.get_google_search_api_key() and EnvService.get_google_search_engine_id():
bot.add_cog(SearchService(bot, model, pinecone_search_service))
print("The Search service is enabled.")
bot.add_cog(
Commands(
bot,
@ -169,6 +184,7 @@ async def main():
bot.get_cog("ImgPromptOptimizer"),
bot.get_cog("ModerationsService"),
bot.get_cog("TranslationService"),
bot.get_cog("SearchService")
)
)

@ -0,0 +1,77 @@
import random
import re
from bs4 import BeautifulSoup
import aiohttp
from services.environment_service import EnvService
from services.usage_service import UsageService
class Search:
def __init__(self, gpt_model, pinecone_service):
self.model = gpt_model
self.pinecone_service = pinecone_service
self.google_search_api_key = EnvService.get_google_search_api_key()
self.google_search_engine_id = EnvService.get_google_search_engine_id()
async def get_links(self, query):
"""Search the web for a query"""
async with aiohttp.ClientSession() as session:
async with session.get(
f"https://www.googleapis.com/customsearch/v1?key={self.google_search_api_key}&cx={self.google_search_engine_id}&q={query}"
) as response:
if response.status == 200:
data = await response.json()
# Return a list of the top 5 links
return [item["link"] for item in data["items"][:5]]
else:
return "An error occurred while searching."
async def search(self, query):
# Get the links for the query
links = await self.get_links(query)
# For each link, crawl the page and get all the text that's not HTML garbage.
# Concatenate all the text for a given website into one string and save it into an array:
texts = []
for link in links:
async with aiohttp.ClientSession() as session:
async with session.get(link, timeout=5) as response:
if response.status == 200:
soup = BeautifulSoup(await response.read(), "html.parser")
# Find all the content between <p> tags and join them together and then append to texts
texts.append(" ".join([p.text for p in soup.find_all("p")]))
else:
pass
print("Finished retrieving text content from the links")
# For each text in texts, split it up into 500 character chunks and create embeddings for it
# The pinecone service uses conversation_id, but we can use it here too to keep track of the "search", each
# conversation_id represents a unique search.
conversation_id = random.randint(0, 100000000)
for text in texts:
# Split the text into 150 character chunks without using re
chunks = [text[i : i + 500] for i in range(0, len(text), 500)]
# Create embeddings for each chunk
for chunk in chunks:
# Create an embedding for the chunk
embedding = await self.model.send_embedding_request(chunk)
# Upsert the embedding for the conversation ID
self.pinecone_service.upsert_conversation_embedding(self.model, conversation_id, chunk,0)
print("Finished creating embeddings for the text")
# Now that we have all the embeddings for the search, we can embed the query and then
# query pinecone for the top 5 results
query_embedding = await self.model.send_embedding_request(query)
results = self.pinecone_service.get_n_similar(conversation_id, query_embedding, n=3)
# Get only the first elements of each result
results = [result[0] for result in results]
# Construct a query for GPT3 to use these results to answer the query
GPT_QUERY = f"This is a search query. I want to know the answer to the query: {query}. Here are some results from the web: {[str(result) for result in results]}. \n\n Answer:"
# Generate the answer
# Use the tokenizer to determine token amount of the query
await self.model.send_request(GPT_QUERY, UsageService.count_tokens_static(GPT_QUERY))
print(texts)

@ -309,3 +309,20 @@ class EnvService:
return deepl_token
except Exception:
return None
@staticmethod
def get_google_search_api_key():
try:
google_search_api_key = os.getenv("GOOGLE_SEARCH_API_KEY")
return google_search_api_key
except Exception:
return None
@staticmethod
def get_google_search_engine_id():
try:
google_search_engine_id = os.getenv("GOOGLE_SEARCH_ENGINE_ID")
return google_search_engine_id
except Exception:
return None

@ -58,3 +58,11 @@ class UsageService:
async with aiofiles.open(self.usage_file_path, "w") as f:
await f.write(str(usage + float(price)))
await f.close()
@staticmethod
def count_tokens_static(text):
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
res = tokenizer(text)["input_ids"]
return len(res)

Loading…
Cancel
Save