channel conversations, gpt4 pricing, summarize fix

Kaveen Kumarasinghe 1 year ago
parent 97af56db2b
commit 54291c6032

@ -45,7 +45,9 @@ SUPPORT SERVER FOR BOT SETUP: https://discord.gg/WvAHXDMS7Q (You can try out the
# Recent Notable Updates
- **ChatGPT API Integration** - The ChatGPT API has been released and our bot is now fully integrated with it! Change to model to one of the ChatGPT turbo models with `/system settings`, or include the model as a param in your `/gpt converse`, `/gpt ask`, etc requests! The two currently available ChatGPT models are `gpt-3.5-turbo` and `gpt-3.5-turbo-0301`. This change is very experimental, so we're looking for your feedback and input on what you think of the new model's performance, especially for search and indexing functionality.
- **Full-channel conversations** - Start a conversation in a full discord channel, retained across restarts, permanent memory. Set `use_threads` to False in `/gpt converse`!
- **GPT4 API Integration** - GPT-4 is fully supported by our bot, if you're off the waitlist, you can put in your organization ID in the environment file and it will work out of the box and allow you to select gpt-4 models in /system settings and etc!
- **AI-Assisted Google Search** - Use GPT3 to browse the internet, you can search the internet for a query and GPT3 will look at the top websites for you automatically and formulate an answer to your query! You can also ask follow-up questions, this is kinda like BingGPT, but much better lol!
<p align="center"/>
@ -105,7 +107,7 @@ These commands are grouped, so each group has a prefix but you can easily tab co
`/gpt edit <instruction> <input> <temp> <top_p> <codex>` Use the bot to edit text using the given instructions for how to do it, currently an alpha openai feature so results might vary. Codex uses a model trained on code. Editing is currently free
`/gpt converse <opener> <opener_file> <private> <minimal>` - Start a conversation with the bot, like ChatGPT
`/gpt converse <opener> <opener_file> <private> <minimal>` - Start a conversation with the bot, like ChatGPT. Also use the option `use_threads:False` to start a conversation in a full discord channel!
- `opener:<opener text>` - Start a conversation with the bot, with a custom opener text (this is useful if you want it to take on a custom personality from the start).

@ -503,7 +503,7 @@ class Commands(discord.Cog, name="Commands"):
name="use_threads",
description="Set this to false to start a channel conversation",
required=False,
default=False,
default=True,
)
@discord.guild_only()
async def converse(

@ -388,8 +388,8 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
if thread:
try:
thread = await self.bot.fetch_channel(channel_id)
await thread.edit(locked=True)
await thread.edit(name="Closed-GPT")
await thread.edit(archived=True)
except Exception:
traceback.print_exc()
except Exception:
@ -405,8 +405,8 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
if thread:
try:
thread = await self.bot.fetch_channel(thread_id)
await thread.edit(locked=True)
await thread.edit(name="Closed-GPT")
await thread.edit(archived=True)
except Exception:
traceback.print_exc()
@ -606,17 +606,11 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
)
new_conversation_history.append(
EmbeddedConversationItem(
"\nThis conversation has some context from earlier, which has been summarized as follows: ",
0,
)
)
new_conversation_history.append(EmbeddedConversationItem(summarized_text, 0))
new_conversation_history.append(
EmbeddedConversationItem(
"\nContinue the conversation, paying very close attention to things <username> told you, such as their name, and personal details.\n",
f"\nThis conversation has some context from earlier, which has been summarized as follows: {summarized_text} \nContinue the conversation, paying very close attention to things <username> told you, such as their name, and personal details.",
0,
)
)
# Get the last entry from the thread's conversation history
new_conversation_history.append(
EmbeddedConversationItem(
@ -1077,6 +1071,7 @@ class GPT3ComCon(discord.Cog, name="GPT3ComCon"):
name=user.name + "'s conversation with GPT",
auto_archive_duration=60,
)
await ctx.respond("Conversation started.")
target = thread
else:
target = ctx.channel

@ -33,7 +33,7 @@ from services.environment_service import EnvService
from models.openai_model import Model
__version__ = "11.0.4"
__version__ = "11.1.0"
PID_FILE = Path("bot.pid")

@ -620,10 +620,16 @@ class Model:
f"{details['exception'].args[0]}"
)
async def valid_text_request(self, response):
async def valid_text_request(self,response, model=None):
try:
tokens_used = int(response["usage"]["total_tokens"])
await self.usage_service.update_usage(tokens_used)
if model and model in Models.GPT4_MODELS:
await self.usage_service.update_usage(tokens_used,
prompt_tokens=int(response["usage"]["prompt_tokens"]),
completion_tokens=int(response["usage"]["completion_tokens"]),
gpt4=True)
else:
await self.usage_service.update_usage(tokens_used)
except Exception as e:
raise ValueError(
"The API returned an invalid response: "
@ -775,13 +781,6 @@ class Model:
return response
# async def send_language_detect_request_local(self, text):
# detected = await asyncio.get_running_loop().run_in_executor(
# None, self.detector.compute_language_confidence, text, Language.ENGLISH
# )
# if detected < 0.03:
# return False
# return True
@backoff.on_exception(
backoff.expo,
@ -887,13 +886,28 @@ class Model:
{"role": "assistant", "name": bot_name_clean, "content": text}
)
else:
username = re.search(r"(?<=\n)(.*?)(?=:)", message.text).group()
username_clean = self.cleanse_username(username)
text = message.text.replace(f"{username}:", "")
text = text.replace("<|endofstatement|>", "")
messages.append(
{"role": "user", "name": username_clean, "content": text}
)
try:
print("In first block The message text is ->" + message.text)
if message.text.strip().lower().startswith("this conversation has some context from earlier"):
print("Hit the exception clause")
raise Exception("This is a context message")
username = re.search(r"(?<=\n)(.*?)(?=:)", message.text).group()
username_clean = self.cleanse_username(username)
text = message.text.replace(f"{username}:", "")
text = text.replace("<|endofstatement|>", "")
messages.append(
{"role": "user", "name": username_clean, "content": text}
)
print("Got to here")
except Exception:
print("In second block The message text is ->" + message.text)
text = message.text.replace("<|endofstatement|>", "")
messages.append(
{"role": "system", "content": text}
)
print(f"Messages -> {messages}")
async with aiohttp.ClientSession(raise_for_status=False) as session:
@ -1044,7 +1058,7 @@ class Model:
response = await resp.json()
# print(f"Payload -> {payload}")
# Parse the total tokens used for this request and response pair from the response
await self.valid_text_request(response)
await self.valid_text_request(response, model=self.model if model is None else model)
print(f"Response -> {response}")
return response
@ -1078,7 +1092,7 @@ class Model:
response = await resp.json()
# print(f"Payload -> {payload}")
# Parse the total tokens used for this request and response pair from the response
await self.valid_text_request(response)
await self.valid_text_request(response, model=self.model if model is None else model)
print(f"Response -> {response}")
return response

@ -32,13 +32,13 @@ dependencies = [
"sqlitedict==2.1.0",
"backoff==2.2.1",
"flask==2.2.3",
"llama-index==0.4.27",
"llama-index==0.4.29",
"PyPDF2==3.0.1",
"youtube_transcript_api==0.5.0",
"sentencepiece==0.1.97",
"protobuf==3.20.2",
"python-pptx==0.6.21",
"langchain==0.0.105",
"langchain==0.0.115",
"unidecode==1.3.6",
"tqdm==4.64.1",
"docx2txt==0.8"

@ -12,14 +12,14 @@ pinecone-client==2.1.0
sqlitedict==2.1.0
backoff==2.2.1
flask==2.2.3
llama-index==0.4.27
llama-index==0.4.29
PyPDF2==3.0.1
youtube_transcript_api==0.5.0
sentencepiece==0.1.97
protobuf==3.20.2
python-pptx==0.6.21
sentence-transformers==2.2.2
langchain==0.0.105
langchain==0.0.115
openai-whisper
unidecode==1.3.6
tqdm==4.64.1

@ -12,13 +12,13 @@ pinecone-client==2.1.0
sqlitedict==2.1.0
backoff==2.2.1
flask==2.2.3
llama-index==0.4.27
llama-index==0.4.29
PyPDF2==3.0.1
youtube_transcript_api==0.5.0
sentencepiece==0.1.97
protobuf==3.20.2
python-pptx==0.6.21
langchain==0.0.105
langchain==0.0.115
unidecode==1.3.6
tqdm==4.64.1
docx2txt==0.8

@ -73,14 +73,6 @@ class TextService:
else prompt
), prompt
# Determine if we're sending a ChatGPT model request. If chatgpt is in the model name or the default model is a ChatGPT model.
# chatgpt_conversation = False
# chatgpt = False
# if (model and "chatgpt" in model.lower()) or (not model and converser_cog.model.model.lower() in Models.CHATGPT_MODELS):
# chatgpt = True
# if ctx.channel.id in converser_cog.conversation_threads:
# chatgpt_conversation = True
stop = f"{ctx.author.display_name if user is None else user.display_name}:"
from_context = isinstance(ctx, discord.ApplicationContext)
@ -260,7 +252,7 @@ class TextService:
tokens = converser_cog.usage_service.count_tokens(new_prompt)
if (
tokens > converser_cog.model.summarize_threshold - 150
tokens > converser_cog.model.summarize_threshold
): # 150 is a buffer for the second stage
await ctx.reply(
"I tried to summarize our current conversation so we could keep chatting, "
@ -269,7 +261,7 @@ class TextService:
)
await converser_cog.end_conversation(ctx)
converser_cog.remove_awaiting(ctx.author.id, ctx.channel.id)
converser_cog.remove_awaiting(ctx.author.id, ctx.channel.id, False, False)
return
else:
await ctx.reply("The conversation context limit has been reached.")

@ -14,27 +14,32 @@ class UsageService:
f.close()
self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
async def get_price(self, tokens_used, embeddings=False, chatgpt=False):
async def get_price(self, tokens_used, prompt_tokens=None, completion_tokens=None, embeddings=False, chatgpt=False, gpt4=False):
tokens_used = int(tokens_used)
if chatgpt:
price = (tokens_used / 1000) * 0.002
return price
elif gpt4:
price = (prompt_tokens / 1000) * 0.03 + (completion_tokens / 1000) * 0.06
return price
elif not embeddings:
price = (
tokens_used / 1000
) * 0.02 # Just use the highest rate instead of model-based... I am overestimating on purpose.
) * 0.02
else:
price = (tokens_used / 1000) * 0.0004
return price
async def update_usage(self, tokens_used, embeddings=False, chatgpt=False):
async def update_usage(self, tokens_used, prompt_tokens=None, completion_tokens=None, embeddings=False, chatgpt=False, gpt4=False):
tokens_used = int(tokens_used)
if chatgpt:
price = (tokens_used / 1000) * 0.002
elif gpt4:
price = (prompt_tokens / 1000) * 0.03 + (completion_tokens / 1000) * 0.06
elif not embeddings:
price = (
tokens_used / 1000
) * 0.02 # Just use the highest rate instead of model-based... I am overestimating on purpose.
) * 0.02
else:
price = (tokens_used / 1000) * 0.0004
usage = await self.get_usage()

Loading…
Cancel
Save