diff --git a/cogs/gpt_3_commands_and_converser.py b/cogs/gpt_3_commands_and_converser.py index b39a20c..3a1f914 100644 --- a/cogs/gpt_3_commands_and_converser.py +++ b/cogs/gpt_3_commands_and_converser.py @@ -1219,7 +1219,7 @@ class RedoButton(discord.ui.Button["RedoView"]): ) await self.converser_cog.encapsulated_send( - user_id, prompt, ctx, response_message + user_id=user_id, prompt=prompt, ctx=ctx, response_message=response_message ) else: await interaction.response.send_message( diff --git a/cogs/image_prompt_optimizer.py b/cogs/image_prompt_optimizer.py index c136e71..b972c82 100644 --- a/cogs/image_prompt_optimizer.py +++ b/cogs/image_prompt_optimizer.py @@ -215,7 +215,7 @@ class RedoButton(discord.ui.Button["OptimizeView"]): "Redoing your original request...", ephemeral=True, delete_after=20 ) await self.converser_cog.encapsulated_send( - user_id, prompt, ctx, response_message + user_id=user_id, prompt=prompt, ctx=ctx, response_message=response_message ) else: await interaction.response.send_message( diff --git a/models/moderations_service_model.py b/models/moderations_service_model.py index 4beab4d..d5b5335 100644 --- a/models/moderations_service_model.py +++ b/models/moderations_service_model.py @@ -51,15 +51,17 @@ class Moderation: return embed @staticmethod - def determine_moderation_result(response): + def determine_moderation_result(text,response): HATE_THRESHOLD = 0.005 HATE_VIOLENCE_THRESHOLD = 0.05 SELF_HARM_THRESHOLD = 0.05 SEXUAL_THRESHOLD = 0.75 SEXUAL_MINORS_THRESHOLD = 0.1 - VIOLENCE_THRESHOLD = 0.01 + VIOLENCE_THRESHOLD = 0.08 VIOLENCE_GRAPHIC_THRESHOLD = 0.1 + extreme_hatred_qualifiers = ["i fucking hate", "fucking hate", "i fucking despise"] + thresholds = [ HATE_THRESHOLD, HATE_VIOLENCE_THRESHOLD, @@ -85,6 +87,12 @@ class Moderation: # Iterate the category scores using the threshold_iterator and compare the values to thresholds for category, threshold in zip(threshold_iterator, thresholds): + if category == "hate": + if "hate" in text.lower(): # The word "hate" makes the model oversensitive. This is a (bad) workaround. + threshold = 0.1 + if any(word in text.lower() for word in extreme_hatred_qualifiers): + threshold = 0.6 + if category_scores[category] > threshold: return True @@ -110,7 +118,7 @@ class Moderation: response = await model.send_moderations_request( to_moderate.message.content ) - moderation_result = Moderation.determine_moderation_result(response) + moderation_result = Moderation.determine_moderation_result(to_moderate.message.content,response) if moderation_result: # Take care of the flagged message