diff --git a/cogs/image_prompt_optimizer.py b/cogs/image_prompt_optimizer.py index c136e71..a12db52 100644 --- a/cogs/image_prompt_optimizer.py +++ b/cogs/image_prompt_optimizer.py @@ -215,7 +215,10 @@ class RedoButton(discord.ui.Button["OptimizeView"]): "Redoing your original request...", ephemeral=True, delete_after=20 ) await self.converser_cog.encapsulated_send( - user_id, prompt, ctx, response_message + user_id=user_id, + prompt=prompt, + ctx=ctx, + response_message=response_message, ) else: await interaction.response.send_message( diff --git a/models/moderations_service_model.py b/models/moderations_service_model.py index 4beab4d..2565168 100644 --- a/models/moderations_service_model.py +++ b/models/moderations_service_model.py @@ -51,15 +51,21 @@ class Moderation: return embed @staticmethod - def determine_moderation_result(response): + def determine_moderation_result(text, response): HATE_THRESHOLD = 0.005 HATE_VIOLENCE_THRESHOLD = 0.05 SELF_HARM_THRESHOLD = 0.05 SEXUAL_THRESHOLD = 0.75 SEXUAL_MINORS_THRESHOLD = 0.1 - VIOLENCE_THRESHOLD = 0.01 + VIOLENCE_THRESHOLD = 0.08 VIOLENCE_GRAPHIC_THRESHOLD = 0.1 + extreme_hatred_qualifiers = [ + "i fucking hate", + "fucking hate", + "i fucking despise", + ] + thresholds = [ HATE_THRESHOLD, HATE_VIOLENCE_THRESHOLD, @@ -85,6 +91,14 @@ class Moderation: # Iterate the category scores using the threshold_iterator and compare the values to thresholds for category, threshold in zip(threshold_iterator, thresholds): + if category == "hate": + if ( + "hate" in text.lower() + ): # The word "hate" makes the model oversensitive. This is a (bad) workaround. + threshold = 0.1 + if any(word in text.lower() for word in extreme_hatred_qualifiers): + threshold = 0.6 + if category_scores[category] > threshold: return True @@ -110,7 +124,9 @@ class Moderation: response = await model.send_moderations_request( to_moderate.message.content ) - moderation_result = Moderation.determine_moderation_result(response) + moderation_result = Moderation.determine_moderation_result( + to_moderate.message.content, response + ) if moderation_result: # Take care of the flagged message