diff --git a/cogs/commands.py b/cogs/commands.py index ed6439b..79da9f0 100644 --- a/cogs/commands.py +++ b/cogs/commands.py @@ -1044,3 +1044,27 @@ class Commands(discord.Cog, name="Commands"): self, ctx: discord.ApplicationContext, file: discord.Attachment, temperature: float ): await self.transcribe_cog.transcribe_file_command(ctx, file, temperature) + + @add_to_group("transcribe") + @discord.slash_command( + name="link", description="Transcribe a file link or youtube link", guild_ids=ALLOWED_GUILDS + ) + @discord.guild_only() + @discord.option( + name="link", + description="A link to transcribe", + required=True, + input_type=discord.SlashCommandOptionType.string, + ) + @discord.option( + name="temperature", + description="The higher the value, the riskier the model will be", + required=False, + input_type=discord.SlashCommandOptionType.number, + max_value=1, + min_value=0, + ) + async def transcribe_link( + self, ctx: discord.ApplicationContext, link: str, temperature: float + ): + await self.transcribe_cog.transcribe_link_command(ctx, link, temperature) diff --git a/cogs/transcription_service_cog.py b/cogs/transcription_service_cog.py index ac7743f..79cdcc5 100644 --- a/cogs/transcription_service_cog.py +++ b/cogs/transcription_service_cog.py @@ -1,8 +1,12 @@ +import asyncio import traceback +from functools import partial +from pathlib import Path import aiohttp import discord from discord.ext import pages +from pytube import YouTube from models.deepl_model import TranslationModel from models.embed_statics_model import EmbedStatics @@ -25,9 +29,75 @@ class TranscribeService(discord.Cog, name="TranscribeService"): self.bot = bot self.model = model self.usage_service = usage_service + # Make the "audiotemp" folder if it doesn't exist, using pathlib + Path("audiotemp").mkdir(parents=True, exist_ok=True) + async def transcribe_link_command(self, ctx: discord.ApplicationContext, link:str, temperature: float): + # Check if this discord file is an instance of mp3, mp4, mpeg, mpga, m4a, wav, or webm. + await ctx.defer() + + user_api_key = None + if USER_INPUT_API_KEYS: + user_api_key = await TextService.get_user_api_key( + ctx.user.id, ctx, USER_KEY_DB + ) + if not user_api_key: + return + + if "youtube" in link: + # We need to download the youtube video and save it to a temporary file + yt = YouTube(link) + + # Delete audiotemp/{str(ctx.user.id)}temp.mp3 if it already exists + if Path("audiotemp/{}temp.mp3".format(str(ctx.user.id))).exists(): + Path("audiotemp/{}temp.mp3".format(str(ctx.user.id))).unlink() + print("before call") + try: + file_path = await asyncio.get_running_loop().run_in_executor(None, partial(yt.streams.filter().first().download, output_path="audiotemp", filename="{}temp".format(str(ctx.user.id)))) + except Exception as e: + traceback.print_exc() + await ctx.respond("Failed to download youtube video. Please try again later. "+str(e)) + return + + print("after call the file path was" + file_path) + else: + await ctx.respond("Please upload a valid youtube link. Other links are not implemented yet") + return + + # Load the file object from the file_path + file = discord.File(file_path) + + response_message = await ctx.respond(embed=EmbedStatics.build_transcribe_progress_embed()) + + try: + + response = await self.model.send_transcription_request(file, temperature, user_api_key) + print(response) + + if len(response) > 4080: + # Chunk the response into 2048 character chunks, each an embed page + chunks = [response[i:i+2048] for i in range(0, len(response), 2048)] + embed_pages = [] + for chunk in chunks: + embed_pages.append(discord.Embed(title="Transcription Page {}".format(len(embed_pages) + 1), description=chunk)) + + paginator = pages.Paginator( + pages=embed_pages, + timeout=None, + author_check=False, + ) + + await paginator.respond(ctx.interaction) + await response_message.delete_original_response() + return + + await response_message.edit_original_response(embed=EmbedStatics.build_transcribe_success_embed(response)) + except Exception as e: + await response_message.edit_original_response(embed=EmbedStatics.build_transcribe_failed_embed(str(e))) + async def transcribe_file_command(self, ctx: discord.ApplicationContext, file: discord.Attachment, temperature: float): # Check if this discord file is an instance of mp3, mp4, mpeg, mpga, m4a, wav, or webm. + await ctx.defer() user_api_key = None if USER_INPUT_API_KEYS: @@ -59,7 +129,6 @@ class TranscribeService(discord.Cog, name="TranscribeService"): for chunk in chunks: embed_pages.append(discord.Embed(title="Transcription Page {}".format(len(embed_pages) + 1), description=chunk)) - paginator = pages.Paginator( pages=embed_pages, timeout=None, diff --git a/models/openai_model.py b/models/openai_model.py index 705b71a..ed173fb 100644 --- a/models/openai_model.py +++ b/models/openai_model.py @@ -923,14 +923,16 @@ class Model: max_tries=4, on_backoff=backoff_handler_request, ) - async def send_transcription_request(self, file: discord.Attachment, temperature_override=None, custom_api_key=None, ): + async def send_transcription_request(self, file: [discord.Attachment, discord.File], temperature_override=None, custom_api_key=None, ): async with aiohttp.ClientSession(raise_for_status=True) as session: data = aiohttp.FormData() data.add_field("model", "whisper-1") + print("audio."+file.filename.split(".")[-1]) data.add_field( - "file", await file.read(), filename="audio."+file.filename.split(".")[-1], content_type=file.content_type + "file", await file.read() if isinstance(file, discord.Attachment) else await file.fp.read(), filename="audio."+file.filename.split(".")[-1] if isinstance(file, discord.Attachment) else "audio.mp4", content_type=file.content_type if isinstance(file, discord.Attachment) else "video/mp4" ) + if temperature_override: data.add_field("temperature", temperature_override) diff --git a/pyproject.toml b/pyproject.toml index b96df77..3c2df1d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ classifiers = [ dependencies = [ "Pillow==9.3.0", "openai==0.27.0", +"pytube==12.1.2", "py-cord==2.3.2", "python-dotenv==0.21.0", "requests==2.28.1", diff --git a/requirements.txt b/requirements.txt index cd3c727..03f31b4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ Pillow==9.3.0 openai==0.27.0 +pytube==12.1.2 py-cord==2.3.2 python-dotenv==0.21.0 requests==2.28.1 diff --git a/requirements_base.txt b/requirements_base.txt index 76e0fce..c4b69d2 100644 --- a/requirements_base.txt +++ b/requirements_base.txt @@ -1,5 +1,6 @@ Pillow==9.3.0 openai==0.27.0 +pytube==12.1.2 py-cord==2.3.2 python-dotenv==0.21.0 requests==2.28.1