tentative youtube link support for transcribe

2 years ago · 1333c58c93
parent 2bdd9baced
commit 1333c58c93
6 changed files with 101 additions and 3 deletions
--- a/cogs/commands.py
+++ b/cogs/commands.py
@ -1044,3 +1044,27 @@ class Commands(discord.Cog, name="Commands"):
            self, ctx: discord.ApplicationContext, file: discord.Attachment, temperature: float
    ):
        await self.transcribe_cog.transcribe_file_command(ctx, file, temperature)
+
+    @add_to_group("transcribe")
+    @discord.slash_command(
+        name="link", description="Transcribe a file link or youtube link", guild_ids=ALLOWED_GUILDS
+    )
+    @discord.guild_only()
+    @discord.option(
+        name="link",
+        description="A link to transcribe",
+        required=True,
+        input_type=discord.SlashCommandOptionType.string,
+    )
+    @discord.option(
+        name="temperature",
+        description="The higher the value, the riskier the model will be",
+        required=False,
+        input_type=discord.SlashCommandOptionType.number,
+        max_value=1,
+        min_value=0,
+    )
+    async def transcribe_link(
+            self, ctx: discord.ApplicationContext, link: str, temperature: float
+    ):
+        await self.transcribe_cog.transcribe_link_command(ctx, link, temperature)
--- a/cogs/transcription_service_cog.py
+++ b/cogs/transcription_service_cog.py
@ -1,8 +1,12 @@
+import asyncio
 import traceback
+from functools import partial
+from pathlib import Path

 import aiohttp
 import discord
 from discord.ext import pages
+from pytube import YouTube

 from models.deepl_model import TranslationModel
 from models.embed_statics_model import EmbedStatics
@ -25,9 +29,75 @@ class TranscribeService(discord.Cog, name="TranscribeService"):
        self.bot = bot
        self.model = model
        self.usage_service = usage_service
+        # Make the "audiotemp" folder if it doesn't exist, using pathlib
+        Path("audiotemp").mkdir(parents=True, exist_ok=True)
+    async def transcribe_link_command(self, ctx: discord.ApplicationContext, link:str, temperature: float):
+        # Check if this discord file is an instance of mp3, mp4, mpeg, mpga, m4a, wav, or webm.
+        await ctx.defer()
+
+        user_api_key = None
+        if USER_INPUT_API_KEYS:
+            user_api_key = await TextService.get_user_api_key(
+                ctx.user.id, ctx, USER_KEY_DB
+            )
+            if not user_api_key:
+                return
+
+        if "youtube" in link:
+            # We need to download the youtube video and save it to a temporary file
+            yt = YouTube(link)
+
+            # Delete audiotemp/{str(ctx.user.id)}temp.mp3 if it already exists
+            if Path("audiotemp/{}temp.mp3".format(str(ctx.user.id))).exists():
+                Path("audiotemp/{}temp.mp3".format(str(ctx.user.id))).unlink()
+            print("before call")
+            try:
+                file_path = await asyncio.get_running_loop().run_in_executor(None, partial(yt.streams.filter().first().download, output_path="audiotemp", filename="{}temp".format(str(ctx.user.id))))
+            except Exception as e:
+                traceback.print_exc()
+                await ctx.respond("Failed to download youtube video. Please try again later. "+str(e))
+                return
+
+            print("after call the file path was" + file_path)
+        else:
+            await ctx.respond("Please upload a valid youtube link. Other links are not implemented yet")
+            return
+
+        # Load the file object from the file_path
+        file = discord.File(file_path)
+
+        response_message = await ctx.respond(embed=EmbedStatics.build_transcribe_progress_embed())
+
+        try:
+
+            response = await self.model.send_transcription_request(file, temperature, user_api_key)
+            print(response)
+
+            if len(response) > 4080:
+                # Chunk the response into 2048 character chunks, each an embed page
+                chunks = [response[i:i+2048] for i in range(0, len(response), 2048)]
+                embed_pages = []
+                for chunk in chunks:
+                    embed_pages.append(discord.Embed(title="Transcription Page {}".format(len(embed_pages) + 1), description=chunk))
+
+                paginator = pages.Paginator(
+                    pages=embed_pages,
+                    timeout=None,
+                    author_check=False,
+                )
+
+                await paginator.respond(ctx.interaction)
+                await response_message.delete_original_response()
+                return
+
+            await response_message.edit_original_response(embed=EmbedStatics.build_transcribe_success_embed(response))
+        except Exception as e:
+            await response_message.edit_original_response(embed=EmbedStatics.build_transcribe_failed_embed(str(e)))
+

    async def transcribe_file_command(self, ctx: discord.ApplicationContext, file: discord.Attachment, temperature: float):
        # Check if this discord file is an instance of mp3, mp4, mpeg, mpga, m4a, wav, or webm.
+        await ctx.defer()

        user_api_key = None
        if USER_INPUT_API_KEYS:
@ -59,7 +129,6 @@ class TranscribeService(discord.Cog, name="TranscribeService"):
                for chunk in chunks:
                    embed_pages.append(discord.Embed(title="Transcription Page {}".format(len(embed_pages) + 1), description=chunk))

-
                paginator = pages.Paginator(
                    pages=embed_pages,
                    timeout=None,
--- a/models/openai_model.py
+++ b/models/openai_model.py
@ -923,14 +923,16 @@ class Model:
        max_tries=4,
        on_backoff=backoff_handler_request,
    )
-    async def send_transcription_request(self, file: discord.Attachment, temperature_override=None, custom_api_key=None, ):
+    async def send_transcription_request(self, file: [discord.Attachment, discord.File], temperature_override=None, custom_api_key=None, ):

        async with aiohttp.ClientSession(raise_for_status=True) as session:
            data = aiohttp.FormData()
            data.add_field("model", "whisper-1")
+            print("audio."+file.filename.split(".")[-1])
            data.add_field(
-                "file", await file.read(), filename="audio."+file.filename.split(".")[-1], content_type=file.content_type
+                "file", await file.read() if isinstance(file, discord.Attachment) else await file.fp.read(), filename="audio."+file.filename.split(".")[-1] if isinstance(file, discord.Attachment) else "audio.mp4", content_type=file.content_type if isinstance(file, discord.Attachment) else "video/mp4"
            )
+
            if temperature_override:
                data.add_field("temperature", temperature_override)

--- a/pyproject.toml
+++ b/pyproject.toml
@ -20,6 +20,7 @@ classifiers = [
 dependencies = [
 "Pillow==9.3.0",
 "openai==0.27.0",
+"pytube==12.1.2",
 "py-cord==2.3.2",
 "python-dotenv==0.21.0",
 "requests==2.28.1",
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,6 @@
 Pillow==9.3.0
 openai==0.27.0
+pytube==12.1.2
 py-cord==2.3.2
 python-dotenv==0.21.0
 requests==2.28.1
--- a/requirements_base.txt
+++ b/requirements_base.txt
@ -1,5 +1,6 @@
 Pillow==9.3.0
 openai==0.27.0
+pytube==12.1.2
 py-cord==2.3.2
 python-dotenv==0.21.0
 requests==2.28.1