tentative youtube link support for transcribe

Kaveen Kumarasinghe 1 year ago
parent 2bdd9baced
commit 1333c58c93

@ -1044,3 +1044,27 @@ class Commands(discord.Cog, name="Commands"):
self, ctx: discord.ApplicationContext, file: discord.Attachment, temperature: float
):
await self.transcribe_cog.transcribe_file_command(ctx, file, temperature)
@add_to_group("transcribe")
@discord.slash_command(
name="link", description="Transcribe a file link or youtube link", guild_ids=ALLOWED_GUILDS
)
@discord.guild_only()
@discord.option(
name="link",
description="A link to transcribe",
required=True,
input_type=discord.SlashCommandOptionType.string,
)
@discord.option(
name="temperature",
description="The higher the value, the riskier the model will be",
required=False,
input_type=discord.SlashCommandOptionType.number,
max_value=1,
min_value=0,
)
async def transcribe_link(
self, ctx: discord.ApplicationContext, link: str, temperature: float
):
await self.transcribe_cog.transcribe_link_command(ctx, link, temperature)

@ -1,8 +1,12 @@
import asyncio
import traceback
from functools import partial
from pathlib import Path
import aiohttp
import discord
from discord.ext import pages
from pytube import YouTube
from models.deepl_model import TranslationModel
from models.embed_statics_model import EmbedStatics
@ -25,9 +29,75 @@ class TranscribeService(discord.Cog, name="TranscribeService"):
self.bot = bot
self.model = model
self.usage_service = usage_service
# Make the "audiotemp" folder if it doesn't exist, using pathlib
Path("audiotemp").mkdir(parents=True, exist_ok=True)
async def transcribe_link_command(self, ctx: discord.ApplicationContext, link:str, temperature: float):
# Check if this discord file is an instance of mp3, mp4, mpeg, mpga, m4a, wav, or webm.
await ctx.defer()
user_api_key = None
if USER_INPUT_API_KEYS:
user_api_key = await TextService.get_user_api_key(
ctx.user.id, ctx, USER_KEY_DB
)
if not user_api_key:
return
if "youtube" in link:
# We need to download the youtube video and save it to a temporary file
yt = YouTube(link)
# Delete audiotemp/{str(ctx.user.id)}temp.mp3 if it already exists
if Path("audiotemp/{}temp.mp3".format(str(ctx.user.id))).exists():
Path("audiotemp/{}temp.mp3".format(str(ctx.user.id))).unlink()
print("before call")
try:
file_path = await asyncio.get_running_loop().run_in_executor(None, partial(yt.streams.filter().first().download, output_path="audiotemp", filename="{}temp".format(str(ctx.user.id))))
except Exception as e:
traceback.print_exc()
await ctx.respond("Failed to download youtube video. Please try again later. "+str(e))
return
print("after call the file path was" + file_path)
else:
await ctx.respond("Please upload a valid youtube link. Other links are not implemented yet")
return
# Load the file object from the file_path
file = discord.File(file_path)
response_message = await ctx.respond(embed=EmbedStatics.build_transcribe_progress_embed())
try:
response = await self.model.send_transcription_request(file, temperature, user_api_key)
print(response)
if len(response) > 4080:
# Chunk the response into 2048 character chunks, each an embed page
chunks = [response[i:i+2048] for i in range(0, len(response), 2048)]
embed_pages = []
for chunk in chunks:
embed_pages.append(discord.Embed(title="Transcription Page {}".format(len(embed_pages) + 1), description=chunk))
paginator = pages.Paginator(
pages=embed_pages,
timeout=None,
author_check=False,
)
await paginator.respond(ctx.interaction)
await response_message.delete_original_response()
return
await response_message.edit_original_response(embed=EmbedStatics.build_transcribe_success_embed(response))
except Exception as e:
await response_message.edit_original_response(embed=EmbedStatics.build_transcribe_failed_embed(str(e)))
async def transcribe_file_command(self, ctx: discord.ApplicationContext, file: discord.Attachment, temperature: float):
# Check if this discord file is an instance of mp3, mp4, mpeg, mpga, m4a, wav, or webm.
await ctx.defer()
user_api_key = None
if USER_INPUT_API_KEYS:
@ -59,7 +129,6 @@ class TranscribeService(discord.Cog, name="TranscribeService"):
for chunk in chunks:
embed_pages.append(discord.Embed(title="Transcription Page {}".format(len(embed_pages) + 1), description=chunk))
paginator = pages.Paginator(
pages=embed_pages,
timeout=None,

@ -923,14 +923,16 @@ class Model:
max_tries=4,
on_backoff=backoff_handler_request,
)
async def send_transcription_request(self, file: discord.Attachment, temperature_override=None, custom_api_key=None, ):
async def send_transcription_request(self, file: [discord.Attachment, discord.File], temperature_override=None, custom_api_key=None, ):
async with aiohttp.ClientSession(raise_for_status=True) as session:
data = aiohttp.FormData()
data.add_field("model", "whisper-1")
print("audio."+file.filename.split(".")[-1])
data.add_field(
"file", await file.read(), filename="audio."+file.filename.split(".")[-1], content_type=file.content_type
"file", await file.read() if isinstance(file, discord.Attachment) else await file.fp.read(), filename="audio."+file.filename.split(".")[-1] if isinstance(file, discord.Attachment) else "audio.mp4", content_type=file.content_type if isinstance(file, discord.Attachment) else "video/mp4"
)
if temperature_override:
data.add_field("temperature", temperature_override)

@ -20,6 +20,7 @@ classifiers = [
dependencies = [
"Pillow==9.3.0",
"openai==0.27.0",
"pytube==12.1.2",
"py-cord==2.3.2",
"python-dotenv==0.21.0",
"requests==2.28.1",

@ -1,5 +1,6 @@
Pillow==9.3.0
openai==0.27.0
pytube==12.1.2
py-cord==2.3.2
python-dotenv==0.21.0
requests==2.28.1

@ -1,5 +1,6 @@
Pillow==9.3.0
openai==0.27.0
pytube==12.1.2
py-cord==2.3.2
python-dotenv==0.21.0
requests==2.28.1

Loading…
Cancel
Save