Improved whisper provider to not throttle when unsupported audio language is encountered. #2474

As we have noted before, bad input data should be no reason to throttle a provider.
In this case, if the input language was not supported by whisper, we were raising a ValueError that was never caught and causing an error in the whisper provider for which it was throttled.
Instead, we are now detecting this case and logging an error message.
However, given that the input language was not one of the 99 currently known to whisper, it's probably a mislabeled audio track. If the user desired output language is English, then we will tell whisper that the input audio is also English and ask it to transcribe it. Whisper does a very good job of transcribing almost anything to English, so it's worth a try.
This should address the throttling in issue #2474.
pull/2481/head
JayZed 8 months ago committed by GitHub
parent c5a5dc9ddf
commit 5749971d67
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -169,7 +169,7 @@ def whisper_get_language_reverse(alpha3):
lan = whisper_get_language(wl, whisper_languages[wl])
if lan.alpha3 == alpha3:
return wl
raise ValueError
return None
def language_from_alpha3(lang):
name = Language(lang).name
@ -317,7 +317,7 @@ class WhisperAIProvider(Provider):
if out == None:
logger.info(f"Whisper cannot process {subtitle.video.original_path} because of missing/bad audio track")
subtitle.content = None
return
return
logger.debug(f'Audio stream length (in WAV format) is {len(out):,} bytes')
@ -326,11 +326,23 @@ class WhisperAIProvider(Provider):
else:
output_language = "eng"
input_language = whisper_get_language_reverse(subtitle.audio_language)
if input_language is None:
if output_language == "eng":
# guess that audio track is mislabelled English and let whisper try to transcribe it
input_language = "en"
subtitle.task = "transcribe"
logger.info(f"Whisper treating unsupported audio track language: '{subtitle.audio_language}' as English")
else:
logger.info(f"Whisper cannot process {subtitle.video.original_path} because of unsupported audio track language: '{subtitle.audio_language}'")
subtitle.content = None
return
logger.info(f'Starting WhisperAI {subtitle.task} to {language_from_alpha3(output_language)} for {subtitle.video.original_path}')
startTime = time.time()
r = self.session.post(f"{self.endpoint}/asr",
params={'task': subtitle.task, 'language': whisper_get_language_reverse(subtitle.audio_language), 'output': 'srt', 'encode': 'false'},
params={'task': subtitle.task, 'language': input_language, 'output': 'srt', 'encode': 'false'},
files={'audio_file': out},
timeout=(self.response, self.timeout))

Loading…
Cancel
Save