Refactored the translation routine to prevent Google Translate from messing with subtitles sequence by sending line by line (slower but better). #2558

8 months ago · 609349b400
parent 00c7eabd8c
commit 609349b400
1 changed files with 43 additions and 28 deletions
--- a/bazarr/subtitles/tools/translate.py
+++ b/bazarr/subtitles/tools/translate.py
@ -6,12 +6,16 @@ import pysubs2
 from subliminal_patch.core import get_subtitle_path
 from subzero.language import Language
 from deep_translator import GoogleTranslator
+from deep_translator.exceptions import TooManyRequests, RequestError, TranslationNotFound
+from time import sleep
+from concurrent.futures import ThreadPoolExecutor

 from languages.custom_lang import CustomLanguage
 from languages.get_languages import alpha3_from_alpha2, language_from_alpha2, language_from_alpha3
 from radarr.history import history_log_movie
 from sonarr.history import history_log
 from subtitles.processing import ProcessSubtitlesResult
+from app.event_handler import show_progress, hide_progress


 def translate_subtitles_file(video_path, source_srt_file, from_lang, to_lang, forced, hi, media_type, sonarr_series_id,
@ -33,8 +37,6 @@ def translate_subtitles_file(video_path, source_srt_file, from_lang, to_lang, fo

    logging.debug(f'BAZARR is translating in {lang_obj} this subtitles {source_srt_file}')

-    max_characters = 5000
-
    dest_srt_file = get_subtitle_path(video_path,
                                      language=lang_obj if isinstance(lang_obj, Language) else lang_obj.subzero_language(),
                                      extension='.srt',
@ -44,40 +46,53 @@ def translate_subtitles_file(video_path, source_srt_file, from_lang, to_lang, fo
    subs = pysubs2.load(source_srt_file, encoding='utf-8')
    subs.remove_miscellaneous_events()
    lines_list = [x.plaintext for x in subs]
-    joined_lines_str = '\n\n'.join(lines_list)
-
-    logging.debug(f'BAZARR splitting subtitles into {max_characters} characters blocks')
-    lines_block_list = []
-    translated_lines_list = []
-    while len(joined_lines_str):
-        partial_lines_str = joined_lines_str[:max_characters]
+    lines_list_len = len(lines_list)

-        if len(joined_lines_str) > max_characters:
-            new_partial_lines_str = partial_lines_str.rsplit('\n\n', 1)[0]
+    def translate_line(id, line, attempt):
+        try:
+            translated_text = GoogleTranslator(
+                source='auto',
+                target=language_code_convert_dict.get(lang_obj.alpha2, lang_obj.alpha2)
+            ).translate(text=line)
+        except TooManyRequests:
+            if attempt <= 5:
+                sleep(1)
+                super(translate_line(id, line, attempt+1))
+            else:
+                logging.debug(f'Too many requests while translating {line}')
+                translated_lines.append({'id': id, 'line': line})
+        except (RequestError, TranslationNotFound):
+            logging.debug(f'Unable to translate line {line}')
+            translated_lines.append({'id': id, 'line': line})
        else:
-            new_partial_lines_str = partial_lines_str
+            translated_lines.append({'id': id, 'line': translated_text})
+        finally:
+            show_progress(id=f'translate_progress_{dest_srt_file}',
+                          header=f'Translating subtitles lines to {language_from_alpha3(to_lang)}...',
+                          name='',
+                          value=len(translated_lines),
+                          count=lines_list_len)

-        lines_block_list.append(new_partial_lines_str)
-        joined_lines_str = joined_lines_str.replace(new_partial_lines_str, '')
+    logging.debug(f'BAZARR is sending {lines_list_len} blocks to Google Translate')

-    logging.debug(f'BAZARR is sending {len(lines_block_list)} blocks to Google Translate')
-    for block_str in lines_block_list:
-        try:
-            translated_partial_srt_text = GoogleTranslator(source='auto',
-                                                           target=language_code_convert_dict.get(lang_obj.alpha2,
-                                                                                                 lang_obj.alpha2)
-                                                           ).translate(text=block_str)
-        except Exception:
-            logging.exception(f'BAZARR Unable to translate subtitles {source_srt_file}')
-            return False
-        else:
-            translated_partial_srt_list = translated_partial_srt_text.split('\n\n')
-            translated_lines_list += translated_partial_srt_list
+    pool = ThreadPoolExecutor(max_workers=10)
+
+    translated_lines = []
+
+    for i, line in enumerate(lines_list):
+        pool.submit(translate_line, i, line, 1)
+
+    pool.shutdown(wait=True)
+
+    for i, line in enumerate(translated_lines):
+        lines_list[line['id']] = line['line']
+
+    hide_progress(id=f'translate_progress_{dest_srt_file}')

    logging.debug(f'BAZARR saving translated subtitles to {dest_srt_file}')
    for i, line in enumerate(subs):
        try:
-            line.plaintext = translated_lines_list[i]
+            line.plaintext = lines_list[i]
        except IndexError:
            logging.error(f'BAZARR is unable to translate malformed subtitles: {source_srt_file}')
            return False