From c74f9e9602ed3c426e4e01ffc4acef44a127168b Mon Sep 17 00:00:00 2001 From: morpheus65535 Date: Wed, 10 Aug 2022 22:34:06 -0400 Subject: [PATCH] Tried to improve full disk daily indexing tasks resources usage. #1916 --- bazarr/subtitles/indexer/movies.py | 24 +++++++++++++++++++++--- bazarr/subtitles/indexer/series.py | 24 +++++++++++++++++++++--- bazarr/subtitles/indexer/utils.py | 30 +++++++++++++++++++++--------- 3 files changed, 63 insertions(+), 15 deletions(-) diff --git a/bazarr/subtitles/indexer/movies.py b/bazarr/subtitles/indexer/movies.py index a32d74043..d83458d25 100644 --- a/bazarr/subtitles/indexer/movies.py +++ b/bazarr/subtitles/indexer/movies.py @@ -55,7 +55,7 @@ def store_subtitles_movie(original_path, reversed_path, use_cache=True): if subtitle_hi: lang = lang + ':hi' logging.debug("BAZARR embedded subtitles detected: " + lang) - actual_subtitles.append([lang, None]) + actual_subtitles.append([lang, None, None]) except Exception: logging.debug("BAZARR unable to index this unrecognized language: " + subtitle_language) pass @@ -68,6 +68,22 @@ def store_subtitles_movie(original_path, reversed_path, use_cache=True): try: dest_folder = get_subtitle_destination_folder() or '' core.CUSTOM_PATHS = [dest_folder] if dest_folder else [] + + # get previously indexed subtitles that haven't changed: + item = TableMovies.select(TableMovies.subtitles) \ + .where(TableMovies.path == original_path) \ + .dicts() \ + .get_or_none() + if not item: + previously_indexed_subtitles_to_exclude = [] + else: + previously_indexed_subtitles = ast.literal_eval(item['subtitles']) + previously_indexed_subtitles_to_exclude = [x for x in previously_indexed_subtitles + if len(x) == 3 and + x[1] and + os.path.isfile(path_mappings.path_replace(x[1])) and + os.stat(path_mappings.path_replace(x[1])).st_size == x[2]] + subtitles = search_external_subtitles(reversed_path, languages=get_language_set()) full_dest_folder_path = os.path.dirname(reversed_path) if dest_folder: @@ -75,7 +91,8 @@ def store_subtitles_movie(original_path, reversed_path, use_cache=True): full_dest_folder_path = dest_folder elif settings.general.subfolder == "relative": full_dest_folder_path = os.path.join(os.path.dirname(reversed_path), dest_folder) - subtitles = guess_external_subtitles(full_dest_folder_path, subtitles) + subtitles = guess_external_subtitles(full_dest_folder_path, subtitles, "movie", + previously_indexed_subtitles_to_exclude) except Exception: logging.exception("BAZARR unable to index external subtitles.") pass @@ -107,7 +124,8 @@ def store_subtitles_movie(original_path, reversed_path, use_cache=True): else: language_str = str(language) logging.debug("BAZARR external subtitles detected: " + language_str) - actual_subtitles.append([language_str, path_mappings.path_replace_reverse_movie(subtitle_path)]) + actual_subtitles.append([language_str, path_mappings.path_replace_reverse_movie(subtitle_path), + os.stat(subtitle_path).st_size]) TableMovies.update({TableMovies.subtitles: str(actual_subtitles)})\ .where(TableMovies.path == original_path)\ diff --git a/bazarr/subtitles/indexer/series.py b/bazarr/subtitles/indexer/series.py index a4283c485..4ce7e2228 100644 --- a/bazarr/subtitles/indexer/series.py +++ b/bazarr/subtitles/indexer/series.py @@ -55,7 +55,7 @@ def store_subtitles(original_path, reversed_path, use_cache=True): if subtitle_hi: lang = lang + ":hi" logging.debug("BAZARR embedded subtitles detected: " + lang) - actual_subtitles.append([lang, None]) + actual_subtitles.append([lang, None, None]) except Exception as error: logging.debug("BAZARR unable to index this unrecognized language: %s (%s)", subtitle_language, error) except Exception: @@ -66,6 +66,22 @@ def store_subtitles(original_path, reversed_path, use_cache=True): try: dest_folder = get_subtitle_destination_folder() core.CUSTOM_PATHS = [dest_folder] if dest_folder else [] + + # get previously indexed subtitles that haven't changed: + item = TableEpisodes.select(TableEpisodes.subtitles) \ + .where(TableEpisodes.path == original_path) \ + .dicts() \ + .get_or_none() + if not item: + previously_indexed_subtitles_to_exclude = [] + else: + previously_indexed_subtitles = ast.literal_eval(item['subtitles']) + previously_indexed_subtitles_to_exclude = [x for x in previously_indexed_subtitles + if len(x) == 3 and + x[1] and + os.path.isfile(path_mappings.path_replace(x[1])) and + os.stat(path_mappings.path_replace(x[1])).st_size == x[2]] + subtitles = search_external_subtitles(reversed_path, languages=get_language_set(), only_one=settings.general.getboolean('single_language')) full_dest_folder_path = os.path.dirname(reversed_path) @@ -74,7 +90,8 @@ def store_subtitles(original_path, reversed_path, use_cache=True): full_dest_folder_path = dest_folder elif settings.general.subfolder == "relative": full_dest_folder_path = os.path.join(os.path.dirname(reversed_path), dest_folder) - subtitles = guess_external_subtitles(full_dest_folder_path, subtitles) + subtitles = guess_external_subtitles(full_dest_folder_path, subtitles, "series", + previously_indexed_subtitles_to_exclude) except Exception: logging.exception("BAZARR unable to index external subtitles.") else: @@ -105,7 +122,8 @@ def store_subtitles(original_path, reversed_path, use_cache=True): else: language_str = str(language) logging.debug("BAZARR external subtitles detected: " + language_str) - actual_subtitles.append([language_str, path_mappings.path_replace_reverse(subtitle_path)]) + actual_subtitles.append([language_str, path_mappings.path_replace_reverse(subtitle_path), + os.stat(subtitle_path).st_size]) TableEpisodes.update({TableEpisodes.subtitles: str(actual_subtitles)})\ .where(TableEpisodes.path == original_path)\ diff --git a/bazarr/subtitles/indexer/utils.py b/bazarr/subtitles/indexer/utils.py index 2d40042da..354133e4b 100644 --- a/bazarr/subtitles/indexer/utils.py +++ b/bazarr/subtitles/indexer/utils.py @@ -11,6 +11,7 @@ from charamel import Detector from app.config import settings from constants import hi_regex +from utilities.path_mappings import path_mappings def get_external_subtitles_path(file, subtitle): @@ -40,16 +41,27 @@ def get_external_subtitles_path(file, subtitle): return path -def guess_external_subtitles(dest_folder, subtitles): +def guess_external_subtitles(dest_folder, subtitles, media_type, previously_indexed_subtitles_to_exclude=None): for subtitle, language in subtitles.items(): + subtitle_path = os.path.join(dest_folder, subtitle) + reversed_subtitle_path = path_mappings.path_replace_reverse(subtitle_path) if media_type == "series" \ + else path_mappings.path_replace_reverse_movie(subtitle_path) + + if previously_indexed_subtitles_to_exclude: + if [x for x in previously_indexed_subtitles_to_exclude + if x[1] == reversed_subtitle_path and x[2] == os.stat(subtitle_path).st_size]: + continue + if not language: - subtitle_path = os.path.join(dest_folder, subtitle) if os.path.exists(subtitle_path) and os.path.splitext(subtitle_path)[1] in core.SUBTITLE_EXTENSIONS: logging.debug("BAZARR falling back to file content analysis to detect language.") detected_language = None + # detect forced subtitles + forced = True if os.path.splitext(os.path.splitext(subtitle)[0])[1] == '.forced' else False + # to improve performance, skip detection of files larger that 1M - if os.path.getsize(subtitle_path) > 1*1024*1024: + if os.path.getsize(subtitle_path) > 1 * 1024 * 1024: logging.debug("BAZARR subtitles file is too large to be text based. Skipping this file: " + subtitle_path) continue @@ -92,21 +104,21 @@ def guess_external_subtitles(dest_folder, subtitles): logging.debug("BAZARR external subtitles detected and guessed this language: " + str( detected_language)) try: - subtitles[subtitle] = Language.rebuild(Language.fromietf(detected_language), forced=False, + subtitles[subtitle] = Language.rebuild(Language.fromietf(detected_language), forced=forced, hi=False) except Exception: pass # If language is still None (undetected), skip it - if not language: - pass + if hasattr(subtitles[subtitle], 'basename') and not subtitles[subtitle].basename: + continue # Skip HI detection if forced - elif language.forced: - pass + if hasattr(language, 'forced') and language.forced: + continue # Detect hearing-impaired external subtitles not identified in filename - elif not subtitles[subtitle].hi: + if hasattr(subtitles[subtitle], 'hi') and not subtitles[subtitle].hi: subtitle_path = os.path.join(dest_folder, subtitle) # check if file exist: