From 33e155531119dc07e1deaf339de0cb4577122a8c Mon Sep 17 00:00:00 2001 From: morpheus65535 Date: Wed, 5 May 2021 23:07:23 -0400 Subject: [PATCH] Improved the ffprobe call caching mechanism by storing result to DB and using it for indexing and subtitles search. --- bazarr/api.py | 4 + bazarr/database.py | 2 + bazarr/embedded_subs_reader.py | 147 +++++++++++++++++++++------------ bazarr/get_subtitle.py | 54 +++++++----- bazarr/list_subtitles.py | 17 ++-- 5 files changed, 140 insertions(+), 84 deletions(-) diff --git a/bazarr/api.py b/bazarr/api.py index d794c20a8..40145f0a0 100644 --- a/bazarr/api.py +++ b/bazarr/api.py @@ -71,6 +71,10 @@ def authenticate(actual_method): def postprocess(item: dict): + # Remove ffprobe_cache + if 'ffprobe_cache' in item: + del (item['ffprobe_cache']) + # Parse tags if 'tags' in item: if item['tags'] is None: diff --git a/bazarr/database.py b/bazarr/database.py index b1bf2f13d..3e0e2c19d 100644 --- a/bazarr/database.py +++ b/bazarr/database.py @@ -109,6 +109,7 @@ def db_upgrade(): ['table_episodes', 'episode_file_id', 'integer'], ['table_episodes', 'audio_language', 'text'], ['table_episodes', 'file_size', 'integer', '0'], + ['table_episodes', 'ffprobe_cache', 'blob'], ['table_movies', 'sortTitle', 'text'], ['table_movies', 'year', 'text'], ['table_movies', 'alternativeTitles', 'text'], @@ -121,6 +122,7 @@ def db_upgrade(): ['table_movies', 'tags', 'text', '[]'], ['table_movies', 'profileId', 'integer'], ['table_movies', 'file_size', 'integer', '0'], + ['table_movies', 'ffprobe_cache', 'blob'], ['table_history', 'video_path', 'text'], ['table_history', 'language', 'text'], ['table_history', 'provider', 'text'], diff --git a/bazarr/embedded_subs_reader.py b/bazarr/embedded_subs_reader.py index 767445b11..d43c234a9 100644 --- a/bazarr/embedded_subs_reader.py +++ b/bazarr/embedded_subs_reader.py @@ -1,68 +1,111 @@ # coding=utf-8 -import enzyme -from enzyme.exceptions import MalformedMKVError import logging import os -import datetime +import pickle from knowit import api -from subliminal.cache import region +import enzyme +from enzyme.exceptions import MalformedMKVError +from enzyme.exceptions import MalformedMKVError +from database import database + + +def embedded_subs_reader(file, file_size, episode_file_id=None, movie_file_id=None): + data = parse_video_metadata(file, file_size, episode_file_id, movie_file_id) -FFPROBE_CACHE_EXPIRATION_TIME = datetime.timedelta(weeks=2).total_seconds() + subtitles_list = [] + if data['ffprobe']: + traditional_chinese = ["cht", "tc", "traditional", "zht", "hant", "big5", u"繁", u"雙語"] + brazilian_portuguese = ["pt-br", "pob", "pb", "brazilian", "brasil", "brazil"] + if 'subtitle' in data['ffprobe']: + for detected_language in data['ffprobe']['subtitle']: + if 'language' in detected_language: + language = detected_language['language'].alpha3 + if language == 'zho' and 'name' in detected_language: + if any (ext in (detected_language['name'].lower()) for ext in traditional_chinese): + language = 'zht' + if language == 'por' and 'name' in detected_language: + if any (ext in (detected_language['name'].lower()) for ext in brazilian_portuguese): + language = 'pob' + forced = detected_language['forced'] if 'forced' in detected_language else False + hearing_impaired = detected_language['hearing_impaired'] if 'hearing_impaired' in \ + detected_language else False + codec = detected_language['format'] if 'format' in detected_language else None + subtitles_list.append([language, forced, hearing_impaired, codec]) + else: + continue + elif data['enzyme']: + for subtitle_track in data['enzyme'].subtitle_tracks: + hearing_impaired = False + if subtitle_track.name: + if 'sdh' in subtitle_track.name.lower(): + hearing_impaired = True + subtitles_list.append([subtitle_track.language, subtitle_track.forced, hearing_impaired, + subtitle_track.codec_id]) -class EmbeddedSubsReader: - def __init__(self): - self.ffprobe = None + return subtitles_list - @region.cache_on_arguments(expiration_time=FFPROBE_CACHE_EXPIRATION_TIME) - # file_size, episode_file_id and movie_file_id are used for cache identification. DO NOT REMOVE! - def list_languages(self, file, file_size, episode_file_id=None, movie_file_id=None): - from utils import get_binary - self.ffprobe = get_binary("ffprobe") - subtitles_list = [] - if self.ffprobe: - api.initialize({'provider': 'ffmpeg', 'ffmpeg': self.ffprobe}) - data = api.know(file) +def parse_video_metadata(file, file_size, episode_file_id=None, movie_file_id=None): + # Define default data keys value + data = { + 'ffprobe': {}, + 'enzyme': {}, + 'file_id': episode_file_id if episode_file_id else movie_file_id, + 'file_size': file_size + } - traditional_chinese = ["cht", "tc", "traditional", "zht", "hant", "big5", u"繁", u"雙語"] - brazilian_portuguese = ["pt-br", "pob", "pb", "brazilian", "brasil", "brazil"] + # Get the actual cache value form database + if episode_file_id: + cache_key = database.execute('SELECT ffprobe_cache FROM table_episodes WHERE episode_file_id=? AND file_size=?', + (episode_file_id, file_size), only_one=True) + elif movie_file_id: + cache_key = database.execute('SELECT ffprobe_cache FROM table_movies WHERE movie_file_id=? AND file_size=?', + (movie_file_id, file_size), only_one=True) + else: + cache_key = None - if 'subtitle' in data: - for detected_language in data['subtitle']: - if 'language' in detected_language: - language = detected_language['language'].alpha3 - if language == 'zho' and 'name' in detected_language: - if any (ext in (detected_language['name'].lower()) for ext in traditional_chinese): - language = 'zht' - if language == 'por' and 'name' in detected_language: - if any (ext in (detected_language['name'].lower()) for ext in brazilian_portuguese): - language = 'pob' - forced = detected_language['forced'] if 'forced' in detected_language else False - hearing_impaired = detected_language['hearing_impaired'] if 'hearing_impaired' in \ - detected_language else False - codec = detected_language['format'] if 'format' in detected_language else None - subtitles_list.append([language, forced, hearing_impaired, codec]) - else: - continue + # check if we have a value for that cache key + if not isinstance(cache_key, dict): + return data + else: + try: + # Unpickle ffprobe cache + cached_value = pickle.loads(cache_key['ffprobe_cache']) + except: + pass else: - if os.path.splitext(file)[1] == '.mkv': - with open(file, 'rb') as f: - try: - mkv = enzyme.MKV(f) - except MalformedMKVError: - logging.error('BAZARR cannot analyze this MKV with our built-in MKV parser, you should install ffmpeg: ' + file) - else: - for subtitle_track in mkv.subtitle_tracks: - hearing_impaired = False - if subtitle_track.name: - if 'sdh' in subtitle_track.name.lower(): - hearing_impaired = True - subtitles_list.append([subtitle_track.language, subtitle_track.forced, hearing_impaired, - subtitle_track.codec_id]) + # Check if file size and file id matches and if so, we return the cached value + if cached_value['file_size'] == file_size and cached_value['file_id'] in [episode_file_id, movie_file_id]: + return cached_value - return subtitles_list + # if not, we retrieve the metadata from the file + from utils import get_binary + ffprobe_path = get_binary("ffprobe") + # if we have ffprobe available + if ffprobe_path: + api.initialize({'provider': 'ffmpeg', 'ffmpeg': ffprobe_path}) + data['ffprobe'] = api.know(file) + # if nto, we use enzyme for mkv files + else: + if os.path.splitext(file)[1] == '.mkv': + with open(file, 'rb') as f: + try: + mkv = enzyme.MKV(f) + except MalformedMKVError: + logging.error( + 'BAZARR cannot analyze this MKV with our built-in MKV parser, you should install ' + 'ffmpeg/ffprobe: ' + file) + else: + data['enzyme'] = mkv -embedded_subs_reader = EmbeddedSubsReader() + # we write to db the result and return the newly cached ffprobe dict + if episode_file_id: + database.execute('UPDATE table_episodes SET ffprobe_cache=? WHERE episode_file_id=?', + (pickle.dumps(data, pickle.HIGHEST_PROTOCOL), episode_file_id)) + elif movie_file_id: + database.execute('UPDATE table_movies SET ffprobe_cache=? WHERE movie_file_id=?', + (pickle.dumps(data, pickle.HIGHEST_PROTOCOL), movie_file_id)) + return data diff --git a/bazarr/get_subtitle.py b/bazarr/get_subtitle.py index 57e67e05e..fbc29e945 100644 --- a/bazarr/get_subtitle.py +++ b/bazarr/get_subtitle.py @@ -33,6 +33,7 @@ from subsyncer import subsync from guessit import guessit from database import database, dict_mapper, get_exclusion_clause, get_profiles_list, get_audio_profile_languages, \ get_desired_languages +from embedded_subs_reader import parse_video_metadata from analytics import track_event from locale import getpreferredencoding @@ -1177,41 +1178,52 @@ def refine_from_db(path, video): def refine_from_ffprobe(path, video): - exe = get_binary('ffprobe') - if not exe: - logging.debug('BAZARR FFprobe not found!') - return + if isinstance(video, Movie): + file_id = database.execute("SELECT movie_file_id FROM table_shows WHERE path = ?", + (path_mappings.path_replace_movie_reverse(path),), only_one=True) + else: + file_id = database.execute("SELECT episode_file_id, file_size FROM table_episodes WHERE path = ?", + (path_mappings.path_replace_reverse(path),), only_one=True) + + if not isinstance(file_id, dict): + return video + + if isinstance(video, Movie): + data = parse_video_metadata(file=path, file_size=file_id['file_size'], + movie_file_id=file_id['movie_file_id']) else: - logging.debug('BAZARR FFprobe used is %s', exe) + data = parse_video_metadata(file=path, file_size=file_id['file_size'], + episode_file_id=file_id['episode_file_id']) - api.initialize({'provider': 'ffmpeg', 'ffmpeg': exe}) - data = api.know(path) + if not data['ffprobe']: + logging.debug("No FFprobe available in cache for this file: {}".format(path)) + return video - logging.debug('FFprobe found: %s', data) + logging.debug('FFprobe found: %s', data['ffprobe']) - if 'video' not in data: + if 'video' not in data['ffprobe']: logging.debug('BAZARR FFprobe was unable to find video tracks in the file!') else: - if 'resolution' in data['video'][0]: + if 'resolution' in data['ffprobe']['video'][0]: if not video.resolution: - video.resolution = data['video'][0]['resolution'] - if 'codec' in data['video'][0]: + video.resolution = data['ffprobe']['video'][0]['resolution'] + if 'codec' in data['ffprobe']['video'][0]: if not video.video_codec: - video.video_codec = data['video'][0]['codec'] - if 'frame_rate' in data['video'][0]: + video.video_codec = data['ffprobe']['video'][0]['codec'] + if 'frame_rate' in data['ffprobe']['video'][0]: if not video.fps: - if isinstance(data['video'][0]['frame_rate'], float): - video.fps = data['video'][0]['frame_rate'] + if isinstance(data['ffprobe']['video'][0]['frame_rate'], float): + video.fps = data['ffprobe']['video'][0]['frame_rate'] else: - video.fps = data['video'][0]['frame_rate'].magnitude + video.fps = data['ffprobe']['video'][0]['frame_rate'].magnitude - if 'audio' not in data: + if 'audio' not in data['ffprobe']: logging.debug('BAZARR FFprobe was unable to find audio tracks in the file!') else: - if 'codec' in data['audio'][0]: + if 'codec' in data['ffprobe']['audio'][0]: if not video.audio_codec: - video.audio_codec = data['audio'][0]['codec'] - for track in data['audio']: + video.audio_codec = data['ffprobe']['audio'][0]['codec'] + for track in data['ffprobe']['audio']: if 'language' in track: video.audio_languages.add(track['language'].alpha3) diff --git a/bazarr/list_subtitles.py b/bazarr/list_subtitles.py index bdc9bebe7..4a03d0cec 100644 --- a/bazarr/list_subtitles.py +++ b/bazarr/list_subtitles.py @@ -5,7 +5,6 @@ import os import logging import ast import re -import subliminal from guess_language import guess_language from subliminal_patch import core, search_external_subtitles from subzero.language import Language @@ -34,11 +33,9 @@ def store_subtitles(original_path, reversed_path): try: item = database.execute('SELECT file_size, episode_file_id FROM table_episodes ' 'WHERE path = ?', (original_path,), only_one=True) - subtitle_languages = embedded_subs_reader.list_languages(reversed_path, - file_size=item['file_size'], - episode_file_id=item['episode_file_id']) - subliminal.region.backend.sync() - + subtitle_languages = embedded_subs_reader(reversed_path, + file_size=item['file_size'], + episode_file_id=item['episode_file_id']) for subtitle_language, subtitle_forced, subtitle_hi, subtitle_codec in subtitle_languages: try: if (settings.general.getboolean("ignore_pgs_subs") and subtitle_codec.lower() == "pgs") or \ @@ -154,11 +151,9 @@ def store_subtitles_movie(original_path, reversed_path): try: item = database.execute('SELECT file_size, movie_file_id FROM table_movies ' 'WHERE path = ?', (original_path,), only_one=True) - subtitle_languages = embedded_subs_reader.list_languages(reversed_path, - file_size=item['file_size'], - movie_file_id=item['movie_file_id']) - subliminal.region.backend.sync() - + subtitle_languages = embedded_subs_reader(reversed_path, + file_size=item['file_size'], + movie_file_id=item['movie_file_id']) for subtitle_language, subtitle_forced, subtitle_hi, subtitle_codec in subtitle_languages: try: if (settings.general.getboolean("ignore_pgs_subs") and subtitle_codec.lower() == "pgs") or \