Improved the ffprobe call caching mechanism by storing result to DB and using it for indexing and subtitles search.

pull/1402/head
morpheus65535 4 years ago
parent 887da10d9b
commit 33e1555311

@ -71,6 +71,10 @@ def authenticate(actual_method):
def postprocess(item: dict): def postprocess(item: dict):
# Remove ffprobe_cache
if 'ffprobe_cache' in item:
del (item['ffprobe_cache'])
# Parse tags # Parse tags
if 'tags' in item: if 'tags' in item:
if item['tags'] is None: if item['tags'] is None:

@ -109,6 +109,7 @@ def db_upgrade():
['table_episodes', 'episode_file_id', 'integer'], ['table_episodes', 'episode_file_id', 'integer'],
['table_episodes', 'audio_language', 'text'], ['table_episodes', 'audio_language', 'text'],
['table_episodes', 'file_size', 'integer', '0'], ['table_episodes', 'file_size', 'integer', '0'],
['table_episodes', 'ffprobe_cache', 'blob'],
['table_movies', 'sortTitle', 'text'], ['table_movies', 'sortTitle', 'text'],
['table_movies', 'year', 'text'], ['table_movies', 'year', 'text'],
['table_movies', 'alternativeTitles', 'text'], ['table_movies', 'alternativeTitles', 'text'],
@ -121,6 +122,7 @@ def db_upgrade():
['table_movies', 'tags', 'text', '[]'], ['table_movies', 'tags', 'text', '[]'],
['table_movies', 'profileId', 'integer'], ['table_movies', 'profileId', 'integer'],
['table_movies', 'file_size', 'integer', '0'], ['table_movies', 'file_size', 'integer', '0'],
['table_movies', 'ffprobe_cache', 'blob'],
['table_history', 'video_path', 'text'], ['table_history', 'video_path', 'text'],
['table_history', 'language', 'text'], ['table_history', 'language', 'text'],
['table_history', 'provider', 'text'], ['table_history', 'provider', 'text'],

@ -1,68 +1,111 @@
# coding=utf-8 # coding=utf-8
import enzyme
from enzyme.exceptions import MalformedMKVError
import logging import logging
import os import os
import datetime import pickle
from knowit import api from knowit import api
from subliminal.cache import region import enzyme
from enzyme.exceptions import MalformedMKVError
from enzyme.exceptions import MalformedMKVError
from database import database
def embedded_subs_reader(file, file_size, episode_file_id=None, movie_file_id=None):
data = parse_video_metadata(file, file_size, episode_file_id, movie_file_id)
FFPROBE_CACHE_EXPIRATION_TIME = datetime.timedelta(weeks=2).total_seconds() subtitles_list = []
if data['ffprobe']:
traditional_chinese = ["cht", "tc", "traditional", "zht", "hant", "big5", u"", u"雙語"]
brazilian_portuguese = ["pt-br", "pob", "pb", "brazilian", "brasil", "brazil"]
if 'subtitle' in data['ffprobe']:
for detected_language in data['ffprobe']['subtitle']:
if 'language' in detected_language:
language = detected_language['language'].alpha3
if language == 'zho' and 'name' in detected_language:
if any (ext in (detected_language['name'].lower()) for ext in traditional_chinese):
language = 'zht'
if language == 'por' and 'name' in detected_language:
if any (ext in (detected_language['name'].lower()) for ext in brazilian_portuguese):
language = 'pob'
forced = detected_language['forced'] if 'forced' in detected_language else False
hearing_impaired = detected_language['hearing_impaired'] if 'hearing_impaired' in \
detected_language else False
codec = detected_language['format'] if 'format' in detected_language else None
subtitles_list.append([language, forced, hearing_impaired, codec])
else:
continue
elif data['enzyme']:
for subtitle_track in data['enzyme'].subtitle_tracks:
hearing_impaired = False
if subtitle_track.name:
if 'sdh' in subtitle_track.name.lower():
hearing_impaired = True
subtitles_list.append([subtitle_track.language, subtitle_track.forced, hearing_impaired,
subtitle_track.codec_id])
class EmbeddedSubsReader: return subtitles_list
def __init__(self):
self.ffprobe = None
@region.cache_on_arguments(expiration_time=FFPROBE_CACHE_EXPIRATION_TIME)
# file_size, episode_file_id and movie_file_id are used for cache identification. DO NOT REMOVE!
def list_languages(self, file, file_size, episode_file_id=None, movie_file_id=None):
from utils import get_binary
self.ffprobe = get_binary("ffprobe")
subtitles_list = [] def parse_video_metadata(file, file_size, episode_file_id=None, movie_file_id=None):
if self.ffprobe: # Define default data keys value
api.initialize({'provider': 'ffmpeg', 'ffmpeg': self.ffprobe}) data = {
data = api.know(file) 'ffprobe': {},
'enzyme': {},
'file_id': episode_file_id if episode_file_id else movie_file_id,
'file_size': file_size
}
traditional_chinese = ["cht", "tc", "traditional", "zht", "hant", "big5", u"", u"雙語"] # Get the actual cache value form database
brazilian_portuguese = ["pt-br", "pob", "pb", "brazilian", "brasil", "brazil"] if episode_file_id:
cache_key = database.execute('SELECT ffprobe_cache FROM table_episodes WHERE episode_file_id=? AND file_size=?',
(episode_file_id, file_size), only_one=True)
elif movie_file_id:
cache_key = database.execute('SELECT ffprobe_cache FROM table_movies WHERE movie_file_id=? AND file_size=?',
(movie_file_id, file_size), only_one=True)
else:
cache_key = None
if 'subtitle' in data: # check if we have a value for that cache key
for detected_language in data['subtitle']: if not isinstance(cache_key, dict):
if 'language' in detected_language: return data
language = detected_language['language'].alpha3 else:
if language == 'zho' and 'name' in detected_language: try:
if any (ext in (detected_language['name'].lower()) for ext in traditional_chinese): # Unpickle ffprobe cache
language = 'zht' cached_value = pickle.loads(cache_key['ffprobe_cache'])
if language == 'por' and 'name' in detected_language: except:
if any (ext in (detected_language['name'].lower()) for ext in brazilian_portuguese): pass
language = 'pob'
forced = detected_language['forced'] if 'forced' in detected_language else False
hearing_impaired = detected_language['hearing_impaired'] if 'hearing_impaired' in \
detected_language else False
codec = detected_language['format'] if 'format' in detected_language else None
subtitles_list.append([language, forced, hearing_impaired, codec])
else:
continue
else: else:
if os.path.splitext(file)[1] == '.mkv': # Check if file size and file id matches and if so, we return the cached value
with open(file, 'rb') as f: if cached_value['file_size'] == file_size and cached_value['file_id'] in [episode_file_id, movie_file_id]:
try: return cached_value
mkv = enzyme.MKV(f)
except MalformedMKVError:
logging.error('BAZARR cannot analyze this MKV with our built-in MKV parser, you should install ffmpeg: ' + file)
else:
for subtitle_track in mkv.subtitle_tracks:
hearing_impaired = False
if subtitle_track.name:
if 'sdh' in subtitle_track.name.lower():
hearing_impaired = True
subtitles_list.append([subtitle_track.language, subtitle_track.forced, hearing_impaired,
subtitle_track.codec_id])
return subtitles_list # if not, we retrieve the metadata from the file
from utils import get_binary
ffprobe_path = get_binary("ffprobe")
# if we have ffprobe available
if ffprobe_path:
api.initialize({'provider': 'ffmpeg', 'ffmpeg': ffprobe_path})
data['ffprobe'] = api.know(file)
# if nto, we use enzyme for mkv files
else:
if os.path.splitext(file)[1] == '.mkv':
with open(file, 'rb') as f:
try:
mkv = enzyme.MKV(f)
except MalformedMKVError:
logging.error(
'BAZARR cannot analyze this MKV with our built-in MKV parser, you should install '
'ffmpeg/ffprobe: ' + file)
else:
data['enzyme'] = mkv
embedded_subs_reader = EmbeddedSubsReader() # we write to db the result and return the newly cached ffprobe dict
if episode_file_id:
database.execute('UPDATE table_episodes SET ffprobe_cache=? WHERE episode_file_id=?',
(pickle.dumps(data, pickle.HIGHEST_PROTOCOL), episode_file_id))
elif movie_file_id:
database.execute('UPDATE table_movies SET ffprobe_cache=? WHERE movie_file_id=?',
(pickle.dumps(data, pickle.HIGHEST_PROTOCOL), movie_file_id))
return data

@ -33,6 +33,7 @@ from subsyncer import subsync
from guessit import guessit from guessit import guessit
from database import database, dict_mapper, get_exclusion_clause, get_profiles_list, get_audio_profile_languages, \ from database import database, dict_mapper, get_exclusion_clause, get_profiles_list, get_audio_profile_languages, \
get_desired_languages get_desired_languages
from embedded_subs_reader import parse_video_metadata
from analytics import track_event from analytics import track_event
from locale import getpreferredencoding from locale import getpreferredencoding
@ -1177,41 +1178,52 @@ def refine_from_db(path, video):
def refine_from_ffprobe(path, video): def refine_from_ffprobe(path, video):
exe = get_binary('ffprobe') if isinstance(video, Movie):
if not exe: file_id = database.execute("SELECT movie_file_id FROM table_shows WHERE path = ?",
logging.debug('BAZARR FFprobe not found!') (path_mappings.path_replace_movie_reverse(path),), only_one=True)
return else:
file_id = database.execute("SELECT episode_file_id, file_size FROM table_episodes WHERE path = ?",
(path_mappings.path_replace_reverse(path),), only_one=True)
if not isinstance(file_id, dict):
return video
if isinstance(video, Movie):
data = parse_video_metadata(file=path, file_size=file_id['file_size'],
movie_file_id=file_id['movie_file_id'])
else: else:
logging.debug('BAZARR FFprobe used is %s', exe) data = parse_video_metadata(file=path, file_size=file_id['file_size'],
episode_file_id=file_id['episode_file_id'])
api.initialize({'provider': 'ffmpeg', 'ffmpeg': exe}) if not data['ffprobe']:
data = api.know(path) logging.debug("No FFprobe available in cache for this file: {}".format(path))
return video
logging.debug('FFprobe found: %s', data) logging.debug('FFprobe found: %s', data['ffprobe'])
if 'video' not in data: if 'video' not in data['ffprobe']:
logging.debug('BAZARR FFprobe was unable to find video tracks in the file!') logging.debug('BAZARR FFprobe was unable to find video tracks in the file!')
else: else:
if 'resolution' in data['video'][0]: if 'resolution' in data['ffprobe']['video'][0]:
if not video.resolution: if not video.resolution:
video.resolution = data['video'][0]['resolution'] video.resolution = data['ffprobe']['video'][0]['resolution']
if 'codec' in data['video'][0]: if 'codec' in data['ffprobe']['video'][0]:
if not video.video_codec: if not video.video_codec:
video.video_codec = data['video'][0]['codec'] video.video_codec = data['ffprobe']['video'][0]['codec']
if 'frame_rate' in data['video'][0]: if 'frame_rate' in data['ffprobe']['video'][0]:
if not video.fps: if not video.fps:
if isinstance(data['video'][0]['frame_rate'], float): if isinstance(data['ffprobe']['video'][0]['frame_rate'], float):
video.fps = data['video'][0]['frame_rate'] video.fps = data['ffprobe']['video'][0]['frame_rate']
else: else:
video.fps = data['video'][0]['frame_rate'].magnitude video.fps = data['ffprobe']['video'][0]['frame_rate'].magnitude
if 'audio' not in data: if 'audio' not in data['ffprobe']:
logging.debug('BAZARR FFprobe was unable to find audio tracks in the file!') logging.debug('BAZARR FFprobe was unable to find audio tracks in the file!')
else: else:
if 'codec' in data['audio'][0]: if 'codec' in data['ffprobe']['audio'][0]:
if not video.audio_codec: if not video.audio_codec:
video.audio_codec = data['audio'][0]['codec'] video.audio_codec = data['ffprobe']['audio'][0]['codec']
for track in data['audio']: for track in data['ffprobe']['audio']:
if 'language' in track: if 'language' in track:
video.audio_languages.add(track['language'].alpha3) video.audio_languages.add(track['language'].alpha3)

@ -5,7 +5,6 @@ import os
import logging import logging
import ast import ast
import re import re
import subliminal
from guess_language import guess_language from guess_language import guess_language
from subliminal_patch import core, search_external_subtitles from subliminal_patch import core, search_external_subtitles
from subzero.language import Language from subzero.language import Language
@ -34,11 +33,9 @@ def store_subtitles(original_path, reversed_path):
try: try:
item = database.execute('SELECT file_size, episode_file_id FROM table_episodes ' item = database.execute('SELECT file_size, episode_file_id FROM table_episodes '
'WHERE path = ?', (original_path,), only_one=True) 'WHERE path = ?', (original_path,), only_one=True)
subtitle_languages = embedded_subs_reader.list_languages(reversed_path, subtitle_languages = embedded_subs_reader(reversed_path,
file_size=item['file_size'], file_size=item['file_size'],
episode_file_id=item['episode_file_id']) episode_file_id=item['episode_file_id'])
subliminal.region.backend.sync()
for subtitle_language, subtitle_forced, subtitle_hi, subtitle_codec in subtitle_languages: for subtitle_language, subtitle_forced, subtitle_hi, subtitle_codec in subtitle_languages:
try: try:
if (settings.general.getboolean("ignore_pgs_subs") and subtitle_codec.lower() == "pgs") or \ if (settings.general.getboolean("ignore_pgs_subs") and subtitle_codec.lower() == "pgs") or \
@ -154,11 +151,9 @@ def store_subtitles_movie(original_path, reversed_path):
try: try:
item = database.execute('SELECT file_size, movie_file_id FROM table_movies ' item = database.execute('SELECT file_size, movie_file_id FROM table_movies '
'WHERE path = ?', (original_path,), only_one=True) 'WHERE path = ?', (original_path,), only_one=True)
subtitle_languages = embedded_subs_reader.list_languages(reversed_path, subtitle_languages = embedded_subs_reader(reversed_path,
file_size=item['file_size'], file_size=item['file_size'],
movie_file_id=item['movie_file_id']) movie_file_id=item['movie_file_id'])
subliminal.region.backend.sync()
for subtitle_language, subtitle_forced, subtitle_hi, subtitle_codec in subtitle_languages: for subtitle_language, subtitle_forced, subtitle_hi, subtitle_codec in subtitle_languages:
try: try:
if (settings.general.getboolean("ignore_pgs_subs") and subtitle_codec.lower() == "pgs") or \ if (settings.general.getboolean("ignore_pgs_subs") and subtitle_codec.lower() == "pgs") or \

Loading…
Cancel
Save