Improved the ffprobe call caching mechanism by storing result to DB and using it for indexing and subtitles search.

pull/1402/head
morpheus65535 4 years ago
parent 887da10d9b
commit 33e1555311

@ -71,6 +71,10 @@ def authenticate(actual_method):
def postprocess(item: dict): def postprocess(item: dict):
# Remove ffprobe_cache
if 'ffprobe_cache' in item:
del (item['ffprobe_cache'])
# Parse tags # Parse tags
if 'tags' in item: if 'tags' in item:
if item['tags'] is None: if item['tags'] is None:

@ -109,6 +109,7 @@ def db_upgrade():
['table_episodes', 'episode_file_id', 'integer'], ['table_episodes', 'episode_file_id', 'integer'],
['table_episodes', 'audio_language', 'text'], ['table_episodes', 'audio_language', 'text'],
['table_episodes', 'file_size', 'integer', '0'], ['table_episodes', 'file_size', 'integer', '0'],
['table_episodes', 'ffprobe_cache', 'blob'],
['table_movies', 'sortTitle', 'text'], ['table_movies', 'sortTitle', 'text'],
['table_movies', 'year', 'text'], ['table_movies', 'year', 'text'],
['table_movies', 'alternativeTitles', 'text'], ['table_movies', 'alternativeTitles', 'text'],
@ -121,6 +122,7 @@ def db_upgrade():
['table_movies', 'tags', 'text', '[]'], ['table_movies', 'tags', 'text', '[]'],
['table_movies', 'profileId', 'integer'], ['table_movies', 'profileId', 'integer'],
['table_movies', 'file_size', 'integer', '0'], ['table_movies', 'file_size', 'integer', '0'],
['table_movies', 'ffprobe_cache', 'blob'],
['table_history', 'video_path', 'text'], ['table_history', 'video_path', 'text'],
['table_history', 'language', 'text'], ['table_history', 'language', 'text'],
['table_history', 'provider', 'text'], ['table_history', 'provider', 'text'],

@ -1,36 +1,25 @@
# coding=utf-8 # coding=utf-8
import enzyme
from enzyme.exceptions import MalformedMKVError
import logging import logging
import os import os
import datetime import pickle
from knowit import api from knowit import api
from subliminal.cache import region import enzyme
from enzyme.exceptions import MalformedMKVError
FFPROBE_CACHE_EXPIRATION_TIME = datetime.timedelta(weeks=2).total_seconds() from enzyme.exceptions import MalformedMKVError
from database import database
class EmbeddedSubsReader:
def __init__(self):
self.ffprobe = None
@region.cache_on_arguments(expiration_time=FFPROBE_CACHE_EXPIRATION_TIME) def embedded_subs_reader(file, file_size, episode_file_id=None, movie_file_id=None):
# file_size, episode_file_id and movie_file_id are used for cache identification. DO NOT REMOVE! data = parse_video_metadata(file, file_size, episode_file_id, movie_file_id)
def list_languages(self, file, file_size, episode_file_id=None, movie_file_id=None):
from utils import get_binary
self.ffprobe = get_binary("ffprobe")
subtitles_list = [] subtitles_list = []
if self.ffprobe: if data['ffprobe']:
api.initialize({'provider': 'ffmpeg', 'ffmpeg': self.ffprobe})
data = api.know(file)
traditional_chinese = ["cht", "tc", "traditional", "zht", "hant", "big5", u"", u"雙語"] traditional_chinese = ["cht", "tc", "traditional", "zht", "hant", "big5", u"", u"雙語"]
brazilian_portuguese = ["pt-br", "pob", "pb", "brazilian", "brasil", "brazil"] brazilian_portuguese = ["pt-br", "pob", "pb", "brazilian", "brasil", "brazil"]
if 'subtitle' in data: if 'subtitle' in data['ffprobe']:
for detected_language in data['subtitle']: for detected_language in data['ffprobe']['subtitle']:
if 'language' in detected_language: if 'language' in detected_language:
language = detected_language['language'].alpha3 language = detected_language['language'].alpha3
if language == 'zho' and 'name' in detected_language: if language == 'zho' and 'name' in detected_language:
@ -46,15 +35,8 @@ class EmbeddedSubsReader:
subtitles_list.append([language, forced, hearing_impaired, codec]) subtitles_list.append([language, forced, hearing_impaired, codec])
else: else:
continue continue
else: elif data['enzyme']:
if os.path.splitext(file)[1] == '.mkv': for subtitle_track in data['enzyme'].subtitle_tracks:
with open(file, 'rb') as f:
try:
mkv = enzyme.MKV(f)
except MalformedMKVError:
logging.error('BAZARR cannot analyze this MKV with our built-in MKV parser, you should install ffmpeg: ' + file)
else:
for subtitle_track in mkv.subtitle_tracks:
hearing_impaired = False hearing_impaired = False
if subtitle_track.name: if subtitle_track.name:
if 'sdh' in subtitle_track.name.lower(): if 'sdh' in subtitle_track.name.lower():
@ -65,4 +47,65 @@ class EmbeddedSubsReader:
return subtitles_list return subtitles_list
embedded_subs_reader = EmbeddedSubsReader() def parse_video_metadata(file, file_size, episode_file_id=None, movie_file_id=None):
# Define default data keys value
data = {
'ffprobe': {},
'enzyme': {},
'file_id': episode_file_id if episode_file_id else movie_file_id,
'file_size': file_size
}
# Get the actual cache value form database
if episode_file_id:
cache_key = database.execute('SELECT ffprobe_cache FROM table_episodes WHERE episode_file_id=? AND file_size=?',
(episode_file_id, file_size), only_one=True)
elif movie_file_id:
cache_key = database.execute('SELECT ffprobe_cache FROM table_movies WHERE movie_file_id=? AND file_size=?',
(movie_file_id, file_size), only_one=True)
else:
cache_key = None
# check if we have a value for that cache key
if not isinstance(cache_key, dict):
return data
else:
try:
# Unpickle ffprobe cache
cached_value = pickle.loads(cache_key['ffprobe_cache'])
except:
pass
else:
# Check if file size and file id matches and if so, we return the cached value
if cached_value['file_size'] == file_size and cached_value['file_id'] in [episode_file_id, movie_file_id]:
return cached_value
# if not, we retrieve the metadata from the file
from utils import get_binary
ffprobe_path = get_binary("ffprobe")
# if we have ffprobe available
if ffprobe_path:
api.initialize({'provider': 'ffmpeg', 'ffmpeg': ffprobe_path})
data['ffprobe'] = api.know(file)
# if nto, we use enzyme for mkv files
else:
if os.path.splitext(file)[1] == '.mkv':
with open(file, 'rb') as f:
try:
mkv = enzyme.MKV(f)
except MalformedMKVError:
logging.error(
'BAZARR cannot analyze this MKV with our built-in MKV parser, you should install '
'ffmpeg/ffprobe: ' + file)
else:
data['enzyme'] = mkv
# we write to db the result and return the newly cached ffprobe dict
if episode_file_id:
database.execute('UPDATE table_episodes SET ffprobe_cache=? WHERE episode_file_id=?',
(pickle.dumps(data, pickle.HIGHEST_PROTOCOL), episode_file_id))
elif movie_file_id:
database.execute('UPDATE table_movies SET ffprobe_cache=? WHERE movie_file_id=?',
(pickle.dumps(data, pickle.HIGHEST_PROTOCOL), movie_file_id))
return data

@ -33,6 +33,7 @@ from subsyncer import subsync
from guessit import guessit from guessit import guessit
from database import database, dict_mapper, get_exclusion_clause, get_profiles_list, get_audio_profile_languages, \ from database import database, dict_mapper, get_exclusion_clause, get_profiles_list, get_audio_profile_languages, \
get_desired_languages get_desired_languages
from embedded_subs_reader import parse_video_metadata
from analytics import track_event from analytics import track_event
from locale import getpreferredencoding from locale import getpreferredencoding
@ -1177,41 +1178,52 @@ def refine_from_db(path, video):
def refine_from_ffprobe(path, video): def refine_from_ffprobe(path, video):
exe = get_binary('ffprobe') if isinstance(video, Movie):
if not exe: file_id = database.execute("SELECT movie_file_id FROM table_shows WHERE path = ?",
logging.debug('BAZARR FFprobe not found!') (path_mappings.path_replace_movie_reverse(path),), only_one=True)
return else:
file_id = database.execute("SELECT episode_file_id, file_size FROM table_episodes WHERE path = ?",
(path_mappings.path_replace_reverse(path),), only_one=True)
if not isinstance(file_id, dict):
return video
if isinstance(video, Movie):
data = parse_video_metadata(file=path, file_size=file_id['file_size'],
movie_file_id=file_id['movie_file_id'])
else: else:
logging.debug('BAZARR FFprobe used is %s', exe) data = parse_video_metadata(file=path, file_size=file_id['file_size'],
episode_file_id=file_id['episode_file_id'])
api.initialize({'provider': 'ffmpeg', 'ffmpeg': exe}) if not data['ffprobe']:
data = api.know(path) logging.debug("No FFprobe available in cache for this file: {}".format(path))
return video
logging.debug('FFprobe found: %s', data) logging.debug('FFprobe found: %s', data['ffprobe'])
if 'video' not in data: if 'video' not in data['ffprobe']:
logging.debug('BAZARR FFprobe was unable to find video tracks in the file!') logging.debug('BAZARR FFprobe was unable to find video tracks in the file!')
else: else:
if 'resolution' in data['video'][0]: if 'resolution' in data['ffprobe']['video'][0]:
if not video.resolution: if not video.resolution:
video.resolution = data['video'][0]['resolution'] video.resolution = data['ffprobe']['video'][0]['resolution']
if 'codec' in data['video'][0]: if 'codec' in data['ffprobe']['video'][0]:
if not video.video_codec: if not video.video_codec:
video.video_codec = data['video'][0]['codec'] video.video_codec = data['ffprobe']['video'][0]['codec']
if 'frame_rate' in data['video'][0]: if 'frame_rate' in data['ffprobe']['video'][0]:
if not video.fps: if not video.fps:
if isinstance(data['video'][0]['frame_rate'], float): if isinstance(data['ffprobe']['video'][0]['frame_rate'], float):
video.fps = data['video'][0]['frame_rate'] video.fps = data['ffprobe']['video'][0]['frame_rate']
else: else:
video.fps = data['video'][0]['frame_rate'].magnitude video.fps = data['ffprobe']['video'][0]['frame_rate'].magnitude
if 'audio' not in data: if 'audio' not in data['ffprobe']:
logging.debug('BAZARR FFprobe was unable to find audio tracks in the file!') logging.debug('BAZARR FFprobe was unable to find audio tracks in the file!')
else: else:
if 'codec' in data['audio'][0]: if 'codec' in data['ffprobe']['audio'][0]:
if not video.audio_codec: if not video.audio_codec:
video.audio_codec = data['audio'][0]['codec'] video.audio_codec = data['ffprobe']['audio'][0]['codec']
for track in data['audio']: for track in data['ffprobe']['audio']:
if 'language' in track: if 'language' in track:
video.audio_languages.add(track['language'].alpha3) video.audio_languages.add(track['language'].alpha3)

@ -5,7 +5,6 @@ import os
import logging import logging
import ast import ast
import re import re
import subliminal
from guess_language import guess_language from guess_language import guess_language
from subliminal_patch import core, search_external_subtitles from subliminal_patch import core, search_external_subtitles
from subzero.language import Language from subzero.language import Language
@ -34,11 +33,9 @@ def store_subtitles(original_path, reversed_path):
try: try:
item = database.execute('SELECT file_size, episode_file_id FROM table_episodes ' item = database.execute('SELECT file_size, episode_file_id FROM table_episodes '
'WHERE path = ?', (original_path,), only_one=True) 'WHERE path = ?', (original_path,), only_one=True)
subtitle_languages = embedded_subs_reader.list_languages(reversed_path, subtitle_languages = embedded_subs_reader(reversed_path,
file_size=item['file_size'], file_size=item['file_size'],
episode_file_id=item['episode_file_id']) episode_file_id=item['episode_file_id'])
subliminal.region.backend.sync()
for subtitle_language, subtitle_forced, subtitle_hi, subtitle_codec in subtitle_languages: for subtitle_language, subtitle_forced, subtitle_hi, subtitle_codec in subtitle_languages:
try: try:
if (settings.general.getboolean("ignore_pgs_subs") and subtitle_codec.lower() == "pgs") or \ if (settings.general.getboolean("ignore_pgs_subs") and subtitle_codec.lower() == "pgs") or \
@ -154,11 +151,9 @@ def store_subtitles_movie(original_path, reversed_path):
try: try:
item = database.execute('SELECT file_size, movie_file_id FROM table_movies ' item = database.execute('SELECT file_size, movie_file_id FROM table_movies '
'WHERE path = ?', (original_path,), only_one=True) 'WHERE path = ?', (original_path,), only_one=True)
subtitle_languages = embedded_subs_reader.list_languages(reversed_path, subtitle_languages = embedded_subs_reader(reversed_path,
file_size=item['file_size'], file_size=item['file_size'],
movie_file_id=item['movie_file_id']) movie_file_id=item['movie_file_id'])
subliminal.region.backend.sync()
for subtitle_language, subtitle_forced, subtitle_hi, subtitle_codec in subtitle_languages: for subtitle_language, subtitle_forced, subtitle_hi, subtitle_codec in subtitle_languages:
try: try:
if (settings.general.getboolean("ignore_pgs_subs") and subtitle_codec.lower() == "pgs") or \ if (settings.general.getboolean("ignore_pgs_subs") and subtitle_codec.lower() == "pgs") or \

Loading…
Cancel
Save