Improved the ffprobe call caching mechanism by storing result to DB and using it for indexing and subtitles search.

pull/1402/head
morpheus65535 4 years ago
parent 887da10d9b
commit 33e1555311

@ -71,6 +71,10 @@ def authenticate(actual_method):
def postprocess(item: dict):
# Remove ffprobe_cache
if 'ffprobe_cache' in item:
del (item['ffprobe_cache'])
# Parse tags
if 'tags' in item:
if item['tags'] is None:

@ -109,6 +109,7 @@ def db_upgrade():
['table_episodes', 'episode_file_id', 'integer'],
['table_episodes', 'audio_language', 'text'],
['table_episodes', 'file_size', 'integer', '0'],
['table_episodes', 'ffprobe_cache', 'blob'],
['table_movies', 'sortTitle', 'text'],
['table_movies', 'year', 'text'],
['table_movies', 'alternativeTitles', 'text'],
@ -121,6 +122,7 @@ def db_upgrade():
['table_movies', 'tags', 'text', '[]'],
['table_movies', 'profileId', 'integer'],
['table_movies', 'file_size', 'integer', '0'],
['table_movies', 'ffprobe_cache', 'blob'],
['table_history', 'video_path', 'text'],
['table_history', 'language', 'text'],
['table_history', 'provider', 'text'],

@ -1,68 +1,111 @@
# coding=utf-8
import enzyme
from enzyme.exceptions import MalformedMKVError
import logging
import os
import datetime
import pickle
from knowit import api
from subliminal.cache import region
import enzyme
from enzyme.exceptions import MalformedMKVError
from enzyme.exceptions import MalformedMKVError
from database import database
def embedded_subs_reader(file, file_size, episode_file_id=None, movie_file_id=None):
data = parse_video_metadata(file, file_size, episode_file_id, movie_file_id)
FFPROBE_CACHE_EXPIRATION_TIME = datetime.timedelta(weeks=2).total_seconds()
subtitles_list = []
if data['ffprobe']:
traditional_chinese = ["cht", "tc", "traditional", "zht", "hant", "big5", u"", u"雙語"]
brazilian_portuguese = ["pt-br", "pob", "pb", "brazilian", "brasil", "brazil"]
if 'subtitle' in data['ffprobe']:
for detected_language in data['ffprobe']['subtitle']:
if 'language' in detected_language:
language = detected_language['language'].alpha3
if language == 'zho' and 'name' in detected_language:
if any (ext in (detected_language['name'].lower()) for ext in traditional_chinese):
language = 'zht'
if language == 'por' and 'name' in detected_language:
if any (ext in (detected_language['name'].lower()) for ext in brazilian_portuguese):
language = 'pob'
forced = detected_language['forced'] if 'forced' in detected_language else False
hearing_impaired = detected_language['hearing_impaired'] if 'hearing_impaired' in \
detected_language else False
codec = detected_language['format'] if 'format' in detected_language else None
subtitles_list.append([language, forced, hearing_impaired, codec])
else:
continue
elif data['enzyme']:
for subtitle_track in data['enzyme'].subtitle_tracks:
hearing_impaired = False
if subtitle_track.name:
if 'sdh' in subtitle_track.name.lower():
hearing_impaired = True
subtitles_list.append([subtitle_track.language, subtitle_track.forced, hearing_impaired,
subtitle_track.codec_id])
class EmbeddedSubsReader:
def __init__(self):
self.ffprobe = None
return subtitles_list
@region.cache_on_arguments(expiration_time=FFPROBE_CACHE_EXPIRATION_TIME)
# file_size, episode_file_id and movie_file_id are used for cache identification. DO NOT REMOVE!
def list_languages(self, file, file_size, episode_file_id=None, movie_file_id=None):
from utils import get_binary
self.ffprobe = get_binary("ffprobe")
subtitles_list = []
if self.ffprobe:
api.initialize({'provider': 'ffmpeg', 'ffmpeg': self.ffprobe})
data = api.know(file)
def parse_video_metadata(file, file_size, episode_file_id=None, movie_file_id=None):
# Define default data keys value
data = {
'ffprobe': {},
'enzyme': {},
'file_id': episode_file_id if episode_file_id else movie_file_id,
'file_size': file_size
}
traditional_chinese = ["cht", "tc", "traditional", "zht", "hant", "big5", u"", u"雙語"]
brazilian_portuguese = ["pt-br", "pob", "pb", "brazilian", "brasil", "brazil"]
# Get the actual cache value form database
if episode_file_id:
cache_key = database.execute('SELECT ffprobe_cache FROM table_episodes WHERE episode_file_id=? AND file_size=?',
(episode_file_id, file_size), only_one=True)
elif movie_file_id:
cache_key = database.execute('SELECT ffprobe_cache FROM table_movies WHERE movie_file_id=? AND file_size=?',
(movie_file_id, file_size), only_one=True)
else:
cache_key = None
if 'subtitle' in data:
for detected_language in data['subtitle']:
if 'language' in detected_language:
language = detected_language['language'].alpha3
if language == 'zho' and 'name' in detected_language:
if any (ext in (detected_language['name'].lower()) for ext in traditional_chinese):
language = 'zht'
if language == 'por' and 'name' in detected_language:
if any (ext in (detected_language['name'].lower()) for ext in brazilian_portuguese):
language = 'pob'
forced = detected_language['forced'] if 'forced' in detected_language else False
hearing_impaired = detected_language['hearing_impaired'] if 'hearing_impaired' in \
detected_language else False
codec = detected_language['format'] if 'format' in detected_language else None
subtitles_list.append([language, forced, hearing_impaired, codec])
else:
continue
# check if we have a value for that cache key
if not isinstance(cache_key, dict):
return data
else:
try:
# Unpickle ffprobe cache
cached_value = pickle.loads(cache_key['ffprobe_cache'])
except:
pass
else:
if os.path.splitext(file)[1] == '.mkv':
with open(file, 'rb') as f:
try:
mkv = enzyme.MKV(f)
except MalformedMKVError:
logging.error('BAZARR cannot analyze this MKV with our built-in MKV parser, you should install ffmpeg: ' + file)
else:
for subtitle_track in mkv.subtitle_tracks:
hearing_impaired = False
if subtitle_track.name:
if 'sdh' in subtitle_track.name.lower():
hearing_impaired = True
subtitles_list.append([subtitle_track.language, subtitle_track.forced, hearing_impaired,
subtitle_track.codec_id])
# Check if file size and file id matches and if so, we return the cached value
if cached_value['file_size'] == file_size and cached_value['file_id'] in [episode_file_id, movie_file_id]:
return cached_value
return subtitles_list
# if not, we retrieve the metadata from the file
from utils import get_binary
ffprobe_path = get_binary("ffprobe")
# if we have ffprobe available
if ffprobe_path:
api.initialize({'provider': 'ffmpeg', 'ffmpeg': ffprobe_path})
data['ffprobe'] = api.know(file)
# if nto, we use enzyme for mkv files
else:
if os.path.splitext(file)[1] == '.mkv':
with open(file, 'rb') as f:
try:
mkv = enzyme.MKV(f)
except MalformedMKVError:
logging.error(
'BAZARR cannot analyze this MKV with our built-in MKV parser, you should install '
'ffmpeg/ffprobe: ' + file)
else:
data['enzyme'] = mkv
embedded_subs_reader = EmbeddedSubsReader()
# we write to db the result and return the newly cached ffprobe dict
if episode_file_id:
database.execute('UPDATE table_episodes SET ffprobe_cache=? WHERE episode_file_id=?',
(pickle.dumps(data, pickle.HIGHEST_PROTOCOL), episode_file_id))
elif movie_file_id:
database.execute('UPDATE table_movies SET ffprobe_cache=? WHERE movie_file_id=?',
(pickle.dumps(data, pickle.HIGHEST_PROTOCOL), movie_file_id))
return data

@ -33,6 +33,7 @@ from subsyncer import subsync
from guessit import guessit
from database import database, dict_mapper, get_exclusion_clause, get_profiles_list, get_audio_profile_languages, \
get_desired_languages
from embedded_subs_reader import parse_video_metadata
from analytics import track_event
from locale import getpreferredencoding
@ -1177,41 +1178,52 @@ def refine_from_db(path, video):
def refine_from_ffprobe(path, video):
exe = get_binary('ffprobe')
if not exe:
logging.debug('BAZARR FFprobe not found!')
return
if isinstance(video, Movie):
file_id = database.execute("SELECT movie_file_id FROM table_shows WHERE path = ?",
(path_mappings.path_replace_movie_reverse(path),), only_one=True)
else:
file_id = database.execute("SELECT episode_file_id, file_size FROM table_episodes WHERE path = ?",
(path_mappings.path_replace_reverse(path),), only_one=True)
if not isinstance(file_id, dict):
return video
if isinstance(video, Movie):
data = parse_video_metadata(file=path, file_size=file_id['file_size'],
movie_file_id=file_id['movie_file_id'])
else:
logging.debug('BAZARR FFprobe used is %s', exe)
data = parse_video_metadata(file=path, file_size=file_id['file_size'],
episode_file_id=file_id['episode_file_id'])
api.initialize({'provider': 'ffmpeg', 'ffmpeg': exe})
data = api.know(path)
if not data['ffprobe']:
logging.debug("No FFprobe available in cache for this file: {}".format(path))
return video
logging.debug('FFprobe found: %s', data)
logging.debug('FFprobe found: %s', data['ffprobe'])
if 'video' not in data:
if 'video' not in data['ffprobe']:
logging.debug('BAZARR FFprobe was unable to find video tracks in the file!')
else:
if 'resolution' in data['video'][0]:
if 'resolution' in data['ffprobe']['video'][0]:
if not video.resolution:
video.resolution = data['video'][0]['resolution']
if 'codec' in data['video'][0]:
video.resolution = data['ffprobe']['video'][0]['resolution']
if 'codec' in data['ffprobe']['video'][0]:
if not video.video_codec:
video.video_codec = data['video'][0]['codec']
if 'frame_rate' in data['video'][0]:
video.video_codec = data['ffprobe']['video'][0]['codec']
if 'frame_rate' in data['ffprobe']['video'][0]:
if not video.fps:
if isinstance(data['video'][0]['frame_rate'], float):
video.fps = data['video'][0]['frame_rate']
if isinstance(data['ffprobe']['video'][0]['frame_rate'], float):
video.fps = data['ffprobe']['video'][0]['frame_rate']
else:
video.fps = data['video'][0]['frame_rate'].magnitude
video.fps = data['ffprobe']['video'][0]['frame_rate'].magnitude
if 'audio' not in data:
if 'audio' not in data['ffprobe']:
logging.debug('BAZARR FFprobe was unable to find audio tracks in the file!')
else:
if 'codec' in data['audio'][0]:
if 'codec' in data['ffprobe']['audio'][0]:
if not video.audio_codec:
video.audio_codec = data['audio'][0]['codec']
for track in data['audio']:
video.audio_codec = data['ffprobe']['audio'][0]['codec']
for track in data['ffprobe']['audio']:
if 'language' in track:
video.audio_languages.add(track['language'].alpha3)

@ -5,7 +5,6 @@ import os
import logging
import ast
import re
import subliminal
from guess_language import guess_language
from subliminal_patch import core, search_external_subtitles
from subzero.language import Language
@ -34,11 +33,9 @@ def store_subtitles(original_path, reversed_path):
try:
item = database.execute('SELECT file_size, episode_file_id FROM table_episodes '
'WHERE path = ?', (original_path,), only_one=True)
subtitle_languages = embedded_subs_reader.list_languages(reversed_path,
file_size=item['file_size'],
episode_file_id=item['episode_file_id'])
subliminal.region.backend.sync()
subtitle_languages = embedded_subs_reader(reversed_path,
file_size=item['file_size'],
episode_file_id=item['episode_file_id'])
for subtitle_language, subtitle_forced, subtitle_hi, subtitle_codec in subtitle_languages:
try:
if (settings.general.getboolean("ignore_pgs_subs") and subtitle_codec.lower() == "pgs") or \
@ -154,11 +151,9 @@ def store_subtitles_movie(original_path, reversed_path):
try:
item = database.execute('SELECT file_size, movie_file_id FROM table_movies '
'WHERE path = ?', (original_path,), only_one=True)
subtitle_languages = embedded_subs_reader.list_languages(reversed_path,
file_size=item['file_size'],
movie_file_id=item['movie_file_id'])
subliminal.region.backend.sync()
subtitle_languages = embedded_subs_reader(reversed_path,
file_size=item['file_size'],
movie_file_id=item['movie_file_id'])
for subtitle_language, subtitle_forced, subtitle_hi, subtitle_codec in subtitle_languages:
try:
if (settings.general.getboolean("ignore_pgs_subs") and subtitle_codec.lower() == "pgs") or \

Loading…
Cancel
Save