Add ability to use MediaInfo to refine video/audio properties (#479)

* Add ability to use MediaInfo to refine video/audio properties

* Remove pymediainfo from requirements.txt and add library files

* Look for .dll file if .exe was not found in get_binary

* Add pymediainfo to libs

* Switch to local MediaInfo library files

* Exit early if supported attributes are already set

* Log media info warnings to debug
pull/488/head
MoshiMoshi0 5 years ago committed by morpheus65535
parent 810d8095f9
commit b56015e90b

5
.gitignore vendored

@ -7,4 +7,7 @@ cachefile.dbm
.idea/*
bazarr.pid
/venv
/data
/data
# Allow
!*.dll

@ -17,6 +17,7 @@ defaults = {
'single_language': 'False',
'minimum_score': '90',
'use_scenename': 'True',
'use_mediainfo': 'True',
'use_postprocessing': 'False',
'postprocessing_cmd': '',
'use_sonarr': 'False',

@ -28,11 +28,12 @@ from config import settings
from helper import path_replace, path_replace_movie, path_replace_reverse, \
path_replace_reverse_movie, pp_replace, get_target_folder, force_unicode
from list_subtitles import store_subtitles, list_missing_subtitles, store_subtitles_movie, list_missing_subtitles_movies
from utils import history_log, history_log_movie
from utils import history_log, history_log_movie, get_binary
from notifier import send_notifications, send_notifications_movie
from get_providers import get_providers, get_providers_auth, provider_throttle, provider_pool
from get_args import args
from queueconfig import notifications
from pymediainfo import MediaInfo
# configure the cache
@ -40,13 +41,14 @@ from queueconfig import notifications
region.configure('dogpile.cache.memory')
def get_video(path, title, sceneName, use_scenename, providers=None, media_type="movie"):
def get_video(path, title, sceneName, use_scenename, use_mediainfo, providers=None, media_type="movie"):
"""
Construct `Video` instance
:param path: path to video
:param title: series/movie title
:param sceneName: sceneName
:param use_scenename: use sceneName
:param use_mediainfo: use media info to refine the video
:param providers: provider list for selective hashing
:param media_type: movie/series
:return: `Video` instance
@ -69,6 +71,10 @@ def get_video(path, title, sceneName, use_scenename, providers=None, media_type=
video.original_name = original_name
video.original_path = original_path
refine_from_db(original_path, video)
if use_mediainfo:
refine_from_mediainfo(original_path, video)
logging.debug('BAZARR is using those video object properties: %s', vars(video))
return video
@ -144,6 +150,7 @@ def download_subtitle(path, language, hi, forced, providers, providers_auth, sce
language_set.add(lang_obj)
use_scenename = settings.general.getboolean('use_scenename')
use_mediainfo = settings.general.getboolean('use_mediainfo')
minimum_score = settings.general.minimum_score
minimum_score_movie = settings.general.minimum_score_movie
use_postprocessing = settings.general.getboolean('use_postprocessing')
@ -160,7 +167,7 @@ def download_subtitle(path, language, hi, forced, providers, providers_auth, sce
language_hook=None
"""
video = get_video(path, title, sceneName, use_scenename, providers=providers, media_type=media_type)
video = get_video(path, title, sceneName, use_scenename, use_mediainfo, providers=providers, media_type=media_type)
if video:
min_score, max_score, scores = get_scores(video, media_type, min_score_movie_perc=int(minimum_score_movie),
min_score_series_perc=int(minimum_score))
@ -307,12 +314,13 @@ def manual_search(path, language, hi, forced, providers, providers_auth, sceneNa
language_set.add(lang_obj)
use_scenename = settings.general.getboolean('use_scenename')
use_mediainfo = settings.general.getboolean('use_mediainfo')
minimum_score = settings.general.minimum_score
minimum_score_movie = settings.general.minimum_score_movie
use_postprocessing = settings.general.getboolean('use_postprocessing')
postprocessing_cmd = settings.general.postprocessing_cmd
video = get_video(path, title, sceneName, use_scenename, providers=providers, media_type=media_type)
video = get_video(path, title, sceneName, use_scenename, use_mediainfo, providers=providers, media_type=media_type)
if video:
min_score, max_score, scores = get_scores(video, media_type, min_score_movie_perc=int(minimum_score_movie),
min_score_series_perc=int(minimum_score))
@ -376,10 +384,11 @@ def manual_download_subtitle(path, language, hi, forced, subtitle, provider, pro
subtitle = pickle.loads(codecs.decode(subtitle.encode(), "base64"))
use_scenename = settings.general.getboolean('use_scenename')
use_mediainfo = settings.general.getboolean('use_mediainfo')
use_postprocessing = settings.general.getboolean('use_postprocessing')
postprocessing_cmd = settings.general.postprocessing_cmd
single = settings.general.getboolean('single_language')
video = get_video(path, title, sceneName, use_scenename, providers={provider}, media_type=media_type)
video = get_video(path, title, sceneName, use_scenename, use_mediainfo, providers={provider}, media_type=media_type)
if video:
min_score, max_score, scores = get_scores(video, media_type)
try:
@ -825,6 +834,31 @@ def refine_from_db(path, video):
return video
def refine_from_mediainfo(path, video):
if video.fps:
return
exe = get_binary('mediainfo')
if not exe:
logging.debug('BAZARR MediaInfo library not found!')
return
media_info = MediaInfo.parse(path, library_file=exe);
video_track = next((t for t in media_info.tracks if t.track_type == 'Video'), None)
if not video_track:
logging.debug('BAZARR MediaInfo was unable to find video tracks in the file!')
return
logging.debug('MediaInfo found: %s', video_track.to_data())
if not video.fps:
if video_track.frame_rate:
video.fps = float(video_track.frame_rate)
elif video_track.framerate_num and video_track.framerate_den:
video.fps = round(float(video_track.framerate_num) / float(video_track.framerate_den), 3)
def upgrade_subtitles():
days_to_upgrade_subs = settings.general.days_to_upgrade_subs
minimum_timestamp = ((datetime.now() - timedelta(days=int(days_to_upgrade_subs))) -

@ -1254,6 +1254,11 @@ def save_settings():
settings_general_scenename = 'False'
else:
settings_general_scenename = 'True'
settings_general_mediainfo = request.forms.get('settings_general_mediainfo')
if settings_general_mediainfo is None:
settings_general_mediainfo = 'False'
else:
settings_general_mediainfo = 'True'
settings_general_embedded = request.forms.get('settings_general_embedded')
if settings_general_embedded is None:
settings_general_embedded = 'False'
@ -1336,6 +1341,7 @@ def save_settings():
settings.general.single_language = text_type(settings_general_single_language)
settings.general.minimum_score = text_type(settings_general_minimum_score)
settings.general.use_scenename = text_type(settings_general_scenename)
settings.general.use_mediainfo = text_type(settings_general_mediainfo)
settings.general.use_postprocessing = text_type(settings_general_use_postprocessing)
settings.general.postprocessing_cmd = text_type(settings_general_postprocessing_cmd)
settings.general.use_sonarr = text_type(settings_general_use_sonarr)

@ -54,6 +54,8 @@ def get_binary(name):
else:
if platform.system() == "Windows": # Windows
exe = os.path.abspath(os.path.join(binaries_dir, "Windows", "i386", name, "%s.exe" % name))
if exe and not os.path.isfile(exe):
exe = os.path.abspath(os.path.join(binaries_dir, "Windows", "i386", name, "%s.dll" % name))
elif platform.system() == "Darwin": # MacOSX
exe = os.path.abspath(os.path.join(binaries_dir, "MacOSX", "i386", name, name))

@ -0,0 +1,3 @@
Patrick Altman <paltman@gmail.com> (author)
cjlucas https://github.com/cjlucas
Louis Sautier <sautier.louis@gmail.com> (maintainer since 2016)

@ -0,0 +1,24 @@
The MIT License
Copyright (c) 2010-2014, Patrick Altman <paltman@gmail.com>
Copyright (c) 2016, Louis Sautier <sautier.louis@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
http://www.opensource.org/licenses/mit-license.php

@ -0,0 +1,27 @@
pymediainfo
-----------
.. image:: https://img.shields.io/pypi/v/pymediainfo.svg
:target: https://pypi.org/project/pymediainfo
.. image:: https://img.shields.io/pypi/pyversions/pymediainfo.svg
:target: https://pypi.org/project/pymediainfo
.. image:: https://repology.org/badge/tiny-repos/python:pymediainfo.svg
:target: https://repology.org/metapackage/python:pymediainfo
.. image:: https://img.shields.io/pypi/implementation/pymediainfo.svg
:target: https://pypi.org/project/pymediainfo
.. image:: https://api.travis-ci.org/sbraz/pymediainfo.svg?branch=master
:target: https://travis-ci.org/sbraz/pymediainfo
.. image:: https://ci.appveyor.com/api/projects/status/g15a2daem1oub57n/branch/master?svg=true
:target: https://ci.appveyor.com/project/sbraz/pymediainfo
This small package is a wrapper around the MediaInfo library.
It works on Linux, Mac OS X and Windows and is tested with Python 2.7, 3.4, 3.5, 3.6, 3.7, PyPy and PyPy3.
See https://pymediainfo.readthedocs.io/ for more information.

@ -0,0 +1,320 @@
# vim: set fileencoding=utf-8 :
import os
import re
import locale
import json
import ctypes
import sys
from pkg_resources import get_distribution, DistributionNotFound
import xml.etree.ElementTree as ET
try:
import pathlib
except ImportError:
pathlib = None
if sys.version_info < (3,):
import urlparse
else:
import urllib.parse as urlparse
try:
__version__ = get_distribution("pymediainfo").version
except DistributionNotFound:
pass
class Track(object):
"""
An object associated with a media file track.
Each :class:`Track` attribute corresponds to attributes parsed from MediaInfo's output.
All attributes are lower case. Attributes that are present several times such as Duration
yield a second attribute starting with `other_` which is a list of all alternative attribute values.
When a non-existing attribute is accessed, `None` is returned.
Example:
>>> t = mi.tracks[0]
>>> t
<Track track_id='None', track_type='General'>
>>> t.duration
3000
>>> t.to_data()["other_duration"]
['3 s 0 ms', '3 s 0 ms', '3 s 0 ms',
'00:00:03.000', '00:00:03.000']
>>> type(t.non_existing)
NoneType
All available attributes can be obtained by calling :func:`to_data`.
"""
def __eq__(self, other):
return self.__dict__ == other.__dict__
def __getattribute__(self, name):
try:
return object.__getattribute__(self, name)
except:
pass
return None
def __getstate__(self):
return self.__dict__
def __setstate__(self, state):
self.__dict__ = state
def __init__(self, xml_dom_fragment):
self.track_type = xml_dom_fragment.attrib['type']
for el in xml_dom_fragment:
node_name = el.tag.lower().strip().strip('_')
if node_name == 'id':
node_name = 'track_id'
node_value = el.text
other_node_name = "other_%s" % node_name
if getattr(self, node_name) is None:
setattr(self, node_name, node_value)
else:
if getattr(self, other_node_name) is None:
setattr(self, other_node_name, [node_value, ])
else:
getattr(self, other_node_name).append(node_value)
for o in [d for d in self.__dict__.keys() if d.startswith('other_')]:
try:
primary = o.replace('other_', '')
setattr(self, primary, int(getattr(self, primary)))
except:
for v in getattr(self, o):
try:
current = getattr(self, primary)
setattr(self, primary, int(v))
getattr(self, o).append(current)
break
except:
pass
def __repr__(self):
return("<Track track_id='{}', track_type='{}'>".format(self.track_id, self.track_type))
def to_data(self):
"""
Returns a dict representation of the track attributes.
Example:
>>> sorted(track.to_data().keys())[:3]
['codec', 'codec_extensions_usually_used', 'codec_url']
>>> t.to_data()["file_size"]
5988
:rtype: dict
"""
data = {}
for k, v in self.__dict__.items():
if k != 'xml_dom_fragment':
data[k] = v
return data
class MediaInfo(object):
"""
An object containing information about a media file.
:class:`MediaInfo` objects can be created by directly calling code from
libmediainfo (in this case, the library must be present on the system):
>>> pymediainfo.MediaInfo.parse("/path/to/file.mp4")
Alternatively, objects may be created from MediaInfo's XML output.
Such output can be obtained using the ``XML`` output format on versions older than v17.10
and the ``OLDXML`` format on newer versions.
Using such an XML file, we can create a :class:`MediaInfo` object:
>>> with open("output.xml") as f:
... mi = pymediainfo.MediaInfo(f.read())
:param str xml: XML output obtained from MediaInfo.
:param str encoding_errors: option to pass to :func:`str.encode`'s `errors`
parameter before parsing `xml`.
:raises xml.etree.ElementTree.ParseError: if passed invalid XML.
:var tracks: A list of :py:class:`Track` objects which the media file contains.
For instance:
>>> mi = pymediainfo.MediaInfo.parse("/path/to/file.mp4")
>>> for t in mi.tracks:
... print(t)
<Track track_id='None', track_type='General'>
<Track track_id='1', track_type='Text'>
"""
def __eq__(self, other):
return self.tracks == other.tracks
def __init__(self, xml, encoding_errors="strict"):
xml_dom = ET.fromstring(xml.encode("utf-8", encoding_errors))
self.tracks = []
# This is the case for libmediainfo < 18.03
# https://github.com/sbraz/pymediainfo/issues/57
# https://github.com/MediaArea/MediaInfoLib/commit/575a9a32e6960ea34adb3bc982c64edfa06e95eb
if xml_dom.tag == "File":
xpath = "track"
else:
xpath = "File/track"
for xml_track in xml_dom.iterfind(xpath):
self.tracks.append(Track(xml_track))
@staticmethod
def _get_library(library_file=None):
os_is_nt = os.name in ("nt", "dos", "os2", "ce")
if os_is_nt:
lib_type = ctypes.WinDLL
else:
lib_type = ctypes.CDLL
if library_file is None:
if os_is_nt:
library_names = ("MediaInfo.dll",)
elif sys.platform == "darwin":
library_names = ("libmediainfo.0.dylib", "libmediainfo.dylib")
else:
library_names = ("libmediainfo.so.0",)
script_dir = os.path.dirname(__file__)
# Look for the library file in the script folder
for library in library_names:
lib_path = os.path.join(script_dir, library)
if os.path.isfile(lib_path):
# If we find it, don't try any other filename
library_names = (lib_path,)
break
else:
library_names = (library_file,)
for i, library in enumerate(library_names, start=1):
try:
lib = lib_type(library)
# Define arguments and return types
lib.MediaInfo_Inform.restype = ctypes.c_wchar_p
lib.MediaInfo_New.argtypes = []
lib.MediaInfo_New.restype = ctypes.c_void_p
lib.MediaInfo_Option.argtypes = [ctypes.c_void_p, ctypes.c_wchar_p, ctypes.c_wchar_p]
lib.MediaInfo_Option.restype = ctypes.c_wchar_p
lib.MediaInfo_Inform.argtypes = [ctypes.c_void_p, ctypes.c_size_t]
lib.MediaInfo_Inform.restype = ctypes.c_wchar_p
lib.MediaInfo_Open.argtypes = [ctypes.c_void_p, ctypes.c_wchar_p]
lib.MediaInfo_Open.restype = ctypes.c_size_t
lib.MediaInfo_Delete.argtypes = [ctypes.c_void_p]
lib.MediaInfo_Delete.restype = None
lib.MediaInfo_Close.argtypes = [ctypes.c_void_p]
lib.MediaInfo_Close.restype = None
return lib
except OSError:
# If we've tried all possible filenames
if i == len(library_names):
raise
@classmethod
def can_parse(cls, library_file=None):
"""
Checks whether media files can be analyzed using libmediainfo.
:rtype: bool
"""
try:
cls._get_library(library_file)
return True
except:
return False
@classmethod
def parse(cls, filename, library_file=None, cover_data=False,
encoding_errors="strict", parse_speed=0.5, text=False,
full=True, legacy_stream_display=False):
"""
Analyze a media file using libmediainfo.
If libmediainfo is located in a non-standard location, the `library_file` parameter can be used:
>>> pymediainfo.MediaInfo.parse("tests/data/sample.mkv",
... library_file="/path/to/libmediainfo.dylib")
:param filename: path to the media file which will be analyzed.
A URL can also be used if libmediainfo was compiled
with CURL support.
:param str library_file: path to the libmediainfo library, this should only be used if the library cannot be auto-detected.
:param bool cover_data: whether to retrieve cover data as base64.
:param str encoding_errors: option to pass to :func:`str.encode`'s `errors`
parameter before parsing MediaInfo's XML output.
:param float parse_speed: passed to the library as `ParseSpeed`,
this option takes values between 0 and 1.
A higher value will yield more precise results in some cases
but will also increase parsing time.
:param bool text: if ``True``, MediaInfo's text output will be returned instead
of a :class:`MediaInfo` object.
:param bool full: display additional tags, including computer-readable values
for sizes and durations.
:param bool legacy_stream_display: display additional information about streams.
:type filename: str or pathlib.Path
:rtype: str if `text` is ``True``.
:rtype: :class:`MediaInfo` otherwise.
:raises FileNotFoundError: if passed a non-existent file
(Python 3.3), does not work on Windows.
:raises IOError: if passed a non-existent file (Python < 3.3),
does not work on Windows.
:raises RuntimeError: if parsing fails, this should not
happen unless libmediainfo itself fails.
"""
lib = cls._get_library(library_file)
if pathlib is not None and isinstance(filename, pathlib.PurePath):
filename = str(filename)
url = False
else:
url = urlparse.urlparse(filename)
# Try to open the file (if it's not a URL)
# Doesn't work on Windows because paths are URLs
if not (url and url.scheme):
# Test whether the file is readable
with open(filename, "rb"):
pass
# Obtain the library version
lib_version = lib.MediaInfo_Option(None, "Info_Version", "")
lib_version = tuple(int(_) for _ in re.search("^MediaInfoLib - v(\\S+)", lib_version).group(1).split("."))
# The XML option was renamed starting with version 17.10
if lib_version >= (17, 10):
xml_option = "OLDXML"
else:
xml_option = "XML"
# Cover_Data is not extracted by default since version 18.03
# See https://github.com/MediaArea/MediaInfoLib/commit/d8fd88a1c282d1c09388c55ee0b46029e7330690
if cover_data and lib_version >= (18, 3):
lib.MediaInfo_Option(None, "Cover_Data", "base64")
# Create a MediaInfo handle
handle = lib.MediaInfo_New()
lib.MediaInfo_Option(handle, "CharSet", "UTF-8")
# Fix for https://github.com/sbraz/pymediainfo/issues/22
# Python 2 does not change LC_CTYPE
# at startup: https://bugs.python.org/issue6203
if (sys.version_info < (3,) and os.name == "posix"
and locale.getlocale() == (None, None)):
locale.setlocale(locale.LC_CTYPE, locale.getdefaultlocale())
lib.MediaInfo_Option(None, "Inform", "" if text else xml_option)
lib.MediaInfo_Option(None, "Complete", "1" if full else "")
lib.MediaInfo_Option(None, "ParseSpeed", str(parse_speed))
lib.MediaInfo_Option(None, "LegacyStreamDisplay", "1" if legacy_stream_display else "")
if lib.MediaInfo_Open(handle, filename) == 0:
raise RuntimeError("An eror occured while opening {}"
" with libmediainfo".format(filename))
output = lib.MediaInfo_Inform(handle, 0)
# Delete the handle
lib.MediaInfo_Close(handle)
lib.MediaInfo_Delete(handle)
if text:
return output
else:
return cls(output, encoding_errors)
def to_data(self):
"""
Returns a dict representation of the object's :py:class:`Tracks <Track>`.
:rtype: dict
"""
data = {'tracks': []}
for track in self.tracks:
data['tracks'].append(track.to_data())
return data
def to_json(self):
"""
Returns a JSON representation of the object's :py:class:`Tracks <Track>`.
:rtype: str
"""
return json.dumps(self.to_data())

@ -20,6 +20,25 @@
</div>
</div>
<div class="middle aligned row">
<div class="right aligned four wide column">
<label>Use MediaInfo</label>
</div>
<div class="one wide column">
<div id="settings_mediainfo" class="ui toggle checkbox" data-mediainfo={{settings.general.getboolean('use_mediainfo')}}>
<input name="settings_general_mediainfo" type="checkbox">
<label></label>
</div>
</div>
<div class="collapsed column">
<div class="collapsed center aligned column">
<div class="ui basic icon" data-tooltip="Use MediaInfo to extract video and audio stream properties." data-inverted="">
<i class="help circle large icon"></i>
</div>
</div>
</div>
</div>
<div class="middle aligned row">
<div class="right aligned four wide column">
<label>Minimum score for episodes</label>
@ -552,6 +571,12 @@
$("#settings_scenename").checkbox('uncheck');
}
if ($('#settings_mediainfo').data("mediainfo") === "True") {
$("#settings_mediainfo").checkbox('check');
} else {
$("#settings_mediainfo").checkbox('uncheck');
}
if ($('#settings_upgrade_subs').data("upgrade") === "True") {
$("#settings_upgrade_subs").checkbox('check');
} else {

Loading…
Cancel
Save