Add ability to use MediaInfo to refine video/audio properties (#479)
* Add ability to use MediaInfo to refine video/audio properties * Remove pymediainfo from requirements.txt and add library files * Look for .dll file if .exe was not found in get_binary * Add pymediainfo to libs * Switch to local MediaInfo library files * Exit early if supported attributes are already set * Log media info warnings to debugpull/488/head
parent
810d8095f9
commit
b56015e90b
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,3 @@
|
||||
Patrick Altman <paltman@gmail.com> (author)
|
||||
cjlucas https://github.com/cjlucas
|
||||
Louis Sautier <sautier.louis@gmail.com> (maintainer since 2016)
|
@ -0,0 +1,24 @@
|
||||
The MIT License
|
||||
|
||||
Copyright (c) 2010-2014, Patrick Altman <paltman@gmail.com>
|
||||
Copyright (c) 2016, Louis Sautier <sautier.louis@gmail.com>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
|
||||
http://www.opensource.org/licenses/mit-license.php
|
@ -0,0 +1,27 @@
|
||||
pymediainfo
|
||||
-----------
|
||||
|
||||
.. image:: https://img.shields.io/pypi/v/pymediainfo.svg
|
||||
:target: https://pypi.org/project/pymediainfo
|
||||
|
||||
.. image:: https://img.shields.io/pypi/pyversions/pymediainfo.svg
|
||||
:target: https://pypi.org/project/pymediainfo
|
||||
|
||||
.. image:: https://repology.org/badge/tiny-repos/python:pymediainfo.svg
|
||||
:target: https://repology.org/metapackage/python:pymediainfo
|
||||
|
||||
.. image:: https://img.shields.io/pypi/implementation/pymediainfo.svg
|
||||
:target: https://pypi.org/project/pymediainfo
|
||||
|
||||
.. image:: https://api.travis-ci.org/sbraz/pymediainfo.svg?branch=master
|
||||
:target: https://travis-ci.org/sbraz/pymediainfo
|
||||
|
||||
.. image:: https://ci.appveyor.com/api/projects/status/g15a2daem1oub57n/branch/master?svg=true
|
||||
:target: https://ci.appveyor.com/project/sbraz/pymediainfo
|
||||
|
||||
|
||||
This small package is a wrapper around the MediaInfo library.
|
||||
|
||||
It works on Linux, Mac OS X and Windows and is tested with Python 2.7, 3.4, 3.5, 3.6, 3.7, PyPy and PyPy3.
|
||||
|
||||
See https://pymediainfo.readthedocs.io/ for more information.
|
@ -0,0 +1,320 @@
|
||||
# vim: set fileencoding=utf-8 :
|
||||
import os
|
||||
import re
|
||||
import locale
|
||||
import json
|
||||
import ctypes
|
||||
import sys
|
||||
from pkg_resources import get_distribution, DistributionNotFound
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
try:
|
||||
import pathlib
|
||||
except ImportError:
|
||||
pathlib = None
|
||||
|
||||
if sys.version_info < (3,):
|
||||
import urlparse
|
||||
else:
|
||||
import urllib.parse as urlparse
|
||||
|
||||
try:
|
||||
__version__ = get_distribution("pymediainfo").version
|
||||
except DistributionNotFound:
|
||||
pass
|
||||
|
||||
class Track(object):
|
||||
"""
|
||||
An object associated with a media file track.
|
||||
|
||||
Each :class:`Track` attribute corresponds to attributes parsed from MediaInfo's output.
|
||||
All attributes are lower case. Attributes that are present several times such as Duration
|
||||
yield a second attribute starting with `other_` which is a list of all alternative attribute values.
|
||||
|
||||
When a non-existing attribute is accessed, `None` is returned.
|
||||
|
||||
Example:
|
||||
|
||||
>>> t = mi.tracks[0]
|
||||
>>> t
|
||||
<Track track_id='None', track_type='General'>
|
||||
>>> t.duration
|
||||
3000
|
||||
>>> t.to_data()["other_duration"]
|
||||
['3 s 0 ms', '3 s 0 ms', '3 s 0 ms',
|
||||
'00:00:03.000', '00:00:03.000']
|
||||
>>> type(t.non_existing)
|
||||
NoneType
|
||||
|
||||
All available attributes can be obtained by calling :func:`to_data`.
|
||||
"""
|
||||
def __eq__(self, other):
|
||||
return self.__dict__ == other.__dict__
|
||||
def __getattribute__(self, name):
|
||||
try:
|
||||
return object.__getattribute__(self, name)
|
||||
except:
|
||||
pass
|
||||
return None
|
||||
def __getstate__(self):
|
||||
return self.__dict__
|
||||
def __setstate__(self, state):
|
||||
self.__dict__ = state
|
||||
def __init__(self, xml_dom_fragment):
|
||||
self.track_type = xml_dom_fragment.attrib['type']
|
||||
for el in xml_dom_fragment:
|
||||
node_name = el.tag.lower().strip().strip('_')
|
||||
if node_name == 'id':
|
||||
node_name = 'track_id'
|
||||
node_value = el.text
|
||||
other_node_name = "other_%s" % node_name
|
||||
if getattr(self, node_name) is None:
|
||||
setattr(self, node_name, node_value)
|
||||
else:
|
||||
if getattr(self, other_node_name) is None:
|
||||
setattr(self, other_node_name, [node_value, ])
|
||||
else:
|
||||
getattr(self, other_node_name).append(node_value)
|
||||
|
||||
for o in [d for d in self.__dict__.keys() if d.startswith('other_')]:
|
||||
try:
|
||||
primary = o.replace('other_', '')
|
||||
setattr(self, primary, int(getattr(self, primary)))
|
||||
except:
|
||||
for v in getattr(self, o):
|
||||
try:
|
||||
current = getattr(self, primary)
|
||||
setattr(self, primary, int(v))
|
||||
getattr(self, o).append(current)
|
||||
break
|
||||
except:
|
||||
pass
|
||||
def __repr__(self):
|
||||
return("<Track track_id='{}', track_type='{}'>".format(self.track_id, self.track_type))
|
||||
def to_data(self):
|
||||
"""
|
||||
Returns a dict representation of the track attributes.
|
||||
|
||||
Example:
|
||||
|
||||
>>> sorted(track.to_data().keys())[:3]
|
||||
['codec', 'codec_extensions_usually_used', 'codec_url']
|
||||
>>> t.to_data()["file_size"]
|
||||
5988
|
||||
|
||||
|
||||
:rtype: dict
|
||||
"""
|
||||
data = {}
|
||||
for k, v in self.__dict__.items():
|
||||
if k != 'xml_dom_fragment':
|
||||
data[k] = v
|
||||
return data
|
||||
|
||||
|
||||
class MediaInfo(object):
|
||||
"""
|
||||
An object containing information about a media file.
|
||||
|
||||
|
||||
:class:`MediaInfo` objects can be created by directly calling code from
|
||||
libmediainfo (in this case, the library must be present on the system):
|
||||
|
||||
>>> pymediainfo.MediaInfo.parse("/path/to/file.mp4")
|
||||
|
||||
Alternatively, objects may be created from MediaInfo's XML output.
|
||||
Such output can be obtained using the ``XML`` output format on versions older than v17.10
|
||||
and the ``OLDXML`` format on newer versions.
|
||||
|
||||
Using such an XML file, we can create a :class:`MediaInfo` object:
|
||||
|
||||
>>> with open("output.xml") as f:
|
||||
... mi = pymediainfo.MediaInfo(f.read())
|
||||
|
||||
:param str xml: XML output obtained from MediaInfo.
|
||||
:param str encoding_errors: option to pass to :func:`str.encode`'s `errors`
|
||||
parameter before parsing `xml`.
|
||||
:raises xml.etree.ElementTree.ParseError: if passed invalid XML.
|
||||
:var tracks: A list of :py:class:`Track` objects which the media file contains.
|
||||
For instance:
|
||||
|
||||
>>> mi = pymediainfo.MediaInfo.parse("/path/to/file.mp4")
|
||||
>>> for t in mi.tracks:
|
||||
... print(t)
|
||||
<Track track_id='None', track_type='General'>
|
||||
<Track track_id='1', track_type='Text'>
|
||||
"""
|
||||
def __eq__(self, other):
|
||||
return self.tracks == other.tracks
|
||||
def __init__(self, xml, encoding_errors="strict"):
|
||||
xml_dom = ET.fromstring(xml.encode("utf-8", encoding_errors))
|
||||
self.tracks = []
|
||||
# This is the case for libmediainfo < 18.03
|
||||
# https://github.com/sbraz/pymediainfo/issues/57
|
||||
# https://github.com/MediaArea/MediaInfoLib/commit/575a9a32e6960ea34adb3bc982c64edfa06e95eb
|
||||
if xml_dom.tag == "File":
|
||||
xpath = "track"
|
||||
else:
|
||||
xpath = "File/track"
|
||||
for xml_track in xml_dom.iterfind(xpath):
|
||||
self.tracks.append(Track(xml_track))
|
||||
@staticmethod
|
||||
def _get_library(library_file=None):
|
||||
os_is_nt = os.name in ("nt", "dos", "os2", "ce")
|
||||
if os_is_nt:
|
||||
lib_type = ctypes.WinDLL
|
||||
else:
|
||||
lib_type = ctypes.CDLL
|
||||
if library_file is None:
|
||||
if os_is_nt:
|
||||
library_names = ("MediaInfo.dll",)
|
||||
elif sys.platform == "darwin":
|
||||
library_names = ("libmediainfo.0.dylib", "libmediainfo.dylib")
|
||||
else:
|
||||
library_names = ("libmediainfo.so.0",)
|
||||
script_dir = os.path.dirname(__file__)
|
||||
# Look for the library file in the script folder
|
||||
for library in library_names:
|
||||
lib_path = os.path.join(script_dir, library)
|
||||
if os.path.isfile(lib_path):
|
||||
# If we find it, don't try any other filename
|
||||
library_names = (lib_path,)
|
||||
break
|
||||
else:
|
||||
library_names = (library_file,)
|
||||
for i, library in enumerate(library_names, start=1):
|
||||
try:
|
||||
lib = lib_type(library)
|
||||
# Define arguments and return types
|
||||
lib.MediaInfo_Inform.restype = ctypes.c_wchar_p
|
||||
lib.MediaInfo_New.argtypes = []
|
||||
lib.MediaInfo_New.restype = ctypes.c_void_p
|
||||
lib.MediaInfo_Option.argtypes = [ctypes.c_void_p, ctypes.c_wchar_p, ctypes.c_wchar_p]
|
||||
lib.MediaInfo_Option.restype = ctypes.c_wchar_p
|
||||
lib.MediaInfo_Inform.argtypes = [ctypes.c_void_p, ctypes.c_size_t]
|
||||
lib.MediaInfo_Inform.restype = ctypes.c_wchar_p
|
||||
lib.MediaInfo_Open.argtypes = [ctypes.c_void_p, ctypes.c_wchar_p]
|
||||
lib.MediaInfo_Open.restype = ctypes.c_size_t
|
||||
lib.MediaInfo_Delete.argtypes = [ctypes.c_void_p]
|
||||
lib.MediaInfo_Delete.restype = None
|
||||
lib.MediaInfo_Close.argtypes = [ctypes.c_void_p]
|
||||
lib.MediaInfo_Close.restype = None
|
||||
return lib
|
||||
except OSError:
|
||||
# If we've tried all possible filenames
|
||||
if i == len(library_names):
|
||||
raise
|
||||
@classmethod
|
||||
def can_parse(cls, library_file=None):
|
||||
"""
|
||||
Checks whether media files can be analyzed using libmediainfo.
|
||||
|
||||
:rtype: bool
|
||||
"""
|
||||
try:
|
||||
cls._get_library(library_file)
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
@classmethod
|
||||
def parse(cls, filename, library_file=None, cover_data=False,
|
||||
encoding_errors="strict", parse_speed=0.5, text=False,
|
||||
full=True, legacy_stream_display=False):
|
||||
"""
|
||||
Analyze a media file using libmediainfo.
|
||||
If libmediainfo is located in a non-standard location, the `library_file` parameter can be used:
|
||||
|
||||
>>> pymediainfo.MediaInfo.parse("tests/data/sample.mkv",
|
||||
... library_file="/path/to/libmediainfo.dylib")
|
||||
|
||||
:param filename: path to the media file which will be analyzed.
|
||||
A URL can also be used if libmediainfo was compiled
|
||||
with CURL support.
|
||||
:param str library_file: path to the libmediainfo library, this should only be used if the library cannot be auto-detected.
|
||||
:param bool cover_data: whether to retrieve cover data as base64.
|
||||
:param str encoding_errors: option to pass to :func:`str.encode`'s `errors`
|
||||
parameter before parsing MediaInfo's XML output.
|
||||
:param float parse_speed: passed to the library as `ParseSpeed`,
|
||||
this option takes values between 0 and 1.
|
||||
A higher value will yield more precise results in some cases
|
||||
but will also increase parsing time.
|
||||
:param bool text: if ``True``, MediaInfo's text output will be returned instead
|
||||
of a :class:`MediaInfo` object.
|
||||
:param bool full: display additional tags, including computer-readable values
|
||||
for sizes and durations.
|
||||
:param bool legacy_stream_display: display additional information about streams.
|
||||
:type filename: str or pathlib.Path
|
||||
:rtype: str if `text` is ``True``.
|
||||
:rtype: :class:`MediaInfo` otherwise.
|
||||
:raises FileNotFoundError: if passed a non-existent file
|
||||
(Python ≥ 3.3), does not work on Windows.
|
||||
:raises IOError: if passed a non-existent file (Python < 3.3),
|
||||
does not work on Windows.
|
||||
:raises RuntimeError: if parsing fails, this should not
|
||||
happen unless libmediainfo itself fails.
|
||||
"""
|
||||
lib = cls._get_library(library_file)
|
||||
if pathlib is not None and isinstance(filename, pathlib.PurePath):
|
||||
filename = str(filename)
|
||||
url = False
|
||||
else:
|
||||
url = urlparse.urlparse(filename)
|
||||
# Try to open the file (if it's not a URL)
|
||||
# Doesn't work on Windows because paths are URLs
|
||||
if not (url and url.scheme):
|
||||
# Test whether the file is readable
|
||||
with open(filename, "rb"):
|
||||
pass
|
||||
# Obtain the library version
|
||||
lib_version = lib.MediaInfo_Option(None, "Info_Version", "")
|
||||
lib_version = tuple(int(_) for _ in re.search("^MediaInfoLib - v(\\S+)", lib_version).group(1).split("."))
|
||||
# The XML option was renamed starting with version 17.10
|
||||
if lib_version >= (17, 10):
|
||||
xml_option = "OLDXML"
|
||||
else:
|
||||
xml_option = "XML"
|
||||
# Cover_Data is not extracted by default since version 18.03
|
||||
# See https://github.com/MediaArea/MediaInfoLib/commit/d8fd88a1c282d1c09388c55ee0b46029e7330690
|
||||
if cover_data and lib_version >= (18, 3):
|
||||
lib.MediaInfo_Option(None, "Cover_Data", "base64")
|
||||
# Create a MediaInfo handle
|
||||
handle = lib.MediaInfo_New()
|
||||
lib.MediaInfo_Option(handle, "CharSet", "UTF-8")
|
||||
# Fix for https://github.com/sbraz/pymediainfo/issues/22
|
||||
# Python 2 does not change LC_CTYPE
|
||||
# at startup: https://bugs.python.org/issue6203
|
||||
if (sys.version_info < (3,) and os.name == "posix"
|
||||
and locale.getlocale() == (None, None)):
|
||||
locale.setlocale(locale.LC_CTYPE, locale.getdefaultlocale())
|
||||
lib.MediaInfo_Option(None, "Inform", "" if text else xml_option)
|
||||
lib.MediaInfo_Option(None, "Complete", "1" if full else "")
|
||||
lib.MediaInfo_Option(None, "ParseSpeed", str(parse_speed))
|
||||
lib.MediaInfo_Option(None, "LegacyStreamDisplay", "1" if legacy_stream_display else "")
|
||||
if lib.MediaInfo_Open(handle, filename) == 0:
|
||||
raise RuntimeError("An eror occured while opening {}"
|
||||
" with libmediainfo".format(filename))
|
||||
output = lib.MediaInfo_Inform(handle, 0)
|
||||
# Delete the handle
|
||||
lib.MediaInfo_Close(handle)
|
||||
lib.MediaInfo_Delete(handle)
|
||||
if text:
|
||||
return output
|
||||
else:
|
||||
return cls(output, encoding_errors)
|
||||
def to_data(self):
|
||||
"""
|
||||
Returns a dict representation of the object's :py:class:`Tracks <Track>`.
|
||||
|
||||
:rtype: dict
|
||||
"""
|
||||
data = {'tracks': []}
|
||||
for track in self.tracks:
|
||||
data['tracks'].append(track.to_data())
|
||||
return data
|
||||
def to_json(self):
|
||||
"""
|
||||
Returns a JSON representation of the object's :py:class:`Tracks <Track>`.
|
||||
|
||||
:rtype: str
|
||||
"""
|
||||
return json.dumps(self.to_data())
|
Loading…
Reference in new issue