Possible fix for #860

pull/871/head
Louis Vézina 5 years ago
parent 1a44dbc31a
commit 0f85f683c2

@ -3,11 +3,9 @@ import enzyme
from enzyme.exceptions import MalformedMKVError
import logging
import os
import subprocess
import locale
from knowit import api
from utils import get_binary
from pyprobe.pyprobe import VideoFileParser
class NotMKVAndNoFFprobe(Exception):
pass
@ -23,11 +21,18 @@ class EmbeddedSubsReader:
subtitles_list = []
if self.ffprobe:
parser = VideoFileParser(ffprobe=self.ffprobe, includeMissing=True, rawMode=False)
data = parser.parseFfprobe(file)
for detected_language in data['subtitles']:
subtitles_list.append([detected_language['language'], detected_language['forced'], detected_language["codec"]])
api.initialize({'provider': 'ffmpeg', 'ffmpeg': self.ffprobe})
data = api.know(file)
if 'subtitle' in data:
for detected_language in data['subtitle']:
language = detected_language['language'].alpha3
forced = detected_language['forced'] if 'forced' in detected_language else None
codec = detected_language['format'] if 'format' in detected_language else None
if language:
subtitles_list.append([language, forced, codec])
else:
continue
else:
if os.path.splitext(file)[1] == '.mkv':
with open(file, 'rb') as f:

@ -34,7 +34,7 @@ from notifier import send_notifications, send_notifications_movie
from get_providers import get_providers, get_providers_auth, provider_throttle, provider_pool
from get_args import args
from queueconfig import notifications
from pyprobe.pyprobe import VideoFileParser
from knowit import api
from database import database, dict_mapper
from analytics import track_event
@ -915,37 +915,30 @@ def refine_from_ffprobe(path, video):
else:
logging.debug('BAZARR FFprobe used is %s', exe)
parser = VideoFileParser(ffprobe=exe, includeMissing=True, rawMode=False)
data = parser.parseFfprobe(path)
api.initialize({'provider': 'ffmpeg', 'ffmpeg': exe})
data = api.know(path)
logging.debug('FFprobe found: %s', data)
if 'videos' not in data:
if 'video' not in data:
logging.debug('BAZARR FFprobe was unable to find video tracks in the file!')
else:
if 'resolution' in data['videos'][0]:
if 'resolution' in data['video'][0]:
if not video.resolution:
if data['videos'][0]['resolution'][0] >= 3200:
video.resolution = "2160p"
elif data['videos'][0]['resolution'][0] >= 1800:
video.resolution = "1080p"
elif data['videos'][0]['resolution'][0] >= 1200:
video.resolution = "720p"
elif data['videos'][0]['resolution'][0] >= 0:
video.resolution = "480p"
if 'codec' in data['videos'][0]:
video.resolution = data['video'][0]['resolution']
if 'codec' in data['video'][0]:
if not video.video_codec:
video.video_codec = data['videos'][0]['codec']
if 'framerate' in data['videos'][0]:
video.video_codec = data['video'][0]['codec']
if 'frame_rate' in data['video'][0]:
if not video.fps:
video.fps = data['videos'][0]['framerate']
video.fps = data['video'][0]['frame_rate']
if 'audios' not in data:
if 'audio' not in data:
logging.debug('BAZARR FFprobe was unable to find audio tracks in the file!')
else:
if 'codec' in data['audios'][0]:
if 'codec' in data['audio'][0]:
if not video.audio_codec:
video.audio_codec = data['audios'][0]['codec'].upper()
video.audio_codec = data['audio'][0]['codec']
def upgrade_subtitles():

@ -95,6 +95,7 @@ def configure_logging(debug=False):
logging.getLogger("subliminal_patch").setLevel(logging.CRITICAL)
logging.getLogger("subzero").setLevel(logging.ERROR)
logging.getLogger("knowit").setLevel(logging.CRITICAL)
logging.getLogger("enzyme").setLevel(logging.CRITICAL)
logging.getLogger("guessit").setLevel(logging.WARNING)
logging.getLogger("rebulk").setLevel(logging.WARNING)

@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
"""Know your media files better."""
from __future__ import unicode_literals
__title__ = 'knowit'
__version__ = '0.3.0-dev'
__short_version__ = '.'.join(__version__.split('.')[:2])
__author__ = 'Rato AQ2'
__license__ = 'MIT'
__copyright__ = 'Copyright 2016-2017, Rato AQ2'
__url__ = 'https://github.com/ratoaq2/knowit'
#: Video extensions
VIDEO_EXTENSIONS = ('.3g2', '.3gp', '.3gp2', '.3gpp', '.60d', '.ajp', '.asf', '.asx', '.avchd', '.avi', '.bik',
'.bix', '.box', '.cam', '.dat', '.divx', '.dmf', '.dv', '.dvr-ms', '.evo', '.flc', '.fli',
'.flic', '.flv', '.flx', '.gvi', '.gvp', '.h264', '.m1v', '.m2p', '.m2ts', '.m2v', '.m4e',
'.m4v', '.mjp', '.mjpeg', '.mjpg', '.mk3d', '.mkv', '.moov', '.mov', '.movhd', '.movie', '.movx',
'.mp4', '.mpe', '.mpeg', '.mpg', '.mpv', '.mpv2', '.mxf', '.nsv', '.nut', '.ogg', '.ogm', '.ogv',
'.omf', '.ps', '.qt', '.ram', '.rm', '.rmvb', '.swf', '.ts', '.vfw', '.vid', '.video', '.viv',
'.vivo', '.vob', '.vro', '.webm', '.wm', '.wmv', '.wmx', '.wrap', '.wvx', '.wx', '.x264', '.xvid')
try:
from collections import OrderedDict
except ImportError: # pragma: no cover
from ordereddict import OrderedDict
from .api import KnowitException, know

@ -0,0 +1,151 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import json
import logging
import sys
from argparse import ArgumentParser
from six import PY2
import yaml
from . import (
__url__,
__version__,
api,
)
from .provider import ProviderError
from .serializer import (
get_json_encoder,
get_yaml_dumper,
)
from .utils import recurse_paths
logging.basicConfig(stream=sys.stdout, format='%(message)s')
logging.getLogger('CONSOLE').setLevel(logging.INFO)
logging.getLogger('knowit').setLevel(logging.ERROR)
console = logging.getLogger('CONSOLE')
logger = logging.getLogger('knowit')
def build_argument_parser():
"""Build the argument parser.
:return: the argument parser
:rtype: ArgumentParser
"""
opts = ArgumentParser()
opts.add_argument(dest='videopath', help='Path to the video to introspect', nargs='*')
provider_opts = opts.add_argument_group('Providers')
provider_opts.add_argument('-p', '--provider', dest='provider',
help='The provider to be used: mediainfo, ffmpeg or enzyme.')
output_opts = opts.add_argument_group('Output')
output_opts.add_argument('--debug', action='store_true', dest='debug',
help='Print useful information for debugging knowit and for reporting bugs.')
output_opts.add_argument('--report', action='store_true', dest='report',
help='Parse media and report all non-detected values')
output_opts.add_argument('-y', '--yaml', action='store_true', dest='yaml',
help='Display output in yaml format')
output_opts.add_argument('-N', '--no-units', action='store_true', dest='no_units',
help='Display output without units')
output_opts.add_argument('-P', '--profile', dest='profile',
help='Display values according to specified profile: code, default, human, technical')
conf_opts = opts.add_argument_group('Configuration')
conf_opts.add_argument('--mediainfo', dest='mediainfo',
help='The location to search for MediaInfo binaries')
conf_opts.add_argument('--ffmpeg', dest='ffmpeg',
help='The location to search for FFmpeg (ffprobe) binaries')
information_opts = opts.add_argument_group('Information')
information_opts.add_argument('--version', dest='version', action='store_true',
help='Display knowit version.')
return opts
def knowit(video_path, options, context):
"""Extract video metadata."""
context['path'] = video_path
if not options.report:
console.info('For: %s', video_path)
else:
console.info('Parsing: %s', video_path)
info = api.know(video_path, context)
if not options.report:
console.info('Knowit %s found: ', __version__)
console.info(dump(info, options, context))
return info
def dump(info, options, context):
"""Convert info to string using json or yaml format."""
if options.yaml:
data = {info['path']: info} if 'path' in info else info
result = yaml.dump(data, Dumper=get_yaml_dumper(context),
default_flow_style=False, allow_unicode=True)
if PY2:
result = result.decode('utf-8')
else:
result = json.dumps(info, cls=get_json_encoder(context), indent=4, ensure_ascii=False)
return result
def main(args=None):
"""Execute main function for entry point."""
argument_parser = build_argument_parser()
args = args or sys.argv[1:]
options = argument_parser.parse_args(args)
if options.debug:
logger.setLevel(logging.DEBUG)
logging.getLogger('enzyme').setLevel(logging.INFO)
else:
logger.setLevel(logging.WARNING)
paths = recurse_paths(options.videopath)
if paths:
report = {}
for i, videopath in enumerate(paths):
try:
context = dict(vars(options))
if options.report:
context['report'] = report
else:
del context['report']
knowit(videopath, options, context)
except ProviderError:
logger.exception('Error when processing video')
except OSError:
logger.exception('OS error when processing video')
except UnicodeError:
logger.exception('Character encoding error when processing video')
except api.KnowitException as e:
logger.error(e)
if options.report and i % 20 == 19 and report:
console.info('Unknown values so far:')
console.info(dump(report, options, vars(options)))
if options.report:
if report:
console.info('Knowit %s found unknown values:', __version__)
console.info(dump(report, options, vars(options)))
console.info('Please report them at %s', __url__)
else:
console.info('Knowit %s knows everything. :-)', __version__)
elif options.version:
console.info(api.debug_info())
else:
argument_parser.print_help()
if __name__ == '__main__':
main(sys.argv[1:])

@ -0,0 +1,132 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import traceback
from . import OrderedDict, __version__
from .config import Config
from .providers import (
EnzymeProvider,
FFmpegProvider,
# MediaInfoProvider,
)
_provider_map = OrderedDict([
# ('mediainfo', MediaInfoProvider),
('ffmpeg', FFmpegProvider),
('enzyme', EnzymeProvider)
])
provider_names = _provider_map.keys()
available_providers = OrderedDict([])
class KnowitException(Exception):
"""Exception raised when knowit fails to perform media info extraction because of an internal error."""
def initialize(context=None):
"""Initialize knowit."""
if not available_providers:
context = context or {}
config = Config.build(context.get('config'))
for name, provider_cls in _provider_map.items():
available_providers[name] = provider_cls(config, context.get(name) or config.general.get(name))
def know(video_path, context=None):
"""Return a dict containing the video metadata.
:param video_path:
:type video_path: string
:param context:
:type context: dict
:return:
:rtype: dict
"""
try:
# handle path-like objects
video_path = video_path.__fspath__()
except AttributeError:
pass
try:
context = context or {}
context.setdefault('profile', 'default')
initialize(context)
for name, provider in available_providers.items():
if name != (context.get('provider') or name):
continue
if provider.accepts(video_path):
result = provider.describe(video_path, context)
if result:
return result
return {}
except Exception:
raise KnowitException(debug_info(context=context, exc_info=True))
def dependencies(context=None):
"""Return all dependencies detected by knowit."""
deps = OrderedDict([])
try:
initialize(context)
for name, provider_cls in _provider_map.items():
if name in available_providers:
deps[name] = available_providers[name].version
else:
deps[name] = {}
except Exception:
pass
return deps
def _centered(value):
value = value[-52:]
return '| {msg:^53} |'.format(msg=value)
def debug_info(context=None, exc_info=False):
lines = [
'+-------------------------------------------------------+',
_centered('KnowIt {0}'.format(__version__)),
'+-------------------------------------------------------+'
]
first = True
for key, info in dependencies(context).items():
if not first:
lines.append(_centered(''))
first = False
for k, v in info.items():
lines.append(_centered(k))
lines.append(_centered(v))
if context:
debug_data = context.pop('debug_data', None)
lines.append('+-------------------------------------------------------+')
for k, v in context.items():
if v:
lines.append(_centered('{}: {}'.format(k, v)))
if debug_data:
lines.append('+-------------------------------------------------------+')
lines.append(debug_data())
if exc_info:
lines.append('+-------------------------------------------------------+')
lines.append(traceback.format_exc())
lines.append('+-------------------------------------------------------+')
lines.append(_centered('Please report any bug or feature request at'))
lines.append(_centered('https://github.com/ratoaq2/knowit/issues.'))
lines.append('+-------------------------------------------------------+')
return '\n'.join(lines)

@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from collections import namedtuple
from logging import NullHandler, getLogger
from pkg_resources import resource_stream
from six import text_type
import yaml
from .serializer import get_yaml_loader
logger = getLogger(__name__)
logger.addHandler(NullHandler())
_valid_aliases = ('code', 'default', 'human', 'technical')
_Value = namedtuple('_Value', _valid_aliases)
class Config(object):
"""Application config class."""
@classmethod
def build(cls, path=None):
"""Build config instance."""
loader = get_yaml_loader()
with resource_stream('knowit', 'defaults.yml') as stream:
cfgs = [yaml.load(stream, Loader=loader)]
if path:
with open(path, 'r') as stream:
cfgs.append(yaml.load(stream, Loader=loader))
profiles_data = {}
for cfg in cfgs:
if 'profiles' in cfg:
profiles_data.update(cfg['profiles'])
knowledge_data = {}
for cfg in cfgs:
if 'knowledge' in cfg:
knowledge_data.update(cfg['knowledge'])
data = {'general': {}}
for class_name, data_map in knowledge_data.items():
data.setdefault(class_name, {})
for code, detection_values in data_map.items():
alias_map = (profiles_data.get(class_name) or {}).get(code) or {}
alias_map.setdefault('code', code)
alias_map.setdefault('default', alias_map['code'])
alias_map.setdefault('human', alias_map['default'])
alias_map.setdefault('technical', alias_map['human'])
value = _Value(**{k: v for k, v in alias_map.items() if k in _valid_aliases})
for detection_value in detection_values:
data[class_name][text_type(detection_value)] = value
config = Config()
config.__dict__ = data
return config

@ -0,0 +1,36 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from logging import NullHandler, getLogger
from six import text_type
logger = getLogger(__name__)
logger.addHandler(NullHandler())
class Reportable(object):
"""Reportable abstract class."""
def __init__(self, name, description=None, reportable=True):
"""Constructor."""
self.name = name
self._description = description
self.reportable = reportable
@property
def description(self):
"""Rule description."""
return self._description or self.name
def report(self, value, context):
"""Report unknown value."""
if not value or not self.reportable:
return
value = text_type(value)
if 'report' in context:
report_map = context['report'].setdefault(self.description, {})
if value not in report_map:
report_map[value] = context['path']
logger.info('Invalid %s: %r', self.description, value)

@ -0,0 +1,628 @@
knowledge:
VideoCodec:
# https://en.wikipedia.org/wiki/MPEG-1#Part_2:_Video
MPEG1:
- MPEG-1V
- MPEG1VIDEO
# https://en.wikipedia.org/wiki/H.262/MPEG-2_Part_2
MPEG2:
- MPEG2
- MPEG-2V
# https://wiki.multimedia.cx/index.php/Microsoft_MPEG-4
MSMPEG4V1:
- MP41
- MPG4
MSMPEG4V2:
- MP42
- MSMPEG4V2
MSMPEG4V3:
- MP43
- AP41
- COL1
WMV1:
- WMV1
- WMV7
WMV2:
- WMV2
- WMV8
# MPEG-4:
# https://wiki.multimedia.cx/index.php/ISO_MPEG-4
# https://en.wikipedia.org/wiki/MPEG-4_Part_2
MPEG4:
- 3IV2
- BLZ0
- DIGI
- DXGM
- EM4A
- EPHV
- FMP4
- FVFW
- HDX4
- M4CC
- M4S2
- MP4S
- MP4V
- MVXM
- RMP4
- SEDG
- SMP4
- UMP4
- WV1F
- MPEG-4V
- ASP # V_MPEG-4/ISO/ASP
- MPEG4
DIVX:
- DIV1
- DIVX
- DX50
XVID:
- XVID
- XVIX
# VC-1:
# https://wiki.multimedia.cx/index.php/VC-1
# https://en.wikipedia.org/wiki/VC-1
VC1:
- WMV3
- WMV9
- WMVA
- WMVC1
- WMVP
- WVP2
- WMVR
- VC-1
- VC1
# H.263:
# https://wiki.multimedia.cx/index.php/H.263
# https://en.wikipedia.org/wiki/Sorenson_Media#Sorenson_Spark
H263:
- D263
- H263
- L263
- M263
- S263
- T263
- U263
- X263
# https://wiki.multimedia.cx/index.php/H.264
H264:
- AVC
- AVC1
- DAVC
- H264
- X264
- VSSH
# https://wiki.multimedia.cx/index.php/H.265
H265:
- HEVC
- H265
- X265
# https://wiki.multimedia.cx/index.php/On2_VP6 and https://en.wikipedia.org/wiki/VP6
VP6:
- VP60
- VP61
- VP62
# https://wiki.multimedia.cx/index.php/On2_VP7
VP7:
- VP70
- VP71
- VP72
# https://en.wikipedia.org/wiki/VP8
VP8:
- VP8
# https://en.wikipedia.org/wiki/VP9
# https://wiki.multimedia.cx/index.php/VP9
VP9:
- VP9
- VP90
CJPG:
- CJPG
QUICKTIME:
- QUICKTIME
__ignored__:
- MJPEG
- PNG
VideoEncoder:
DIVX:
- DIVX
X264:
- X264
X265:
- X265
XVID:
- XVID
VIMEO:
- VIMEO ENCODER
VideoProfile:
ADVANCED:
- ADVANCED
ADVANCEDSIMPLE:
- ADVANCED SIMPLE
- ADVANCED SIMPLE PROFILE
SIMPLE:
- SIMPLE
BASELINE:
- BASELINE
- CONSTRAINED BASELINE
MAIN:
- MAIN
MAIN10:
- MAIN 10
HIGH:
- HIGH
VideoProfileLevel:
L1:
- L1
- L1.0
L11:
- L1.1
L13:
- L1.3
L2:
- L2
L21:
- L2.1
L22:
- L2.2
L3:
- L3
- L3.0
L31:
- L3.1
L32:
- L3.2
L4:
- L4
- L4.0
L41:
- L4.1
L42:
- L4.2
L5:
- L5
- L5.0
L51:
- L5.1
LOW:
- LOW
MAIN:
- MAIN
HIGH:
- HIGH
H14:
- HIGH 1440
VideoProfileTier:
MAIN:
- MAIN
HIGH:
- HIGH
ScanType:
PROGRESSIVE:
- PROGRESSIVE
INTERLACED:
- INTERLACED
- MBAFF
- TT
- BB
- TB
- BT
BitRateMode:
VBR:
- VBR
CBR:
- CBR
AudioCompression:
LOSSY:
- LOSSY
LOSSLESS:
- LOSSLESS
AudioProfile:
CORE:
- CORE
HRA:
- HRA
- DTS-HD HRA
MA:
- MA
- DTS-HD MA
MAIN:
- MAIN
LC:
- LC
HEAAC:
- HE-AAC
HEAACV2:
- HE-AACV2
# https://www.lifewire.com/dts-96-24-1846848
96/24:
- 96/24
- DTS 96/24
# https://www.lifewire.com/what-is-dts-es-1846890
ESDISCRETE:
- ES DISCRETE
- DTS-ES
ESMATRIX:
- ES MATRIX
LAYER2:
- LAYER 2
LAYER3:
- LAYER 3
PRO:
- PRO
__ignored__:
- DOLBY DIGITAL
- DTS
# References:
# - https://ffmpeg.org/general.html#Audio-Codecs
AudioCodec:
AC3:
- AC3
- BSID9
- BSID10
- 2000
EAC3:
- EAC3
- AC3+
TRUEHD:
- TRUEHD
ATMOS:
- ATMOS
DTS:
- DTS
# DTS-HD used for DTS-HD High Resolution Audio and DTS-HD Master Audio
DTSHD:
- DTS-HD
AAC:
- AAC
FLAC:
- FLAC
PCM:
- PCM
- PCM_S16LE
# https://en.wikipedia.org/wiki/MPEG-1_Audio_Layer_II
MP2:
- MP2
- MPA1L2
- MPEG/L2
# https://en.wikipedia.org/wiki/MP3
MP3:
- MP3
- MPA1L3
- MPA2L3
- MPEG/L3
- 50
- 55
VORBIS:
- VORBIS
OPUS:
- OPUS
# https://wiki.multimedia.cx/index.php?title=Windows_Media_Audio_9
WMA1:
- 160
WMA2:
- 161
- WMAV2
WMAPRO:
- 162
- WMAPRO
# https://answers.microsoft.com/en-us/windows/forum/windows_vista-pictures/how-to-access-codec-voxware-rt29-metasound-75/a6dbea68-ca5c-e011-8dfc-68b599b31bf5
RT29:
- 75
SubtitleFormat:
PGS:
- PGS
- 144
- HDMV_PGS_SUBTITLE
VOBSUB:
- VOBSUB
- E0
- DVD_SUBTITLE
SUBRIP:
- SUBRIP
- UTF8
- SRT
# https://en.wikipedia.org/wiki/SubStation_Alpha
SSA:
- SSA
ASS:
- ASS
# https://en.wikipedia.org/wiki/MPEG-4_Part_17
TX3G:
- TX3G
DVBSUB:
- 6
MOVTEXT:
- MOV_TEXT
profiles:
VideoCodec:
MPEG1:
default: MPEG-1
human: MPEG-1 Video
technical: MPEG-1 Part 2
MPEG2:
default: MPEG-2
human: MPEG-2 Video
technical: MPEG-2 Part 2
aka: H.262
MSMPEG4V1:
default: Microsoft MPEG-4 v1
human: Microsoft MPEG-4 version 1
technical: MPEG-4 Part 2 Microsoft variant version 1
MSMPEG4V2:
default: Microsoft MPEG-4 v2
human: Microsoft MPEG-4 version 2
technical: MPEG-4 Part 2 Microsoft variant version 2
MSMPEG4V3:
default: Microsoft MPEG-4 v3
human: Microsoft MPEG-4 version 3
technical: MPEG-4 Part 2 Microsoft variant version 3
WMV1:
default: WMV 7
human: Windows Media Video 7
technical: Microsoft Windows Media Video v1/v7
WMV2:
default: WMV 8
human: Windows Media Video 8
technical: Microsoft Windows Media Video v2/v8
MPEG4:
default: MPEG-4
human: MPEG-4 Visual
technical: MPEG-4 Part 2
DIVX:
default: DivX
human: MPEG-4 Visual (DivX)
technical: MPEG-4 Part 2 (DivX)
XVID:
default: Xvid
human: MPEG-4 Visual (Xvid)
technical: MPEG-4 Part 2 (Xvid)
VC1:
default: VC-1
human: Windows Media Video 9
technical: Microsoft SMPTE 421M
H263:
default: H.263
H264:
default: H.264
human: Advanced Video Coding (H.264)
technical: MPEG-4 Part 10 - Advanced Video Coding
aka: AVC
H265:
default: H.265
human: High Efficiency Video Coding (H.265)
technical: MPEG-H Part 2 - High Efficiency Video Coding
aka: HEVC
VP6:
human: On2 VP6
technical: On2 TrueMotion VP6
VP7:
human: On2 VP7
technical: On2 TrueMotion VP7
VP8:
technical: Google VP8
VP9:
technical: Google VP9
CJPG:
default: WebCam JPEG
QUICKTIME:
default: QuickTime
VideoEncoder:
DIVX:
default: DivX
X264:
default: x264
X265:
default: x265
XVID:
default: Xvid
VIMEO:
default: Vimeo
VideoProfile:
ADVANCED:
default: Advanced
ADVANCEDSIMPLE:
default: Advanced Simple
SIMPLE:
default: Simple
BASELINE:
default: Baseline
MAIN:
default: Main
MAIN10:
default: Main 10
HIGH:
default: High
VideoProfileLevel:
L1:
default: '1'
technical: Level 1
L11:
default: '1.1'
technical: Level 1.1
L13:
default: '1.3'
technical: Level 1.3
L2:
default: '2'
technical: Level 2
L21:
default: '2.1'
technical: Level 2.1
L22:
default: '2.2'
technical: Level 2.2
L3:
default: '3'
technical: Level 3
L31:
default: '3.1'
technical: Level 3.1
L32:
default: '3.2'
technical: Level 3.2
L4:
default: '4'
technical: Level 4
L41:
default: '4.1'
technical: Level 4.1
L42:
default: '4.2'
technical: Level 4.2
L5:
default: '5'
technical: Level 5
L51:
default: '5.1'
technical: Level 5.1
LOW:
default: Low
MAIN:
default: Main
HIGH:
default: High
H14:
default: High 1440
VideoProfileTier:
MAIN:
default: Main
HIGH:
default: High
ScanType:
PROGRESSIVE:
default: Progressive
human: Progressive scanning
INTERLACED:
default: Interlaced
human: Interlaced video
BitRateMode:
VBR:
default: Variable
human: Variable bitrate
CBR:
default: Constant
human: Constant bitrate
AudioCompression:
LOSSY:
default: Lossy
human: Lossy compression
LOSSLESS:
default: Lossless
human: Lossless compression
AudioProfile:
HRA:
default: High Resolution Audio
MA:
default: Master Audio
MAIN:
default: Main
technical: Main Profile
LC:
default: Low Complexity
HEAAC:
default: High Efficiency
HEAACV2:
default: High Efficiency v2
human: High Efficiency version 2
96/24:
default: 96/24
human: 96 kHz 24 bits
technical: 96 kHz 24 bits Upscaled
ESDISCRETE:
default: Extended Surround
human: Extended Surround Discrete
ESMATRIX:
default: Extended Surround
human: Extended Surround Matrix
LAYER2:
default: Layer 2
LAYER3:
default: Layer 3
PRO:
default: Pro
technical: Professional
AudioCodec:
AC3:
default: AC-3
human: Dolby Digital
EAC3:
default: E-AC-3
human: Dolby Digital Plus
technical: Enhanced AC-3
TRUEHD:
default: TrueHD
human: Dolby TrueHD
ATMOS:
default: Atmos
human: Dolby Atmos
DTS:
DTSHD:
default: DTS-HD
AAC:
human: Advanced Audio Coding
FLAC:
human: Free Lossless Audio Codec
PCM:
human: Pulse-code Modulation
MP2:
human: MPEG Audio Layer 2
technical: MPEG-1/MPEG-2 Audio Layer 2
MP3:
human: MPEG Audio Layer 3
technical: MPEG-1/MPEG-2 Audio Layer 3
VORBIS:
default: Vorbis
OPUS:
default: Opus
WMA1:
default: WMA
human: Windows Media Audio 1
WMA2:
default: WMA 2
human: Windows Media Audio 2
WMAPRO:
default: WMA Pro
human: Windows Media Audio Pro
RT29:
default: RT29 MetaSound
human: Voxware RT29 MetaSound
SubtitleFormat:
PGS:
human: Presentation Graphic Stream
VOBSUB:
default: VobSub
SUBRIP:
default: SubRip
SSA:
human: SubStation Alpha
ASS:
human: Advanced SubStation Alpha
TX3G:
human: MPEG-4 Timed Text
technical: MPEG-4 Part 17
DVBSUB:
default: DVBSub
human: DVB Subtitle
technical: Digital Video Broadcasting Subtitles
MOVTEXT:
default: MOV Text

@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from .audio import (
AudioChannels,
AudioCodec,
AudioCompression,
AudioProfile,
BitRateMode,
)
from .basic import Basic
from .duration import Duration
from .language import Language
from .quantity import Quantity
from .subtitle import (
SubtitleFormat,
)
from .video import (
Ratio,
ScanType,
VideoCodec,
VideoEncoder,
VideoProfile,
VideoProfileLevel,
VideoProfileTier,
)
from .yesno import YesNo

@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from .bitratemode import BitRateMode
from .channels import AudioChannels
from .codec import AudioCodec
from .compression import AudioCompression
from .profile import AudioProfile

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from ...property import Configurable
class BitRateMode(Configurable):
"""Bit Rate mode property."""
pass

@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from six import text_type
from ...property import Property
class AudioChannels(Property):
"""Audio Channels property."""
ignored = {
'object based', # Dolby Atmos
}
def handle(self, value, context):
"""Handle audio channels."""
if isinstance(value, int):
return value
v = text_type(value).lower()
if v not in self.ignored:
try:
return int(v)
except ValueError:
self.report(value, context)

@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from six import text_type
from ...property import Configurable
class AudioCodec(Configurable):
"""Audio codec property."""
@classmethod
def _extract_key(cls, value):
key = text_type(value).upper()
if key.startswith('A_'):
key = key[2:]
# only the first part of the word. E.g.: 'AAC LC' => 'AAC'
return key.split(' ')[0]
@classmethod
def _extract_fallback_key(cls, value, key):
if '/' in key:
return key.split('/')[0]

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from ...property import Configurable
class AudioCompression(Configurable):
"""Audio Compression property."""
pass

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from ...property import Configurable
class AudioProfile(Configurable):
"""Audio profile property."""
pass

@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from six import text_type
from ..property import Property
class Basic(Property):
"""Basic property to handle int, float and other basic types."""
def __init__(self, name, data_type, allow_fallback=False, **kwargs):
"""Init method."""
super(Basic, self).__init__(name, **kwargs)
self.data_type = data_type
self.allow_fallback = allow_fallback
def handle(self, value, context):
"""Handle value."""
if isinstance(value, self.data_type):
return value
try:
return self.data_type(text_type(value))
except ValueError:
if not self.allow_fallback:
self.report(value, context)

@ -0,0 +1,38 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from datetime import timedelta
from six import text_type
from ..property import Property
class Duration(Property):
"""Duration property."""
duration_re = re.compile(r'(?P<hours>\d{1,2}):'
r'(?P<minutes>\d{1,2}):'
r'(?P<seconds>\d{1,2})(?:\.'
r'(?P<millis>\d{3})'
r'(?P<micro>\d{3})?\d*)?')
def handle(self, value, context):
"""Return duration as timedelta."""
if isinstance(value, timedelta):
return value
elif isinstance(value, int):
return timedelta(milliseconds=value)
try:
return timedelta(milliseconds=int(float(value)))
except ValueError:
pass
try:
h, m, s, ms, mc = self.duration_re.match(text_type(value)).groups('0')
return timedelta(hours=int(h), minutes=int(m), seconds=int(s), milliseconds=int(ms), microseconds=int(mc))
except ValueError:
pass
self.report(value, context)

@ -0,0 +1,28 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import babelfish
from ..property import Property
class Language(Property):
"""Language property."""
def handle(self, value, context):
"""Handle languages."""
try:
if len(value) == 3:
return babelfish.Language.fromalpha3b(value)
return babelfish.Language.fromietf(value)
except (babelfish.Error, ValueError):
pass
try:
return babelfish.Language.fromname(value)
except babelfish.Error:
pass
self.report(value, context)
return babelfish.Language('und')

@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from six import text_type
from ..property import Property
class Quantity(Property):
"""Quantity is a property with unit."""
def __init__(self, name, unit, data_type=int, **kwargs):
"""Init method."""
super(Quantity, self).__init__(name, **kwargs)
self.unit = unit
self.data_type = data_type
def handle(self, value, context):
"""Handle value with unit."""
if not isinstance(value, self.data_type):
try:
value = self.data_type(text_type(value))
except ValueError:
self.report(value, context)
return
return value if context.get('no_units') else value * self.unit

@ -0,0 +1,4 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from .format import SubtitleFormat

@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from six import text_type
from ...property import Configurable
class SubtitleFormat(Configurable):
"""Subtitle Format property."""
@classmethod
def _extract_key(cls, value):
key = text_type(value) .upper()
if key.startswith('S_'):
key = key[2:]
return key.split('/')[-1]

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from .codec import VideoCodec
from .encoder import VideoEncoder
from .profile import VideoProfile
from .profile import VideoProfileLevel
from .profile import VideoProfileTier
from .ratio import Ratio
from .scantype import ScanType

@ -0,0 +1,16 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from ...property import Configurable
class VideoCodec(Configurable):
"""Video Codec handler."""
@classmethod
def _extract_key(cls, value):
key = value.upper().split('/')[-1]
if key.startswith('V_'):
key = key[2:]
return key.split(' ')[-1]

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from ...property import Configurable
class VideoEncoder(Configurable):
"""Video Encoder property."""
pass

@ -0,0 +1,41 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from six import text_type
from ...property import Configurable
class VideoProfile(Configurable):
"""Video Profile property."""
@classmethod
def _extract_key(cls, value):
return value.upper().split('@')[0]
class VideoProfileLevel(Configurable):
"""Video Profile Level property."""
@classmethod
def _extract_key(cls, value):
values = text_type(value).upper().split('@')
if len(values) > 1:
value = values[1]
return value
# There's no level, so don't warn or report it
return False
class VideoProfileTier(Configurable):
"""Video Profile Tier property."""
@classmethod
def _extract_key(cls, value):
values = value.upper().split('@')
if len(values) > 2:
return values[2]
# There's no tier, so don't warn or report it
return False

@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from six import text_type
from ...property import Property
class Ratio(Property):
"""Ratio property."""
def __init__(self, name, unit=None, **kwargs):
"""Constructor."""
super(Ratio, self).__init__(name, **kwargs)
self.unit = unit
ratio_re = re.compile(r'(?P<width>\d+)[:/](?P<height>\d+)')
def handle(self, value, context):
"""Handle ratio."""
match = self.ratio_re.match(text_type(value))
if match:
width, height = match.groups()
if (width, height) == ('0', '1'): # identity
return 1.
result = round(float(width) / float(height), 3)
if self.unit:
result *= self.unit
return result
self.report(value, context)

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from ...property import Configurable
class ScanType(Configurable):
"""Scan Type property."""
pass

@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from six import text_type
from ..property import Property
class YesNo(Property):
"""Yes or No handler."""
mapping = ('yes', 'true', '1')
def __init__(self, name, yes=True, no=False, hide_value=None, **kwargs):
"""Init method."""
super(YesNo, self).__init__(name, **kwargs)
self.yes = yes
self.no = no
self.hide_value = hide_value
def handle(self, value, context):
"""Handle boolean values."""
v = text_type(value).lower()
result = self.yes if v in self.mapping else self.no
return result if result != self.hide_value else None

@ -0,0 +1,137 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from logging import NullHandler, getLogger
from six import PY3, binary_type, string_types, text_type
from .core import Reportable
logger = getLogger(__name__)
logger.addHandler(NullHandler())
_visible_chars_table = dict.fromkeys(range(32))
def _is_unknown(value):
return isinstance(value, text_type) and (not value or value.lower() == 'unknown')
class Property(Reportable):
"""Property class."""
def __init__(self, name, default=None, private=False, description=None, delimiter=' / ', **kwargs):
"""Init method."""
super(Property, self).__init__(name, description, **kwargs)
self.default = default
self.private = private
# Used to detect duplicated values. e.g.: en / en or High@L4.0 / High@L4.0 or Progressive / Progressive
self.delimiter = delimiter
def extract_value(self, track, context):
"""Extract the property value from a given track."""
names = self.name.split('.')
value = track.get(names[0], {}).get(names[1]) if len(names) == 2 else track.get(self.name)
if value is None:
if self.default is None:
return
value = self.default
if isinstance(value, string_types):
if isinstance(value, binary_type):
value = text_type(value)
else:
value = value.translate(_visible_chars_table).strip()
if _is_unknown(value):
return
value = self._deduplicate(value)
result = self.handle(value, context)
if result is not None and not _is_unknown(result):
return result
@classmethod
def _deduplicate(cls, value):
values = value.split(' / ')
if len(values) == 2 and values[0] == values[1]:
return values[0]
return value
def handle(self, value, context):
"""Return the value without any modification."""
return value
class Configurable(Property):
"""Configurable property where values are in a config mapping."""
def __init__(self, config, *args, **kwargs):
"""Init method."""
super(Configurable, self).__init__(*args, **kwargs)
self.mapping = getattr(config, self.__class__.__name__)
@classmethod
def _extract_key(cls, value):
return text_type(value).upper()
@classmethod
def _extract_fallback_key(cls, value, key):
pass
def _lookup(self, key, context):
result = self.mapping.get(key)
if result is not None:
result = getattr(result, context.get('profile') or 'default')
return result if result != '__ignored__' else False
def handle(self, value, context):
"""Return Variable or Constant."""
key = self._extract_key(value)
if key is False:
return
result = self._lookup(key, context)
if result is False:
return
while not result and key:
key = self._extract_fallback_key(value, key)
result = self._lookup(key, context)
if result is False:
return
if not result:
self.report(value, context)
return result
class MultiValue(Property):
"""Property with multiple values."""
def __init__(self, prop=None, delimiter='/', single=False, handler=None, name=None, **kwargs):
"""Init method."""
super(MultiValue, self).__init__(prop.name if prop else name, **kwargs)
self.prop = prop
self.delimiter = delimiter
self.single = single
self.handler = handler
def handle(self, value, context):
"""Handle properties with multiple values."""
values = (self._split(value[0], self.delimiter)
if len(value) == 1 else value) if isinstance(value, list) else self._split(value, self.delimiter)
call = self.handler or self.prop.handle
if len(values) > 1 and not self.single:
return [call(item, context) if not _is_unknown(item) else None for item in values]
return call(values[0], context)
@classmethod
def _split(cls, value, delimiter='/'):
if value is None:
return
v = text_type(value)
result = map(text_type.strip, v.split(delimiter))
return list(result) if PY3 else result

@ -0,0 +1,135 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import os
from logging import NullHandler, getLogger
from . import OrderedDict
from .properties import Quantity
from .units import units
logger = getLogger(__name__)
logger.addHandler(NullHandler())
size_property = Quantity('size', units.byte, description='media size')
class Provider(object):
"""Base class for all providers."""
min_fps = 10
max_fps = 200
def __init__(self, config, mapping, rules=None):
"""Init method."""
self.config = config
self.mapping = mapping
self.rules = rules or {}
def accepts(self, target):
"""Whether or not the video is supported by this provider."""
raise NotImplementedError
def describe(self, target, context):
"""Read video metadata information."""
raise NotImplementedError
def _describe_tracks(self, video_path, general_track, video_tracks, audio_tracks, subtitle_tracks, context):
logger.debug('Handling general track')
props = self._describe_track(general_track, 'general', context)
if 'path' not in props:
props['path'] = video_path
if 'container' not in props:
props['container'] = os.path.splitext(video_path)[1][1:]
if 'size' not in props and os.path.isfile(video_path):
props['size'] = size_property.handle(os.path.getsize(video_path), context)
for track_type, tracks, in (('video', video_tracks),
('audio', audio_tracks),
('subtitle', subtitle_tracks)):
results = []
for track in tracks or []:
logger.debug('Handling %s track', track_type)
t = self._validate_track(track_type, self._describe_track(track, track_type, context))
if t:
results.append(t)
if results:
props[track_type] = results
return props
@classmethod
def _validate_track(cls, track_type, track):
if track_type != 'video' or 'frame_rate' not in track:
return track
frame_rate = track['frame_rate']
try:
frame_rate = frame_rate.magnitude
except AttributeError:
pass
if cls.min_fps < frame_rate < cls.max_fps:
return track
def _describe_track(self, track, track_type, context):
"""Describe track to a dict.
:param track:
:param track_type:
:rtype: dict
"""
props = OrderedDict()
pv_props = {}
for name, prop in self.mapping[track_type].items():
if not prop:
# placeholder to be populated by rules. It keeps the order
props[name] = None
continue
value = prop.extract_value(track, context)
if value is not None:
if not prop.private:
which = props
else:
which = pv_props
which[name] = value
for name, rule in self.rules.get(track_type, {}).items():
if props.get(name) is not None and not rule.override:
logger.debug('Skipping rule %s since property is already present: %r', name, props[name])
continue
value = rule.execute(props, pv_props, context)
if value is not None:
props[name] = value
elif name in props and not rule.override:
del props[name]
return props
@property
def version(self):
"""Return provider version information."""
raise NotImplementedError
class ProviderError(Exception):
"""Base class for provider exceptions."""
pass
class MalformedFileError(ProviderError):
"""Malformed File error."""
pass
class UnsupportedFileFormatError(ProviderError):
"""Unsupported File Format error."""
pass

@ -0,0 +1,7 @@
# -*- coding: utf-8 -*-
"""Provider package."""
from __future__ import unicode_literals
from .enzyme import EnzymeProvider
from .ffmpeg import FFmpegProvider
#from .mediainfo import MediaInfoProvider

@ -0,0 +1,153 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals
import json
import logging
from collections import defaultdict
from logging import NullHandler, getLogger
import enzyme
from .. import OrderedDict
from ..properties import (
AudioCodec,
Basic,
Duration,
Language,
Quantity,
VideoCodec,
YesNo,
)
from ..property import Property
from ..provider import (
MalformedFileError,
Provider,
)
from ..rules import (
AudioChannelsRule,
ClosedCaptionRule,
HearingImpairedRule,
LanguageRule,
ResolutionRule,
)
from ..serializer import get_json_encoder
from ..units import units
from ..utils import todict
logger = getLogger(__name__)
logger.addHandler(NullHandler())
class EnzymeProvider(Provider):
"""Enzyme Provider."""
def __init__(self, config, *args, **kwargs):
"""Init method."""
super(EnzymeProvider, self).__init__(config, {
'general': OrderedDict([
('title', Property('title', description='media title')),
('duration', Duration('duration', description='media duration')),
]),
'video': OrderedDict([
('id', Basic('number', int, description='video track number')),
('name', Property('name', description='video track name')),
('language', Language('language', description='video language')),
('width', Quantity('width', units.pixel)),
('height', Quantity('height', units.pixel)),
('scan_type', YesNo('interlaced', yes='Interlaced', no='Progressive', default='Progressive',
description='video scan type')),
('resolution', None), # populated with ResolutionRule
# ('bit_depth', Property('bit_depth', Integer('video bit depth'))),
('codec', VideoCodec(config, 'codec_id', description='video codec')),
('forced', YesNo('forced', hide_value=False, description='video track forced')),
('default', YesNo('default', hide_value=False, description='video track default')),
('enabled', YesNo('enabled', hide_value=True, description='video track enabled')),
]),
'audio': OrderedDict([
('id', Basic('number', int, description='audio track number')),
('name', Property('name', description='audio track name')),
('language', Language('language', description='audio language')),
('codec', AudioCodec(config, 'codec_id', description='audio codec')),
('channels_count', Basic('channels', int, description='audio channels count')),
('channels', None), # populated with AudioChannelsRule
('forced', YesNo('forced', hide_value=False, description='audio track forced')),
('default', YesNo('default', hide_value=False, description='audio track default')),
('enabled', YesNo('enabled', hide_value=True, description='audio track enabled')),
]),
'subtitle': OrderedDict([
('id', Basic('number', int, description='subtitle track number')),
('name', Property('name', description='subtitle track name')),
('language', Language('language', description='subtitle language')),
('hearing_impaired', None), # populated with HearingImpairedRule
('closed_caption', None), # populated with ClosedCaptionRule
('forced', YesNo('forced', hide_value=False, description='subtitle track forced')),
('default', YesNo('default', hide_value=False, description='subtitle track default')),
('enabled', YesNo('enabled', hide_value=True, description='subtitle track enabled')),
]),
}, {
'video': OrderedDict([
('language', LanguageRule('video language')),
('resolution', ResolutionRule('video resolution')),
]),
'audio': OrderedDict([
('language', LanguageRule('audio language')),
('channels', AudioChannelsRule('audio channels')),
]),
'subtitle': OrderedDict([
('language', LanguageRule('subtitle language')),
('hearing_impaired', HearingImpairedRule('subtitle hearing impaired')),
('closed_caption', ClosedCaptionRule('closed caption')),
])
})
def accepts(self, video_path):
"""Accept only MKV files."""
return video_path.lower().endswith('.mkv')
@classmethod
def extract_info(cls, video_path):
"""Extract info from the video."""
with open(video_path, 'rb') as f:
return todict(enzyme.MKV(f))
def describe(self, video_path, context):
"""Return video metadata."""
try:
data = defaultdict(dict)
ff = self.extract_info(video_path)
def debug_data():
"""Debug data."""
return json.dumps(ff, cls=get_json_encoder(context), indent=4, ensure_ascii=False)
context['debug_data'] = debug_data
if logger.isEnabledFor(logging.DEBUG):
logger.debug('Video %r scanned using enzyme %r has raw data:\n%s',
video_path, enzyme.__version__, debug_data)
data.update(ff)
if 'info' in data and data['info'] is None:
return {}
except enzyme.MalformedMKVError: # pragma: no cover
raise MalformedFileError
if logger.level == logging.DEBUG:
logger.debug('Video {video_path} scanned using Enzyme {version} has raw data:\n{data}',
video_path=video_path, version=enzyme.__version__, data=json.dumps(data))
result = self._describe_tracks(video_path, data.get('info', {}), data.get('video_tracks'),
data.get('audio_tracks'), data.get('subtitle_tracks'), context)
if not result:
raise MalformedFileError
result['provider'] = {
'name': 'enzyme',
'version': self.version
}
return result
@property
def version(self):
"""Return enzyme version information."""
return {'enzyme': enzyme.__version__}

@ -0,0 +1,276 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import json
import logging
import re
from logging import NullHandler, getLogger
from subprocess import check_output
from six import ensure_text
from .. import (
OrderedDict,
VIDEO_EXTENSIONS,
)
from ..properties import (
AudioChannels,
AudioCodec,
AudioProfile,
Basic,
Duration,
Language,
Quantity,
Ratio,
ScanType,
SubtitleFormat,
VideoCodec,
VideoProfile,
VideoProfileLevel,
YesNo,
)
from ..property import (
Property,
)
from ..provider import (
MalformedFileError,
Provider,
)
from ..rules import (
AudioChannelsRule,
AudioCodecRule,
ClosedCaptionRule,
HearingImpairedRule,
LanguageRule,
ResolutionRule,
)
from ..serializer import get_json_encoder
from ..units import units
from ..utils import (
define_candidate,
detect_os,
)
logger = getLogger(__name__)
logger.addHandler(NullHandler())
WARN_MSG = r'''
=========================================================================================
FFmpeg (ffprobe) not found on your system or could not be loaded.
Visit https://ffmpeg.org/download.html to download it.
If you still have problems, please check if the downloaded version matches your system.
To load FFmpeg (ffprobe) from a specific location, please define the location as follow:
knowit --ffmpeg /usr/local/ffmpeg/bin <video_path>
knowit --ffmpeg /usr/local/ffmpeg/bin/ffprobe <video_path>
knowit --ffmpeg "C:\Program Files\FFmpeg" <video_path>
knowit --ffmpeg C:\Software\ffprobe.exe <video_path>
=========================================================================================
'''
class FFmpegExecutor(object):
"""Executor that knows how to execute media info: using ctypes or cli."""
version_re = re.compile(r'\bversion\s+(?P<version>\d+(?:\.\d+)+)\b')
locations = {
'unix': ('/usr/local/ffmpeg/lib', '/usr/local/ffmpeg/bin', '__PATH__'),
'windows': ('__PATH__', ),
'macos': ('__PATH__', ),
}
def __init__(self, location, version):
"""Constructor."""
self.location = location
self.version = version
def extract_info(self, filename):
"""Extract media info."""
json_dump = self._execute(filename)
return json.loads(json_dump)
def _execute(self, filename):
raise NotImplementedError
@classmethod
def _get_version(cls, output):
match = cls.version_re.search(output)
if match:
version = tuple([int(v) for v in match.groupdict()['version'].split('.')])
return version
@classmethod
def get_executor_instance(cls, suggested_path=None):
"""Return executor instance."""
os_family = detect_os()
logger.debug('Detected os: %s', os_family)
for exec_cls in (FFmpegCliExecutor, ):
executor = exec_cls.create(os_family, suggested_path)
if executor:
return executor
class FFmpegCliExecutor(FFmpegExecutor):
"""Executor that uses FFmpeg (ffprobe) cli."""
names = {
'unix': ('ffprobe', ),
'windows': ('ffprobe.exe', ),
'macos': ('ffprobe', ),
}
def _execute(self, filename):
return ensure_text(check_output([self.location, '-v', 'quiet', '-print_format', 'json',
'-show_format', '-show_streams', '-sexagesimal', filename]))
@classmethod
def create(cls, os_family=None, suggested_path=None):
"""Create the executor instance."""
for candidate in define_candidate(cls.locations, cls.names, os_family, suggested_path):
try:
output = ensure_text(check_output([candidate, '-version']))
version = cls._get_version(output)
if version:
logger.debug('FFmpeg cli detected: %s v%s', candidate, '.'.join(map(str, version)))
return FFmpegCliExecutor(candidate, version)
except OSError:
pass
class FFmpegProvider(Provider):
"""FFmpeg provider."""
def __init__(self, config, suggested_path=None):
"""Init method."""
super(FFmpegProvider, self).__init__(config, {
'general': OrderedDict([
('title', Property('tags.title', description='media title')),
('path', Property('filename', description='media path')),
('duration', Duration('duration', description='media duration')),
('size', Quantity('size', units.byte, description='media size')),
('bit_rate', Quantity('bit_rate', units.bps, description='media bit rate')),
]),
'video': OrderedDict([
('id', Basic('index', int, allow_fallback=True, description='video track number')),
('name', Property('tags.title', description='video track name')),
('language', Language('tags.language', description='video language')),
('duration', Duration('duration', description='video duration')),
('width', Quantity('width', units.pixel)),
('height', Quantity('height', units.pixel)),
('scan_type', ScanType(config, 'field_order', default='Progressive', description='video scan type')),
('aspect_ratio', Ratio('display_aspect_ratio', description='display aspect ratio')),
('pixel_aspect_ratio', Ratio('sample_aspect_ratio', description='pixel aspect ratio')),
('resolution', None), # populated with ResolutionRule
('frame_rate', Ratio('r_frame_rate', unit=units.FPS, description='video frame rate')),
# frame_rate_mode
('bit_rate', Quantity('bit_rate', units.bps, description='video bit rate')),
('bit_depth', Quantity('bits_per_raw_sample', units.bit, description='video bit depth')),
('codec', VideoCodec(config, 'codec_name', description='video codec')),
('profile', VideoProfile(config, 'profile', description='video codec profile')),
('profile_level', VideoProfileLevel(config, 'level', description='video codec profile level')),
# ('profile_tier', VideoProfileTier(config, 'codec_profile', description='video codec profile tier')),
('forced', YesNo('disposition.forced', hide_value=False, description='video track forced')),
('default', YesNo('disposition.default', hide_value=False, description='video track default')),
]),
'audio': OrderedDict([
('id', Basic('index', int, allow_fallback=True, description='audio track number')),
('name', Property('tags.title', description='audio track name')),
('language', Language('tags.language', description='audio language')),
('duration', Duration('duration', description='audio duration')),
('codec', AudioCodec(config, 'codec_name', description='audio codec')),
('_codec', AudioCodec(config, 'profile', description='audio codec', private=True, reportable=False)),
('profile', AudioProfile(config, 'profile', description='audio codec profile')),
('channels_count', AudioChannels('channels', description='audio channels count')),
('channels', None), # populated with AudioChannelsRule
('bit_depth', Quantity('bits_per_raw_sample', units.bit, description='audio bit depth')),
('bit_rate', Quantity('bit_rate', units.bps, description='audio bit rate')),
('sampling_rate', Quantity('sample_rate', units.Hz, description='audio sampling rate')),
('forced', YesNo('disposition.forced', hide_value=False, description='audio track forced')),
('default', YesNo('disposition.default', hide_value=False, description='audio track default')),
]),
'subtitle': OrderedDict([
('id', Basic('index', int, allow_fallback=True, description='subtitle track number')),
('name', Property('tags.title', description='subtitle track name')),
('language', Language('tags.language', description='subtitle language')),
('hearing_impaired', YesNo('disposition.hearing_impaired',
hide_value=False, description='subtitle hearing impaired')),
('closed_caption', None), # populated with ClosedCaptionRule
('format', SubtitleFormat(config, 'codec_name', description='subtitle format')),
('forced', YesNo('disposition.forced', hide_value=False, description='subtitle track forced')),
('default', YesNo('disposition.default', hide_value=False, description='subtitle track default')),
]),
}, {
'video': OrderedDict([
('language', LanguageRule('video language')),
('resolution', ResolutionRule('video resolution')),
]),
'audio': OrderedDict([
('language', LanguageRule('audio language')),
('channels', AudioChannelsRule('audio channels')),
('codec', AudioCodecRule('audio codec', override=True)),
]),
'subtitle': OrderedDict([
('language', LanguageRule('subtitle language')),
('hearing_impaired', HearingImpairedRule('subtitle hearing impaired')),
('closed_caption', ClosedCaptionRule('closed caption'))
])
})
self.executor = FFmpegExecutor.get_executor_instance(suggested_path)
def accepts(self, video_path):
"""Accept any video when FFprobe is available."""
if self.executor is None:
logger.warning(WARN_MSG)
self.executor = False
return self.executor and video_path.lower().endswith(VIDEO_EXTENSIONS)
def describe(self, video_path, context):
"""Return video metadata."""
data = self.executor.extract_info(video_path)
def debug_data():
"""Debug data."""
return json.dumps(data, cls=get_json_encoder(context), indent=4, ensure_ascii=False)
context['debug_data'] = debug_data
if logger.isEnabledFor(logging.DEBUG):
logger.debug('Video %r scanned using ffmpeg %r has raw data:\n%s',
video_path, self.executor.location, debug_data())
general_track = data.get('format') or {}
if 'tags' in general_track:
general_track['tags'] = {k.lower(): v for k, v in general_track['tags'].items()}
video_tracks = []
audio_tracks = []
subtitle_tracks = []
for track in data.get('streams'):
track_type = track.get('codec_type')
if track_type == 'video':
video_tracks.append(track)
elif track_type == 'audio':
audio_tracks.append(track)
elif track_type == 'subtitle':
subtitle_tracks.append(track)
result = self._describe_tracks(video_path, general_track, video_tracks, audio_tracks, subtitle_tracks, context)
if not result:
raise MalformedFileError
result['provider'] = self.executor.location
result['provider'] = {
'name': 'ffmpeg',
'version': self.version
}
return result
@property
def version(self):
"""Return ffmpeg version information."""
if not self.executor:
return {}
return {self.executor.location: 'v{}'.format('.'.join(map(str, self.executor.version)))}

@ -0,0 +1,335 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from ctypes import c_void_p, c_wchar_p
from logging import DEBUG, NullHandler, getLogger
from subprocess import CalledProcessError, check_output
from xml.dom import minidom
from xml.etree import ElementTree
from pymediainfo import MediaInfo
from pymediainfo import __version__ as pymediainfo_version
from six import ensure_text
from .. import (
OrderedDict,
VIDEO_EXTENSIONS,
)
from ..properties import (
AudioChannels,
AudioCodec,
AudioCompression,
AudioProfile,
Basic,
BitRateMode,
Duration,
Language,
Quantity,
ScanType,
SubtitleFormat,
VideoCodec,
VideoEncoder,
VideoProfile,
VideoProfileLevel,
VideoProfileTier,
YesNo,
)
from ..property import (
MultiValue,
Property,
)
from ..provider import (
MalformedFileError,
Provider,
)
from ..rules import (
AtmosRule,
AudioChannelsRule,
ClosedCaptionRule,
DtsHdRule,
HearingImpairedRule,
LanguageRule,
ResolutionRule,
)
from ..units import units
from ..utils import (
define_candidate,
detect_os,
)
logger = getLogger(__name__)
logger.addHandler(NullHandler())
WARN_MSG = r'''
=========================================================================================
MediaInfo not found on your system or could not be loaded.
Visit https://mediaarea.net/ to download it.
If you still have problems, please check if the downloaded version matches your system.
To load MediaInfo from a specific location, please define the location as follow:
knowit --mediainfo /usr/local/mediainfo/lib <video_path>
knowit --mediainfo /usr/local/mediainfo/bin <video_path>
knowit --mediainfo "C:\Program Files\MediaInfo" <video_path>
knowit --mediainfo C:\Software\MediaInfo.dll <video_path>
knowit --mediainfo C:\Software\MediaInfo.exe <video_path>
knowit --mediainfo /opt/mediainfo/libmediainfo.so <video_path>
knowit --mediainfo /opt/mediainfo/libmediainfo.dylib <video_path>
=========================================================================================
'''
class MediaInfoExecutor(object):
"""Media info executable knows how to execute media info: using ctypes or cli."""
version_re = re.compile(r'\bv(?P<version>\d+(?:\.\d+)+)\b')
locations = {
'unix': ('/usr/local/mediainfo/lib', '/usr/local/mediainfo/bin', '__PATH__'),
'windows': ('__PATH__', ),
'macos': ('__PATH__', ),
}
def __init__(self, location, version):
"""Constructor."""
self.location = location
self.version = version
def extract_info(self, filename):
"""Extract media info."""
return self._execute(filename)
def _execute(self, filename):
raise NotImplementedError
@classmethod
def _get_version(cls, output):
match = cls.version_re.search(output)
if match:
version = tuple([int(v) for v in match.groupdict()['version'].split('.')])
return version
@classmethod
def get_executor_instance(cls, suggested_path=None):
"""Return the executor instance."""
os_family = detect_os()
logger.debug('Detected os: %s', os_family)
for exec_cls in (MediaInfoCTypesExecutor, MediaInfoCliExecutor):
executor = exec_cls.create(os_family, suggested_path)
if executor:
return executor
class MediaInfoCliExecutor(MediaInfoExecutor):
"""Media info using cli."""
names = {
'unix': ('mediainfo', ),
'windows': ('MediaInfo.exe', ),
'macos': ('mediainfo', ),
}
def _execute(self, filename):
output_type = 'OLDXML' if self.version >= (17, 10) else 'XML'
return MediaInfo(ensure_text(check_output([self.location, '--Output=' + output_type, '--Full', filename])))
@classmethod
def create(cls, os_family=None, suggested_path=None):
"""Create the executor instance."""
for candidate in define_candidate(cls.locations, cls.names, os_family, suggested_path):
try:
output = ensure_text(check_output([candidate, '--version']))
version = cls._get_version(output)
if version:
logger.debug('MediaInfo cli detected: %s', candidate)
return MediaInfoCliExecutor(candidate, version)
except CalledProcessError as e:
# old mediainfo returns non-zero exit code for mediainfo --version
version = cls._get_version(ensure_text(e.output))
if version:
logger.debug('MediaInfo cli detected: %s', candidate)
return MediaInfoCliExecutor(candidate, version)
except OSError:
pass
class MediaInfoCTypesExecutor(MediaInfoExecutor):
"""Media info ctypes."""
names = {
'unix': ('libmediainfo.so.0', ),
'windows': ('MediaInfo.dll', ),
'macos': ('libmediainfo.0.dylib', 'libmediainfo.dylib'),
}
def _execute(self, filename):
# Create a MediaInfo handle
return MediaInfo.parse(filename, library_file=self.location)
@classmethod
def create(cls, os_family=None, suggested_path=None):
"""Create the executor instance."""
for candidate in define_candidate(cls.locations, cls.names, os_family, suggested_path):
if MediaInfo.can_parse(candidate):
lib = MediaInfo._get_library(candidate)
lib.MediaInfo_Option.argtypes = [c_void_p, c_wchar_p, c_wchar_p]
lib.MediaInfo_Option.restype = c_wchar_p
version = MediaInfoExecutor._get_version(lib.MediaInfo_Option(None, "Info_Version", ""))
logger.debug('MediaInfo library detected: %s (v%s)', candidate, '.'.join(map(str, version)))
return MediaInfoCTypesExecutor(candidate, version)
class MediaInfoProvider(Provider):
"""Media Info provider."""
executor = None
def __init__(self, config, suggested_path):
"""Init method."""
super(MediaInfoProvider, self).__init__(config, {
'general': OrderedDict([
('title', Property('title', description='media title')),
('path', Property('complete_name', description='media path')),
('duration', Duration('duration', description='media duration')),
('size', Quantity('file_size', units.byte, description='media size')),
('bit_rate', Quantity('overall_bit_rate', units.bps, description='media bit rate')),
]),
'video': OrderedDict([
('id', Basic('track_id', int, allow_fallback=True, description='video track number')),
('name', Property('name', description='video track name')),
('language', Language('language', description='video language')),
('duration', Duration('duration', description='video duration')),
('size', Quantity('stream_size', units.byte, description='video stream size')),
('width', Quantity('width', units.pixel)),
('height', Quantity('height', units.pixel)),
('scan_type', ScanType(config, 'scan_type', default='Progressive', description='video scan type')),
('aspect_ratio', Basic('display_aspect_ratio', float, description='display aspect ratio')),
('pixel_aspect_ratio', Basic('pixel_aspect_ratio', float, description='pixel aspect ratio')),
('resolution', None), # populated with ResolutionRule
('frame_rate', Quantity('frame_rate', units.FPS, float, description='video frame rate')),
# frame_rate_mode
('bit_rate', Quantity('bit_rate', units.bps, description='video bit rate')),
('bit_depth', Quantity('bit_depth', units.bit, description='video bit depth')),
('codec', VideoCodec(config, 'codec', description='video codec')),
('profile', VideoProfile(config, 'codec_profile', description='video codec profile')),
('profile_level', VideoProfileLevel(config, 'codec_profile', description='video codec profile level')),
('profile_tier', VideoProfileTier(config, 'codec_profile', description='video codec profile tier')),
('encoder', VideoEncoder(config, 'encoded_library_name', description='video encoder')),
('media_type', Property('internet_media_type', description='video media type')),
('forced', YesNo('forced', hide_value=False, description='video track forced')),
('default', YesNo('default', hide_value=False, description='video track default')),
]),
'audio': OrderedDict([
('id', Basic('track_id', int, allow_fallback=True, description='audio track number')),
('name', Property('title', description='audio track name')),
('language', Language('language', description='audio language')),
('duration', Duration('duration', description='audio duration')),
('size', Quantity('stream_size', units.byte, description='audio stream size')),
('codec', MultiValue(AudioCodec(config, 'codec', description='audio codec'))),
('profile', MultiValue(AudioProfile(config, 'format_profile', description='audio codec profile'),
delimiter=' / ')),
('channels_count', MultiValue(AudioChannels('channel_s', description='audio channels count'))),
('channel_positions', MultiValue(name='other_channel_positions', handler=(lambda x, *args: x),
delimiter=' / ', private=True, description='audio channels position')),
('channels', None), # populated with AudioChannelsRule
('bit_depth', Quantity('bit_depth', units.bit, description='audio bit depth')),
('bit_rate', MultiValue(Quantity('bit_rate', units.bps, description='audio bit rate'))),
('bit_rate_mode', MultiValue(BitRateMode(config, 'bit_rate_mode', description='audio bit rate mode'))),
('sampling_rate', MultiValue(Quantity('sampling_rate', units.Hz, description='audio sampling rate'))),
('compression', MultiValue(AudioCompression(config, 'compression_mode',
description='audio compression'))),
('forced', YesNo('forced', hide_value=False, description='audio track forced')),
('default', YesNo('default', hide_value=False, description='audio track default')),
]),
'subtitle': OrderedDict([
('id', Basic('track_id', int, allow_fallback=True, description='subtitle track number')),
('name', Property('title', description='subtitle track name')),
('language', Language('language', description='subtitle language')),
('hearing_impaired', None), # populated with HearingImpairedRule
('_closed_caption', Property('captionservicename', private=True)),
('closed_caption', None), # populated with ClosedCaptionRule
('format', SubtitleFormat(config, 'codec_id', description='subtitle format')),
('forced', YesNo('forced', hide_value=False, description='subtitle track forced')),
('default', YesNo('default', hide_value=False, description='subtitle track default')),
]),
}, {
'video': OrderedDict([
('language', LanguageRule('video language')),
('resolution', ResolutionRule('video resolution')),
]),
'audio': OrderedDict([
('language', LanguageRule('audio language')),
('channels', AudioChannelsRule('audio channels')),
('_atmosrule', AtmosRule('atmos rule')),
('_dtshdrule', DtsHdRule('dts-hd rule')),
]),
'subtitle': OrderedDict([
('language', LanguageRule('subtitle language')),
('hearing_impaired', HearingImpairedRule('subtitle hearing impaired')),
('closed_caption', ClosedCaptionRule('closed caption')),
])
})
self.executor = MediaInfoExecutor.get_executor_instance(suggested_path)
def accepts(self, video_path):
"""Accept any video when MediaInfo is available."""
if self.executor is None:
logger.warning(WARN_MSG)
self.executor = False
return self.executor and video_path.lower().endswith(VIDEO_EXTENSIONS)
def describe(self, video_path, context):
"""Return video metadata."""
media_info = self.executor.extract_info(video_path)
def debug_data():
"""Debug data."""
xml = ensure_text(ElementTree.tostring(media_info.xml_dom)).replace('\r', '').replace('\n', '')
return ensure_text(minidom.parseString(xml).toprettyxml(indent=' ', newl='\n', encoding='utf-8'))
context['debug_data'] = debug_data
if logger.isEnabledFor(DEBUG):
logger.debug('Video %r scanned using mediainfo %r has raw data:\n%s',
video_path, self.executor.location, debug_data())
data = media_info.to_data()
result = {}
if data.get('tracks'):
general_tracks = []
video_tracks = []
audio_tracks = []
subtitle_tracks = []
for track in data.get('tracks'):
track_type = track.get('track_type')
if track_type == 'General':
general_tracks.append(track)
elif track_type == 'Video':
video_tracks.append(track)
elif track_type == 'Audio':
audio_tracks.append(track)
elif track_type == 'Text':
subtitle_tracks.append(track)
result = self._describe_tracks(video_path, general_tracks[0] if general_tracks else {},
video_tracks, audio_tracks, subtitle_tracks, context)
if not result:
raise MalformedFileError
result['provider'] = {
'name': 'mediainfo',
'version': self.version
}
return result
@property
def version(self):
"""Return mediainfo version information."""
versions = [('pymediainfo', pymediainfo_version)]
if self.executor:
versions.append((self.executor.location, 'v{}'.format('.'.join(map(str, self.executor.version)))))
return OrderedDict(versions)

@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from .core import Reportable
class Rule(Reportable):
"""Rule abstract class."""
def __init__(self, name, override=False, **kwargs):
"""Constructor."""
super(Rule, self).__init__(name, **kwargs)
self.override = override
def execute(self, props, pv_props, context):
"""How to execute a rule."""
raise NotImplementedError

@ -0,0 +1,11 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from .audio import AtmosRule
from .audio import AudioChannelsRule
from .audio import AudioCodecRule
from .audio import DtsHdRule
from .language import LanguageRule
from .subtitle import ClosedCaptionRule
from .subtitle import HearingImpairedRule
from .video import ResolutionRule

@ -0,0 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from .atmos import AtmosRule
from .channels import AudioChannelsRule
from .codec import AudioCodecRule
from .dtshd import DtsHdRule

@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from ...rule import Rule
class AtmosRule(Rule):
"""Atmos rule."""
@classmethod
def _redefine(cls, props, name, index):
actual = props.get(name)
if isinstance(actual, list):
value = actual[index]
if value is None:
del props[name]
else:
props[name] = value
def execute(self, props, pv_props, context):
"""Execute the rule against properties."""
codecs = props.get('codec') or []
# TODO: handle this properly
if 'atmos' in {codec.lower() for codec in codecs if codec}:
index = None
for i, codec in enumerate(codecs):
if codec and 'atmos' in codec.lower():
index = i
break
if index is not None:
for name in ('channels_count', 'sampling_rate'):
self._redefine(props, name, index)

@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from logging import NullHandler, getLogger
from six import text_type
from ...rule import Rule
logger = getLogger(__name__)
logger.addHandler(NullHandler())
class AudioChannelsRule(Rule):
"""Audio Channel rule."""
mapping = {
1: '1.0',
2: '2.0',
6: '5.1',
8: '7.1',
}
def execute(self, props, pv_props, context):
"""Execute the rule against properties."""
count = props.get('channels_count')
if count is None:
return
channels = self.mapping.get(count) if isinstance(count, int) else None
positions = pv_props.get('channel_positions') or []
positions = positions if isinstance(positions, list) else [positions]
candidate = 0
for position in positions:
if not position:
continue
c = 0
for i in position.split('/'):
try:
c += float(i)
except ValueError:
logger.debug('Invalid %s: %s', self.description, i)
pass
c_count = int(c) + int(round((c - int(c)) * 10))
if c_count == count:
return text_type(c)
candidate = max(candidate, c)
if channels:
return channels
if candidate:
return text_type(candidate)
self.report(positions, context)

@ -0,0 +1,13 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from ...rule import Rule
class AudioCodecRule(Rule):
"""Audio Codec rule."""
def execute(self, props, pv_props, context):
"""Execute the rule against properties."""
if '_codec' in pv_props:
return pv_props.get('_codec')

@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from ...rule import Rule
class DtsHdRule(Rule):
"""DTS-HD rule."""
@classmethod
def _redefine(cls, props, name, index):
actual = props.get(name)
if isinstance(actual, list):
value = actual[index]
if value is None:
del props[name]
else:
props[name] = value
def execute(self, props, pv_props, context):
"""Execute the rule against properties."""
if props.get('codec') == 'DTS-HD':
index = None
for i, profile in enumerate(props.get('profile', [])):
if profile and profile.upper() != 'CORE':
index = i
break
if index is not None:
for name in ('profile', 'channels_count', 'bit_rate',
'bit_rate_mode', 'sampling_rate', 'compression'):
self._redefine(props, name, index)

@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from logging import NullHandler, getLogger
import babelfish
from ..rule import Rule
logger = getLogger(__name__)
logger.addHandler(NullHandler())
class LanguageRule(Rule):
"""Language rules."""
name_re = re.compile(r'(?P<name>\w+)\b', re.IGNORECASE)
def execute(self, props, pv_props, context):
"""Language detection using name."""
if 'language' in props:
return
if 'name' in props:
name = props.get('name', '')
match = self.name_re.match(name)
if match:
try:
return babelfish.Language.fromname(match.group('name'))
except babelfish.Error:
pass
logger.info('Invalid %s: %r', self.description, name)

@ -0,0 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from .closedcaption import ClosedCaptionRule
from .hearingimpaired import HearingImpairedRule

@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from ...rule import Rule
class ClosedCaptionRule(Rule):
"""Closed caption rule."""
cc_re = re.compile(r'(\bcc\d\b)', re.IGNORECASE)
def execute(self, props, pv_props, context):
"""Execute closed caption rule."""
for name in (pv_props.get('_closed_caption'), props.get('name')):
if name and self.cc_re.search(name):
return True

@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from ...rule import Rule
class HearingImpairedRule(Rule):
"""Hearing Impaired rule."""
hi_re = re.compile(r'(\bsdh\b)', re.IGNORECASE)
def execute(self, props, pv_props, context):
"""Hearing Impaired."""
name = props.get('name')
if name and self.hi_re.search(name):
return True

@ -0,0 +1,4 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from .resolution import ResolutionRule

@ -0,0 +1,75 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from ...rule import Rule
class ResolutionRule(Rule):
"""Resolution rule."""
standard_resolutions = (
480,
720,
1080,
2160,
4320,
)
uncommon_resolutions = (
240,
288,
360,
576,
)
resolutions = list(sorted(standard_resolutions + uncommon_resolutions))
square = 4. / 3
wide = 16. / 9
def execute(self, props, pv_props, context):
"""Return the resolution for the video.
The resolution is based on a widescreen TV (16:9)
1920x800 will be considered 1080p since the TV will use 1920x1080 with vertical black bars
1426x1080 is considered 1080p since the TV will use 1920x1080 with horizontal black bars
The calculation considers the display aspect ratio and the pixel aspect ratio (not only width and height).
The upper resolution is selected if there's no perfect match with the following list of resolutions:
240, 288, 360, 480, 576, 720, 1080, 2160, 4320
If no interlaced information is available, resolution will be considered Progressive.
"""
width = props.get('width')
height = props.get('height')
if not width or not height:
return
try:
width = width.magnitude
height = height.magnitude
except AttributeError:
pass
dar = props.get('aspect_ratio', float(width) / height)
par = props.get('pixel_aspect_ratio', 1)
scan_type = props.get('scan_type', 'p')[0].lower()
# selected DAR must be between 4:3 and 16:9
selected_dar = max(min(dar, self.wide), self.square)
# mod-16
stretched_width = int(round(width * par / 16)) * 16
# mod-8
calculated_height = int(round(stretched_width / selected_dar / 8)) * 8
selected_resolution = None
for r in reversed(self.resolutions):
if r < calculated_height:
break
selected_resolution = r
if selected_resolution:
return '{0}{1}'.format(selected_resolution, scan_type)
msg = '{width}x{height} - scan_type: {scan_type}, aspect_ratio: {dar}, pixel_aspect_ratio: {par}'.format(
width=width, height=height, scan_type=scan_type, dar=dar, par=par)
self.report(msg, context)

@ -0,0 +1,155 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import json
from collections import OrderedDict
from datetime import timedelta
import babelfish
from six import text_type
import yaml
from .units import units
def format_property(context, o):
"""Convert properties to string."""
if isinstance(o, timedelta):
return format_duration(o, context['profile'])
if isinstance(o, babelfish.language.Language):
return format_language(o, context['profile'])
if hasattr(o, 'units'):
return format_quantity(o, context['profile'])
return text_type(o)
def get_json_encoder(context):
"""Return json encoder that handles all needed object types."""
class StringEncoder(json.JSONEncoder):
"""String json encoder."""
def default(self, o):
return format_property(context, o)
return StringEncoder
def get_yaml_dumper(context):
"""Return yaml dumper that handles all needed object types."""
class CustomDumper(yaml.SafeDumper):
"""Custom YAML Dumper."""
def default_representer(self, data):
"""Convert data to string."""
if isinstance(data, int):
return self.represent_int(data)
if isinstance(data, float):
return self.represent_float(data)
return self.represent_str(str(data))
def ordered_dict_representer(self, data):
"""Representer for OrderedDict."""
return self.represent_mapping('tag:yaml.org,2002:map', data.items())
def default_language_representer(self, data):
"""Convert language to string."""
return self.represent_str(format_language(data, context['profile']))
def default_quantity_representer(self, data):
"""Convert quantity to string."""
return self.default_representer(format_quantity(data, context['profile']))
def default_duration_representer(self, data):
"""Convert quantity to string."""
return self.default_representer(format_duration(data, context['profile']))
CustomDumper.add_representer(OrderedDict, CustomDumper.ordered_dict_representer)
CustomDumper.add_representer(babelfish.Language, CustomDumper.default_language_representer)
CustomDumper.add_representer(timedelta, CustomDumper.default_duration_representer)
CustomDumper.add_representer(units.Quantity, CustomDumper.default_quantity_representer)
return CustomDumper
def get_yaml_loader(constructors=None):
"""Return a yaml loader that handles sequences as python lists."""
constructors = constructors or {}
class CustomLoader(yaml.Loader):
"""Custom YAML Loader."""
pass
CustomLoader.add_constructor('tag:yaml.org,2002:seq', CustomLoader.construct_python_tuple)
for tag, constructor in constructors.items():
CustomLoader.add_constructor(tag, constructor)
return CustomLoader
def format_duration(duration, profile='default'):
if profile == 'technical':
return str(duration)
seconds = duration.total_seconds()
if profile == 'code':
return duration.total_seconds()
hours = int(seconds // 3600)
seconds = seconds - (hours * 3600)
minutes = int(seconds // 60)
seconds = int(seconds - (minutes * 60))
if profile == 'human':
if hours > 0:
return '{0} hours {1:02d} minutes {2:02d} seconds'.format(hours, minutes, seconds)
if minutes > 0:
return '{0} minutes {1:02d} seconds'.format(minutes, seconds)
return '{0} seconds'.format(seconds)
return '{0}:{1:02d}:{2:02d}'.format(hours, minutes, seconds)
def format_language(language, profile='default'):
if profile in ('default', 'human'):
return str(language.name)
return str(language)
def format_quantity(quantity, profile='default'):
"""Human friendly format."""
if profile == 'code':
return quantity.magnitude
unit = quantity.units
if unit != 'bit':
technical = profile == 'technical'
if unit == 'hertz':
return _format_quantity(quantity.magnitude, unit='Hz', binary=technical, precision=3 if technical else 1)
root_unit = quantity.to_root_units().units
if root_unit == 'bit':
return _format_quantity(quantity.magnitude, binary=technical, precision=3 if technical else 2)
if root_unit == 'bit / second':
return _format_quantity(quantity.magnitude, unit='bps', binary=technical, precision=3 if technical else 1)
return str(quantity)
def _format_quantity(num, unit='B', binary=False, precision=2):
fmt_pattern = '{value:3.%sf} {prefix}{affix}{unit}' % precision
factor = 1024. if binary else 1000.
binary_affix = 'i' if binary else ''
for prefix in ('', 'K', 'M', 'G', 'T', 'P', 'E', 'Z'):
if abs(num) < factor:
return fmt_pattern.format(value=num, prefix=prefix, affix=binary_affix, unit=unit)
num /= factor
return fmt_pattern.format(value=num, prefix='Y', affix=binary_affix, unit=unit)
YAMLLoader = get_yaml_loader()

@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
def _build_unit_registry():
try:
from pint import UnitRegistry
registry = UnitRegistry()
registry.define('FPS = 1 * hertz')
except ImportError:
class NoUnitRegistry:
def __init__(self):
pass
def __getattr__(self, item):
return 1
registry = NoUnitRegistry()
return registry
units = _build_unit_registry()

@ -0,0 +1,95 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import os
import sys
from collections import OrderedDict
from six import PY2, string_types, text_type
from . import VIDEO_EXTENSIONS
def recurse_paths(paths):
"""Return a file system encoded list of videofiles.
:param paths:
:type paths: string or list
:return:
:rtype: list
"""
enc_paths = []
if isinstance(paths, (string_types, text_type)):
paths = [p.strip() for p in paths.split(',')] if ',' in paths else paths.split()
encoding = sys.getfilesystemencoding()
for path in paths:
if os.path.isfile(path):
enc_paths.append(path.decode(encoding) if PY2 else path)
if os.path.isdir(path):
for root, directories, filenames in os.walk(path):
for filename in filenames:
if os.path.splitext(filename)[1] in VIDEO_EXTENSIONS:
if PY2 and os.name == 'nt':
fullpath = os.path.join(root, filename.decode(encoding))
else:
fullpath = os.path.join(root, filename).decode(encoding)
enc_paths.append(fullpath)
# Lets remove any dupes since mediainfo is rather slow.
seen = set()
seen_add = seen.add
return [f for f in enc_paths if not (f in seen or seen_add(f))]
def todict(obj, classkey=None):
"""Transform an object to dict."""
if isinstance(obj, string_types):
return obj
elif isinstance(obj, dict):
data = {}
for (k, v) in obj.items():
data[k] = todict(v, classkey)
return data
elif hasattr(obj, '_ast'):
return todict(obj._ast())
elif hasattr(obj, '__iter__'):
return [todict(v, classkey) for v in obj]
elif hasattr(obj, '__dict__'):
values = [(key, todict(value, classkey))
for key, value in obj.__dict__.items() if not callable(value) and not key.startswith('_')]
data = OrderedDict([(k, v) for k, v in values if v is not None])
if classkey is not None and hasattr(obj, '__class__'):
data[classkey] = obj.__class__.__name__
return data
return obj
def detect_os():
"""Detect os family: windows, macos or unix."""
if os.name in ('nt', 'dos', 'os2', 'ce'):
return 'windows'
if sys.platform == 'darwin':
return 'macos'
return 'unix'
def define_candidate(locations, names, os_family=None, suggested_path=None):
"""Generate candidate list for the given parameters."""
os_family = os_family or detect_os()
for location in (suggested_path, ) + locations[os_family]:
if not location:
continue
if location == '__PATH__':
for name in names[os_family]:
yield name
elif os.path.isfile(location):
yield location
elif os.path.isdir(location):
for name in names[os_family]:
cmd = os.path.join(location, name)
if os.path.isfile(cmd):
yield cmd

@ -1,3 +0,0 @@
Patrick Altman <paltman@gmail.com> (author)
cjlucas https://github.com/cjlucas
Louis Sautier <sautier.louis@gmail.com> (maintainer since 2016)

@ -1,24 +0,0 @@
The MIT License
Copyright (c) 2010-2014, Patrick Altman <paltman@gmail.com>
Copyright (c) 2016, Louis Sautier <sautier.louis@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
http://www.opensource.org/licenses/mit-license.php

@ -1,27 +0,0 @@
pymediainfo
-----------
.. image:: https://img.shields.io/pypi/v/pymediainfo.svg
:target: https://pypi.org/project/pymediainfo
.. image:: https://img.shields.io/pypi/pyversions/pymediainfo.svg
:target: https://pypi.org/project/pymediainfo
.. image:: https://repology.org/badge/tiny-repos/python:pymediainfo.svg
:target: https://repology.org/metapackage/python:pymediainfo
.. image:: https://img.shields.io/pypi/implementation/pymediainfo.svg
:target: https://pypi.org/project/pymediainfo
.. image:: https://api.travis-ci.org/sbraz/pymediainfo.svg?branch=master
:target: https://travis-ci.org/sbraz/pymediainfo
.. image:: https://ci.appveyor.com/api/projects/status/g15a2daem1oub57n/branch/master?svg=true
:target: https://ci.appveyor.com/project/sbraz/pymediainfo
This small package is a wrapper around the MediaInfo library.
It works on Linux, Mac OS X and Windows and is tested with Python 2.7, 3.4, 3.5, 3.6, 3.7, PyPy and PyPy3.
See https://pymediainfo.readthedocs.io/ for more information.

@ -1,320 +0,0 @@
# vim: set fileencoding=utf-8 :
import os
import re
import locale
import json
import ctypes
import sys
from pkg_resources import get_distribution, DistributionNotFound
import xml.etree.ElementTree as ET
try:
import pathlib
except ImportError:
pathlib = None
if sys.version_info < (3,):
import urlparse
else:
import urllib.parse as urlparse
try:
__version__ = get_distribution("pymediainfo").version
except DistributionNotFound:
pass
class Track(object):
"""
An object associated with a media file track.
Each :class:`Track` attribute corresponds to attributes parsed from MediaInfo's output.
All attributes are lower case. Attributes that are present several times such as Duration
yield a second attribute starting with `other_` which is a list of all alternative attribute values.
When a non-existing attribute is accessed, `None` is returned.
Example:
>>> t = mi.tracks[0]
>>> t
<Track track_id='None', track_type='General'>
>>> t.duration
3000
>>> t.to_data()["other_duration"]
['3 s 0 ms', '3 s 0 ms', '3 s 0 ms',
'00:00:03.000', '00:00:03.000']
>>> type(t.non_existing)
NoneType
All available attributes can be obtained by calling :func:`to_data`.
"""
def __eq__(self, other):
return self.__dict__ == other.__dict__
def __getattribute__(self, name):
try:
return object.__getattribute__(self, name)
except:
pass
return None
def __getstate__(self):
return self.__dict__
def __setstate__(self, state):
self.__dict__ = state
def __init__(self, xml_dom_fragment):
self.track_type = xml_dom_fragment.attrib['type']
for el in xml_dom_fragment:
node_name = el.tag.lower().strip().strip('_')
if node_name == 'id':
node_name = 'track_id'
node_value = el.text
other_node_name = "other_%s" % node_name
if getattr(self, node_name) is None:
setattr(self, node_name, node_value)
else:
if getattr(self, other_node_name) is None:
setattr(self, other_node_name, [node_value, ])
else:
getattr(self, other_node_name).append(node_value)
for o in [d for d in self.__dict__.keys() if d.startswith('other_')]:
try:
primary = o.replace('other_', '')
setattr(self, primary, int(getattr(self, primary)))
except:
for v in getattr(self, o):
try:
current = getattr(self, primary)
setattr(self, primary, int(v))
getattr(self, o).append(current)
break
except:
pass
def __repr__(self):
return("<Track track_id='{}', track_type='{}'>".format(self.track_id, self.track_type))
def to_data(self):
"""
Returns a dict representation of the track attributes.
Example:
>>> sorted(track.to_data().keys())[:3]
['codec', 'codec_extensions_usually_used', 'codec_url']
>>> t.to_data()["file_size"]
5988
:rtype: dict
"""
data = {}
for k, v in self.__dict__.items():
if k != 'xml_dom_fragment':
data[k] = v
return data
class MediaInfo(object):
"""
An object containing information about a media file.
:class:`MediaInfo` objects can be created by directly calling code from
libmediainfo (in this case, the library must be present on the system):
>>> pymediainfo.MediaInfo.parse("/path/to/file.mp4")
Alternatively, objects may be created from MediaInfo's XML output.
Such output can be obtained using the ``XML`` output format on versions older than v17.10
and the ``OLDXML`` format on newer versions.
Using such an XML file, we can create a :class:`MediaInfo` object:
>>> with open("output.xml") as f:
... mi = pymediainfo.MediaInfo(f.read())
:param str xml: XML output obtained from MediaInfo.
:param str encoding_errors: option to pass to :func:`str.encode`'s `errors`
parameter before parsing `xml`.
:raises xml.etree.ElementTree.ParseError: if passed invalid XML.
:var tracks: A list of :py:class:`Track` objects which the media file contains.
For instance:
>>> mi = pymediainfo.MediaInfo.parse("/path/to/file.mp4")
>>> for t in mi.tracks:
... print(t)
<Track track_id='None', track_type='General'>
<Track track_id='1', track_type='Text'>
"""
def __eq__(self, other):
return self.tracks == other.tracks
def __init__(self, xml, encoding_errors="strict"):
xml_dom = ET.fromstring(xml.encode("utf-8", encoding_errors))
self.tracks = []
# This is the case for libmediainfo < 18.03
# https://github.com/sbraz/pymediainfo/issues/57
# https://github.com/MediaArea/MediaInfoLib/commit/575a9a32e6960ea34adb3bc982c64edfa06e95eb
if xml_dom.tag == "File":
xpath = "track"
else:
xpath = "File/track"
for xml_track in xml_dom.iterfind(xpath):
self.tracks.append(Track(xml_track))
@staticmethod
def _get_library(library_file=None):
os_is_nt = os.name in ("nt", "dos", "os2", "ce")
if os_is_nt:
lib_type = ctypes.WinDLL
else:
lib_type = ctypes.CDLL
if library_file is None:
if os_is_nt:
library_names = ("MediaInfo.dll",)
elif sys.platform == "darwin":
library_names = ("libmediainfo.0.dylib", "libmediainfo.dylib")
else:
library_names = ("libmediainfo.so.0",)
script_dir = os.path.dirname(__file__)
# Look for the library file in the script folder
for library in library_names:
lib_path = os.path.join(script_dir, library)
if os.path.isfile(lib_path):
# If we find it, don't try any other filename
library_names = (lib_path,)
break
else:
library_names = (library_file,)
for i, library in enumerate(library_names, start=1):
try:
lib = lib_type(library)
# Define arguments and return types
lib.MediaInfo_Inform.restype = ctypes.c_wchar_p
lib.MediaInfo_New.argtypes = []
lib.MediaInfo_New.restype = ctypes.c_void_p
lib.MediaInfo_Option.argtypes = [ctypes.c_void_p, ctypes.c_wchar_p, ctypes.c_wchar_p]
lib.MediaInfo_Option.restype = ctypes.c_wchar_p
lib.MediaInfo_Inform.argtypes = [ctypes.c_void_p, ctypes.c_size_t]
lib.MediaInfo_Inform.restype = ctypes.c_wchar_p
lib.MediaInfo_Open.argtypes = [ctypes.c_void_p, ctypes.c_wchar_p]
lib.MediaInfo_Open.restype = ctypes.c_size_t
lib.MediaInfo_Delete.argtypes = [ctypes.c_void_p]
lib.MediaInfo_Delete.restype = None
lib.MediaInfo_Close.argtypes = [ctypes.c_void_p]
lib.MediaInfo_Close.restype = None
return lib
except OSError:
# If we've tried all possible filenames
if i == len(library_names):
raise
@classmethod
def can_parse(cls, library_file=None):
"""
Checks whether media files can be analyzed using libmediainfo.
:rtype: bool
"""
try:
cls._get_library(library_file)
return True
except:
return False
@classmethod
def parse(cls, filename, library_file=None, cover_data=False,
encoding_errors="strict", parse_speed=0.5, text=False,
full=True, legacy_stream_display=False):
"""
Analyze a media file using libmediainfo.
If libmediainfo is located in a non-standard location, the `library_file` parameter can be used:
>>> pymediainfo.MediaInfo.parse("tests/data/sample.mkv",
... library_file="/path/to/libmediainfo.dylib")
:param filename: path to the media file which will be analyzed.
A URL can also be used if libmediainfo was compiled
with CURL support.
:param str library_file: path to the libmediainfo library, this should only be used if the library cannot be auto-detected.
:param bool cover_data: whether to retrieve cover data as base64.
:param str encoding_errors: option to pass to :func:`str.encode`'s `errors`
parameter before parsing MediaInfo's XML output.
:param float parse_speed: passed to the library as `ParseSpeed`,
this option takes values between 0 and 1.
A higher value will yield more precise results in some cases
but will also increase parsing time.
:param bool text: if ``True``, MediaInfo's text output will be returned instead
of a :class:`MediaInfo` object.
:param bool full: display additional tags, including computer-readable values
for sizes and durations.
:param bool legacy_stream_display: display additional information about streams.
:type filename: str or pathlib.Path
:rtype: str if `text` is ``True``.
:rtype: :class:`MediaInfo` otherwise.
:raises FileNotFoundError: if passed a non-existent file
(Python 3.3), does not work on Windows.
:raises IOError: if passed a non-existent file (Python < 3.3),
does not work on Windows.
:raises RuntimeError: if parsing fails, this should not
happen unless libmediainfo itself fails.
"""
lib = cls._get_library(library_file)
if pathlib is not None and isinstance(filename, pathlib.PurePath):
filename = str(filename)
url = False
else:
url = urlparse.urlparse(filename)
# Try to open the file (if it's not a URL)
# Doesn't work on Windows because paths are URLs
if not (url and url.scheme):
# Test whether the file is readable
with open(filename, "rb"):
pass
# Obtain the library version
lib_version = lib.MediaInfo_Option(None, "Info_Version", "")
lib_version = tuple(int(_) for _ in re.search("^MediaInfoLib - v(\\S+)", lib_version).group(1).split("."))
# The XML option was renamed starting with version 17.10
if lib_version >= (17, 10):
xml_option = "OLDXML"
else:
xml_option = "XML"
# Cover_Data is not extracted by default since version 18.03
# See https://github.com/MediaArea/MediaInfoLib/commit/d8fd88a1c282d1c09388c55ee0b46029e7330690
if cover_data and lib_version >= (18, 3):
lib.MediaInfo_Option(None, "Cover_Data", "base64")
# Create a MediaInfo handle
handle = lib.MediaInfo_New()
lib.MediaInfo_Option(handle, "CharSet", "UTF-8")
# Fix for https://github.com/sbraz/pymediainfo/issues/22
# Python 2 does not change LC_CTYPE
# at startup: https://bugs.python.org/issue6203
if (sys.version_info < (3,) and os.name == "posix"
and locale.getlocale() == (None, None)):
locale.setlocale(locale.LC_CTYPE, locale.getdefaultlocale())
lib.MediaInfo_Option(None, "Inform", "" if text else xml_option)
lib.MediaInfo_Option(None, "Complete", "1" if full else "")
lib.MediaInfo_Option(None, "ParseSpeed", str(parse_speed))
lib.MediaInfo_Option(None, "LegacyStreamDisplay", "1" if legacy_stream_display else "")
if lib.MediaInfo_Open(handle, filename) == 0:
raise RuntimeError("An eror occured while opening {}"
" with libmediainfo".format(filename))
output = lib.MediaInfo_Inform(handle, 0)
# Delete the handle
lib.MediaInfo_Close(handle)
lib.MediaInfo_Delete(handle)
if text:
return output
else:
return cls(output, encoding_errors)
def to_data(self):
"""
Returns a dict representation of the object's :py:class:`Tracks <Track>`.
:rtype: dict
"""
data = {'tracks': []}
for track in self.tracks:
data['tracks'].append(track.to_data())
return data
def to_json(self):
"""
Returns a JSON representation of the object's :py:class:`Tracks <Track>`.
:rtype: str
"""
return json.dumps(self.to_data())

@ -1,2 +0,0 @@
from .pyprobe import VideoFileParser

@ -1,41 +0,0 @@
class BaseParser:
@classmethod
def parse(cls, data, rawMode, includeMissing):
"""Core of the parser classes
Collects all methods prefixed with "value_" and builds a dict of
their return values. Parser classes will inherit from this class.
All methods that begin with "value_" in a parser class will be given
the same `data` argument and are expected to pull their corresponding
value from the collection.
These methods return a tuple - their raw value and formatted value.
The raw value is a string or tuple of string and the formatted value
be of type string, int, float, or tuple.
If no data is found in a method, the raw value is expected to be None,
and for the formatted value, strings will be "null", ints will be 0,
floats will be 0.0.
Args:
data (dict): Raw video data
rawMode (bool): Returns raw values instead of formatted values
includeMissing (bool): If value is missing, return "empty" value
Returns:
dict<str, dict<str, var>>: Parsed data from class methods, may not have every value.
"""
parsers = [getattr(cls, p) for p in dir(cls) if p.startswith("value_")]
info = {}
for parser in parsers:
parsed_raw, parsed_formatted = parser(data)
if parsed_raw == None and not includeMissing:
continue
name = parser.__name__[6:]
if rawMode:
info[name] = parsed_raw
else:
info[name] = parsed_formatted
return info

@ -1,216 +0,0 @@
from __future__ import absolute_import
from os import path
from .baseparser import BaseParser
class StreamParser(BaseParser):
@staticmethod
def value_codec(data):
"""Returns a string"""
info = data.get("codec_name", None)
return info, (info or "null")
@staticmethod
def value_format(data):
"""Returns a string"""
info = data.get("format_name", None)
return info, (info or "null")
@staticmethod
def value_bit_rate(data):
"""Returns an int"""
info = data.get("bit_rate", None)
try:
return info, int(float(info))
except (ValueError, TypeError):
return info, 0
class VideoStreamParser(BaseParser):
@staticmethod
def value_codec(data):
return StreamParser.value_codec(data)
@staticmethod
def value_format(data):
return StreamParser.value_format(data)
@staticmethod
def value_bit_rate(data):
return StreamParser.value_bit_rate(data)
@staticmethod
def value_resolution(data):
"""Returns a tuple (width, height)"""
width = data.get("width", None)
height = data.get("height", None)
if width is None and height is None:
return None, (0, 0)
try:
return (width, height), (int(float(width)), int(float(height)))
except (ValueError, TypeError):
return (width, height), (0, 0)
@staticmethod
def average_framerate(data):
"""Returns an int"""
frames = data.get("nb_frames", None)
duration = data.get("duration", None)
try:
return float(frames) / float(duration)
except (ValueError, TypeError, ZeroDivisionError):
return 0.0
@classmethod
def value_framerate(cls, data):
"""Returns a float"""
input_str = data.get("avg_frame_rate", None)
try:
num, den = input_str.split("/")
return input_str, round(float(num) / float(den), 3)
except (ValueError, ZeroDivisionError, AttributeError):
info = cls.average_framerate(data)
return input_str, info
@staticmethod
def value_aspect_ratio(data):
"""Returns a string"""
info = data.get("display_aspect_ratio", None)
return info, (info or "null")
@staticmethod
def value_pixel_format(data):
"""Returns a string"""
info = data.get("pix_fmt", None)
return info, (info or "null")
class AudioStreamParser(StreamParser):
@staticmethod
def value_sample_rate(data):
"""Returns an int - audio sample rate in Hz"""
info = data.get("sample_rate", None)
try:
return info, int(float(info))
except (ValueError, TypeError):
return info, 0
@staticmethod
def value_channel_count(data):
"""Returns an int"""
info = data.get("channels", None)
try:
return info, int(float(info))
except (ValueError, TypeError):
return info, 0
@staticmethod
def value_channel_layout(data):
"""Returns a string"""
info = data.get("channel_layout", None)
return info, (info or "null")
class SubtitleStreamParser(BaseParser):
@staticmethod
def value_codec(data):
return StreamParser.value_codec(data)
@staticmethod
def value_language(data):
"""Returns a string """
tags = data.get("tags", None)
if tags:
info = tags.get("language", None) or tags.get("LANGUAGE", None)
return info, (info or "null")
return None, "null"
@staticmethod
def value_forced(data):
"""Returns a bool """
disposition = data.get("disposition", None)
if disposition:
info = disposition.get("forced", None)
return bool(info), (bool(info) or False)
return None, "null"
class ChapterParser(BaseParser):
@staticmethod
def value_start(data):
"""Returns an int"""
info = data.get("start_time", None)
try:
return info, float(data.get("start_time"))
except (ValueError, TypeError):
return info, 0
@classmethod
def value_end(cls, data):
"""Returns a float"""
info = data.get("end_time", None)
try:
return info, float(info)
except (ValueError, TypeError):
return info, 0
@staticmethod
def value_title(data):
"""Returns a string"""
info = data.get("tags", {}).get("title", None)
return info, (info or "null")
@staticmethod
def fillEmptyTitles(chapters):
"""Add text in place of empty titles
If a chapter doesn't have a title, this will add a basic
string in the form "Chapter `index+1`"
Args:
chapters(list<dict>): The list of parsed chapters
"""
index = 0
for chapter in chapters:
if not chapter["title"]:
chapter["title"] = "Chapter " + str(index)
index += 1
class RootParser(BaseParser):
@staticmethod
def value_duration(data):
"""Returns an int"""
info = data.get("duration", None)
try:
return info, float(info)
except (ValueError, TypeError):
return info, 0.0
@staticmethod
def value_size(data):
"""Returns an int"""
info = data.get("size", None)
if info is None:
file_path = data.get("filename", "")
if path.isfile(file_path):
info = str(path.getsize(file_path))
try:
return info, int(float(info))
except (ValueError, TypeError):
return info, 0
@classmethod
def value_bit_rate(cls, data):
"""Returns an int"""
info = data.get("bit_rate", None)
if info is None:
_, size = cls.value_size(data)
_, duration = cls.value_duration(data)
if size and duration:
info = size / (duration / 60 * 0.0075) / 1000
try:
return info, int(float(info))
except (ValueError, TypeError):
return info, 0

@ -1,226 +0,0 @@
from __future__ import absolute_import
from six import PY3
import json
import subprocess
from os import path
from sys import getfilesystemencoding
from . import ffprobeparsers
class VideoFileParser:
def __init__(
self,
ffprobe="ffprobe",
includeMissing=True,
rawMode=False,
):
self._ffprobe = ffprobe
self._includeMissing = includeMissing
self._rawMode = rawMode
########################################
# Main Method
def parseFfprobe(self, inputFile):
"""Takes an input file and returns the parsed data using ffprobe.
Args:
inputFile (str): Video file path
Returns:
dict<str, dict<str, var>>: Parsed video info
Raises:
FileNotFoundError: The input video file or input executable was not found
IOError: Execution failed
"""
if not path.isfile(inputFile):
raise FileNotFoundError(inputFile + " not found")
self._checkExecutable(self._ffprobe)
fdict = self._executeFfprobe(inputFile)
return self._parseFfprobe(fdict, inputFile)
########################################
# ffprobe Parsing
def _executeFfprobe(self, inputFile):
"""Executes ffprobe program on input file to get raw info
fdict = dict<str, fdict> or dict<str, str>
Args:
inputFile (str): Video file path
Returns:
fdict: Parsed data
"""
commandArgs = [
"-v",
"quiet",
"-hide_banner",
"-show_error",
"-show_format",
"-show_streams",
"-show_programs",
"-show_chapters",
"-show_private_data",
"-print_format",
"json",
]
outputJson = self._executeParser(self._ffprobe, commandArgs, inputFile)
try:
data = json.loads(outputJson)
except json.JSONDecodeError:
raise IOError("Could not decode ffprobe output for file " + inputFile)
return data
def _parseFfprobe(self, fOutput, inputFile):
"""Parse all data from fOutput to organized format
fdict = dict<str, fdict> or dict<str, str>
Args:
fOutput (fdict): Stream data from ffprobe
inputFile (str): Video file path
Returns:
dict<str, dict<str, str>>: Parsed video data
"""
videoInfo = {}
videoInfo["path"] = path.abspath(inputFile)
videoInfo.update(
ffprobeparsers.RootParser.parse(
fOutput["format"], self._rawMode, self._includeMissing
)
)
videoInfo.update(self._parseFfprobeStreams(fOutput))
videoInfo.update(self._parseFfprobeChapters(fOutput))
if not self._rawMode:
ffprobeparsers.ChapterParser.fillEmptyTitles(videoInfo["chapters"])
return videoInfo
def _parseFfprobeStreams(self, fOutput):
"""Parses video, audio, and subtitle streams
fdict = dict<str, fdict> or dict<str, str>
Args:
streams_data (fdict): Stream data from ffprobe
Returns:
dict<str, dict<str, var>>: Parsed streams - video, audio, and subtitle
"""
parsedInfo = {"videos": [], "audios": [], "subtitles": []}
for stream in fOutput["streams"]:
streamType = stream["codec_type"]
data = None
if streamType == "video":
data = ffprobeparsers.VideoStreamParser.parse(
stream, self._rawMode, self._includeMissing
)
parsedInfo["videos"].append(data)
elif streamType == "audio":
data = ffprobeparsers.AudioStreamParser.parse(
stream, self._rawMode, self._includeMissing
)
parsedInfo["audios"].append(data)
elif streamType == "subtitle":
data = ffprobeparsers.SubtitleStreamParser.parse(
stream, self._rawMode, self._includeMissing
)
parsedInfo["subtitles"].append(data)
return parsedInfo
def _parseFfprobeChapters(self, fOutput):
"""Parses chapters
fdict = dict<str, fdict> or dict<str, str>
Args:
chapters_data (fdict): Stream data from ffprobe
Returns:
dict<str, dict<str, var>>: Parsed chapters
"""
parsedInfo = {"chapters": []}
if fOutput["chapters"] is None:
return parsedInfo
for chapter in fOutput["chapters"]:
parsedInfo["chapters"].append(
ffprobeparsers.ChapterParser.parse(
chapter, self._rawMode, self._includeMissing
)
)
return parsedInfo
########################################
# Misc Methods
@staticmethod
def _executeParser(parser, commandArgs, inputFile):
"""Executes parser on the input file
Args:
parser (str): Executable location or command
commandArgs (list of strings): Extra command arguments
inputFile (str): the input file location
Raises:
IOError: ffprobe execution failed
"""
if PY3:
command = [parser] + commandArgs + [inputFile]
completedProcess = subprocess.run(
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, encoding="utf-8"
)
if completedProcess.returncode:
raise IOError(
"Error occurred during execution - " + completedProcess.stderr
)
return completedProcess.stdout
else:
command = [parser] + commandArgs + [inputFile.encode(getfilesystemencoding())]
try:
completedProcess = subprocess.check_output(
command, stderr=subprocess.STDOUT
)
except subprocess.CalledProcessError as e:
raise IOError(
"Error occurred during execution - " + e.output
)
return completedProcess
@staticmethod
def _checkExecutable(executable):
"""Checks if target is executable
Args:
executable (str): Executable location, can be file or command
Raises:
FileNotFoundError: Executable was not found
"""
try:
subprocess.check_output(
[executable, "--help"],
stderr=subprocess.STDOUT
)
except OSError:
raise FileNotFoundError(executable + " not found")
class FileNotFoundError(Exception):
pass
class IOError(Exception):
pass

@ -12,11 +12,11 @@ gevent-websocker=0.10.1
gitpython=2.1.9
guessit=2.1.4
guess_language-spirit=0.5.3
knowit=0.3.0-dev
peewee=3.9.6
py-pretty=1
pycountry=18.2.23
pyga=2.6.1
pyprobe=0.1.2 <-- modified version: do not update!!!
pysrt=1.1.1
pytz=2018.4
rarfile=3.0

Loading…
Cancel
Save