commit
f760d3198b
@ -1,443 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# License: GPL
|
||||
|
||||
from __future__ import annotations
|
||||
from .container import FFprobeVideoContainer
|
||||
from .stream import FFprobeSubtitleStream
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
from typing import List, Optional
|
||||
|
||||
from babelfish import Language
|
||||
from babelfish.exceptions import LanguageError
|
||||
import pysubs2
|
||||
|
||||
__version__ = "0.1.4"
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Paths to executables
|
||||
FFPROBE_PATH = os.environ.get("FFPROBE_PATH", "ffprobe")
|
||||
FFMPEG_PATH = os.environ.get("FFMPEG_PATH", "ffmpeg")
|
||||
|
||||
FFMPEG_STATS = True
|
||||
FF_LOG_LEVEL = "quiet"
|
||||
|
||||
|
||||
class FeseError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class ExtractionError(FeseError):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidFile(FeseError):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidStream(FeseError):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidSource(FeseError):
|
||||
pass
|
||||
|
||||
|
||||
class ConversionError(FeseError):
|
||||
pass
|
||||
|
||||
|
||||
class LanguageNotFound(FeseError):
|
||||
pass
|
||||
|
||||
|
||||
# Extensions
|
||||
|
||||
SRT = "srt"
|
||||
ASS = "ass"
|
||||
|
||||
|
||||
class FFprobeSubtitleDisposition:
|
||||
def __init__(self, data: dict):
|
||||
self.default = False
|
||||
self.generic = False
|
||||
self.dub = False
|
||||
self.original = False
|
||||
self.comment = False
|
||||
self.lyrics = False
|
||||
self.karaoke = False
|
||||
self.forced = False
|
||||
self.hearing_impaired = False
|
||||
self.visual_impaired = False
|
||||
self.clean_effects = False
|
||||
self.attached_pic = False
|
||||
self.timed_thumbnails = False
|
||||
self._content_type = None
|
||||
|
||||
for key, val in data.items():
|
||||
if hasattr(self, key):
|
||||
setattr(self, key, bool(val))
|
||||
|
||||
def update_from_tags(self, tags):
|
||||
tag_title = tags.get("title")
|
||||
if tag_title is None:
|
||||
logger.debug("Title not found. Marking as generic")
|
||||
self.generic = True
|
||||
return None
|
||||
|
||||
l_tag_title = tag_title.lower()
|
||||
|
||||
for key, val in _content_types.items():
|
||||
if val.search(l_tag_title) is not None:
|
||||
logger.debug("Found %s: %s", key, l_tag_title)
|
||||
self._content_type = key
|
||||
setattr(self, key, True)
|
||||
return None
|
||||
|
||||
logger.debug("Generic disposition title found: %s", l_tag_title)
|
||||
self.generic = True
|
||||
return None
|
||||
|
||||
@property
|
||||
def suffix(self):
|
||||
if self._content_type is not None:
|
||||
return f"-{self._content_type}"
|
||||
|
||||
return ""
|
||||
|
||||
def __str__(self):
|
||||
return self.suffix.lstrip("-").upper() or "GENERIC"
|
||||
|
||||
|
||||
class FFprobeSubtitleStream:
|
||||
"""Base class for FFprobe (FFmpeg) extractable subtitle streams."""
|
||||
|
||||
def __init__(self, stream: dict):
|
||||
"""
|
||||
:raises: LanguageNotFound
|
||||
"""
|
||||
self.index = int(stream.get("index", 0))
|
||||
self.codec_name = stream.get("codec_name", "Unknown")
|
||||
self.extension = _subtitle_extensions.get(self.codec_name, self.codec_name)
|
||||
self.r_frame_rate = stream.get("r_frame_rate")
|
||||
self.avg_frame_rate = stream.get("avg_frame_rate")
|
||||
self.time_base = stream.get("time_base")
|
||||
self.tags = stream.get("tags", {})
|
||||
self.start_time = float(stream.get("start_time", 0))
|
||||
# TODO: separate tags
|
||||
self.number_of_frames = int(self.tags.get("NUMBER_OF_FRAMES", 0))
|
||||
self.number_of_frames_eng = int(
|
||||
self.tags.get("NUMBER_OF_FRAMES-eng", self.number_of_frames)
|
||||
)
|
||||
|
||||
self.duration, self.duration_ts = 0, 0
|
||||
|
||||
# some subtitles streams miss the duration_ts field and only have tags->DURATION field
|
||||
# fixme: we still don't know if "DURATION" is a common tag/key
|
||||
if "DURATION" in self.tags:
|
||||
try:
|
||||
h, m, s = [
|
||||
ts.replace(",", ".").strip()
|
||||
for ts in self.tags["DURATION"].split(":")
|
||||
]
|
||||
self.duration = float(s) + float(m) * 60 + float(h) * 60 * 60
|
||||
self.duration_ts = int(self.duration * 1000)
|
||||
except ValueError as error:
|
||||
logger.warning("Couldn't get duration field: %s. Using 0", error)
|
||||
else:
|
||||
try:
|
||||
self.duration = float(stream.get("duration", "0").replace(",", "."))
|
||||
self.duration_ts = int(stream.get("duration_ts", self.duration * 1000))
|
||||
# some subtitles streams miss a duration completely and has "N/A" as value
|
||||
except ValueError as error:
|
||||
logger.warning("Couldn't get duration field: %s. Using 0", error)
|
||||
|
||||
self.start_pts = int(stream.get("start_pts", 0))
|
||||
|
||||
self.disposition = FFprobeSubtitleDisposition(stream.get("disposition", {}))
|
||||
|
||||
if self.tags:
|
||||
self.disposition.update_from_tags(self.tags)
|
||||
|
||||
self.language: Language = self._language()
|
||||
|
||||
@property
|
||||
def suffix(self):
|
||||
lang = self.language.alpha2
|
||||
if self.language.country is not None:
|
||||
lang = f"{lang}-{self.language.country}"
|
||||
|
||||
return f"{lang}{self.disposition.suffix}.{self.extension}"
|
||||
|
||||
def _language(self) -> Language:
|
||||
og_lang = self.tags.get("language")
|
||||
last_exc = None
|
||||
|
||||
if og_lang is not None:
|
||||
if og_lang in _extra_languages:
|
||||
extra = _extra_languages[og_lang]
|
||||
title = self.tags.get("title", "n/a").lower()
|
||||
if any(possible in title for possible in extra["matches"]):
|
||||
logger.debug("Found extra language %s", extra["language_args"])
|
||||
return Language(*extra["language_args"])
|
||||
|
||||
try:
|
||||
lang = Language.fromalpha3b(og_lang)
|
||||
# Test for suffix
|
||||
assert lang.alpha2
|
||||
|
||||
return lang
|
||||
except LanguageError as error:
|
||||
last_exc = error
|
||||
logger.debug("Error with '%s' language: %s", og_lang, error)
|
||||
|
||||
raise LanguageNotFound(
|
||||
f"Couldn't detect language for stream: {self.tags}"
|
||||
) from last_exc
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<{self.codec_name.upper()}: {self.language}@{self.disposition}>"
|
||||
|
||||
|
||||
class FFprobeVideoContainer:
|
||||
def __init__(self, path: str):
|
||||
self.path = path
|
||||
|
||||
@property
|
||||
def extension(self):
|
||||
return os.path.splitext(self.path)[-1].lstrip(".")
|
||||
|
||||
def get_subtitles(self, timeout: int = 600) -> List[FFprobeSubtitleStream]:
|
||||
"""Factory function to create subtitle instances from FFprobe.
|
||||
|
||||
:param timeout: subprocess timeout in seconds (default: 600)
|
||||
:raises: InvalidSource"""
|
||||
|
||||
ff_command = [
|
||||
FFPROBE_PATH,
|
||||
"-v",
|
||||
FF_LOG_LEVEL,
|
||||
"-print_format",
|
||||
"json",
|
||||
"-show_format",
|
||||
"-show_streams",
|
||||
self.path,
|
||||
]
|
||||
try:
|
||||
result = subprocess.run(
|
||||
ff_command, stdout=subprocess.PIPE, check=True, timeout=timeout
|
||||
)
|
||||
streams = json.loads(result.stdout)["streams"]
|
||||
except _ffprobe_exceptions as error:
|
||||
raise InvalidSource(
|
||||
f"{error} trying to get information from {self.path}"
|
||||
) from error # We want to see the traceback
|
||||
|
||||
subs = []
|
||||
for stream in streams:
|
||||
if stream.get("codec_type", "n/a") != "subtitle":
|
||||
continue
|
||||
try:
|
||||
subs.append(FFprobeSubtitleStream(stream))
|
||||
except LanguageNotFound:
|
||||
pass
|
||||
|
||||
if not subs:
|
||||
logger.debug("Source doesn't have any subtitle valid streams")
|
||||
return []
|
||||
|
||||
logger.debug("Found subtitle streams: %s", subs)
|
||||
return subs
|
||||
|
||||
def extract_subtitles(
|
||||
self,
|
||||
subtitles: List[FFprobeSubtitleStream],
|
||||
custom_dir=None,
|
||||
overwrite=True,
|
||||
timeout=600,
|
||||
):
|
||||
"""Extracts a list of subtitles. Returns a dictionary of the extracted
|
||||
filenames by index.
|
||||
|
||||
:param subtitles: a list of FFprobeSubtitle instances
|
||||
:param custom_dir: a custom directory to save the subtitles. Defaults to
|
||||
same directory as the media file
|
||||
:param overwrite: overwrite files with the same name (default: True)
|
||||
:param timeout: subprocess timeout in seconds (default: 600)
|
||||
:raises: ExtractionError, OSError
|
||||
"""
|
||||
extract_command = [FFMPEG_PATH, "-v", FF_LOG_LEVEL]
|
||||
if FFMPEG_STATS:
|
||||
extract_command.append("-stats")
|
||||
extract_command.extend(["-y", "-i", self.path])
|
||||
|
||||
if custom_dir is not None:
|
||||
# May raise OSError
|
||||
os.makedirs(custom_dir, exist_ok=True)
|
||||
|
||||
items = {}
|
||||
collected_paths = set()
|
||||
|
||||
for subtitle in subtitles:
|
||||
sub_path = f"{os.path.splitext(self.path)[0]}.{subtitle.suffix}"
|
||||
if custom_dir is not None:
|
||||
sub_path = os.path.join(custom_dir, os.path.basename(sub_path))
|
||||
|
||||
if sub_path in collected_paths:
|
||||
sub_path = (
|
||||
f"{sub_path.rstrip(f'.{subtitle.suffix}')}"
|
||||
f"-{len(collected_paths)}.{subtitle.suffix}"
|
||||
)
|
||||
|
||||
if not overwrite and os.path.isfile(sub_path):
|
||||
logger.debug("Ignoring path (OVERWRITE TRUE): %s", sub_path)
|
||||
continue
|
||||
|
||||
extract_command.extend(
|
||||
["-map", f"0:{subtitle.index}", "-c", "copy", sub_path]
|
||||
)
|
||||
logger.debug("Appending subtitle path: %s", sub_path)
|
||||
|
||||
collected_paths.add(sub_path)
|
||||
|
||||
items[subtitle.index] = sub_path
|
||||
|
||||
if not items:
|
||||
logger.debug("No subtitles to extract")
|
||||
return {}
|
||||
|
||||
logger.debug("Extracting subtitle with command %s", " ".join(extract_command))
|
||||
|
||||
try:
|
||||
subprocess.run(extract_command, timeout=timeout, check=True)
|
||||
except (subprocess.SubprocessError, FileNotFoundError) as error:
|
||||
raise ExtractionError(f"Error calling ffmpeg: {error}") from error
|
||||
|
||||
for path in items.values():
|
||||
if not os.path.isfile(path):
|
||||
logger.debug("%s was not extracted", path)
|
||||
|
||||
return items
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<FFprobeVideoContainer {self.extension}: {self.path}>"
|
||||
|
||||
|
||||
def check_integrity(
|
||||
subtitle: FFprobeSubtitleStream, path: str, sec_offset_threshold=900
|
||||
):
|
||||
"""A relative check for the integriy of a file. This can be used to find a failed
|
||||
ffmpeg extraction where the final file might not be complete or might be corrupted.
|
||||
Currently, only ASS and Subrip are supported.
|
||||
|
||||
:param subtitle: FFprobeSubtitle instance
|
||||
:param path: the path of the subtitle file (ass or srt)
|
||||
:param sec_offset_threshold: the maximum seconds offset to determine if the file is complete
|
||||
:raises: InvalidFile
|
||||
"""
|
||||
if subtitle.extension not in (ASS, SRT):
|
||||
raise InvalidFile(f"Extension not supported: {subtitle.extension}")
|
||||
|
||||
try:
|
||||
sub = pysubs2.load(path)
|
||||
except (pysubs2.Pysubs2Error, UnicodeError, OSError, FileNotFoundError) as error:
|
||||
raise InvalidFile(error) from error
|
||||
else:
|
||||
# ignore the duration check if the stream has no duration listed at all
|
||||
if subtitle.duration_ts:
|
||||
off = abs(int(sub[-1].end) - subtitle.duration_ts)
|
||||
if off > abs(sec_offset_threshold) * 1000:
|
||||
raise InvalidFile(
|
||||
f"The last subtitle timestamp ({sub[-1].end/1000} sec) is {off/1000} sec ahead"
|
||||
f" from the subtitle stream total duration ({subtitle.duration} sec)"
|
||||
)
|
||||
logger.debug("Integrity check passed (%d sec offset)", off / 1000)
|
||||
else:
|
||||
logger.warning(
|
||||
"Ignoring duration check, subtitle stream has bad duration values: %s",
|
||||
subtitle,
|
||||
)
|
||||
|
||||
|
||||
def to_srt(
|
||||
source: str, output: Optional[str] = None, remove_source: bool = False
|
||||
) -> str:
|
||||
"""Convert a subtitle to SubRip. Currently, only ASS is supported. SubRip
|
||||
files will be silently ignored.
|
||||
|
||||
raises: ConversionError, OSError"""
|
||||
if source.endswith(".srt"):
|
||||
return source
|
||||
|
||||
split_path = os.path.splitext(source)
|
||||
|
||||
if split_path[-1] not in (".ass"):
|
||||
raise ConversionError(
|
||||
f"No converter found for extension: {split_path[-1]}"
|
||||
) from None
|
||||
|
||||
output = output or f"{split_path[0]}.srt"
|
||||
|
||||
try:
|
||||
parsed = pysubs2.load(source)
|
||||
parsed.save(output)
|
||||
except (pysubs2.Pysubs2Error, UnicodeError) as error:
|
||||
raise ConversionError(f"Exception converting {output}: {error}") from error
|
||||
|
||||
logger.debug("Converted: %s", output)
|
||||
|
||||
if remove_source and source != output:
|
||||
try:
|
||||
os.remove(source)
|
||||
except OSError as error:
|
||||
logger.debug("Can't remove source: %s (%s)", source, error)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
_subtitle_extensions = {
|
||||
"subrip": "srt",
|
||||
"ass": "ass",
|
||||
"hdmv_pgs_subtitle": "sup",
|
||||
"pgs": "sup",
|
||||
}
|
||||
|
||||
|
||||
_content_types = {
|
||||
"hearing_impaired": re.compile(r"sdh|hearing impaired"),
|
||||
"forced": re.compile(r"forced"),
|
||||
"comment": re.compile(r"comment"),
|
||||
"visual_impaired": re.compile(r"signs|visual impair"),
|
||||
"karaoke": re.compile(r"karaoke|songs"),
|
||||
}
|
||||
|
||||
|
||||
_ffprobe_exceptions = (
|
||||
subprocess.SubprocessError,
|
||||
json.JSONDecodeError,
|
||||
FileNotFoundError,
|
||||
KeyError,
|
||||
)
|
||||
|
||||
_extra_languages = {
|
||||
"spa": {
|
||||
"matches": (
|
||||
"es-la",
|
||||
"spa-la",
|
||||
"spl",
|
||||
"mx",
|
||||
"latin",
|
||||
"mexic",
|
||||
"argent",
|
||||
"latam",
|
||||
),
|
||||
"language_args": ("spa", "MX"),
|
||||
},
|
||||
"por": {
|
||||
"matches": ("pt-br", "pob", "pb", "brazilian", "brasil", "brazil"),
|
||||
"language_args": ("por", "BR"),
|
||||
},
|
||||
}
|
||||
__version__ = "0.2"
|
||||
|
@ -0,0 +1,238 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# License: GPL
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from .exceptions import ExtractionError
|
||||
from .exceptions import InvalidSource
|
||||
from .exceptions import LanguageNotFound
|
||||
from .exceptions import UnsupportedCodec
|
||||
from .stream import FFprobeSubtitleStream
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Paths to executables
|
||||
FFPROBE_PATH = os.environ.get("FFPROBE_PATH", "ffprobe")
|
||||
FFMPEG_PATH = os.environ.get("FFMPEG_PATH", "ffmpeg")
|
||||
|
||||
FFMPEG_STATS = True
|
||||
FF_LOG_LEVEL = "quiet"
|
||||
|
||||
|
||||
class FFprobeVideoContainer:
|
||||
def __init__(self, path: str):
|
||||
self.path = path
|
||||
|
||||
@property
|
||||
def extension(self):
|
||||
return os.path.splitext(self.path)[-1].lstrip(".")
|
||||
|
||||
def get_subtitles(self, timeout: int = 600):
|
||||
"""Factory function to create subtitle (stream) instances from FFprobe.
|
||||
|
||||
:param timeout: subprocess timeout in seconds (default: 600)
|
||||
:raises: InvalidSource"""
|
||||
|
||||
ff_command = [
|
||||
FFPROBE_PATH,
|
||||
"-v",
|
||||
FF_LOG_LEVEL,
|
||||
"-print_format",
|
||||
"json",
|
||||
"-show_format",
|
||||
"-show_streams",
|
||||
self.path,
|
||||
]
|
||||
try:
|
||||
result = subprocess.run(
|
||||
ff_command, stdout=subprocess.PIPE, check=True, timeout=timeout
|
||||
)
|
||||
streams = json.loads(result.stdout)["streams"]
|
||||
except _ffprobe_exceptions as error:
|
||||
raise InvalidSource(
|
||||
f"{error} trying to get information from {self.path}"
|
||||
) from error # We want to see the traceback
|
||||
|
||||
subs = []
|
||||
for stream in streams:
|
||||
if stream.get("codec_type", "n/a") != "subtitle":
|
||||
continue
|
||||
try:
|
||||
subs.append(FFprobeSubtitleStream(stream))
|
||||
except (LanguageNotFound, UnsupportedCodec) as error:
|
||||
logger.debug("Ignoring %s: %s", stream.get("codec_name"), error)
|
||||
|
||||
if not subs:
|
||||
logger.debug("Source doesn't have any subtitle valid streams")
|
||||
return []
|
||||
|
||||
logger.debug("Found subtitle streams: %s", subs)
|
||||
return subs
|
||||
|
||||
def extract_subtitles(
|
||||
self,
|
||||
subtitles,
|
||||
custom_dir=None,
|
||||
overwrite=True,
|
||||
timeout=600,
|
||||
convert_format=None,
|
||||
):
|
||||
"""Extracts a list of subtitles converting them. Returns a dictionary of the
|
||||
extracted filenames by index.
|
||||
|
||||
Most bitmap subtitles will raise UnsupportedCodec as they don't support conversion.
|
||||
For such formats use copy instead.
|
||||
|
||||
:param subtitles: a list of FFprobeSubtitle instances
|
||||
:param custom_dir: a custom directory to save the subtitles. Defaults to
|
||||
same directory as the media file
|
||||
:param overwrite: overwrite files with the same name (default: True)
|
||||
:param timeout: subprocess timeout in seconds (default: 600)
|
||||
:param convert_format: format to convert selected subtitles. Defaults to
|
||||
srt
|
||||
:raises: ExtractionError, UnsupportedCodec, OSError
|
||||
"""
|
||||
extract_command = [FFMPEG_PATH, "-v", FF_LOG_LEVEL]
|
||||
if FFMPEG_STATS:
|
||||
extract_command.append("-stats")
|
||||
extract_command.extend(["-y", "-i", self.path])
|
||||
|
||||
if custom_dir is not None:
|
||||
# May raise OSError
|
||||
os.makedirs(custom_dir, exist_ok=True)
|
||||
|
||||
items = {}
|
||||
collected_paths = set()
|
||||
|
||||
for subtitle in subtitles:
|
||||
extension_to_use = convert_format or subtitle.convert_default_format
|
||||
|
||||
sub_path = (
|
||||
f"{os.path.splitext(self.path)[0]}.{subtitle.suffix}.{extension_to_use}"
|
||||
)
|
||||
if custom_dir is not None:
|
||||
sub_path = os.path.join(custom_dir, os.path.basename(sub_path))
|
||||
|
||||
if not overwrite and sub_path in collected_paths:
|
||||
sub_path = f"{os.path.splitext(sub_path)[0]}.{len(collected_paths):02}.{extension_to_use}"
|
||||
|
||||
if not overwrite and os.path.isfile(sub_path):
|
||||
logger.debug("Ignoring path (OVERWRITE TRUE): %s", sub_path)
|
||||
continue
|
||||
|
||||
extract_command.extend(subtitle.convert_args(convert_format, sub_path))
|
||||
|
||||
logger.debug("Appending subtitle path: %s", sub_path)
|
||||
collected_paths.add(sub_path)
|
||||
|
||||
items[subtitle.index] = sub_path
|
||||
|
||||
if not items:
|
||||
logger.debug("No subtitles to extract")
|
||||
return {}
|
||||
|
||||
logger.debug("Extracting subtitle with command %s", " ".join(extract_command))
|
||||
|
||||
try:
|
||||
subprocess.run(extract_command, timeout=timeout, check=True)
|
||||
except (subprocess.SubprocessError, FileNotFoundError) as error:
|
||||
raise ExtractionError(f"Error calling ffmpeg: {error}") from error
|
||||
|
||||
for path in items.values():
|
||||
if not os.path.isfile(path):
|
||||
logger.warning("%s was not extracted", path)
|
||||
|
||||
return items
|
||||
|
||||
def copy_subtitles(
|
||||
self,
|
||||
subtitles,
|
||||
custom_dir=None,
|
||||
overwrite=True,
|
||||
timeout=600,
|
||||
fallback_to_convert=True,
|
||||
):
|
||||
"""Extracts a list of subtitles with ffmpeg's copy method. Returns a dictionary
|
||||
of the extracted filenames by index.
|
||||
|
||||
:param subtitles: a list of FFprobeSubtitle instances
|
||||
:param custom_dir: a custom directory to save the subtitles. Defaults to
|
||||
same directory as the media file
|
||||
:param overwrite: overwrite files with the same name (default: True)
|
||||
:param timeout: subprocess timeout in seconds (default: 600)
|
||||
:param fallback_to_convert: fallback to stream's default convert format if it is
|
||||
incompatible with copy
|
||||
:raises: ExtractionError, UnsupportedCodec, OSError
|
||||
"""
|
||||
extract_command = [FFMPEG_PATH, "-v", FF_LOG_LEVEL]
|
||||
if FFMPEG_STATS:
|
||||
extract_command.append("-stats")
|
||||
extract_command.extend(["-y", "-i", self.path])
|
||||
|
||||
if custom_dir is not None:
|
||||
# May raise OSError
|
||||
os.makedirs(custom_dir, exist_ok=True)
|
||||
|
||||
items = {}
|
||||
collected_paths = set()
|
||||
|
||||
for subtitle in subtitles:
|
||||
sub_path = f"{os.path.splitext(self.path)[0]}.{subtitle.suffix}.{subtitle.extension}"
|
||||
if custom_dir is not None:
|
||||
sub_path = os.path.join(custom_dir, os.path.basename(sub_path))
|
||||
|
||||
if not overwrite and sub_path in collected_paths:
|
||||
sub_path = f"{os.path.splitext(sub_path)[0]}.{len(collected_paths):02}.{subtitle.extension}"
|
||||
|
||||
if not overwrite and os.path.isfile(sub_path):
|
||||
logger.debug("Ignoring path (OVERWRITE TRUE): %s", sub_path)
|
||||
continue
|
||||
|
||||
try:
|
||||
extract_command.extend(subtitle.copy_args(sub_path))
|
||||
except UnsupportedCodec:
|
||||
if fallback_to_convert:
|
||||
logger.warning(
|
||||
"%s incompatible with copy. Using fallback", subtitle
|
||||
)
|
||||
extract_command.extend(subtitle.convert_args(None, sub_path))
|
||||
else:
|
||||
raise
|
||||
|
||||
logger.debug("Appending subtitle path: %s", sub_path)
|
||||
collected_paths.add(sub_path)
|
||||
|
||||
items[subtitle.index] = sub_path
|
||||
|
||||
if not items:
|
||||
logger.debug("No subtitles to extract")
|
||||
return {}
|
||||
|
||||
logger.debug("Extracting subtitle with command %s", " ".join(extract_command))
|
||||
|
||||
try:
|
||||
subprocess.run(extract_command, timeout=timeout, check=True)
|
||||
except (subprocess.SubprocessError, FileNotFoundError) as error:
|
||||
raise ExtractionError(f"Error calling ffmpeg: {error}") from error
|
||||
|
||||
for path in items.values():
|
||||
if not os.path.isfile(path):
|
||||
logger.warning("%s was not extracted", path)
|
||||
|
||||
return items
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<FFprobeVideoContainer {self.extension}: {self.path}>"
|
||||
|
||||
|
||||
_ffprobe_exceptions = (
|
||||
subprocess.SubprocessError,
|
||||
json.JSONDecodeError,
|
||||
FileNotFoundError,
|
||||
KeyError,
|
||||
)
|
@ -0,0 +1,68 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FFprobeSubtitleDisposition:
|
||||
def __init__(self, data: dict):
|
||||
self.default = False
|
||||
self.generic = False
|
||||
self.dub = False
|
||||
self.original = False
|
||||
self.comment = False
|
||||
self.lyrics = False
|
||||
self.karaoke = False
|
||||
self.forced = False
|
||||
self.hearing_impaired = False
|
||||
self.visual_impaired = False
|
||||
self.clean_effects = False
|
||||
self.attached_pic = False
|
||||
self.timed_thumbnails = False
|
||||
self._content_type = None
|
||||
|
||||
for key, val in data.items():
|
||||
if hasattr(self, key):
|
||||
setattr(self, key, bool(val))
|
||||
|
||||
for key in _content_types.keys():
|
||||
if getattr(self, key, None):
|
||||
self._content_type = key
|
||||
|
||||
def update_from_tags(self, tags):
|
||||
tag_title = tags.get("title")
|
||||
if tag_title is None:
|
||||
logger.debug("Title not found. Marking as generic")
|
||||
self.generic = True
|
||||
return None
|
||||
|
||||
l_tag_title = tag_title.lower()
|
||||
|
||||
for key, val in _content_types.items():
|
||||
if val.search(l_tag_title) is not None:
|
||||
logger.debug("Found %s: %s", key, l_tag_title)
|
||||
self._content_type = key
|
||||
setattr(self, key, True)
|
||||
return None
|
||||
|
||||
logger.debug("Generic disposition title found: %s", l_tag_title)
|
||||
self.generic = True
|
||||
return None
|
||||
|
||||
@property
|
||||
def suffix(self):
|
||||
return self._content_type or ""
|
||||
|
||||
def __str__(self):
|
||||
return self.suffix.upper() or "GENERIC"
|
||||
|
||||
|
||||
_content_types = {
|
||||
"hearing_impaired": re.compile(r"sdh|hearing impaired|cc"),
|
||||
"forced": re.compile(r"forced|non[- ]english"),
|
||||
"comment": re.compile(r"comment"),
|
||||
"visual_impaired": re.compile(r"signs|visual impair"),
|
||||
"karaoke": re.compile(r"karaoke|songs"),
|
||||
}
|
@ -0,0 +1,32 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
class FeseError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class ExtractionError(FeseError):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidFile(FeseError):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidStream(FeseError):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidSource(FeseError):
|
||||
pass
|
||||
|
||||
|
||||
class ConversionError(FeseError):
|
||||
pass
|
||||
|
||||
|
||||
class LanguageNotFound(FeseError):
|
||||
pass
|
||||
|
||||
|
||||
class UnsupportedCodec(FeseError):
|
||||
pass
|
@ -0,0 +1,162 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import timedelta
|
||||
import logging
|
||||
|
||||
from .disposition import FFprobeSubtitleDisposition
|
||||
from .exceptions import UnsupportedCodec
|
||||
from .tags import FFprobeGenericSubtitleTags
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FFprobeSubtitleStream:
|
||||
"""Base class for FFprobe (FFmpeg) extractable subtitle streams."""
|
||||
|
||||
def __init__(self, stream: dict):
|
||||
"""
|
||||
:raises: LanguageNotFound, UnsupportedCodec
|
||||
"""
|
||||
self.index = int(stream["index"])
|
||||
self.codec_name = stream["codec_name"]
|
||||
|
||||
try:
|
||||
self._codec = _codecs[self.codec_name]
|
||||
except KeyError:
|
||||
raise UnsupportedCodec(f"{self.codec_name} is not supported")
|
||||
|
||||
self.r_frame_rate = stream.get("r_frame_rate")
|
||||
self.avg_frame_rate = stream.get("avg_frame_rate")
|
||||
self.start_time = timedelta(seconds=float(stream.get("start_time", 0)))
|
||||
self.start_pts = timedelta(milliseconds=int(stream.get("start_pts", 0)))
|
||||
self.duration_ts = timedelta(milliseconds=int(stream.get("duration_ts", 0)))
|
||||
self.duration = timedelta(seconds=float(stream.get("duration", 0)))
|
||||
|
||||
self.tags = FFprobeGenericSubtitleTags.detect_cls_from_data(
|
||||
stream.get("tags", {})
|
||||
)
|
||||
self.disposition = FFprobeSubtitleDisposition(stream.get("disposition", {}))
|
||||
|
||||
if stream.get("tags") is not None:
|
||||
self.disposition.update_from_tags(stream["tags"])
|
||||
|
||||
def convert_args(self, convert_format, outfile):
|
||||
"""
|
||||
convert_format: Union[str, None] = the codec format to convert. if None is set, defaults
|
||||
to 'convert_default_format' codec's key
|
||||
outfile: str = output file
|
||||
|
||||
raises UnsupportedCodec if convert_format doesn't exist or if the codec doesn't
|
||||
support conversion
|
||||
"""
|
||||
convert_format = convert_format or self._codec["convert_default_format"]
|
||||
|
||||
if convert_format is None or not any(
|
||||
convert_format == item["copy_format"] for item in _codecs.values()
|
||||
):
|
||||
raise UnsupportedCodec(f"Unknown convert format: {convert_format}")
|
||||
|
||||
if not self._codec["convert"]:
|
||||
raise UnsupportedCodec(
|
||||
f"{self.codec_name} codec doesn't support conversion"
|
||||
)
|
||||
|
||||
return ["-map", f"0:{self.index}", "-f", convert_format, outfile]
|
||||
|
||||
def copy_args(self, outfile):
|
||||
"raises UnsupportedCodec if the codec doesn't support copy"
|
||||
if not self._codec["copy"] or not self._codec["copy_format"]:
|
||||
raise UnsupportedCodec(f"{self.codec_name} doesn't support copy")
|
||||
|
||||
return [
|
||||
"-map",
|
||||
f"0:{self.index}",
|
||||
"-c:s",
|
||||
"copy",
|
||||
"-f",
|
||||
self._codec["copy_format"],
|
||||
outfile,
|
||||
]
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
# Legacy
|
||||
return self.tags.language
|
||||
|
||||
@property
|
||||
def extension(self):
|
||||
return self._codec["copy_format"] or self._codec["convert_default_format"] or ""
|
||||
|
||||
@property
|
||||
def convert_default_format(self):
|
||||
return self._codec["convert_default_format"]
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
return self._codec["type"]
|
||||
|
||||
@property
|
||||
def suffix(self):
|
||||
return ".".join(
|
||||
item
|
||||
for item in (self.tags.suffix, self.disposition.suffix, self.extension)
|
||||
if item
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<{self.codec_name.upper()}: {self.tags}@{self.disposition}>"
|
||||
|
||||
|
||||
_codecs = {
|
||||
"ass": {
|
||||
"type": "text",
|
||||
"copy": True,
|
||||
"copy_format": "ass",
|
||||
"convert": True,
|
||||
"convert_default_format": "srt",
|
||||
},
|
||||
"subrip": {
|
||||
"type": "text",
|
||||
"copy": True,
|
||||
"copy_format": "srt",
|
||||
"convert": True,
|
||||
"convert_default_format": "srt",
|
||||
},
|
||||
"webvtt": {
|
||||
"type": "text",
|
||||
"copy": True,
|
||||
"copy_format": "webvtt",
|
||||
"convert": True,
|
||||
"convert_default_format": "srt",
|
||||
},
|
||||
"mov_text": {
|
||||
"type": "text",
|
||||
"copy": False,
|
||||
"copy_format": None,
|
||||
"convert": True,
|
||||
"convert_default_format": "srt",
|
||||
},
|
||||
"hdmv_pgs_subtitle": {
|
||||
"type": "bitmap",
|
||||
"copy": True,
|
||||
"copy_format": "sup",
|
||||
"convert": False,
|
||||
"convert_default_format": None,
|
||||
},
|
||||
"dvb_subtitle": {
|
||||
"type": "bitmap",
|
||||
"copy": True,
|
||||
"copy_format": "sup",
|
||||
"convert": False,
|
||||
"convert_default_format": None,
|
||||
},
|
||||
"dvd_subtitle": {
|
||||
"type": "bitmap",
|
||||
"copy": True,
|
||||
"copy_format": "sup",
|
||||
"convert": False,
|
||||
"convert_default_format": None,
|
||||
},
|
||||
}
|
@ -0,0 +1,175 @@
|
||||
from datetime import timedelta
|
||||
import logging
|
||||
|
||||
from babelfish import Language
|
||||
from babelfish.exceptions import LanguageError
|
||||
|
||||
from .exceptions import LanguageNotFound
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FFprobeGenericSubtitleTags:
|
||||
_DETECTABLE_TAGS = None
|
||||
|
||||
def __init__(self, data: dict):
|
||||
self.language = _get_language(data)
|
||||
self._data = data
|
||||
|
||||
@classmethod
|
||||
def detect_cls_from_data(cls, data):
|
||||
for cls_ in (FFprobeMkvSubtitleTags, FFprobeMp4SubtitleTags):
|
||||
if cls_.is_compatible(data):
|
||||
logger.debug("Detected tags class: %s", cls_)
|
||||
return cls_(data)
|
||||
|
||||
logger.debug("Unable to detect tags class. Using generic")
|
||||
return FFprobeGenericSubtitleTags(data)
|
||||
|
||||
@property
|
||||
def suffix(self):
|
||||
lang = self.language.alpha2
|
||||
if self.language.country is not None:
|
||||
lang = f"{lang}-{self.language.country}"
|
||||
|
||||
return str(lang)
|
||||
|
||||
@property
|
||||
def frames(self):
|
||||
return 0
|
||||
|
||||
@classmethod
|
||||
def is_compatible(cls, data):
|
||||
return False
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{type(self).__name__}: {self.suffix}"
|
||||
|
||||
|
||||
class FFprobeMkvSubtitleTags(FFprobeGenericSubtitleTags):
|
||||
_DETECTABLE_TAGS = (
|
||||
"BPS",
|
||||
"BPS-eng",
|
||||
"DURATION",
|
||||
"DURATION-eng",
|
||||
"NUMBER_OF_FRAMES",
|
||||
"NUMBER_OF_FRAMES-eng",
|
||||
"NUMBER_OF_BYTES",
|
||||
"NUMBER_OF_BYTES-eng",
|
||||
)
|
||||
|
||||
def __init__(self, data: dict):
|
||||
super().__init__(data)
|
||||
|
||||
self.title = data.get("title")
|
||||
self.bps = _safe_int(data.get("BPS"))
|
||||
self.bps_eng = _safe_int(data.get("BPS-eng"))
|
||||
self.duration = _safe_td(data.get("DURATION"))
|
||||
self.duration_eng = _safe_td(data.get("DURATION-eng"))
|
||||
self.number_of_frames = _safe_int(data.get("NUMBER_OF_FRAMES"))
|
||||
self.number_of_frames_eng = _safe_int(data.get("NUMBER_OF_FRAMES-eng"))
|
||||
self.number_of_bytes = _safe_int(data.get("NUMBER_OF_BYTES"))
|
||||
self.number_of_bytes_eng = _safe_int(data.get("NUMBER_OF_BYTES-eng"))
|
||||
|
||||
@property
|
||||
def frames(self):
|
||||
return self.number_of_frames or self.number_of_frames_eng or 0
|
||||
|
||||
@classmethod
|
||||
def is_compatible(cls, data):
|
||||
return any(
|
||||
key
|
||||
in (
|
||||
"BPS",
|
||||
"BPS-eng",
|
||||
"DURATION",
|
||||
"DURATION-eng",
|
||||
"NUMBER_OF_FRAMES",
|
||||
"NUMBER_OF_FRAMES-eng",
|
||||
"NUMBER_OF_BYTES",
|
||||
"NUMBER_OF_BYTES-eng",
|
||||
)
|
||||
for key in data.keys()
|
||||
)
|
||||
|
||||
|
||||
class FFprobeMp4SubtitleTags(FFprobeGenericSubtitleTags):
|
||||
_DETECTABLE_TAGS = ("creation_time", "handler_name")
|
||||
|
||||
def __init__(self, data: dict):
|
||||
super().__init__(data)
|
||||
self.creation_time = data.get("creation_time")
|
||||
self.handler_name = data.get("handler_name")
|
||||
|
||||
@classmethod
|
||||
def is_compatible(cls, data):
|
||||
return any(key in ("creation_time", "handler_name") for key in data.keys())
|
||||
|
||||
|
||||
def _get_language(tags) -> Language:
|
||||
og_lang = tags.get("language")
|
||||
last_exc = None
|
||||
|
||||
if og_lang is not None:
|
||||
if og_lang in _extra_languages:
|
||||
extra = _extra_languages[og_lang]
|
||||
title = tags.get("title", "n/a").lower()
|
||||
if any(possible in title for possible in extra["matches"]):
|
||||
logger.debug("Found extra language %s", extra["language_args"])
|
||||
return Language(*extra["language_args"])
|
||||
|
||||
try:
|
||||
lang = Language.fromalpha3b(og_lang)
|
||||
# Test for suffix
|
||||
assert lang.alpha2
|
||||
|
||||
return lang
|
||||
except LanguageError as error:
|
||||
last_exc = error
|
||||
logger.debug("Error with '%s' language: %s", og_lang, error)
|
||||
|
||||
raise LanguageNotFound(f"Couldn't detect language from tags: {tags}") from last_exc
|
||||
|
||||
|
||||
def _safe_td(value, default=None):
|
||||
if value is None:
|
||||
return default
|
||||
|
||||
try:
|
||||
h, m, s = [float(ts.replace(",", ".").strip()) for ts in value.split(":")]
|
||||
return timedelta(hours=h, minutes=m, seconds=s)
|
||||
except ValueError as error:
|
||||
logger.warning("Couldn't get duration field: %s. Returning %s", error, default)
|
||||
return default
|
||||
|
||||
|
||||
def _safe_int(value, default=None):
|
||||
if value is None:
|
||||
return default
|
||||
|
||||
try:
|
||||
return int(value)
|
||||
except ValueError:
|
||||
logger.warning("Couldn't convert to int: %s. Returning %s", value, default)
|
||||
return default
|
||||
|
||||
|
||||
_extra_languages = {
|
||||
"spa": {
|
||||
"matches": (
|
||||
"es-la",
|
||||
"spa-la",
|
||||
"spl",
|
||||
"mx",
|
||||
"latin",
|
||||
"mexic",
|
||||
"argent",
|
||||
"latam",
|
||||
),
|
||||
"language_args": ("spa", "MX"),
|
||||
},
|
||||
"por": {
|
||||
"matches": ("pt-br", "pob", "pb", "brazilian", "brasil", "brazil"),
|
||||
"language_args": ("por", "BR"),
|
||||
},
|
||||
}
|
Loading…
Reference in new issue