You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
428 lines
16 KiB
428 lines
16 KiB
from __future__ import print_function, unicode_literals, division
|
|
from collections import MutableSequence, OrderedDict
|
|
import io
|
|
from io import open
|
|
from itertools import starmap, chain
|
|
import os.path
|
|
import logging
|
|
from .formats import autodetect_format, get_format_class, get_format_identifier
|
|
from .substation import is_valid_field_content
|
|
from .ssaevent import SSAEvent
|
|
from .ssastyle import SSAStyle
|
|
from .time import make_time, ms_to_str
|
|
from .common import PY3
|
|
|
|
|
|
class SSAFile(MutableSequence):
|
|
"""
|
|
Subtitle file in SubStation Alpha format.
|
|
|
|
This class has a list-like interface which exposes :attr:`SSAFile.events`,
|
|
list of subtitles in the file::
|
|
|
|
subs = SSAFile.load("subtitles.srt")
|
|
|
|
for line in subs:
|
|
print(line.text)
|
|
|
|
subs.insert(0, SSAEvent(start=0, end=make_time(s=2.5), text="New first subtitle"))
|
|
|
|
del subs[0]
|
|
|
|
"""
|
|
|
|
DEFAULT_INFO = OrderedDict([
|
|
("WrapStyle", "0"),
|
|
("ScaledBorderAndShadow", "yes"),
|
|
("Collisions", "Normal")])
|
|
|
|
def __init__(self):
|
|
self.events = [] #: List of :class:`SSAEvent` instances, ie. individual subtitles.
|
|
self.styles = OrderedDict([("Default", SSAStyle.DEFAULT_STYLE.copy())]) #: Dict of :class:`SSAStyle` instances.
|
|
self.info = self.DEFAULT_INFO.copy() #: Dict with script metadata, ie. ``[Script Info]``.
|
|
self.aegisub_project = OrderedDict() #: Dict with Aegisub project, ie. ``[Aegisub Project Garbage]``.
|
|
self.fps = None #: Framerate used when reading the file, if applicable.
|
|
self.format = None #: Format of source subtitle file, if applicable, eg. ``"srt"``.
|
|
|
|
# ------------------------------------------------------------------------
|
|
# I/O methods
|
|
# ------------------------------------------------------------------------
|
|
|
|
@classmethod
|
|
def load(cls, path, encoding="utf-8", format_=None, fps=None, **kwargs):
|
|
"""
|
|
Load subtitle file from given path.
|
|
|
|
Arguments:
|
|
path (str): Path to subtitle file.
|
|
encoding (str): Character encoding of input file.
|
|
Defaults to UTF-8, you may need to change this.
|
|
format_ (str): Optional, forces use of specific parser
|
|
(eg. `"srt"`, `"ass"`). Otherwise, format is detected
|
|
automatically from file contents. This argument should
|
|
be rarely needed.
|
|
fps (float): Framerate for frame-based formats (MicroDVD),
|
|
for other formats this argument is ignored. Framerate might
|
|
be detected from the file, in which case you don't need
|
|
to specify it here (when given, this argument overrides
|
|
autodetection).
|
|
keep_unknown_html_tags (bool): This affects SubRip only (SRT),
|
|
for other formats this argument is ignored.
|
|
By default, HTML tags are converted to equivalent SubStation tags
|
|
(eg. ``<i>`` to ``{\\i1}`` and any remaining tags are removed
|
|
to keep the text clean. Set this parameter to ``True``
|
|
if you want to pass through these tags (eg. ``<sub>``).
|
|
This is useful if your output format is SRT and your player
|
|
supports these tags.
|
|
|
|
Returns:
|
|
SSAFile
|
|
|
|
Raises:
|
|
IOError
|
|
UnicodeDecodeError
|
|
pysubs2.exceptions.UnknownFPSError
|
|
pysubs2.exceptions.UnknownFormatIdentifierError
|
|
pysubs2.exceptions.FormatAutodetectionError
|
|
|
|
Note:
|
|
pysubs2 may autodetect subtitle format and/or framerate. These
|
|
values are set as :attr:`SSAFile.format` and :attr:`SSAFile.fps`
|
|
attributes.
|
|
|
|
Example:
|
|
>>> subs1 = pysubs2.load("subrip-subtitles.srt")
|
|
>>> subs2 = pysubs2.load("microdvd-subtitles.sub", fps=23.976)
|
|
>>> subs3 = pysubs2.load("subrip-subtitles-with-fancy-tags.srt", keep_unknown_html_tags=True)
|
|
|
|
"""
|
|
with open(path, encoding=encoding) as fp:
|
|
return cls.from_file(fp, format_, fps=fps, **kwargs)
|
|
|
|
@classmethod
|
|
def from_string(cls, string, format_=None, fps=None, **kwargs):
|
|
"""
|
|
Load subtitle file from string.
|
|
|
|
See :meth:`SSAFile.load()` for full description.
|
|
|
|
Arguments:
|
|
string (str): Subtitle file in a string. Note that the string
|
|
must be Unicode (in Python 2).
|
|
|
|
Returns:
|
|
SSAFile
|
|
|
|
Example:
|
|
>>> text = '''
|
|
... 1
|
|
... 00:00:00,000 --> 00:00:05,000
|
|
... An example SubRip file.
|
|
... '''
|
|
>>> subs = SSAFile.from_string(text)
|
|
|
|
"""
|
|
fp = io.StringIO(string)
|
|
return cls.from_file(fp, format_, fps=fps, **kwargs)
|
|
|
|
@classmethod
|
|
def from_file(cls, fp, format_=None, fps=None, **kwargs):
|
|
"""
|
|
Read subtitle file from file object.
|
|
|
|
See :meth:`SSAFile.load()` for full description.
|
|
|
|
Note:
|
|
This is a low-level method. Usually, one of :meth:`SSAFile.load()`
|
|
or :meth:`SSAFile.from_string()` is preferable.
|
|
|
|
Arguments:
|
|
fp (file object): A file object, ie. :class:`io.TextIOBase` instance.
|
|
Note that the file must be opened in text mode (as opposed to binary).
|
|
|
|
Returns:
|
|
SSAFile
|
|
|
|
"""
|
|
if format_ is None:
|
|
# Autodetect subtitle format, then read again using correct parser.
|
|
# The file might be a pipe and we need to read it twice,
|
|
# so just buffer everything.
|
|
text = fp.read()
|
|
fragment = text[:10000]
|
|
format_ = autodetect_format(fragment)
|
|
fp = io.StringIO(text)
|
|
|
|
impl = get_format_class(format_)
|
|
subs = cls() # an empty subtitle file
|
|
subs.format = format_
|
|
subs.fps = fps
|
|
impl.from_file(subs, fp, format_, fps=fps, **kwargs)
|
|
return subs
|
|
|
|
def save(self, path, encoding="utf-8", format_=None, fps=None, **kwargs):
|
|
"""
|
|
Save subtitle file to given path.
|
|
|
|
Arguments:
|
|
path (str): Path to subtitle file.
|
|
encoding (str): Character encoding of output file.
|
|
Defaults to UTF-8, which should be fine for most purposes.
|
|
format_ (str): Optional, specifies desired subtitle format
|
|
(eg. `"srt"`, `"ass"`). Otherwise, format is detected
|
|
automatically from file extension. Thus, this argument
|
|
is rarely needed.
|
|
fps (float): Framerate for frame-based formats (MicroDVD),
|
|
for other formats this argument is ignored. When omitted,
|
|
:attr:`SSAFile.fps` value is used (ie. the framerate used
|
|
for loading the file, if any). When the :class:`SSAFile`
|
|
wasn't loaded from MicroDVD, or if you wish save it with
|
|
different framerate, use this argument. See also
|
|
:meth:`SSAFile.transform_framerate()` for fixing bad
|
|
frame-based to time-based conversions.
|
|
kwargs: Extra options for the writer.
|
|
|
|
Raises:
|
|
IOError
|
|
UnicodeEncodeError
|
|
pysubs2.exceptions.UnknownFPSError
|
|
pysubs2.exceptions.UnknownFormatIdentifierError
|
|
pysubs2.exceptions.UnknownFileExtensionError
|
|
|
|
"""
|
|
if format_ is None:
|
|
ext = os.path.splitext(path)[1].lower()
|
|
format_ = get_format_identifier(ext)
|
|
|
|
with open(path, "w", encoding=encoding) as fp:
|
|
self.to_file(fp, format_, fps=fps, **kwargs)
|
|
|
|
def to_string(self, format_, fps=None, **kwargs):
|
|
"""
|
|
Get subtitle file as a string.
|
|
|
|
See :meth:`SSAFile.save()` for full description.
|
|
|
|
Returns:
|
|
str
|
|
|
|
"""
|
|
fp = io.StringIO()
|
|
self.to_file(fp, format_, fps=fps, **kwargs)
|
|
return fp.getvalue()
|
|
|
|
def to_file(self, fp, format_, fps=None, **kwargs):
|
|
"""
|
|
Write subtitle file to file object.
|
|
|
|
See :meth:`SSAFile.save()` for full description.
|
|
|
|
Note:
|
|
This is a low-level method. Usually, one of :meth:`SSAFile.save()`
|
|
or :meth:`SSAFile.to_string()` is preferable.
|
|
|
|
Arguments:
|
|
fp (file object): A file object, ie. :class:`io.TextIOBase` instance.
|
|
Note that the file must be opened in text mode (as opposed to binary).
|
|
|
|
"""
|
|
impl = get_format_class(format_)
|
|
impl.to_file(self, fp, format_, fps=fps, **kwargs)
|
|
|
|
# ------------------------------------------------------------------------
|
|
# Retiming subtitles
|
|
# ------------------------------------------------------------------------
|
|
|
|
def shift(self, h=0, m=0, s=0, ms=0, frames=None, fps=None):
|
|
"""
|
|
Shift all subtitles by constant time amount.
|
|
|
|
Shift may be time-based (the default) or frame-based. In the latter
|
|
case, specify both frames and fps. h, m, s, ms will be ignored.
|
|
|
|
Arguments:
|
|
h, m, s, ms: Integer or float values, may be positive or negative.
|
|
frames (int): When specified, must be an integer number of frames.
|
|
May be positive or negative. fps must be also specified.
|
|
fps (float): When specified, must be a positive number.
|
|
|
|
Raises:
|
|
ValueError: Invalid fps or missing number of frames.
|
|
|
|
"""
|
|
delta = make_time(h=h, m=m, s=s, ms=ms, frames=frames, fps=fps)
|
|
for line in self:
|
|
line.start += delta
|
|
line.end += delta
|
|
|
|
def transform_framerate(self, in_fps, out_fps):
|
|
"""
|
|
Rescale all timestamps by ratio of in_fps/out_fps.
|
|
|
|
Can be used to fix files converted from frame-based to time-based
|
|
with wrongly assumed framerate.
|
|
|
|
Arguments:
|
|
in_fps (float)
|
|
out_fps (float)
|
|
|
|
Raises:
|
|
ValueError: Non-positive framerate given.
|
|
|
|
"""
|
|
if in_fps <= 0 or out_fps <= 0:
|
|
raise ValueError("Framerates must be positive, cannot transform %f -> %f" % (in_fps, out_fps))
|
|
|
|
ratio = in_fps / out_fps
|
|
for line in self:
|
|
line.start = int(round(line.start * ratio))
|
|
line.end = int(round(line.end * ratio))
|
|
|
|
# ------------------------------------------------------------------------
|
|
# Working with styles
|
|
# ------------------------------------------------------------------------
|
|
|
|
def rename_style(self, old_name, new_name):
|
|
"""
|
|
Rename a style, including references to it.
|
|
|
|
Arguments:
|
|
old_name (str): Style to be renamed.
|
|
new_name (str): New name for the style (must be unused).
|
|
|
|
Raises:
|
|
KeyError: No style named old_name.
|
|
ValueError: new_name is not a legal name (cannot use commas)
|
|
or new_name is taken.
|
|
|
|
"""
|
|
if old_name not in self.styles:
|
|
raise KeyError("Style %r not found" % old_name)
|
|
if new_name in self.styles:
|
|
raise ValueError("There is already a style called %r" % new_name)
|
|
if not is_valid_field_content(new_name):
|
|
raise ValueError("%r is not a valid name" % new_name)
|
|
|
|
self.styles[new_name] = self.styles[old_name]
|
|
del self.styles[old_name]
|
|
|
|
for line in self:
|
|
# XXX also handle \r override tag
|
|
if line.style == old_name:
|
|
line.style = new_name
|
|
|
|
def import_styles(self, subs, overwrite=True):
|
|
"""
|
|
Merge in styles from other SSAFile.
|
|
|
|
Arguments:
|
|
subs (SSAFile): Subtitle file imported from.
|
|
overwrite (bool): On name conflict, use style from the other file
|
|
(default: True).
|
|
|
|
"""
|
|
if not isinstance(subs, SSAFile):
|
|
raise TypeError("Must supply an SSAFile.")
|
|
|
|
for name, style in subs.styles.items():
|
|
if name not in self.styles or overwrite:
|
|
self.styles[name] = style
|
|
|
|
# ------------------------------------------------------------------------
|
|
# Helper methods
|
|
# ------------------------------------------------------------------------
|
|
|
|
def equals(self, other):
|
|
"""
|
|
Equality of two SSAFiles.
|
|
|
|
Compares :attr:`SSAFile.info`, :attr:`SSAFile.styles` and :attr:`SSAFile.events`.
|
|
Order of entries in OrderedDicts does not matter. "ScriptType" key in info is
|
|
considered an implementation detail and thus ignored.
|
|
|
|
Useful mostly in unit tests. Differences are logged at DEBUG level.
|
|
|
|
"""
|
|
|
|
if isinstance(other, SSAFile):
|
|
for key in set(chain(self.info.keys(), other.info.keys())) - {"ScriptType"}:
|
|
sv, ov = self.info.get(key), other.info.get(key)
|
|
if sv is None:
|
|
logging.debug("%r missing in self.info", key)
|
|
return False
|
|
elif ov is None:
|
|
logging.debug("%r missing in other.info", key)
|
|
return False
|
|
elif sv != ov:
|
|
logging.debug("info %r differs (self=%r, other=%r)", key, sv, ov)
|
|
return False
|
|
|
|
for key in set(chain(self.styles.keys(), other.styles.keys())):
|
|
sv, ov = self.styles.get(key), other.styles.get(key)
|
|
if sv is None:
|
|
logging.debug("%r missing in self.styles", key)
|
|
return False
|
|
elif ov is None:
|
|
logging.debug("%r missing in other.styles", key)
|
|
return False
|
|
elif sv != ov:
|
|
for k in sv.FIELDS:
|
|
if getattr(sv, k) != getattr(ov, k): logging.debug("difference in field %r", k)
|
|
logging.debug("style %r differs (self=%r, other=%r)", key, sv.as_dict(), ov.as_dict())
|
|
return False
|
|
|
|
if len(self) != len(other):
|
|
logging.debug("different # of subtitles (self=%d, other=%d)", len(self), len(other))
|
|
return False
|
|
|
|
for i, (se, oe) in enumerate(zip(self.events, other.events)):
|
|
if not se.equals(oe):
|
|
for k in se.FIELDS:
|
|
if getattr(se, k) != getattr(oe, k): logging.debug("difference in field %r", k)
|
|
logging.debug("event %d differs (self=%r, other=%r)", i, se.as_dict(), oe.as_dict())
|
|
return False
|
|
|
|
return True
|
|
else:
|
|
raise TypeError("Cannot compare to non-SSAFile object")
|
|
|
|
def __repr__(self):
|
|
if self.events:
|
|
max_time = max(ev.end for ev in self)
|
|
s = "<SSAFile with %d events and %d styles, last timestamp %s>" % \
|
|
(len(self), len(self.styles), ms_to_str(max_time))
|
|
else:
|
|
s = "<SSAFile with 0 events and %d styles>" % len(self.styles)
|
|
|
|
if not PY3: s = s.encode("utf-8")
|
|
return s
|
|
|
|
# ------------------------------------------------------------------------
|
|
# MutableSequence implementation + sort()
|
|
# ------------------------------------------------------------------------
|
|
|
|
def sort(self):
|
|
"""Sort subtitles time-wise, in-place."""
|
|
self.events.sort()
|
|
|
|
def __getitem__(self, item):
|
|
return self.events[item]
|
|
|
|
def __setitem__(self, key, value):
|
|
if isinstance(value, SSAEvent):
|
|
self.events[key] = value
|
|
else:
|
|
raise TypeError("SSAFile.events must contain only SSAEvent objects")
|
|
|
|
def __delitem__(self, key):
|
|
del self.events[key]
|
|
|
|
def __len__(self):
|
|
return len(self.events)
|
|
|
|
def insert(self, index, value):
|
|
if isinstance(value, SSAEvent):
|
|
self.events.insert(index, value)
|
|
else:
|
|
raise TypeError("SSAFile.events must contain only SSAEvent objects")
|