Updated pysubs2 module to support newer SSA files.

pull/1471/head
morpheus65535 3 years ago
parent 60353c0367
commit 09a8335a03

@ -10,3 +10,6 @@ load = SSAFile.load
#: Alias for :meth:`pysubs2.time.make_time()`. #: Alias for :meth:`pysubs2.time.make_time()`.
make_time = time.make_time make_time = time.make_time
#: Alias for `pysubs2.common.VERSION`.
__version__ = VERSION

@ -1,4 +1,3 @@
from __future__ import unicode_literals, print_function
import argparse import argparse
import codecs import codecs
import os import os
@ -8,38 +7,39 @@ import io
from io import open from io import open
import sys import sys
from textwrap import dedent from textwrap import dedent
from .formats import get_file_extension from .formats import get_file_extension, FORMAT_IDENTIFIERS
from .time import make_time from .time import make_time
from .ssafile import SSAFile from .ssafile import SSAFile
from .common import PY3, VERSION from .common import VERSION
import logging
def positive_float(s): def positive_float(s: str) -> float:
x = float(s) x = float(s)
if not x > 0: if not x > 0:
raise argparse.ArgumentTypeError("%r is not a positive number" % s) raise argparse.ArgumentTypeError("%r is not a positive number" % s)
return x return x
def character_encoding(s): def character_encoding(s: str) -> str:
try: try:
codecs.lookup(s) codecs.lookup(s)
return s return s
except LookupError: except LookupError:
raise argparse.ArgumentError raise argparse.ArgumentError
def time(s): def time(s: str):
d = {} d = {}
for v, k in re.findall(r"(\d*\.?\d*)(ms|m|s|h)", s): for v, k in re.findall(r"(\d*\.?\d*)(ms|m|s|h)", s):
d[k] = float(v) d[k] = float(v)
return make_time(**d) return make_time(**d)
def change_ext(path, ext): def change_ext(path: str, ext: str) -> str:
base, _ = op.splitext(path) base, _ = op.splitext(path)
return base + ext return base + ext
class Pysubs2CLI(object): class Pysubs2CLI:
def __init__(self): def __init__(self):
parser = self.parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, parser = self.parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
prog="pysubs2", prog="pysubs2",
@ -50,6 +50,7 @@ class Pysubs2CLI(object):
epilog=dedent(""" epilog=dedent("""
usage examples: usage examples:
python -m pysubs2 --to srt *.ass python -m pysubs2 --to srt *.ass
python -m pysubs2 --to srt --clean *.ass
python -m pysubs2 --to microdvd --fps 23.976 *.ass python -m pysubs2 --to microdvd --fps 23.976 *.ass
python -m pysubs2 --shift 0.3s *.srt python -m pysubs2 --shift 0.3s *.srt
python -m pysubs2 --shift 0.3s <my_file.srt >retimed_file.srt python -m pysubs2 --shift 0.3s <my_file.srt >retimed_file.srt
@ -57,21 +58,21 @@ class Pysubs2CLI(object):
python -m pysubs2 --transform-framerate 25 23.976 *.srt""")) python -m pysubs2 --transform-framerate 25 23.976 *.srt"""))
parser.add_argument("files", nargs="*", metavar="FILE", parser.add_argument("files", nargs="*", metavar="FILE",
help="Input subtitle files. Can be in SubStation Alpha (*.ass, *.ssa), SubRip (*.srt) or " help="Input subtitle files. Can be in SubStation Alpha (*.ass, *.ssa), SubRip (*.srt), "
"MicroDVD (*.sub) formats. When no files are specified, pysubs2 will work as a pipe, " "MicroDVD (*.sub) or other supported format. When no files are specified, "
"reading from standard input and writing to standard output.") "pysubs2 will work as a pipe, reading from standard input and writing to standard output.")
parser.add_argument("-v", "--version", action="version", version="pysubs2 %s" % VERSION) parser.add_argument("-v", "--version", action="version", version="pysubs2 %s" % VERSION)
parser.add_argument("-f", "--from", choices=["ass", "ssa", "srt", "microdvd", "json"], dest="input_format", parser.add_argument("-f", "--from", choices=FORMAT_IDENTIFIERS, dest="input_format",
help="By default, subtitle format is detected from the file. This option can be used to " help="By default, subtitle format is detected from the file. This option can be used to "
"skip autodetection and force specific format. Generally, it should never be needed.") "skip autodetection and force specific format. Generally, it should never be needed.")
parser.add_argument("-t", "--to", choices=["ass", "ssa", "srt", "microdvd", "json"], dest="output_format", parser.add_argument("-t", "--to", choices=FORMAT_IDENTIFIERS, dest="output_format",
help="Convert subtitle files to given format. By default, each file is saved in its " help="Convert subtitle files to given format. By default, each file is saved in its "
"original format.") "original format.")
parser.add_argument("--input-enc", metavar="ENCODING", default="iso-8859-1", type=character_encoding, parser.add_argument("--input-enc", metavar="ENCODING", default="utf-8", type=character_encoding,
help="Character encoding for input files. By default, ISO-8859-1 is used for both " help="Character encoding for input files. By default, UTF-8 is used for both "
"input and output, which should generally work (for 8-bit encodings).") "input and output.")
parser.add_argument("--output-enc", metavar="ENCODING", type=character_encoding, parser.add_argument("--output-enc", metavar="ENCODING", type=character_encoding,
help="Character encoding for output files. By default, it is the same as input encoding. " help="Character encoding for output files. By default, it is the same as input encoding. "
"If you wish to convert between encodings, make sure --input-enc is set correctly! " "If you wish to convert between encodings, make sure --input-enc is set correctly! "
@ -85,6 +86,11 @@ class Pysubs2CLI(object):
help="Use this to save all files to given directory. By default, every file is saved to its parent directory, " help="Use this to save all files to given directory. By default, every file is saved to its parent directory, "
"ie. unless it's being saved in different subtitle format (and thus with different file extension), " "ie. unless it's being saved in different subtitle format (and thus with different file extension), "
"it overwrites the original file.") "it overwrites the original file.")
parser.add_argument("--clean", action="store_true",
help="Attempt to remove non-essential subtitles (eg. karaoke, SSA drawing tags), "
"strip styling information when saving to non-SSA formats")
parser.add_argument("--verbose", action="store_true",
help="Print misc logging")
group = parser.add_mutually_exclusive_group() group = parser.add_mutually_exclusive_group()
@ -105,6 +111,9 @@ class Pysubs2CLI(object):
args = self.parser.parse_args(argv) args = self.parser.parse_args(argv)
errors = 0 errors = 0
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
if args.output_dir and not op.exists(args.output_dir): if args.output_dir and not op.exists(args.output_dir):
os.makedirs(args.output_dir) os.makedirs(args.output_dir)
@ -138,19 +147,15 @@ class Pysubs2CLI(object):
outpath = op.join(args.output_dir, filename) outpath = op.join(args.output_dir, filename)
with open(outpath, "w", encoding=args.output_enc) as outfile: with open(outpath, "w", encoding=args.output_enc) as outfile:
subs.to_file(outfile, output_format, args.fps) subs.to_file(outfile, output_format, args.fps, apply_styles=not args.clean)
else: else:
if PY3: infile = io.TextIOWrapper(sys.stdin.buffer, args.input_enc)
infile = io.TextIOWrapper(sys.stdin.buffer, args.input_enc) outfile = io.TextIOWrapper(sys.stdout.buffer, args.output_enc)
outfile = io.TextIOWrapper(sys.stdout.buffer, args.output_enc)
else:
infile = io.TextIOWrapper(sys.stdin, args.input_enc)
outfile = io.TextIOWrapper(sys.stdout, args.output_enc)
subs = SSAFile.from_file(infile, args.input_format, args.fps) subs = SSAFile.from_file(infile, args.input_format, args.fps)
self.process(subs, args) self.process(subs, args)
output_format = args.output_format or subs.format output_format = args.output_format or subs.format
subs.to_file(outfile, output_format, args.fps) subs.to_file(outfile, output_format, args.fps, apply_styles=not args.clean)
return (0 if errors == 0 else 1) return (0 if errors == 0 else 1)
@ -164,6 +169,9 @@ class Pysubs2CLI(object):
in_fps, out_fps = args.transform_framerate in_fps, out_fps = args.transform_framerate
subs.transform_framerate(in_fps, out_fps) subs.transform_framerate(in_fps, out_fps)
if args.clean:
subs.remove_miscellaneous_events()
def __main__(): def __main__():
cli = Pysubs2CLI() cli = Pysubs2CLI()

@ -1,30 +1,32 @@
from collections import namedtuple from dataclasses import dataclass
import sys from typing import Union
_Color = namedtuple("Color", "r g b a")
class Color(_Color): @dataclass(init=False)
class Color:
""" """
(r, g, b, a) namedtuple for 8-bit RGB color with alpha channel. 8-bit RGB color with alpha channel.
All values are ints from 0 to 255. All values are ints from 0 to 255.
""" """
def __new__(cls, r, g, b, a=0): r: int
g: int
b: int
a: int = 0
def __init__(self, r: int, g: int, b: int, a: int = 0):
for value in r, g, b, a: for value in r, g, b, a:
if value not in range(256): if value not in range(256):
raise ValueError("Color channels must have values 0-255") raise ValueError("Color channels must have values 0-255")
return _Color.__new__(cls, r, g, b, a) self.r = r
self.g = g
self.b = b
self.a = a
#: Version of the pysubs2 library.
VERSION = "0.2.4"
#: Version of the pysubs2 library.
VERSION = "1.2.0"
PY3 = sys.version_info.major == 3
if PY3: IntOrFloat = Union[int, float]
text_type = str
binary_string_type = bytes
else:
text_type = unicode
binary_string_type = str

@ -1,17 +1,22 @@
class Pysubs2Error(Exception): class Pysubs2Error(Exception):
"""Base class for pysubs2 exceptions.""" """Base class for pysubs2 exceptions."""
class UnknownFPSError(Pysubs2Error): class UnknownFPSError(Pysubs2Error):
"""Framerate was not specified and couldn't be inferred otherwise.""" """Framerate was not specified and couldn't be inferred otherwise."""
class UnknownFileExtensionError(Pysubs2Error): class UnknownFileExtensionError(Pysubs2Error):
"""File extension does not pertain to any known subtitle format.""" """File extension does not pertain to any known subtitle format."""
class UnknownFormatIdentifierError(Pysubs2Error): class UnknownFormatIdentifierError(Pysubs2Error):
"""Unknown subtitle format identifier (ie. string like ``"srt"``).""" """Unknown subtitle format identifier (ie. string like ``"srt"``)."""
class FormatAutodetectionError(Pysubs2Error): class FormatAutodetectionError(Pysubs2Error):
"""Subtitle format is ambiguous or unknown.""" """Subtitle format is ambiguous or unknown."""
class ContentNotUsable(Pysubs2Error): class ContentNotUsable(Pysubs2Error):
"""Current content not usable for specified format""" """Current content not usable for specified format"""

@ -1,4 +1,8 @@
class FormatBase(object): from typing import Optional
import io
class FormatBase:
""" """
Base class for subtitle format implementations. Base class for subtitle format implementations.
@ -14,7 +18,7 @@ class FormatBase(object):
""" """
@classmethod @classmethod
def from_file(cls, subs, fp, format_, **kwargs): def from_file(cls, subs, fp: io.TextIOBase, format_: str, **kwargs):
""" """
Load subtitle file into an empty SSAFile. Load subtitle file into an empty SSAFile.
@ -37,7 +41,7 @@ class FormatBase(object):
raise NotImplementedError("Parsing is not supported for this format") raise NotImplementedError("Parsing is not supported for this format")
@classmethod @classmethod
def to_file(cls, subs, fp, format_, **kwargs): def to_file(cls, subs, fp: io.TextIOBase, format_: str, **kwargs):
""" """
Write SSAFile into a file. Write SSAFile into a file.
@ -62,7 +66,7 @@ class FormatBase(object):
raise NotImplementedError("Writing is not supported for this format") raise NotImplementedError("Writing is not supported for this format")
@classmethod @classmethod
def guess_format(self, text): def guess_format(self, text: str) -> Optional[str]:
""" """
Return format identifier of recognized format, or None. Return format identifier of recognized format, or None.

@ -1,3 +1,5 @@
from typing import Dict, Type
from .formatbase import FormatBase from .formatbase import FormatBase
from .microdvd import MicroDVDFormat from .microdvd import MicroDVDFormat
from .subrip import SubripFormat from .subrip import SubripFormat
@ -5,20 +7,22 @@ from .jsonformat import JSONFormat
from .substation import SubstationFormat from .substation import SubstationFormat
from .mpl2 import MPL2Format from .mpl2 import MPL2Format
from .tmp import TmpFormat from .tmp import TmpFormat
from .webvtt import WebVTTFormat
from .exceptions import * from .exceptions import *
#: Dict mapping file extensions to format identifiers. #: Dict mapping file extensions to format identifiers.
FILE_EXTENSION_TO_FORMAT_IDENTIFIER = { FILE_EXTENSION_TO_FORMAT_IDENTIFIER: Dict[str, str] = {
".srt": "srt", ".srt": "srt",
".ass": "ass", ".ass": "ass",
".ssa": "ssa", ".ssa": "ssa",
".sub": "microdvd", ".sub": "microdvd",
".json": "json", ".json": "json",
".txt": "tmp", ".txt": "tmp",
".vtt": "vtt",
} }
#: Dict mapping format identifiers to implementations (FormatBase subclasses). #: Dict mapping format identifiers to implementations (FormatBase subclasses).
FORMAT_IDENTIFIER_TO_FORMAT_CLASS = { FORMAT_IDENTIFIER_TO_FORMAT_CLASS: Dict[str, Type[FormatBase]] = {
"srt": SubripFormat, "srt": SubripFormat,
"ass": SubstationFormat, "ass": SubstationFormat,
"ssa": SubstationFormat, "ssa": SubstationFormat,
@ -26,23 +30,29 @@ FORMAT_IDENTIFIER_TO_FORMAT_CLASS = {
"json": JSONFormat, "json": JSONFormat,
"mpl2": MPL2Format, "mpl2": MPL2Format,
"tmp": TmpFormat, "tmp": TmpFormat,
"vtt": WebVTTFormat,
} }
def get_format_class(format_): FORMAT_IDENTIFIERS = list(FORMAT_IDENTIFIER_TO_FORMAT_CLASS.keys())
def get_format_class(format_: str) -> Type[FormatBase]:
"""Format identifier -> format class (ie. subclass of FormatBase)""" """Format identifier -> format class (ie. subclass of FormatBase)"""
try: try:
return FORMAT_IDENTIFIER_TO_FORMAT_CLASS[format_] return FORMAT_IDENTIFIER_TO_FORMAT_CLASS[format_]
except KeyError: except KeyError:
raise UnknownFormatIdentifierError(format_) raise UnknownFormatIdentifierError(format_)
def get_format_identifier(ext):
def get_format_identifier(ext: str) -> str:
"""File extension -> format identifier""" """File extension -> format identifier"""
try: try:
return FILE_EXTENSION_TO_FORMAT_IDENTIFIER[ext] return FILE_EXTENSION_TO_FORMAT_IDENTIFIER[ext]
except KeyError: except KeyError:
raise UnknownFileExtensionError(ext) raise UnknownFileExtensionError(ext)
def get_file_extension(format_):
def get_file_extension(format_: str) -> str:
"""Format identifier -> file extension""" """Format identifier -> file extension"""
if format_ not in FORMAT_IDENTIFIER_TO_FORMAT_CLASS: if format_ not in FORMAT_IDENTIFIER_TO_FORMAT_CLASS:
raise UnknownFormatIdentifierError(format_) raise UnknownFormatIdentifierError(format_)
@ -53,7 +63,8 @@ def get_file_extension(format_):
raise RuntimeError("No file extension for format %r" % format_) raise RuntimeError("No file extension for format %r" % format_)
def autodetect_format(content):
def autodetect_format(content: str) -> str:
"""Return format identifier for given fragment or raise FormatAutodetectionError.""" """Return format identifier for given fragment or raise FormatAutodetectionError."""
formats = set() formats = set()
for impl in FORMAT_IDENTIFIER_TO_FORMAT_CLASS.values(): for impl in FORMAT_IDENTIFIER_TO_FORMAT_CLASS.values():

@ -1,20 +1,35 @@
from __future__ import unicode_literals, print_function import dataclasses
import json import json
from .common import Color, PY3 from .common import Color
from .ssaevent import SSAEvent from .ssaevent import SSAEvent
from .ssastyle import SSAStyle from .ssastyle import SSAStyle
from .formatbase import FormatBase from .formatbase import FormatBase
# We're using Color dataclass
# https://stackoverflow.com/questions/51286748/make-the-python-json-encoder-support-pythons-new-dataclasses
class EnhancedJSONEncoder(json.JSONEncoder):
def default(self, o):
if dataclasses.is_dataclass(o):
return dataclasses.asdict(o)
return super().default(o)
class JSONFormat(FormatBase): class JSONFormat(FormatBase):
"""
Implementation of JSON subtitle pseudo-format (serialized pysubs2 internal representation)
This is essentially SubStation Alpha as JSON.
"""
@classmethod @classmethod
def guess_format(cls, text): def guess_format(cls, text):
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if text.startswith("{\""): if text.startswith("{\""):
return "json" return "json"
@classmethod @classmethod
def from_file(cls, subs, fp, format_, **kwargs): def from_file(cls, subs, fp, format_, **kwargs):
"""See :meth:`pysubs2.formats.FormatBase.from_file()`"""
data = json.load(fp) data = json.load(fp)
subs.info.clear() subs.info.clear()
@ -25,7 +40,7 @@ class JSONFormat(FormatBase):
subs.styles[name] = sty = SSAStyle() subs.styles[name] = sty = SSAStyle()
for k, v in fields.items(): for k, v in fields.items():
if "color" in k: if "color" in k:
setattr(sty, k, Color(*v)) setattr(sty, k, Color(**v))
else: else:
setattr(sty, k, v) setattr(sty, k, v)
@ -33,14 +48,11 @@ class JSONFormat(FormatBase):
@classmethod @classmethod
def to_file(cls, subs, fp, format_, **kwargs): def to_file(cls, subs, fp, format_, **kwargs):
"""See :meth:`pysubs2.formats.FormatBase.to_file()`"""
data = { data = {
"info": dict(**subs.info), "info": dict(**subs.info),
"styles": {name: sty.as_dict() for name, sty in subs.styles.items()}, "styles": {name: sty.as_dict() for name, sty in subs.styles.items()},
"events": [ev.as_dict() for ev in subs.events] "events": [ev.as_dict() for ev in subs.events]
} }
if PY3: json.dump(data, fp, cls=EnhancedJSONEncoder)
json.dump(data, fp)
else:
text = json.dumps(data, fp)
fp.write(unicode(text))

@ -1,8 +1,5 @@
from __future__ import unicode_literals, print_function
from functools import partial from functools import partial
import re import re
from .common import text_type
from .exceptions import UnknownFPSError from .exceptions import UnknownFPSError
from .ssaevent import SSAEvent from .ssaevent import SSAEvent
from .ssastyle import SSAStyle from .ssastyle import SSAStyle
@ -15,13 +12,16 @@ MICRODVD_LINE = re.compile(r" *\{ *(\d+) *\} *\{ *(\d+) *\}(.+)")
class MicroDVDFormat(FormatBase): class MicroDVDFormat(FormatBase):
"""MicroDVD subtitle format implementation"""
@classmethod @classmethod
def guess_format(cls, text): def guess_format(cls, text):
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if any(map(MICRODVD_LINE.match, text.splitlines())): if any(map(MICRODVD_LINE.match, text.splitlines())):
return "microdvd" return "microdvd"
@classmethod @classmethod
def from_file(cls, subs, fp, format_, fps=None, **kwargs): def from_file(cls, subs, fp, format_, fps=None, **kwargs):
"""See :meth:`pysubs2.formats.FormatBase.from_file()`"""
for line in fp: for line in fp:
match = MICRODVD_LINE.match(line) match = MICRODVD_LINE.match(line)
if not match: if not match:
@ -63,7 +63,18 @@ class MicroDVDFormat(FormatBase):
subs.append(ev) subs.append(ev)
@classmethod @classmethod
def to_file(cls, subs, fp, format_, fps=None, write_fps_declaration=True, **kwargs): def to_file(cls, subs, fp, format_, fps=None, write_fps_declaration=True, apply_styles=True, **kwargs):
"""
See :meth:`pysubs2.formats.FormatBase.to_file()`
The only supported styling is marking whole lines italic.
Keyword args:
write_fps_declaration: If True, create a zero-duration first subtitle which will contain
the fps.
apply_styles: If False, do not write any styling.
"""
if fps is None: if fps is None:
fps = subs.fps fps = subs.fps
@ -83,11 +94,14 @@ class MicroDVDFormat(FormatBase):
# insert an artificial first line telling the framerate # insert an artificial first line telling the framerate
if write_fps_declaration: if write_fps_declaration:
subs.insert(0, SSAEvent(start=0, end=0, text=text_type(fps))) subs.insert(0, SSAEvent(start=0, end=0, text=str(fps)))
for line in subs:
if line.is_comment or line.is_drawing:
continue
for line in (ev for ev in subs if not ev.is_comment):
text = "|".join(line.plaintext.splitlines()) text = "|".join(line.plaintext.splitlines())
if is_entirely_italic(line): if apply_styles and is_entirely_italic(line):
text = "{Y:i}" + text text = "{Y:i}" + text
start, end = map(to_frames, (line.start, line.end)) start, end = map(to_frames, (line.start, line.end))

@ -1,6 +1,3 @@
# coding=utf-8
from __future__ import print_function, division, unicode_literals
import re import re
from .time import times_to_ms from .time import times_to_ms
@ -13,13 +10,16 @@ MPL2_FORMAT = re.compile(r"^(?um)\[(-?\d+)\]\[(-?\d+)\](.*)")
class MPL2Format(FormatBase): class MPL2Format(FormatBase):
"""MPL2 subtitle format implementation"""
@classmethod @classmethod
def guess_format(cls, text): def guess_format(cls, text):
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if MPL2_FORMAT.search(text): if MPL2_FORMAT.search(text):
return "mpl2" return "mpl2"
@classmethod @classmethod
def from_file(cls, subs, fp, format_, **kwargs): def from_file(cls, subs, fp, format_, **kwargs):
"""See :meth:`pysubs2.formats.FormatBase.from_file()`"""
def prepare_text(lines): def prepare_text(lines):
out = [] out = []
for s in lines.split("|"): for s in lines.split("|"):
@ -37,7 +37,12 @@ class MPL2Format(FormatBase):
@classmethod @classmethod
def to_file(cls, subs, fp, format_, **kwargs): def to_file(cls, subs, fp, format_, **kwargs):
"""
See :meth:`pysubs2.formats.FormatBase.to_file()`
No styling is supported at the moment.
"""
# TODO handle italics # TODO handle italics
for line in subs: for line in subs:
if line.is_comment: if line.is_comment:

@ -1,10 +1,14 @@
from __future__ import unicode_literals
import re import re
import warnings
from typing import Optional, Dict, Any, ClassVar
import dataclasses
from .common import IntOrFloat
from .time import ms_to_str, make_time from .time import ms_to_str, make_time
from .common import PY3
class SSAEvent(object): @dataclasses.dataclass(repr=False, eq=False, order=False)
class SSAEvent:
""" """
A SubStation Event, ie. one subtitle. A SubStation Event, ie. one subtitle.
@ -21,36 +25,29 @@ class SSAEvent(object):
>>> ev = SSAEvent(start=make_time(s=1), end=make_time(s=2.5), text="Hello World!") >>> ev = SSAEvent(start=make_time(s=1), end=make_time(s=2.5), text="Hello World!")
""" """
OVERRIDE_SEQUENCE = re.compile(r"{[^}]*}") OVERRIDE_SEQUENCE: ClassVar = re.compile(r"{[^}]*}")
#: All fields in SSAEvent. start: int = 0 #: Subtitle start time (in milliseconds)
FIELDS = frozenset([ end: int = 10000 #: Subtitle end time (in milliseconds)
"start", "end", "text", "marked", "layer", "style", text: str = "" #: Text of subtitle (with SubStation override tags)
"name", "marginl", "marginr", "marginv", "effect", "type" marked: bool = False #: (SSA only)
]) layer: int = 0 #: Layer number, 0 is the lowest layer (ASS only)
style: str = "Default" #: Style name
def __init__(self, **fields): name: str = "" #: Actor name
self.start = 0 #: Subtitle start time (in milliseconds) marginl: int = 0 #: Left margin
self.end = 10000 #: Subtitle end time (in milliseconds) marginr: int = 0 #: Right margin
self.text = "" #: Text of subtitle (with SubStation override tags) marginv: int = 0 #: Vertical margin
self.marked = False #: (SSA only) effect: str = "" #: Line effect
self.layer = 0 #: Layer number, 0 is the lowest layer (ASS only) type: str = "Dialogue" #: Line type (Dialogue/Comment)
self.style = "Default" #: Style name
self.name = "" #: Actor name @property
self.marginl = 0 #: Left margin def FIELDS(self):
self.marginr = 0 #: Right margin """All fields in SSAEvent."""
self.marginv = 0 #: Vertical margin warnings.warn("Deprecated in 1.2.0 - it's a dataclass now", DeprecationWarning)
self.effect = "" #: Line effect return frozenset(field.name for field in dataclasses.fields(self))
self.type = "Dialogue" #: Line type (Dialogue/Comment)
for k, v in fields.items():
if k in self.FIELDS:
setattr(self, k, v)
else:
raise ValueError("SSAEvent has no field named %r" % k)
@property @property
def duration(self): def duration(self) -> IntOrFloat:
""" """
Subtitle duration in milliseconds (read/write property). Subtitle duration in milliseconds (read/write property).
@ -60,14 +57,14 @@ class SSAEvent(object):
return self.end - self.start return self.end - self.start
@duration.setter @duration.setter
def duration(self, ms): def duration(self, ms: int):
if ms >= 0: if ms >= 0:
self.end = self.start + ms self.end = self.start + ms
else: else:
raise ValueError("Subtitle duration cannot be negative") raise ValueError("Subtitle duration cannot be negative")
@property @property
def is_comment(self): def is_comment(self) -> bool:
""" """
When true, the subtitle is a comment, ie. not visible (read/write property). When true, the subtitle is a comment, ie. not visible (read/write property).
@ -77,14 +74,20 @@ class SSAEvent(object):
return self.type == "Comment" return self.type == "Comment"
@is_comment.setter @is_comment.setter
def is_comment(self, value): def is_comment(self, value: bool):
if value: if value:
self.type = "Comment" self.type = "Comment"
else: else:
self.type = "Dialogue" self.type = "Dialogue"
@property @property
def plaintext(self): def is_drawing(self) -> bool:
"""Returns True if line is SSA drawing tag (ie. not text)"""
from .substation import parse_tags
return any(sty.drawing for _, sty in parse_tags(self.text))
@property
def plaintext(self) -> str:
""" """
Subtitle text as multi-line string with no tags (read/write property). Subtitle text as multi-line string with no tags (read/write property).
@ -99,10 +102,11 @@ class SSAEvent(object):
return text return text
@plaintext.setter @plaintext.setter
def plaintext(self, text): def plaintext(self, text: str):
self.text = text.replace("\n", r"\N") self.text = text.replace("\n", r"\N")
def shift(self, h=0, m=0, s=0, ms=0, frames=None, fps=None): def shift(self, h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0,
frames: Optional[int]=None, fps: Optional[float]=None):
""" """
Shift start and end times. Shift start and end times.
@ -113,41 +117,39 @@ class SSAEvent(object):
self.start += delta self.start += delta
self.end += delta self.end += delta
def copy(self): def copy(self) -> "SSAEvent":
"""Return a copy of the SSAEvent.""" """Return a copy of the SSAEvent."""
return SSAEvent(**self.as_dict()) return SSAEvent(**self.as_dict())
def as_dict(self): def as_dict(self) -> Dict[str, Any]:
return {field: getattr(self, field) for field in self.FIELDS} # dataclasses.asdict() would recursively dictify Color objects, which we don't want
return {field.name: getattr(self, field.name) for field in dataclasses.fields(self)}
def equals(self, other): def equals(self, other: "SSAEvent") -> bool:
"""Field-based equality for SSAEvents.""" """Field-based equality for SSAEvents."""
if isinstance(other, SSAEvent): if isinstance(other, SSAEvent):
return self.as_dict() == other.as_dict() return self.as_dict() == other.as_dict()
else: else:
raise TypeError("Cannot compare to non-SSAEvent object") raise TypeError("Cannot compare to non-SSAEvent object")
def __eq__(self, other): def __eq__(self, other: "SSAEvent"):
# XXX document this # XXX document this
return self.start == other.start and self.end == other.end return self.start == other.start and self.end == other.end
def __ne__(self, other): def __ne__(self, other: "SSAEvent"):
return self.start != other.start or self.end != other.end return self.start != other.start or self.end != other.end
def __lt__(self, other): def __lt__(self, other: "SSAEvent"):
return (self.start, self.end) < (other.start, other.end) return (self.start, self.end) < (other.start, other.end)
def __le__(self, other): def __le__(self, other: "SSAEvent"):
return (self.start, self.end) <= (other.start, other.end) return (self.start, self.end) <= (other.start, other.end)
def __gt__(self, other): def __gt__(self, other: "SSAEvent"):
return (self.start, self.end) > (other.start, other.end) return (self.start, self.end) > (other.start, other.end)
def __ge__(self, other): def __ge__(self, other: "SSAEvent"):
return (self.start, self.end) >= (other.start, other.end) return (self.start, self.end) >= (other.start, other.end)
def __repr__(self): def __repr__(self):
s = "<SSAEvent type={self.type} start={start} end={end} text='{self.text}'>".format( return f"<SSAEvent type={self.type} start={ms_to_str(self.start)} end={ms_to_str(self.end)} text={self.text!r}>"
self=self, start=ms_to_str(self.start), end=ms_to_str(self.end))
if not PY3: s = s.encode("utf-8")
return s

@ -1,16 +1,17 @@
from __future__ import print_function, unicode_literals, division from collections import MutableSequence
from collections import MutableSequence, OrderedDict
import io import io
from io import open from io import open
from itertools import starmap, chain from itertools import chain
import os.path import os.path
import logging import logging
from typing import Optional, List, Dict, Iterable, Any
from .common import IntOrFloat
from .formats import autodetect_format, get_format_class, get_format_identifier from .formats import autodetect_format, get_format_class, get_format_identifier
from .substation import is_valid_field_content from .substation import is_valid_field_content
from .ssaevent import SSAEvent from .ssaevent import SSAEvent
from .ssastyle import SSAStyle from .ssastyle import SSAStyle
from .time import make_time, ms_to_str from .time import make_time, ms_to_str
from .common import PY3
class SSAFile(MutableSequence): class SSAFile(MutableSequence):
@ -31,28 +32,37 @@ class SSAFile(MutableSequence):
""" """
DEFAULT_INFO = OrderedDict([ DEFAULT_INFO = {
("WrapStyle", "0"), "WrapStyle": "0",
("ScaledBorderAndShadow", "yes"), "ScaledBorderAndShadow": "yes",
("Collisions", "Normal")]) "Collisions": "Normal"
}
def __init__(self): def __init__(self):
self.events = [] #: List of :class:`SSAEvent` instances, ie. individual subtitles. self.events: List[SSAEvent] = [] #: List of :class:`SSAEvent` instances, ie. individual subtitles.
self.styles = OrderedDict([("Default", SSAStyle.DEFAULT_STYLE.copy())]) #: Dict of :class:`SSAStyle` instances. self.styles: Dict[str, SSAStyle] = {"Default": SSAStyle.DEFAULT_STYLE.copy()} #: Dict of :class:`SSAStyle` instances.
self.info = self.DEFAULT_INFO.copy() #: Dict with script metadata, ie. ``[Script Info]``. self.info: Dict[str, str] = self.DEFAULT_INFO.copy() #: Dict with script metadata, ie. ``[Script Info]``.
self.aegisub_project = OrderedDict() #: Dict with Aegisub project, ie. ``[Aegisub Project Garbage]``. self.aegisub_project: Dict[str, str] = {} #: Dict with Aegisub project, ie. ``[Aegisub Project Garbage]``.
self.fps = None #: Framerate used when reading the file, if applicable. self.fonts_opaque: Dict[str, Any] = {} #: Dict with embedded fonts, ie. ``[Fonts]``.
self.format = None #: Format of source subtitle file, if applicable, eg. ``"srt"``. self.fps: Optional[float] = None #: Framerate used when reading the file, if applicable.
self.format: Optional[str] = None #: Format of source subtitle file, if applicable, eg. ``"srt"``.
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
# I/O methods # I/O methods
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
@classmethod @classmethod
def load(cls, path, encoding="utf-8", format_=None, fps=None, **kwargs): def load(cls, path: str, encoding: str="utf-8", format_: Optional[str]=None, fps: Optional[float]=None, **kwargs) -> "SSAFile":
""" """
Load subtitle file from given path. Load subtitle file from given path.
This method is implemented in terms of :meth:`SSAFile.from_file()`.
See also:
Specific formats may implement additional loading options,
please refer to documentation of the implementation classes
(eg. :meth:`pysubs2.subrip.SubripFormat.from_file()`)
Arguments: Arguments:
path (str): Path to subtitle file. path (str): Path to subtitle file.
encoding (str): Character encoding of input file. encoding (str): Character encoding of input file.
@ -66,14 +76,7 @@ class SSAFile(MutableSequence):
be detected from the file, in which case you don't need be detected from the file, in which case you don't need
to specify it here (when given, this argument overrides to specify it here (when given, this argument overrides
autodetection). autodetection).
keep_unknown_html_tags (bool): This affects SubRip only (SRT), kwargs: Extra options for the reader.
for other formats this argument is ignored.
By default, HTML tags are converted to equivalent SubStation tags
(eg. ``<i>`` to ``{\\i1}`` and any remaining tags are removed
to keep the text clean. Set this parameter to ``True``
if you want to pass through these tags (eg. ``<sub>``).
This is useful if your output format is SRT and your player
supports these tags.
Returns: Returns:
SSAFile SSAFile
@ -100,7 +103,7 @@ class SSAFile(MutableSequence):
return cls.from_file(fp, format_, fps=fps, **kwargs) return cls.from_file(fp, format_, fps=fps, **kwargs)
@classmethod @classmethod
def from_string(cls, string, format_=None, fps=None, **kwargs): def from_string(cls, string: str, format_: Optional[str]=None, fps: Optional[float]=None, **kwargs) -> "SSAFile":
""" """
Load subtitle file from string. Load subtitle file from string.
@ -126,7 +129,7 @@ class SSAFile(MutableSequence):
return cls.from_file(fp, format_, fps=fps, **kwargs) return cls.from_file(fp, format_, fps=fps, **kwargs)
@classmethod @classmethod
def from_file(cls, fp, format_=None, fps=None, **kwargs): def from_file(cls, fp: io.TextIOBase, format_: Optional[str]=None, fps: Optional[float]=None, **kwargs) -> "SSAFile":
""" """
Read subtitle file from file object. Read subtitle file from file object.
@ -160,10 +163,17 @@ class SSAFile(MutableSequence):
impl.from_file(subs, fp, format_, fps=fps, **kwargs) impl.from_file(subs, fp, format_, fps=fps, **kwargs)
return subs return subs
def save(self, path, encoding="utf-8", format_=None, fps=None, **kwargs): def save(self, path: str, encoding: str="utf-8", format_: Optional[str]=None, fps: Optional[float]=None, **kwargs):
""" """
Save subtitle file to given path. Save subtitle file to given path.
This method is implemented in terms of :meth:`SSAFile.to_file()`.
See also:
Specific formats may implement additional saving options,
please refer to documentation of the implementation classes
(eg. :meth:`pysubs2.subrip.SubripFormat.to_file()`)
Arguments: Arguments:
path (str): Path to subtitle file. path (str): Path to subtitle file.
encoding (str): Character encoding of output file. encoding (str): Character encoding of output file.
@ -197,7 +207,7 @@ class SSAFile(MutableSequence):
with open(path, "w", encoding=encoding) as fp: with open(path, "w", encoding=encoding) as fp:
self.to_file(fp, format_, fps=fps, **kwargs) self.to_file(fp, format_, fps=fps, **kwargs)
def to_string(self, format_, fps=None, **kwargs): def to_string(self, format_: str, fps: Optional[float]=None, **kwargs) -> str:
""" """
Get subtitle file as a string. Get subtitle file as a string.
@ -211,7 +221,7 @@ class SSAFile(MutableSequence):
self.to_file(fp, format_, fps=fps, **kwargs) self.to_file(fp, format_, fps=fps, **kwargs)
return fp.getvalue() return fp.getvalue()
def to_file(self, fp, format_, fps=None, **kwargs): def to_file(self, fp: io.TextIOBase, format_: str, fps: Optional[float]=None, **kwargs):
""" """
Write subtitle file to file object. Write subtitle file to file object.
@ -233,7 +243,8 @@ class SSAFile(MutableSequence):
# Retiming subtitles # Retiming subtitles
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
def shift(self, h=0, m=0, s=0, ms=0, frames=None, fps=None): def shift(self, h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0,
frames: Optional[int]=None, fps: Optional[float]=None):
""" """
Shift all subtitles by constant time amount. Shift all subtitles by constant time amount.
@ -255,7 +266,7 @@ class SSAFile(MutableSequence):
line.start += delta line.start += delta
line.end += delta line.end += delta
def transform_framerate(self, in_fps, out_fps): def transform_framerate(self, in_fps: float, out_fps: float):
""" """
Rescale all timestamps by ratio of in_fps/out_fps. Rescale all timestamps by ratio of in_fps/out_fps.
@ -282,7 +293,7 @@ class SSAFile(MutableSequence):
# Working with styles # Working with styles
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
def rename_style(self, old_name, new_name): def rename_style(self, old_name: str, new_name: str):
""" """
Rename a style, including references to it. Rename a style, including references to it.
@ -311,7 +322,7 @@ class SSAFile(MutableSequence):
if line.style == old_name: if line.style == old_name:
line.style = new_name line.style = new_name
def import_styles(self, subs, overwrite=True): def import_styles(self, subs: "SSAFile", overwrite: bool=True):
""" """
Merge in styles from other SSAFile. Merge in styles from other SSAFile.
@ -332,7 +343,39 @@ class SSAFile(MutableSequence):
# Helper methods # Helper methods
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
def equals(self, other): def remove_miscellaneous_events(self):
"""
Remove subtitles which appear to be non-essential (the --clean in CLI)
Currently, this removes events matching any of these criteria:
- SSA event type Comment
- SSA drawing tags
- Less than two characters of text
- Duplicated text with identical time interval (only the first event is kept)
"""
new_events = []
duplicate_text_ids = set()
times_to_texts = {}
for i, e in enumerate(self):
tmp = times_to_texts.setdefault((e.start, e.end), [])
if tmp.count(e.plaintext) > 0:
duplicate_text_ids.add(i)
tmp.append(e.plaintext)
for i, e in enumerate(self):
if e.is_drawing or e.is_comment:
continue
if len(e.plaintext.strip()) < 2:
continue
if i in duplicate_text_ids:
continue
new_events.append(e)
self.events = new_events
def equals(self, other: "SSAFile"):
""" """
Equality of two SSAFiles. Equality of two SSAFiles.
@ -357,6 +400,18 @@ class SSAFile(MutableSequence):
logging.debug("info %r differs (self=%r, other=%r)", key, sv, ov) logging.debug("info %r differs (self=%r, other=%r)", key, sv, ov)
return False return False
for key in set(chain(self.fonts_opaque.keys(), other.fonts_opaque.keys())):
sv, ov = self.fonts_opaque.get(key), other.fonts_opaque.get(key)
if sv is None:
logging.debug("%r missing in self.fonts_opaque", key)
return False
elif ov is None:
logging.debug("%r missing in other.fonts_opaque", key)
return False
elif sv != ov:
logging.debug("fonts_opaque %r differs (self=%r, other=%r)", key, sv, ov)
return False
for key in set(chain(self.styles.keys(), other.styles.keys())): for key in set(chain(self.styles.keys(), other.styles.keys())):
sv, ov = self.styles.get(key), other.styles.get(key) sv, ov = self.styles.get(key), other.styles.get(key)
if sv is None: if sv is None:
@ -389,12 +444,10 @@ class SSAFile(MutableSequence):
def __repr__(self): def __repr__(self):
if self.events: if self.events:
max_time = max(ev.end for ev in self) max_time = max(ev.end for ev in self)
s = "<SSAFile with %d events and %d styles, last timestamp %s>" % \ s = f"<SSAFile with {len(self)} events and {len(self.styles)} styles, last timestamp {ms_to_str(max_time)}>"
(len(self), len(self.styles), ms_to_str(max_time))
else: else:
s = "<SSAFile with 0 events and %d styles>" % len(self.styles) s = f"<SSAFile with 0 events and {len(self.styles)} styles>"
if not PY3: s = s.encode("utf-8")
return s return s
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
@ -405,22 +458,25 @@ class SSAFile(MutableSequence):
"""Sort subtitles time-wise, in-place.""" """Sort subtitles time-wise, in-place."""
self.events.sort() self.events.sort()
def __getitem__(self, item): def __iter__(self) -> Iterable[SSAEvent]:
return iter(self.events)
def __getitem__(self, item: int):
return self.events[item] return self.events[item]
def __setitem__(self, key, value): def __setitem__(self, key: int, value: SSAEvent):
if isinstance(value, SSAEvent): if isinstance(value, SSAEvent):
self.events[key] = value self.events[key] = value
else: else:
raise TypeError("SSAFile.events must contain only SSAEvent objects") raise TypeError("SSAFile.events must contain only SSAEvent objects")
def __delitem__(self, key): def __delitem__(self, key: int):
del self.events[key] del self.events[key]
def __len__(self): def __len__(self):
return len(self.events) return len(self.events)
def insert(self, index, value): def insert(self, index: int, value: SSAEvent):
if isinstance(value, SSAEvent): if isinstance(value, SSAEvent):
self.events.insert(index, value) self.events.insert(index, value)
else: else:

@ -1,8 +1,11 @@
from __future__ import unicode_literals import warnings
from .common import Color, PY3 from typing import Dict, Any, ClassVar
import dataclasses
from .common import Color
class SSAStyle(object): @dataclasses.dataclass(repr=False)
class SSAStyle:
""" """
A SubStation Style. A SubStation Style.
@ -17,71 +20,57 @@ class SSAStyle(object):
This class defines equality (equality of all fields). This class defines equality (equality of all fields).
""" """
DEFAULT_STYLE = None DEFAULT_STYLE: ClassVar["SSAStyle"] = None
#: All fields in SSAStyle. @property
FIELDS = frozenset([ def FIELDS(self):
"fontname", "fontsize", "primarycolor", "secondarycolor", """All fields in SSAStyle."""
"tertiarycolor", "outlinecolor", "backcolor", warnings.warn("Deprecated in 1.2.0 - it's a dataclass now", DeprecationWarning)
"bold", "italic", "underline", "strikeout", return frozenset(field.name for field in dataclasses.fields(self))
"scalex", "scaley", "spacing", "angle", "borderstyle",
"outline", "shadow", "alignment", fontname: str = "Arial" #: Font name
"marginl", "marginr", "marginv", "alphalevel", "encoding" fontsize: float = 20.0 #: Font size (in pixels)
]) primarycolor: Color = Color(255, 255, 255, 0) #: Primary color (:class:`pysubs2.Color` instance)
secondarycolor: Color = Color(255, 0, 0, 0) #: Secondary color (:class:`pysubs2.Color` instance)
def __init__(self, **fields): tertiarycolor: Color = Color(0, 0, 0, 0) #: Tertiary color (:class:`pysubs2.Color` instance)
self.fontname = "Arial" #: Font name outlinecolor: Color = Color(0, 0, 0, 0) #: Outline color (:class:`pysubs2.Color` instance)
self.fontsize = 20.0 #: Font size (in pixels) backcolor: Color = Color(0, 0, 0, 0) #: Back, ie. shadow color (:class:`pysubs2.Color` instance)
self.primarycolor = Color(255, 255, 255, 0) #: Primary color (:class:`pysubs2.Color` instance) bold: bool = False #: Bold
self.secondarycolor = Color(255, 0, 0, 0) #: Secondary color (:class:`pysubs2.Color` instance) italic: bool = False #: Italic
self.tertiarycolor = Color(0, 0, 0, 0) #: Tertiary color (:class:`pysubs2.Color` instance) underline: bool = False #: Underline (ASS only)
self.outlinecolor = Color(0, 0, 0, 0) #: Outline color (:class:`pysubs2.Color` instance) strikeout: bool = False #: Strikeout (ASS only)
self.backcolor = Color(0, 0, 0, 0) #: Back, ie. shadow color (:class:`pysubs2.Color` instance) scalex: float = 100.0 #: Horizontal scaling (ASS only)
self.bold = False #: Bold scaley: float = 100.0 #: Vertical scaling (ASS only)
self.italic = False #: Italic spacing: float = 0.0 #: Letter spacing (ASS only)
self.underline = False #: Underline (ASS only) angle: float = 0.0 #: Rotation (ASS only)
self.strikeout = False #: Strikeout (ASS only) borderstyle: int = 1 #: Border style
self.drawing = False #: Drawing (ASS only, see http://docs.aegisub.org/3.1/ASS_Tags/#drawing-tags outline: float = 2.0 #: Outline width (in pixels)
self.scalex = 100.0 #: Horizontal scaling (ASS only) shadow: float = 2.0 #: Shadow depth (in pixels)
self.scaley = 100.0 #: Vertical scaling (ASS only) alignment: int = 2 #: Numpad-style alignment, eg. 7 is "top left" (that is, ASS alignment semantics)
self.spacing = 0.0 #: Letter spacing (ASS only) marginl: int = 10 #: Left margin (in pixels)
self.angle = 0.0 #: Rotation (ASS only) marginr: int = 10 #: Right margin (in pixels)
self.borderstyle = 1 #: Border style marginv: int = 10 #: Vertical margin (in pixels)
self.outline = 2.0 #: Outline width (in pixels) alphalevel: int = 0 #: Old, unused SSA-only field
self.shadow = 2.0 #: Shadow depth (in pixels) encoding: int = 1 #: Charset
self.alignment = 2 #: Numpad-style alignment, eg. 7 is "top left" (that is, ASS alignment semantics)
self.marginl = 10 #: Left margin (in pixels) # The following attributes cannot be defined for SSA styles themselves,
self.marginr = 10 #: Right margin (in pixels) # but can be used in override tags and thus are useful to keep here
self.marginv = 10 #: Vertical margin (in pixels) # for the `pysubs2.substation.parse_tags()` interface which returns
self.alphalevel = 0 #: Old, unused SSA-only field # SSAStyles for text fragments.
self.encoding = 1 #: Charset drawing: bool = False #: Indicates that text span is a SSA vector drawing, see `pysubs2.substation.parse_tags()`
for k, v in fields.items(): def copy(self) -> "SSAStyle":
if k in self.FIELDS:
setattr(self, k, v)
else:
raise ValueError("SSAStyle has no field named %r" % k)
def copy(self):
return SSAStyle(**self.as_dict()) return SSAStyle(**self.as_dict())
def as_dict(self): def as_dict(self) -> Dict[str, Any]:
return {field: getattr(self, field) for field in self.FIELDS} # dataclasses.asdict() would recursively dictify Color objects, which we don't want
return {field.name: getattr(self, field.name) for field in dataclasses.fields(self)}
def __eq__(self, other):
return self.as_dict() == other.as_dict()
def __ne__(self, other):
return not self == other
def __repr__(self): def __repr__(self):
s = "<SSAStyle " return f"<SSAStyle {self.fontsize!r}px" \
s += "%rpx " % self.fontsize f"{' bold' if self.bold else ''}" \
if self.bold: s += "bold " f"{' italic' if self.italic else ''}" \
if self.italic: s += "italic " f" {self.fontname!r}>"
s += "{!r}>".format(self.fontname)
if not PY3: s = s.encode("utf-8")
return s
SSAStyle.DEFAULT_STYLE = SSAStyle() SSAStyle.DEFAULT_STYLE = SSAStyle()

@ -1,5 +1,3 @@
from __future__ import print_function, unicode_literals
import re import re
from .formatbase import FormatBase from .formatbase import FormatBase
from .ssaevent import SSAEvent from .ssaevent import SSAEvent
@ -21,25 +19,50 @@ def ms_to_timestamp(ms):
class SubripFormat(FormatBase): class SubripFormat(FormatBase):
"""SubRip Text (SRT) subtitle format implementation"""
TIMESTAMP = TIMESTAMP
@staticmethod
def timestamp_to_ms(groups):
return timestamp_to_ms(groups)
@classmethod @classmethod
def guess_format(cls, text): def guess_format(cls, text):
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if "[Script Info]" in text or "[V4+ Styles]" in text: if "[Script Info]" in text or "[V4+ Styles]" in text:
# disambiguation vs. SSA/ASS # disambiguation vs. SSA/ASS
return None return None
if text.lstrip().startswith("WEBVTT"):
# disambiguation vs. WebVTT
return None
for line in text.splitlines(): for line in text.splitlines():
if len(TIMESTAMP.findall(line)) == 2: if len(cls.TIMESTAMP.findall(line)) == 2:
return "srt" return "srt"
@classmethod @classmethod
def from_file(cls, subs, fp, format_, keep_unknown_html_tags=False, **kwargs): def from_file(cls, subs, fp, format_, keep_unknown_html_tags=False, **kwargs):
"""
See :meth:`pysubs2.formats.FormatBase.from_file()`
Supported tags:
- ``<i>``
- ``<u>``
- ``<s>``
Keyword args:
keep_unknown_html_tags: If True, HTML tags other than i/u/s will be kept as-is.
Otherwise, they will be stripped from input.
"""
timestamps = [] # (start, end) timestamps = [] # (start, end)
following_lines = [] # contains lists of lines following each timestamp following_lines = [] # contains lists of lines following each timestamp
for line in fp: for line in fp:
stamps = TIMESTAMP.findall(line) stamps = cls.TIMESTAMP.findall(line)
if len(stamps) == 2: # timestamp line if len(stamps) == 2: # timestamp line
start, end = map(timestamp_to_ms, stamps) start, end = map(cls.timestamp_to_ms, stamps)
timestamps.append((start, end)) timestamps.append((start, end))
following_lines.append([]) following_lines.append([])
else: else:
@ -72,16 +95,26 @@ class SubripFormat(FormatBase):
for (start, end), lines in zip(timestamps, following_lines)] for (start, end), lines in zip(timestamps, following_lines)]
@classmethod @classmethod
def to_file(cls, subs, fp, format_, **kwargs): def to_file(cls, subs, fp, format_, apply_styles=True, **kwargs):
"""
See :meth:`pysubs2.formats.FormatBase.to_file()`
Italic, underline and strikeout styling is supported.
Keyword args:
apply_styles: If False, do not write any styling.
"""
def prepare_text(text, style): def prepare_text(text, style):
body = [] body = []
for fragment, sty in parse_tags(text, style, subs.styles): for fragment, sty in parse_tags(text, style, subs.styles):
fragment = fragment.replace(r"\h", " ") fragment = fragment.replace(r"\h", " ")
fragment = fragment.replace(r"\n", "\n") fragment = fragment.replace(r"\n", "\n")
fragment = fragment.replace(r"\N", "\n") fragment = fragment.replace(r"\N", "\n")
if sty.italic: fragment = "<i>%s</i>" % fragment if apply_styles:
if sty.underline: fragment = "<u>%s</u>" % fragment if sty.italic: fragment = "<i>%s</i>" % fragment
if sty.strikeout: fragment = "<s>%s</s>" % fragment if sty.underline: fragment = "<u>%s</u>" % fragment
if sty.strikeout: fragment = "<s>%s</s>" % fragment
if sty.drawing: raise ContentNotUsable if sty.drawing: raise ContentNotUsable
body.append(fragment) body.append(fragment)
@ -89,7 +122,8 @@ class SubripFormat(FormatBase):
visible_lines = (line for line in subs if not line.is_comment) visible_lines = (line for line in subs if not line.is_comment)
for i, line in enumerate(visible_lines, 1): lineno = 1
for line in visible_lines:
start = ms_to_timestamp(line.start) start = ms_to_timestamp(line.start)
end = ms_to_timestamp(line.end) end = ms_to_timestamp(line.end)
try: try:
@ -97,6 +131,7 @@ class SubripFormat(FormatBase):
except ContentNotUsable: except ContentNotUsable:
continue continue
print("%d" % i, file=fp) # Python 2.7 compat print("%d" % lineno, file=fp) # Python 2.7 compat
print(start, "-->", end, file=fp) print(start, "-->", end, file=fp)
print(text, end="\n\n", file=fp) print(text, end="\n\n", file=fp)
lineno += 1

@ -1,10 +1,10 @@
from __future__ import print_function, division, unicode_literals import logging
import re import re
from numbers import Number from numbers import Number
from .formatbase import FormatBase from .formatbase import FormatBase
from .ssaevent import SSAEvent from .ssaevent import SSAEvent
from .ssastyle import SSAStyle from .ssastyle import SSAStyle
from .common import text_type, Color, PY3, binary_string_type from .common import Color
from .time import make_time, ms_to_times, timestamp_to_ms, TIMESTAMP from .time import make_time, ms_to_times, timestamp_to_ms, TIMESTAMP
SSA_ALIGNMENT = (1, 2, 3, 9, 10, 11, 5, 6, 7) SSA_ALIGNMENT = (1, 2, 3, 9, 10, 11, 5, 6, 7)
@ -15,7 +15,14 @@ def ass_to_ssa_alignment(i):
def ssa_to_ass_alignment(i): def ssa_to_ass_alignment(i):
return SSA_ALIGNMENT.index(i) + 1 return SSA_ALIGNMENT.index(i) + 1
SECTION_HEADING = re.compile(r"^.{,3}\[[^\]]+\]") # allow for UTF-8 BOM, which is 3 bytes SECTION_HEADING = re.compile(
r"^.{,3}" # allow 3 chars at start of line for BOM
r"\[" # open square bracket
r"[^]]*[a-z][^]]*" # inside square brackets, at least one lowercase letter (this guards vs. uuencoded font data)
r"]" # close square bracket
)
FONT_FILE_HEADING = re.compile(r"fontname:\s+(\S+)")
STYLE_FORMAT_LINE = { STYLE_FORMAT_LINE = {
"ass": "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic," "ass": "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic,"
@ -46,7 +53,7 @@ EVENT_FIELDS = {
#: Largest timestamp allowed in SubStation, ie. 9:59:59.99. #: Largest timestamp allowed in SubStation, ie. 9:59:59.99.
MAX_REPRESENTABLE_TIME = make_time(h=10) - 10 MAX_REPRESENTABLE_TIME = make_time(h=10) - 10
def ms_to_timestamp(ms): def ms_to_timestamp(ms: int) -> str:
"""Convert ms to 'H:MM:SS.cc'""" """Convert ms to 'H:MM:SS.cc'"""
# XXX throw on overflow/underflow? # XXX throw on overflow/underflow?
if ms < 0: ms = 0 if ms < 0: ms = 0
@ -54,28 +61,24 @@ def ms_to_timestamp(ms):
h, m, s, ms = ms_to_times(ms) h, m, s, ms = ms_to_times(ms)
return "%01d:%02d:%02d.%02d" % (h, m, s, ms//10) return "%01d:%02d:%02d.%02d" % (h, m, s, ms//10)
def color_to_ass_rgba(c): def color_to_ass_rgba(c: Color) -> str:
return "&H%08X" % ((c.a << 24) | (c.b << 16) | (c.g << 8) | c.r) return "&H%08X" % ((c.a << 24) | (c.b << 16) | (c.g << 8) | c.r)
def color_to_ssa_rgb(c): def color_to_ssa_rgb(c: Color) -> str:
return "%d" % ((c.b << 16) | (c.g << 8) | c.r) return "%d" % ((c.b << 16) | (c.g << 8) | c.r)
def ass_rgba_to_color(s): def rgba_to_color(s: str) -> Color:
x = int(s[2:], base=16) if s[0] == '&':
x = int(s[2:], base=16)
else:
x = int(s)
r = x & 0xff r = x & 0xff
g = (x >> 8) & 0xff g = (x >> 8) & 0xff
b = (x >> 16) & 0xff b = (x >> 16) & 0xff
a = (x >> 24) & 0xff a = (x >> 24) & 0xff
return Color(r, g, b, a) return Color(r, g, b, a)
def ssa_rgb_to_color(s): def is_valid_field_content(s: str) -> bool:
x = int(s)
r = x & 0xff
g = (x >> 8) & 0xff
b = (x >> 16) & 0xff
return Color(r, g, b)
def is_valid_field_content(s):
""" """
Returns True if string s can be stored in a SubStation field. Returns True if string s can be stored in a SubStation field.
@ -140,8 +143,10 @@ def parse_tags(text, style=SSAStyle.DEFAULT_STYLE, styles={}):
NOTICE = "Script generated by pysubs2\nhttps://pypi.python.org/pypi/pysubs2" NOTICE = "Script generated by pysubs2\nhttps://pypi.python.org/pypi/pysubs2"
class SubstationFormat(FormatBase): class SubstationFormat(FormatBase):
"""SubStation Alpha (ASS, SSA) subtitle format implementation"""
@classmethod @classmethod
def guess_format(cls, text): def guess_format(cls, text):
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if "V4+ Styles" in text: if "V4+ Styles" in text:
return "ass" return "ass"
elif "V4 Styles" in text: elif "V4 Styles" in text:
@ -149,6 +154,7 @@ class SubstationFormat(FormatBase):
@classmethod @classmethod
def from_file(cls, subs, fp, format_, **kwargs): def from_file(cls, subs, fp, format_, **kwargs):
"""See :meth:`pysubs2.formats.FormatBase.from_file()`"""
def string_to_field(f, v): def string_to_field(f, v):
if f in {"start", "end"}: if f in {"start", "end"}:
@ -159,10 +165,7 @@ class SubstationFormat(FormatBase):
else: else:
return timestamp_to_ms(TIMESTAMP.match(v).groups()) return timestamp_to_ms(TIMESTAMP.match(v).groups())
elif "color" in f: elif "color" in f:
if format_ == "ass": return rgba_to_color(v)
return ass_rgba_to_color(v)
else:
return ssa_rgb_to_color(v)
elif f in {"bold", "underline", "italic", "strikeout"}: elif f in {"bold", "underline", "italic", "strikeout"}:
return v == "-1" return v == "-1"
elif f in {"borderstyle", "encoding", "marginl", "marginr", "marginv", "layer", "alphalevel"}: elif f in {"borderstyle", "encoding", "marginl", "marginr", "marginv", "layer", "alphalevel"}:
@ -183,16 +186,22 @@ class SubstationFormat(FormatBase):
subs.info.clear() subs.info.clear()
subs.aegisub_project.clear() subs.aegisub_project.clear()
subs.styles.clear() subs.styles.clear()
subs.fonts_opaque.clear()
inside_info_section = False inside_info_section = False
inside_aegisub_section = False inside_aegisub_section = False
inside_font_section = False
current_font_name = None
current_font_lines_buffer = []
for line in fp: for lineno, line in enumerate(fp, 1):
line = line.strip() line = line.strip()
if SECTION_HEADING.match(line): if SECTION_HEADING.match(line):
logging.debug("at line %d: section heading %s", lineno, line)
inside_info_section = "Info" in line inside_info_section = "Info" in line
inside_aegisub_section = "Aegisub" in line inside_aegisub_section = "Aegisub" in line
inside_font_section = "Fonts" in line
elif inside_info_section or inside_aegisub_section: elif inside_info_section or inside_aegisub_section:
if line.startswith(";"): continue # skip comments if line.startswith(";"): continue # skip comments
try: try:
@ -203,6 +212,24 @@ class SubstationFormat(FormatBase):
subs.aegisub_project[k] = v.strip() subs.aegisub_project[k] = v.strip()
except ValueError: except ValueError:
pass pass
elif inside_font_section:
m = FONT_FILE_HEADING.match(line)
if current_font_name and (m or not line):
# flush last font on newline or new font name
font_data = current_font_lines_buffer[:]
subs.fonts_opaque[current_font_name] = font_data
logging.debug("at line %d: finished font definition %s", lineno, current_font_name)
current_font_lines_buffer.clear()
current_font_name = None
if m:
# start new font
font_name = m.group(1)
current_font_name = font_name
elif line:
# add non-empty line to current buffer
current_font_lines_buffer.append(line)
elif line.startswith("Style:"): elif line.startswith("Style:"):
_, rest = line.split(":", 1) _, rest = line.split(":", 1)
buf = rest.strip().split(",") buf = rest.strip().split(",")
@ -218,9 +245,18 @@ class SubstationFormat(FormatBase):
ev = SSAEvent(**field_dict) ev = SSAEvent(**field_dict)
subs.events.append(ev) subs.events.append(ev)
# cleanup fonts
if current_font_name:
# flush last font on EOF or new section w/o newline
font_data = current_font_lines_buffer[:]
subs.fonts_opaque[current_font_name] = font_data
logging.debug("at EOF: finished font definition %s", current_font_name)
current_font_lines_buffer.clear()
current_font_name = None
@classmethod @classmethod
def to_file(cls, subs, fp, format_, header_notice=NOTICE, **kwargs): def to_file(cls, subs, fp, format_, header_notice=NOTICE, **kwargs):
"""See :meth:`pysubs2.formats.FormatBase.to_file()`"""
print("[Script Info]", file=fp) print("[Script Info]", file=fp)
for line in header_notice.splitlines(False): for line in header_notice.splitlines(False):
print(";", line, file=fp) print(";", line, file=fp)
@ -240,19 +276,11 @@ class SubstationFormat(FormatBase):
elif f == "marked": elif f == "marked":
return "Marked=%d" % v return "Marked=%d" % v
elif f == "alignment" and format_ == "ssa": elif f == "alignment" and format_ == "ssa":
return text_type(ass_to_ssa_alignment(v)) return str(ass_to_ssa_alignment(v))
elif isinstance(v, bool): elif isinstance(v, bool):
return "-1" if v else "0" return "-1" if v else "0"
elif isinstance(v, (text_type, Number)): elif isinstance(v, (str, Number)):
return text_type(v) return str(v)
elif not PY3 and isinstance(v, binary_string_type):
# A convenience feature, see issue #12 - accept non-unicode strings
# when they are ASCII; this is useful in Python 2, especially for non-text
# fields like style names, where requiring Unicode type seems too stringent
if all(ord(c) < 128 for c in v):
return text_type(v)
else:
raise TypeError("Encountered binary string with non-ASCII codepoint in SubStation field {!r} for line {!r} - please use unicode string instead of str".format(f, line))
elif isinstance(v, Color): elif isinstance(v, Color):
if format_ == "ass": if format_ == "ass":
return color_to_ass_rgba(v) return color_to_ass_rgba(v)
@ -267,6 +295,14 @@ class SubstationFormat(FormatBase):
fields = [field_to_string(f, getattr(sty, f), sty) for f in STYLE_FIELDS[format_]] fields = [field_to_string(f, getattr(sty, f), sty) for f in STYLE_FIELDS[format_]]
print("Style: %s" % name, *fields, sep=",", file=fp) print("Style: %s" % name, *fields, sep=",", file=fp)
if subs.fonts_opaque:
print("\n[Fonts]", file=fp)
for font_name, font_lines in sorted(subs.fonts_opaque.items()):
print("fontname: {}".format(font_name), file=fp)
for line in font_lines:
print(line, file=fp)
print(file=fp)
print("\n[Events]", file=fp) print("\n[Events]", file=fp)
print(EVENT_FORMAT_LINE[format_], file=fp) print(EVENT_FORMAT_LINE[format_], file=fp)
for ev in subs.events: for ev in subs.events:

@ -1,15 +1,19 @@
from __future__ import division
from collections import namedtuple from collections import namedtuple
import re import re
#: Pattern that matches both SubStation and SubRip timestamps. #: Pattern that matches both SubStation and SubRip timestamps.
from typing import Optional, List, Tuple, Sequence
from pysubs2.common import IntOrFloat
TIMESTAMP = re.compile(r"(\d{1,2}):(\d{2}):(\d{2})[.,](\d{2,3})") TIMESTAMP = re.compile(r"(\d{1,2}):(\d{2}):(\d{2})[.,](\d{2,3})")
Times = namedtuple("Times", ["h", "m", "s", "ms"]) Times = namedtuple("Times", ["h", "m", "s", "ms"])
def make_time(h=0, m=0, s=0, ms=0, frames=None, fps=None):
def make_time(h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0,
frames: Optional[int]=None, fps: Optional[float]=None):
""" """
Convert time to milliseconds. Convert time to milliseconds.
@ -33,7 +37,8 @@ def make_time(h=0, m=0, s=0, ms=0, frames=None, fps=None):
else: else:
raise ValueError("Both fps and frames must be specified") raise ValueError("Both fps and frames must be specified")
def timestamp_to_ms(groups):
def timestamp_to_ms(groups: Sequence[str]):
""" """
Convert groups from :data:`pysubs2.time.TIMESTAMP` match to milliseconds. Convert groups from :data:`pysubs2.time.TIMESTAMP` match to milliseconds.
@ -49,7 +54,8 @@ def timestamp_to_ms(groups):
ms += h * 3600000 ms += h * 3600000
return ms return ms
def tmptimestamp_to_ms(groups):
def tmptimestamp_to_ms(groups: Sequence[str]):
""" """
Convert groups from :data:`pysubs2.time.TMPTIMESTAMP` match to milliseconds. Convert groups from :data:`pysubs2.time.TMPTIMESTAMP` match to milliseconds.
@ -63,7 +69,9 @@ def tmptimestamp_to_ms(groups):
ms += m * 60000 ms += m * 60000
ms += h * 3600000 ms += h * 3600000
return ms return ms
def times_to_ms(h=0, m=0, s=0, ms=0):
def times_to_ms(h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0) -> int:
""" """
Convert hours, minutes, seconds to milliseconds. Convert hours, minutes, seconds to milliseconds.
@ -79,7 +87,8 @@ def times_to_ms(h=0, m=0, s=0, ms=0):
ms += h * 3600000 ms += h * 3600000
return int(round(ms)) return int(round(ms))
def frames_to_ms(frames, fps):
def frames_to_ms(frames: int, fps: float) -> int:
""" """
Convert frame-based duration to milliseconds. Convert frame-based duration to milliseconds.
@ -99,7 +108,8 @@ def frames_to_ms(frames, fps):
return int(round(frames * (1000 / fps))) return int(round(frames * (1000 / fps)))
def ms_to_frames(ms, fps):
def ms_to_frames(ms: IntOrFloat, fps: float) -> int:
""" """
Convert milliseconds to number of frames. Convert milliseconds to number of frames.
@ -119,7 +129,8 @@ def ms_to_frames(ms, fps):
return int(round((ms / 1000) * fps)) return int(round((ms / 1000) * fps))
def ms_to_times(ms):
def ms_to_times(ms: IntOrFloat) -> Tuple[int, int, int, int]:
""" """
Convert milliseconds to normalized tuple (h, m, s, ms). Convert milliseconds to normalized tuple (h, m, s, ms).
@ -138,7 +149,8 @@ def ms_to_times(ms):
s, ms = divmod(ms, 1000) s, ms = divmod(ms, 1000)
return Times(h, m, s, ms) return Times(h, m, s, ms)
def ms_to_str(ms, fractions=False):
def ms_to_str(ms: IntOrFloat, fractions: bool=False) -> str:
""" """
Prettyprint milliseconds to [-]H:MM:SS[.mmm] Prettyprint milliseconds to [-]H:MM:SS[.mmm]
@ -156,6 +168,6 @@ def ms_to_str(ms, fractions=False):
sgn = "-" if ms < 0 else "" sgn = "-" if ms < 0 else ""
h, m, s, ms = ms_to_times(abs(ms)) h, m, s, ms = ms_to_times(abs(ms))
if fractions: if fractions:
return sgn + "{:01d}:{:02d}:{:02d}.{:03d}".format(h, m, s, ms) return f"{sgn}{h:01d}:{m:02d}:{s:02d}.{ms:03d}"
else: else:
return sgn + "{:01d}:{:02d}:{:02d}".format(h, m, s) return f"{sgn}{h:01d}:{m:02d}:{s:02d}"

@ -1,5 +1,3 @@
from __future__ import print_function, unicode_literals
import re import re
from .formatbase import FormatBase from .formatbase import FormatBase
from .ssaevent import SSAEvent from .ssaevent import SSAEvent
@ -15,6 +13,7 @@ TMP_LINE = re.compile(r"(\d{1,2}:\d{2}:\d{2}):(.+)")
#: Largest timestamp allowed in Tmp, ie. 99:59:59. #: Largest timestamp allowed in Tmp, ie. 99:59:59.
MAX_REPRESENTABLE_TIME = make_time(h=100) - 1 MAX_REPRESENTABLE_TIME = make_time(h=100) - 1
def ms_to_timestamp(ms): def ms_to_timestamp(ms):
"""Convert ms to 'HH:MM:SS'""" """Convert ms to 'HH:MM:SS'"""
# XXX throw on overflow/underflow? # XXX throw on overflow/underflow?
@ -25,8 +24,10 @@ def ms_to_timestamp(ms):
class TmpFormat(FormatBase): class TmpFormat(FormatBase):
"""TMP subtitle format implementation"""
@classmethod @classmethod
def guess_format(cls, text): def guess_format(cls, text):
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if "[Script Info]" in text or "[V4+ Styles]" in text: if "[Script Info]" in text or "[V4+ Styles]" in text:
# disambiguation vs. SSA/ASS # disambiguation vs. SSA/ASS
return None return None
@ -37,8 +38,14 @@ class TmpFormat(FormatBase):
@classmethod @classmethod
def from_file(cls, subs, fp, format_, **kwargs): def from_file(cls, subs, fp, format_, **kwargs):
timestamps = [] # (start) """See :meth:`pysubs2.formats.FormatBase.from_file()`"""
lines = [] # contains lists of lines following each timestamp events = []
def prepare_text(text):
text = text.replace("|", r"\N") # convert newlines
text = re.sub(r"< *u *>", "{\\\\u1}", text) # not r" for Python 2.7 compat, triggers unicodeescape
text = re.sub(r"< */? *[a-zA-Z][^>]*>", "", text) # strip other HTML tags
return text
for line in fp: for line in fp:
match = TMP_LINE.match(line) match = TMP_LINE.match(line)
@ -47,42 +54,54 @@ class TmpFormat(FormatBase):
start, text = match.groups() start, text = match.groups()
start = tmptimestamp_to_ms(TMPTIMESTAMP.match(start).groups()) start = tmptimestamp_to_ms(TMPTIMESTAMP.match(start).groups())
#calculate endtime from starttime + 500 miliseconds + 67 miliseconds per each character (15 chars per second)
end = start + 500 + (len(line) * 67)
timestamps.append((start, end))
lines.append(text)
def prepare_text(lines): # Unfortunately, end timestamp is not given; try to estimate something reasonable:
lines = lines.replace("|", r"\N") # convert newlines # start + 500 ms + 67 ms/character (15 chars per second)
lines = re.sub(r"< *u *>", "{\\\\u1}", lines) # not r" for Python 2.7 compat, triggers unicodeescape end_guess = start + 500 + (len(line) * 67)
lines = re.sub(r"< */? *[a-zA-Z][^>]*>", "", lines) # strip other HTML tags
return lines event = SSAEvent(start=start, end=end_guess, text=prepare_text(text))
events.append(event)
subs.events = [SSAEvent(start=start, end=end, text=prepare_text(lines)) # correct any overlapping subtitles created by end_guess
for (start, end), lines in zip(timestamps, lines)] for i in range(len(events) - 1):
events[i].end = min(events[i].end, events[i+1].start)
subs.events = events
@classmethod @classmethod
def to_file(cls, subs, fp, format_, **kwargs): def to_file(cls, subs, fp, format_, apply_styles=True, **kwargs):
"""
See :meth:`pysubs2.formats.FormatBase.to_file()`
Italic, underline and strikeout styling is supported.
Keyword args:
apply_styles: If False, do not write any styling.
"""
def prepare_text(text, style): def prepare_text(text, style):
body = [] body = []
skip = False
for fragment, sty in parse_tags(text, style, subs.styles): for fragment, sty in parse_tags(text, style, subs.styles):
fragment = fragment.replace(r"\h", " ") fragment = fragment.replace(r"\h", " ")
fragment = fragment.replace(r"\n", "\n") fragment = fragment.replace(r"\n", "\n")
fragment = fragment.replace(r"\N", "\n") fragment = fragment.replace(r"\N", "\n")
if sty.italic: fragment = "<i>%s</i>" % fragment if apply_styles:
if sty.underline: fragment = "<u>%s</u>" % fragment if sty.italic: fragment = "<i>%s</i>" % fragment
if sty.strikeout: fragment = "<s>%s</s>" % fragment if sty.underline: fragment = "<u>%s</u>" % fragment
if sty.strikeout: fragment = "<s>%s</s>" % fragment
if sty.drawing: skip = True
body.append(fragment) body.append(fragment)
return re.sub("\n+", "\n", "".join(body).strip()) if skip:
return ""
else:
return re.sub("\n+", "\n", "".join(body).strip())
visible_lines = (line for line in subs if not line.is_comment) visible_lines = (line for line in subs if not line.is_comment)
for i, line in enumerate(visible_lines, 1): for line in visible_lines:
start = ms_to_timestamp(line.start) start = ms_to_timestamp(line.start)
#end = ms_to_timestamp(line.end)
text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE)) text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE))
#print("%d" % i, file=fp) # Python 2.7 compat
print(start + ":" + text, end="\n", file=fp) print(start + ":" + text, end="\n", file=fp)
#print(text, end="\n\n", file=fp)

@ -0,0 +1,36 @@
import re
from .subrip import SubripFormat
from .time import make_time
class WebVTTFormat(SubripFormat):
"""
Web Video Text Tracks (WebVTT) subtitle format implementation
Currently, this shares implementation with :class:`pysubs2.subrip.SubripFormat`.
"""
TIMESTAMP = re.compile(r"(\d{0,4}:)?(\d{2}):(\d{2})\.(\d{2,3})")
@staticmethod
def timestamp_to_ms(groups):
_h, _m, _s, _ms = groups
if not _h:
h = 0
else:
h = int(_h.strip(":"))
m, s, ms = map(int, (_m, _s, _ms))
return make_time(h=h, m=m, s=s, ms=ms)
@classmethod
def guess_format(cls, text):
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if text.lstrip().startswith("WEBVTT"):
return "vtt"
@classmethod
def to_file(cls, subs, fp, format_, **kwargs):
"""
See :meth:`pysubs2.formats.FormatBase.to_file()`
"""
print("WEBVTT\n", file=fp)
return SubripFormat.to_file(subs=subs, fp=fp, format_=format_, **kwargs)
Loading…
Cancel
Save