Updated pysubs2 to 1.7.2

* chore: Bump pysubs2 to v1.7.1

* chore: bump version to 1.7.2

* remove bin
pull/2521/head
Anderson Shindy Oki 7 months ago committed by GitHub
parent a8c1794061
commit 2b19f390e7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -13,7 +13,7 @@ import pysrt
import pysubs2
from bs4 import UnicodeDammit
from pysubs2 import SSAStyle
from pysubs2.subrip import parse_tags, MAX_REPRESENTABLE_TIME
from pysubs2.formats.subrip import parse_tags, MAX_REPRESENTABLE_TIME
from pysubs2.time import ms_to_times
from subzero.modification import SubtitleModifications
from subzero.language import Language

@ -1,84 +0,0 @@
Metadata-Version: 2.1
Name: pysubs2
Version: 1.6.1
Summary: A library for editing subtitle files
Home-page: https://github.com/tkarabela/pysubs2
Author: Tomas Karabela
Author-email: tkarabela@seznam.cz
License: MIT
Project-URL: Documentation, https://pysubs2.readthedocs.io
Project-URL: Bug Tracker, https://github.com/tkarabela/pysubs2/issues
Keywords: SubStation,SubRip,MicroDVD,MPL2,WebVTT,ass,srt,sub,vtt,subtitles,captions
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Development Status :: 5 - Production/Stable
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: Text Processing :: Markup
Classifier: Topic :: Multimedia :: Video
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: End Users/Desktop
Classifier: Operating System :: OS Independent
Classifier: License :: OSI Approved :: MIT License
Classifier: Typing :: Typed
Requires-Python: >=3.7
Description-Content-Type: text/x-rst
License-File: LICENSE.txt
pysubs2
=======
.. image:: https://img.shields.io/github/actions/workflow/status/tkarabela/pysubs2/main.yml?branch=master
:alt: pysubs2 build master branch
:target: https://github.com/tkarabela/pysubs2/actions
.. image:: https://img.shields.io/codecov/c/github/tkarabela/pysubs2
:alt: pysubs2 test code coverage
:target: https://app.codecov.io/github/tkarabela/pysubs2
.. image:: http://www.mypy-lang.org/static/mypy_badge.svg
:alt: MyPy checked
:target: https://github.com/tkarabela/pysubs2/actions
.. image:: https://img.shields.io/pypi/v/pysubs2.svg?style=flat-square
:alt: PyPI - Version
:target: https://pypi.org/project/pysubs2/
.. image:: https://img.shields.io/pypi/status/pysubs2.svg?style=flat-square
:alt: PyPI - Status
:target: https://pypi.org/project/pysubs2/
.. image:: https://img.shields.io/pypi/pyversions/pysubs2.svg?style=flat-square
:alt: PyPI - Python Version
:target: https://pypi.org/project/pysubs2/
.. image:: https://img.shields.io/pypi/l/pysubs2.svg?style=flat-square
:alt: PyPI - License
:target: LICENSE.txt
pysubs2 is a Python library for editing subtitle files.
Its based on *SubStation Alpha*, the native format of
`Aegisub <http://www.aegisub.org/>`_; it also supports *SubRip (SRT)*,
*MicroDVD*, *MPL2*, *TMP* and *WebVTT* formats and *OpenAI Whisper* captions.
There is a small CLI tool for batch conversion and retiming.
.. code:: bash
$ pip install pysubs2
$ pysubs2 --shift 0.3s *.srt
$ pysubs2 --to srt *.ass
.. code:: python
import pysubs2
subs = pysubs2.load("my_subtitles.ass", encoding="utf-8")
subs.shift(s=2.5)
for line in subs:
line.text = "{\\be1}" + line.text
subs.save("my_subtitles_edited.ass")
To learn more, please `see the documentation <http://pysubs2.readthedocs.io>`_.
If you'd like to contribute, see `CONTRIBUTING.md <CONTRIBUTING.md>`_.
pysubs2 is licensed under the MIT license (see `LICENSE.txt <LICENSE.txt>`_).

@ -1,28 +0,0 @@
../../bin/pysubs2,sha256=1ipjtihceH0M9ljChd41YMd3co9-Mn5sngcbA3Kgupk,241
pysubs2-1.6.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
pysubs2-1.6.1.dist-info/LICENSE.txt,sha256=Syr1zDKD8s_Wl4E6er5xVmuOZ1bvaYyLOxcEI7lW4KU,1063
pysubs2-1.6.1.dist-info/METADATA,sha256=f8UZ93EkW3RJVHEXKuIX29IT8Cuz2siGq1IlcLS17tc,3281
pysubs2-1.6.1.dist-info/RECORD,,
pysubs2-1.6.1.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
pysubs2-1.6.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
pysubs2-1.6.1.dist-info/entry_points.txt,sha256=mznmeaia9Ec19KsaIM8D4tt6V0HnOUlPhSg5nXiAfAE,49
pysubs2-1.6.1.dist-info/top_level.txt,sha256=i_V6JX-AWjMX15-Dzyc0RlrInU7Zun9PYfdB9brM95E,8
pysubs2/__init__.py,sha256=lkW5MzctUNvTu5KPVr84eO3Nh2x5yHp0SWAJ4NnAp-Y,505
pysubs2/__main__.py,sha256=3iAK73rLxjLJqFMYD-jn3PDJvHZ8rIOW_NgsmBkRGhA,134
pysubs2/cli.py,sha256=6zWUOifOCm5ziwn0dd_MsxZfdFcLiaGpnCvbz65WqbA,10607
pysubs2/common.py,sha256=SWYT6_aoL3CBEO-j0vP2EZnTDcUvNom6hW_y8gChGFM,1479
pysubs2/exceptions.py,sha256=pEbAUuDj3GOUbxryLAdZa7qawMWxNhIGyzC3j-VCykM,633
pysubs2/formatbase.py,sha256=WLHV7ppth7giTxqjHN13exyx1tfuwJGpEq0iNCMGYUY,2945
pysubs2/formats.py,sha256=nlGNc9dnC1vHQF-qAZiQEqfBN7Mn0B8OPGCr_ezzPvc,2438
pysubs2/jsonformat.py,sha256=Z7BfaqGkK4vl0Gc9lPF_UUPUi3l4oJlxD-0TOSLDt9w,1862
pysubs2/microdvd.py,sha256=boTIasQUUV8430k5kz8ba2GgCGqr1hGEEbdlE7Lw7dI,4398
pysubs2/mpl2.py,sha256=T1JUIAG3zCJHKoLRlYVLI_qFbvdkgV0w6XG9wlYJ17Q,1802
pysubs2/ssaevent.py,sha256=bz5oGH2yIJQl7Lzi4Dw_KFBfLMPelmomxXAeNJ6tXTc,5883
pysubs2/ssafile.py,sha256=b9adFdbx1MdHEB_ZdRon7GwKT5pKsaJhnnTE7UHcaKM,20356
pysubs2/ssastyle.py,sha256=PFFejFEwaUx_bO1lkoHVSSJcPhupGO3epCCBCAEgclk,3995
pysubs2/subrip.py,sha256=lK8bxf98wLKqordXeY2j8DwknDBN8kyLZXVFbZwj3mw,7371
pysubs2/substation.py,sha256=xUC19ASpj3yDS9ASzFjNn88KnIqkvWqE_vPOVxWCKsM,16170
pysubs2/time.py,sha256=1GPQlajjEio9rJvGnNO8J5OnWChQlB3BgAzRS3gnEzM,4641
pysubs2/tmp.py,sha256=hJ4hpja7AckpGmdbC5KuZ2jyIEXWu2b1_SILqzbR5iQ,3954
pysubs2/webvtt.py,sha256=sNFtGAaWn7gVMG8SVHS0Ndcku6LyiAaiv9gPErl0kys,1531
pysubs2/whisper.py,sha256=5dywTdlXa46ulYbVeWpM_dH9jiwRKlTm6G1GrzIjsx0,1604

@ -1,4 +1,4 @@
Copyright (c) 2014-2023 Tomas Karabela
Copyright (c) 2014-2024 Tomas Karabela
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

@ -0,0 +1,70 @@
Metadata-Version: 2.1
Name: pysubs2
Version: 1.7.2
Summary: A library for editing subtitle files
Home-page: https://github.com/tkarabela/pysubs2
Author: Tomas Karabela
Author-email: tkarabela@seznam.cz
License: MIT
Project-URL: Documentation, https://pysubs2.readthedocs.io
Project-URL: Bug Tracker, https://github.com/tkarabela/pysubs2/issues
Keywords: SubStation,SubRip,MicroDVD,MPL2,WebVTT,ass,srt,sub,vtt,subtitles,captions
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Development Status :: 5 - Production/Stable
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: Text Processing :: Markup
Classifier: Topic :: Multimedia :: Video
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: End Users/Desktop
Classifier: Operating System :: OS Independent
Classifier: License :: OSI Approved :: MIT License
Classifier: Typing :: Typed
Requires-Python: >=3.8
Description-Content-Type: text/markdown
License-File: LICENSE.txt
pysubs2
=======
[![pysubs2 build master branch](https://img.shields.io/github/actions/workflow/status/tkarabela/pysubs2/main.yml?branch=master)](https://github.com/tkarabela/pysubs2/actions)
[![pysubs2 test code coverage](https://img.shields.io/codecov/c/github/tkarabela/pysubs2)](https://app.codecov.io/github/tkarabela/pysubs2)
[![Static Badge](https://img.shields.io/badge/MyPy%20%26%20Ruffle-checked-blue?style=flat)](https://github.com/tkarabela/pysubs2/actions)
[![PyPI - Version](https://img.shields.io/pypi/v/pysubs2.svg?style=flat)](https://pypi.org/project/pysubs2/)
[![PyPI - Status](https://img.shields.io/pypi/status/pysubs2.svg?style=flat)](https://pypi.org/project/pysubs2/)
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pysubs2.svg?style=flat)](https://pypi.org/project/pysubs2/)
[![PyPI - License](https://img.shields.io/pypi/l/pysubs2.svg?style=flat)](LICENSE.txt)
[![GitHub Repo stars](https://img.shields.io/github/stars/tkarabela/pysubs2?style=flat&label=GitHub%20stars)](https://github.com/tkarabela/pysubs2)
pysubs2 is a Python library for editing subtitle files.
Its based on *SubStation Alpha*, the native format of
[Aegisub](http://www.aegisub.org/); it also supports *SubRip (SRT)*,
*MicroDVD*, *MPL2*, *TMP* and *WebVTT* formats and *OpenAI Whisper* captions.
There is a small CLI tool for batch conversion and retiming.
```bash
pip install pysubs2
pysubs2 --shift 0.3s *.srt
pysubs2 --to srt *.ass
```
```python
import pysubs2
subs = pysubs2.load("my_subtitles.ass", encoding="utf-8")
subs.shift(s=2.5)
for line in subs:
line.text = "{\\be1}" + line.text
subs.save("my_subtitles_edited.ass")
```
To learn more, please [see the documentation](http://pysubs2.readthedocs.io).
If you'd like to contribute, see [CONTRIBUTING.md](CONTRIBUTING.md).
pysubs2 is licensed under the MIT license (see [LICENSE.txt](LICENSE.txt)).

@ -0,0 +1,47 @@
../../bin/pysubs2,sha256=qbyZPJ0JMXc2Ii34KZgXKNvu1v8En-GFSWQFwhstugg,249
pysubs2-1.7.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
pysubs2-1.7.2.dist-info/LICENSE.txt,sha256=WYgskvBrUBBDxuKH9JSV1u22g-UBTwHjeuJLlPp6lWU,1063
pysubs2-1.7.2.dist-info/METADATA,sha256=2Fm6UaI7kElE-smr83cNBIfWRXt0vfso1Kyx5sChjB0,3183
pysubs2-1.7.2.dist-info/RECORD,,
pysubs2-1.7.2.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
pysubs2-1.7.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
pysubs2-1.7.2.dist-info/entry_points.txt,sha256=mznmeaia9Ec19KsaIM8D4tt6V0HnOUlPhSg5nXiAfAE,49
pysubs2-1.7.2.dist-info/top_level.txt,sha256=i_V6JX-AWjMX15-Dzyc0RlrInU7Zun9PYfdB9brM95E,8
pysubs2/__init__.py,sha256=L71MMMS6zcAiGWdddhnc4COZmKLYX__igP0Tr5q-y3Q,783
pysubs2/__main__.py,sha256=3iAK73rLxjLJqFMYD-jn3PDJvHZ8rIOW_NgsmBkRGhA,134
pysubs2/__pycache__/__init__.cpython-311.pyc,,
pysubs2/__pycache__/__main__.cpython-311.pyc,,
pysubs2/__pycache__/cli.cpython-311.pyc,,
pysubs2/__pycache__/common.cpython-311.pyc,,
pysubs2/__pycache__/exceptions.cpython-311.pyc,,
pysubs2/__pycache__/ssaevent.cpython-311.pyc,,
pysubs2/__pycache__/ssafile.cpython-311.pyc,,
pysubs2/__pycache__/ssastyle.cpython-311.pyc,,
pysubs2/__pycache__/time.cpython-311.pyc,,
pysubs2/cli.py,sha256=lFVZsLChtnnaRov2lNEHlecTJaoNNvTLMQmZe0-4oGE,11916
pysubs2/common.py,sha256=33XyqTSPqUNMedlnVkx-7GWFwvX0Fpce1G_ZHHtwQuI,1503
pysubs2/exceptions.py,sha256=5XMoOvloP4QPPkEPour6YalxomcK-RkJA9eb_ecIbH0,2318
pysubs2/formats/__init__.py,sha256=KZy9IibVmK-WwCHAYYplJELmYEnQu9BBRz5HEvyZ5U4,2515
pysubs2/formats/__pycache__/__init__.cpython-311.pyc,,
pysubs2/formats/__pycache__/base.cpython-311.pyc,,
pysubs2/formats/__pycache__/jsonformat.cpython-311.pyc,,
pysubs2/formats/__pycache__/microdvd.cpython-311.pyc,,
pysubs2/formats/__pycache__/mpl2.cpython-311.pyc,,
pysubs2/formats/__pycache__/subrip.cpython-311.pyc,,
pysubs2/formats/__pycache__/substation.cpython-311.pyc,,
pysubs2/formats/__pycache__/tmp.cpython-311.pyc,,
pysubs2/formats/__pycache__/webvtt.cpython-311.pyc,,
pysubs2/formats/__pycache__/whisper.cpython-311.pyc,,
pysubs2/formats/base.py,sha256=zzt4L3KIC6VrUNfzwjDHSF5HV7swrdo65dop90vfy70,2959
pysubs2/formats/jsonformat.py,sha256=TWk_qOI0-GaV_Jzs2S5JOe7uWu7t1jllO9NYOnZgRcE,2101
pysubs2/formats/microdvd.py,sha256=hfshagBIMMPz3WgqpttYGugtAj_6CheAiTkvTTrn3NE,5970
pysubs2/formats/mpl2.py,sha256=vpyL5j7gxxkGvWvI8JXwFlHaEyCBHYTn_mtgk4zqNFw,1970
pysubs2/formats/subrip.py,sha256=VsEhj6aX_NZdUbecxjKk-5Ri7GcUdGh8dfzZVTsAZSM,7484
pysubs2/formats/substation.py,sha256=UFaZ3sZoCa3bizKvn_5Ugtqy6crO1FqldxS5OI7ozcQ,16795
pysubs2/formats/tmp.py,sha256=FKktjpgv07rf6EZ1Og0rkNpcj59Z6kL6dL-V0QLSKQw,4100
pysubs2/formats/webvtt.py,sha256=Ycur_E5ztjazW2akyUXTsbNUdLs4S-knq93hwompCac,1788
pysubs2/formats/whisper.py,sha256=JHGY8nhiVso_iyVZOP72lLbC7GM-gX_FFc1BdzCGdo0,1607
pysubs2/ssaevent.py,sha256=VR0Pso779Pw5bs-Q-VpIz8Zs-A6p2a2pptQtsFVjRnM,6291
pysubs2/ssafile.py,sha256=smMZIFk5h8n3uUuzgrcSmzdNsVU7oB3kguKQtUAdQGU,23833
pysubs2/ssastyle.py,sha256=hlNowFmuiiwOzWTtQxKUakTwcE7Q5KxlQtl64Q6ScMU,4031
pysubs2/time.py,sha256=Onk1vs9xXKiuwYK0RorZVU29p2FCEARapnfP_yYczTQ,4739

@ -1,5 +1,5 @@
Wheel-Version: 1.0
Generator: bdist_wheel (0.42.0)
Generator: bdist_wheel (0.43.0)
Root-Is-Purelib: true
Tag: py3-none-any

@ -1,10 +1,28 @@
from .ssafile import SSAFile
from .ssaevent import SSAEvent
from .ssastyle import SSAStyle
from . import time, formats, cli, whisper
from .exceptions import *
from . import time, formats, cli, exceptions
from .formats import whisper
from .exceptions import * # noqa: F403
from .common import Color, Alignment, VERSION
__all__ = [
"SSAFile",
"SSAEvent",
"SSAStyle",
"time",
"formats",
"cli",
"whisper",
"exceptions",
"Color",
"Alignment",
"VERSION",
"load",
"load_from_whisper",
"make_time",
]
#: Alias for :meth:`SSAFile.load()`.
load = SSAFile.load

@ -3,10 +3,11 @@ import codecs
import os
import re
import os.path as op
import io
from io import open
from io import TextIOWrapper
import sys
from textwrap import dedent
from typing import List
from .formats import get_file_extension, FORMAT_IDENTIFIERS
from .time import make_time
from .ssafile import SSAFile
@ -42,7 +43,7 @@ def change_ext(path: str, ext: str) -> str:
class Pysubs2CLI:
def __init__(self):
def __init__(self) -> None:
parser = self.parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
prog="pysubs2",
description=dedent("""
@ -80,6 +81,12 @@ class Pysubs2CLI:
"If you wish to convert between encodings, make sure --input-enc is set correctly! "
"Otherwise, your output files will probably be corrupted. It's a good idea to "
"back up your files or use the -o option.")
parser.add_argument("--enc-error-handling", choices=("strict", "surrogateescape"),
default="surrogateescape",
help="Character encoding error handling for input and output. Defaults to 'surrogateescape' "
"which passes through unrecognized characters to output unchanged. Use 'strict' if "
"you want the command to fail when encountering a character incompatible with selected "
"input/output encoding.")
parser.add_argument("--fps", metavar="FPS", type=positive_float,
help="This argument specifies framerate for MicroDVD files. By default, framerate "
"is detected from the file. Use this when framerate specification is missing "
@ -116,13 +123,14 @@ class Pysubs2CLI:
extra_sub_options.add_argument("--sub-no-write-fps-declaration", action="store_true",
help="(output) omit writing FPS as first zero-length subtitle")
def __call__(self, argv):
def __call__(self, argv: List[str]) -> int:
try:
self.main(argv)
return self.main(argv)
except KeyboardInterrupt:
exit("\nAborted by user.")
print("\nAborted by user.", file=sys.stderr)
return 1
def main(self, argv):
def main(self, argv: List[str]) -> int:
args = self.parser.parse_args(argv)
errors = 0
@ -157,7 +165,7 @@ class Pysubs2CLI:
print("Skipping", path, "(not a file)")
errors += 1
else:
with open(path, encoding=args.input_enc) as infile:
with open(path, encoding=args.input_enc, errors=args.enc_error_handling) as infile:
subs = SSAFile.from_file(infile, args.input_format, args.fps, **extra_input_args)
self.process(subs, args)
@ -165,31 +173,37 @@ class Pysubs2CLI:
if args.output_format is None:
outpath = path
output_format = subs.format
assert output_format is not None, "subs.format must not be None (it was read from file)"
else:
ext = get_file_extension(args.output_format)
outpath = change_ext(path, ext)
output_format = args.output_format
assert output_format is not None, "args.output_format must not be None (see if/else)"
if args.output_dir is not None:
_, filename = op.split(outpath)
outpath = op.join(args.output_dir, filename)
with open(outpath, "w", encoding=args.output_enc) as outfile:
with open(outpath, "w", encoding=args.output_enc, errors=args.enc_error_handling) as outfile:
subs.to_file(outfile, output_format, args.fps, apply_styles=not args.clean,
**extra_output_args)
else:
infile = io.TextIOWrapper(sys.stdin.buffer, args.input_enc)
outfile = io.TextIOWrapper(sys.stdout.buffer, args.output_enc)
elif not sys.stdin.isatty():
infile = TextIOWrapper(sys.stdin.buffer, encoding=args.input_enc, errors=args.enc_error_handling)
outfile = TextIOWrapper(sys.stdout.buffer, encoding=args.output_enc, errors=args.enc_error_handling)
subs = SSAFile.from_file(infile, args.input_format, args.fps)
self.process(subs, args)
output_format = args.output_format or subs.format
assert output_format is not None, "output_format must not be None (it's either given or inferred at read time)"
subs.to_file(outfile, output_format, args.fps, apply_styles=not args.clean)
else:
self.parser.print_help()
errors += 1
return (0 if errors == 0 else 1)
return 0 if errors == 0 else 1
@staticmethod
def process(subs, args):
def process(subs: SSAFile, args: argparse.Namespace) -> None:
if args.shift is not None:
subs.shift(ms=args.shift)
elif args.shift_back is not None:
@ -202,7 +216,7 @@ class Pysubs2CLI:
subs.remove_miscellaneous_events()
def __main__():
def __main__() -> None:
cli = Pysubs2CLI()
rv = cli(sys.argv[1:])
sys.exit(rv)

@ -1,5 +1,5 @@
from dataclasses import dataclass
from typing import Union
from typing import Tuple, Union
from enum import IntEnum
@ -54,11 +54,11 @@ class Alignment(IntEnum):
return SSA_ALIGNMENT[self.value - 1]
SSA_ALIGNMENT = (1, 2, 3, 9, 10, 11, 5, 6, 7)
SSA_ALIGNMENT: Tuple[int, ...] = (1, 2, 3, 9, 10, 11, 5, 6, 7)
#: Version of the pysubs2 library.
VERSION = "1.6.1"
VERSION = "1.7.2"
IntOrFloat = Union[int, float]

@ -1,3 +1,15 @@
from typing import List
__all__ = [
"Pysubs2Error",
"UnknownFPSError",
"UnknownFileExtensionError",
"UnknownFormatIdentifierError",
"FormatAutodetectionError",
]
class Pysubs2Error(Exception):
"""Base class for pysubs2 exceptions."""
@ -7,16 +19,58 @@ class UnknownFPSError(Pysubs2Error):
class UnknownFileExtensionError(Pysubs2Error):
"""File extension does not pertain to any known subtitle format."""
"""
File extension does not pertain to any known subtitle format.
This exception is raised by `SSAFile.save()` when the ``format_`` parameter
is not specified. It will try to guess the desired format from output filename
and raise this exception when it fails.
Attributes:
ext (str): File extension
"""
def __init__(self, ext: str) -> None:
self.ext = ext
msg = f"File extension {ext!r} does not match any supported subtitle format"
super().__init__(msg)
class UnknownFormatIdentifierError(Pysubs2Error):
"""Unknown subtitle format identifier (ie. string like ``"srt"``)."""
"""
Unknown subtitle format identifier (ie. string like ``"srt"``).
This exception is used when interpreting ``format_`` parameter fails,
eg. in `SSAFile.save()`.
Attributes:
format_ (str): Format identifier
"""
def __init__(self, format_: str) -> None:
self.format_ = format_
msg = f"Format identifier {format_!r} does not match any supported subtitle format"
super().__init__(msg)
class FormatAutodetectionError(Pysubs2Error):
"""Subtitle format is ambiguous or unknown."""
"""
Subtitle format is ambiguous or unknown based on analysis of file fragment
This exception is raised by `SSAFile.load()` and related methods
when the ``format_`` parameter is not specified. It will try to guess
the input format based on reading first few kilobytes of the input file
and raise this exception if the format cannot be uniquely determined.
class ContentNotUsable(Pysubs2Error):
"""Current content not usable for specified format"""
Attributes:
content (str): Analyzed subtitle file content
formats (list[str]): Format identifiers for detected formats
"""
def __init__(self, content: str, formats: List[str]) -> None:
self.content = content
self.formats = formats
if not formats:
msg = "No suitable formats"
else:
msg = f"Multiple suitable formats ({formats!r})"
super().__init__(msg)

@ -1,6 +1,6 @@
from typing import Dict, Type
from .formatbase import FormatBase
from .base import FormatBase
from .microdvd import MicroDVDFormat
from .subrip import SubripFormat
from .jsonformat import JSONFormat
@ -8,7 +8,7 @@ from .substation import SubstationFormat
from .mpl2 import MPL2Format
from .tmp import TmpFormat
from .webvtt import WebVTTFormat
from .exceptions import *
from ..exceptions import UnknownFormatIdentifierError, UnknownFileExtensionError, FormatAutodetectionError
#: Dict mapping file extensions to format identifiers.
FILE_EXTENSION_TO_FORMAT_IDENTIFIER: Dict[str, str] = {
@ -75,6 +75,6 @@ def autodetect_format(content: str) -> str:
if len(formats) == 1:
return formats.pop()
elif not formats:
raise FormatAutodetectionError("No suitable formats")
raise FormatAutodetectionError(content=content, formats=[])
else:
raise FormatAutodetectionError(f"Multiple suitable formats ({formats!r})")
raise FormatAutodetectionError(content=content, formats=list(formats))

@ -1,6 +1,5 @@
from typing import Optional
import io
import pysubs2
from typing import Optional, Any, TextIO
from ..ssafile import SSAFile
class FormatBase:
@ -19,7 +18,7 @@ class FormatBase:
"""
@classmethod
def from_file(cls, subs: "pysubs2.SSAFile", fp: io.TextIOBase, format_: str, **kwargs):
def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None:
"""
Load subtitle file into an empty SSAFile.
@ -42,7 +41,7 @@ class FormatBase:
raise NotImplementedError("Parsing is not supported for this format")
@classmethod
def to_file(cls, subs: "pysubs2.SSAFile", fp: io.TextIOBase, format_: str, **kwargs):
def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None:
"""
Write SSAFile into a file.

@ -1,15 +1,18 @@
import dataclasses
import json
from .common import Color
from .ssaevent import SSAEvent
from .ssastyle import SSAStyle
from .formatbase import FormatBase
from typing import Any, Optional, TextIO
from ..common import Color
from ..ssaevent import SSAEvent
from ..ssastyle import SSAStyle
from .base import FormatBase
from ..ssafile import SSAFile
# We're using Color dataclass
# https://stackoverflow.com/questions/51286748/make-the-python-json-encoder-support-pythons-new-dataclasses
class EnhancedJSONEncoder(json.JSONEncoder):
def default(self, o):
def default(self, o: Any) -> Any:
if dataclasses.is_dataclass(o):
return dataclasses.asdict(o)
return super().default(o)
@ -22,13 +25,15 @@ class JSONFormat(FormatBase):
This is essentially SubStation Alpha as JSON.
"""
@classmethod
def guess_format(cls, text):
def guess_format(cls, text: str) -> Optional[str]:
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if text.startswith("{\""):
if text.startswith("{\"") and "\"info\":" in text:
return "json"
else:
return None
@classmethod
def from_file(cls, subs, fp, format_, **kwargs):
def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None:
"""See :meth:`pysubs2.formats.FormatBase.from_file()`"""
data = json.load(fp)
@ -47,7 +52,7 @@ class JSONFormat(FormatBase):
subs.events = [SSAEvent(**fields) for fields in data["events"]]
@classmethod
def to_file(cls, subs, fp, format_, **kwargs):
def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None:
"""See :meth:`pysubs2.formats.FormatBase.to_file()`"""
data = {
"info": dict(**subs.info),

@ -1,11 +1,15 @@
from functools import partial
import re
from .exceptions import UnknownFPSError
from .ssaevent import SSAEvent
from .ssastyle import SSAStyle
from .formatbase import FormatBase
from typing import Optional, TextIO, Any, Match
from ..exceptions import UnknownFPSError
from ..ssaevent import SSAEvent
from ..ssastyle import SSAStyle
from .base import FormatBase
from .substation import parse_tags
from .time import ms_to_frames, frames_to_ms
from ..time import ms_to_frames, frames_to_ms
from ..ssafile import SSAFile
#: Matches a MicroDVD line.
MICRODVD_LINE = re.compile(r" *\{ *(\d+) *\} *\{ *(\d+) *\}(.+)")
@ -14,14 +18,33 @@ MICRODVD_LINE = re.compile(r" *\{ *(\d+) *\} *\{ *(\d+) *\}(.+)")
class MicroDVDFormat(FormatBase):
"""MicroDVD subtitle format implementation"""
@classmethod
def guess_format(cls, text):
def guess_format(cls, text: str) -> Optional[str]:
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if any(map(MICRODVD_LINE.match, text.splitlines())):
return "microdvd"
else:
return None
@classmethod
def from_file(cls, subs, fp, format_, fps=None, **kwargs):
"""See :meth:`pysubs2.formats.FormatBase.from_file()`"""
def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, fps: Optional[float] = None,
strict_fps_inference: bool = True, **kwargs: Any) -> None:
"""
See :meth:`pysubs2.formats.FormatBase.from_file()`
Keyword args:
strict_fps_inference: If True (default), in the case when ``fps`` is not given, it will be read
from the first subtitle text only if the start and end frame of this subtitle is ``{1}{1}``
(matches VLC Player behaviour), otherwise :class:`pysubs2.exceptions.UnknownFPSError` is raised.
When ``strict_fps_inference``
is False, framerate will be read from the first subtitle text in this case regardless of
start and end frame (which may result in bogus result, if the first subtitle is not supposed
to contain framerate). Before introduction of this option, the library behaved as if this
option was False.
.. versionchanged:: 1.7.0
Added the ``strict_fps_inference`` option.
"""
for line in fp:
match = MICRODVD_LINE.match(line)
if not match:
@ -31,10 +54,14 @@ class MicroDVDFormat(FormatBase):
fstart, fend = map(int, (fstart, fend))
if fps is None:
# We don't know the framerate, but it is customary to include
# it as text of the first subtitle. In that case, we skip
# this auxiliary subtitle and proceed with reading.
# We don't know the framerate, but it is customary to include it as text of the first subtitle,
# in the format {1}{1}fps, see pysubs2 issue #71 or VLC player source:
# https://code.videolan.org/videolan/vlc/-/blob/dccda0e133ff0a2e85de727cf19ddbc634f06b67/modules/demux/subtitle.c#L1014
# In that case, we skip this auxiliary subtitle and proceed with reading.
try:
if strict_fps_inference and not (fstart == 1 and fend == 1):
raise ValueError("Frame mismatch, expected {1}{1}")
fps = float(text)
subs.fps = fps
continue
@ -45,10 +72,10 @@ class MicroDVDFormat(FormatBase):
start, end = map(partial(frames_to_ms, fps=fps), (fstart, fend))
def prepare_text(text):
def prepare_text(text: str) -> str:
text = text.replace("|", r"\N")
def style_replacer(match: re.Match) -> str:
def style_replacer(match: Match[str]) -> str:
tags = [c for c in "biu" if c in match.group(0)]
return "{%s}" % "".join(f"\\{c}1" for c in tags)
@ -63,14 +90,15 @@ class MicroDVDFormat(FormatBase):
subs.append(ev)
@classmethod
def to_file(cls, subs, fp, format_, fps=None, write_fps_declaration=True, apply_styles=True, **kwargs):
def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, fps: Optional[float] = None,
write_fps_declaration: bool = True, apply_styles: bool = True, **kwargs: Any) -> None:
"""
See :meth:`pysubs2.formats.FormatBase.to_file()`
The only supported styling is marking whole lines italic.
Keyword args:
write_fps_declaration: If True, create a zero-duration first subtitle which will contain
write_fps_declaration: If True, create a zero-duration first subtitle ``{1}{1}`` which will contain
the fps.
apply_styles: If False, do not write any styling.
@ -94,12 +122,9 @@ class MicroDVDFormat(FormatBase):
# insert an artificial first line telling the framerate
if write_fps_declaration:
subs.insert(0, SSAEvent(start=0, end=0, text=str(fps)))
for line in subs:
if line.is_comment or line.is_drawing:
continue
subs.insert(0, SSAEvent(start=1, end=1, text=str(fps)))
for line in subs.get_text_events():
text = "|".join(line.plaintext.splitlines())
if apply_styles and is_entirely_italic(line):
text = "{Y:i}" + text
@ -107,8 +132,10 @@ class MicroDVDFormat(FormatBase):
start, end = map(to_frames, (line.start, line.end))
# XXX warn on underflow?
if start < 0: start = 0
if end < 0: end = 0
if start < 0:
start = 0
if end < 0:
end = 0
print("{%d}{%d}%s" % (start, end, text), file=fp)

@ -0,0 +1,60 @@
import re
from typing import Optional, Any, TextIO
from ..time import times_to_ms
from .base import FormatBase
from ..ssaevent import SSAEvent
from ..ssafile import SSAFile
# thanks to http://otsaloma.io/gaupol/doc/api/aeidon.files.mpl2_source.html
MPL2_FORMAT = re.compile(r"^\[(-?\d+)\]\[(-?\d+)\](.*)", re.MULTILINE)
class MPL2Format(FormatBase):
"""MPL2 subtitle format implementation"""
@classmethod
def guess_format(cls, text: str) -> Optional[str]:
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if MPL2_FORMAT.search(text):
return "mpl2"
else:
return None
@classmethod
def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None:
"""See :meth:`pysubs2.formats.FormatBase.from_file()`"""
def prepare_text(lines: str) -> str:
out = []
for s in lines.split("|"):
s = s.strip()
if s.startswith("/"):
# line beginning with '/' is in italics
s = r"{\i1}%s{\i0}" % s[1:].strip()
out.append(s)
return "\\N".join(out)
text = fp.read()
for start, end, text in MPL2_FORMAT.findall(text):
e = SSAEvent(
start=times_to_ms(s=float(start) / 10),
end=times_to_ms(s=float(end) / 10),
text=prepare_text(text)
)
subs.append(e)
@classmethod
def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None:
"""
See :meth:`pysubs2.formats.FormatBase.to_file()`
No styling is supported at the moment.
"""
# TODO handle italics
for line in subs.get_text_events():
start = int(line.start // 100)
end = int(line.end // 100)
text = line.plaintext.replace("\n", "|")
print(f"[{start}][{end}] {text}", file=fp)

@ -1,14 +1,14 @@
import re
import warnings
from typing import List
from typing import List, Sequence, Optional, TextIO, Any, Tuple
import pysubs2
from .formatbase import FormatBase
from .ssaevent import SSAEvent
from .ssastyle import SSAStyle
from .base import FormatBase
from ..ssaevent import SSAEvent
from ..ssastyle import SSAStyle
from .substation import parse_tags
from .exceptions import ContentNotUsable
from .time import ms_to_times, make_time, TIMESTAMP, timestamp_to_ms
from ..time import ms_to_times, make_time, TIMESTAMP, timestamp_to_ms
from ..ssafile import SSAFile
#: Largest timestamp allowed in SubRip, ie. 99:59:59,999.
MAX_REPRESENTABLE_TIME = make_time(h=100) - 1
@ -30,11 +30,11 @@ class SubripFormat(FormatBase):
return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
@staticmethod
def timestamp_to_ms(groups):
def timestamp_to_ms(groups: Sequence[str]) -> int:
return timestamp_to_ms(groups)
@classmethod
def guess_format(cls, text):
def guess_format(cls, text: str) -> Optional[str]:
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if "[Script Info]" in text or "[V4+ Styles]" in text:
# disambiguation vs. SSA/ASS
@ -48,8 +48,11 @@ class SubripFormat(FormatBase):
if len(cls.TIMESTAMP.findall(line)) == 2:
return "srt"
return None
@classmethod
def from_file(cls, subs, fp, format_, keep_html_tags=False, keep_unknown_html_tags=False, **kwargs):
def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, keep_html_tags: bool = False,
keep_unknown_html_tags: bool = False, **kwargs: Any) -> None:
"""
See :meth:`pysubs2.formats.FormatBase.from_file()`
@ -70,8 +73,8 @@ class SubripFormat(FormatBase):
If False, these other HTML tags will be stripped from output
(in the previous example, you would get only ``example {\\i1}text{\\i0}``).
"""
timestamps = [] # (start, end)
following_lines = [] # contains lists of lines following each timestamp
timestamps: List[Tuple[int, int]] = [] # (start, end)
following_lines: List[List[str]] = [] # contains lists of lines following each timestamp
for line in fp:
stamps = cls.TIMESTAMP.findall(line)
@ -83,7 +86,7 @@ class SubripFormat(FormatBase):
if timestamps:
following_lines[-1].append(line)
def prepare_text(lines):
def prepare_text(lines: List[str]) -> str:
# Handle the "happy" empty subtitle case, which is timestamp line followed by blank line(s)
# followed by number line and timestamp line of the next subtitle. Fixes issue #11.
if (len(lines) >= 2
@ -108,11 +111,13 @@ class SubripFormat(FormatBase):
s = re.sub(r"\n", r"\\N", s) # convert newlines
return s
subs.events = [SSAEvent(start=start, end=end, text=prepare_text(lines))
for (start, end), lines in zip(timestamps, following_lines)]
for (start, end), lines in zip(timestamps, following_lines):
e = SSAEvent(start=start, end=end, text=prepare_text(lines))
subs.append(e)
@classmethod
def to_file(cls, subs, fp, format_, apply_styles=True, keep_ssa_tags=False, **kwargs):
def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, apply_styles: bool = True,
keep_ssa_tags: bool = False, **kwargs: Any) -> None:
"""
See :meth:`pysubs2.formats.FormatBase.to_file()`
@ -134,7 +139,7 @@ class SubripFormat(FormatBase):
is SRT which doesn't use line styles - this shouldn't be much
of an issue in practice.)
"""
def prepare_text(text: str, style: SSAStyle):
def prepare_text(text: str, style: SSAStyle) -> str:
text = text.replace(r"\h", " ")
text = text.replace(r"\n", "\n")
text = text.replace(r"\N", "\n")
@ -145,24 +150,20 @@ class SubripFormat(FormatBase):
else:
for fragment, sty in parse_tags(text, style, subs.styles):
if apply_styles:
if sty.italic: fragment = f"<i>{fragment}</i>"
if sty.underline: fragment = f"<u>{fragment}</u>"
if sty.strikeout: fragment = f"<s>{fragment}</s>"
if sty.drawing: raise ContentNotUsable
if sty.italic:
fragment = f"<i>{fragment}</i>"
if sty.underline:
fragment = f"<u>{fragment}</u>"
if sty.strikeout:
fragment = f"<s>{fragment}</s>"
body.append(fragment)
return re.sub("\n+", "\n", "".join(body).strip())
visible_lines = cls._get_visible_lines(subs)
lineno = 1
for line in visible_lines:
for lineno, line in enumerate(cls._get_visible_lines(subs), 1):
start = cls.ms_to_timestamp(line.start)
end = cls.ms_to_timestamp(line.end)
try:
text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE))
except ContentNotUsable:
continue
text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE))
print(lineno, file=fp)
print(start, "-->", end, file=fp)
@ -170,6 +171,5 @@ class SubripFormat(FormatBase):
lineno += 1
@classmethod
def _get_visible_lines(cls, subs: "pysubs2.SSAFile") -> List["pysubs2.SSAEvent"]:
visible_lines = [line for line in subs if not line.is_comment]
return visible_lines
def _get_visible_lines(cls, subs: "SSAFile") -> List[SSAEvent]:
return subs.get_text_events()

@ -1,22 +1,21 @@
import logging
import re
import warnings
from numbers import Number
from typing import Any, Union, Optional, Dict
from typing import Any, Union, Optional, Dict, Tuple, List, TextIO
import pysubs2
from .formatbase import FormatBase
from .ssaevent import SSAEvent
from .ssastyle import SSAStyle
from .common import Color, Alignment, SSA_ALIGNMENT
from .time import make_time, ms_to_times, timestamp_to_ms, TIMESTAMP, TIMESTAMP_SHORT
from .base import FormatBase
from ..ssaevent import SSAEvent
from ..ssastyle import SSAStyle
from ..common import Color, Alignment, SSA_ALIGNMENT
from ..time import make_time, ms_to_times, timestamp_to_ms, TIMESTAMP, TIMESTAMP_SHORT
from ..ssafile import SSAFile
def ass_to_ssa_alignment(i):
def ass_to_ssa_alignment(i: int) -> int:
warnings.warn("ass_to_ssa_alignment function is deprecated, please use the Alignment enum", DeprecationWarning)
return SSA_ALIGNMENT[i-1]
def ssa_to_ass_alignment(i):
def ssa_to_ass_alignment(i: int) -> int:
warnings.warn("ssa_to_ass_alignment function is deprecated, please use the Alignment enum", DeprecationWarning)
return SSA_ALIGNMENT.index(i) + 1
@ -86,7 +85,8 @@ def is_valid_field_content(s: str) -> bool:
return "\n" not in s and "," not in s
def parse_tags(text: str, style: SSAStyle = SSAStyle.DEFAULT_STYLE, styles: Optional[Dict[str, SSAStyle]] = None):
def parse_tags(text: str, style: SSAStyle = SSAStyle.DEFAULT_STYLE,
styles: Optional[Dict[str, SSAStyle]] = None) -> List[Tuple[str, SSAStyle]]:
"""
Split text into fragments with computed SSAStyles.
@ -117,14 +117,18 @@ def parse_tags(text: str, style: SSAStyle = SSAStyle.DEFAULT_STYLE, styles: Opti
s = style.copy() # reset to original line style
elif tag.startswith(r"\r"):
name = tag[2:]
if name in styles: # type: ignore[operator]
if name in styles:
# reset to named style
s = styles[name].copy() # type: ignore[index]
s = styles[name].copy()
else:
if "i" in tag: s.italic = "1" in tag
elif "b" in tag: s.bold = "1" in tag
elif "u" in tag: s.underline = "1" in tag
elif "s" in tag: s.strikeout = "1" in tag
if "i" in tag:
s.italic = "1" in tag
elif "b" in tag:
s.bold = "1" in tag
elif "u" in tag:
s.underline = "1" in tag
elif "s" in tag:
s.strikeout = "1" in tag
elif "p" in tag:
try:
scale = int(tag[2:])
@ -146,34 +150,36 @@ class SubstationFormat(FormatBase):
"""SubStation Alpha (ASS, SSA) subtitle format implementation"""
@staticmethod
def ms_to_timestamp(ms: int) -> str:
def ms_to_timestamp(requested_ms: int) -> str:
"""Convert ms to 'H:MM:SS.cc'"""
if ms < 0:
ms = 0
if ms > MAX_REPRESENTABLE_TIME:
if requested_ms < 0:
requested_ms = 0
if requested_ms > MAX_REPRESENTABLE_TIME:
warnings.warn("Overflow in SubStation timestamp, clamping to MAX_REPRESENTABLE_TIME", RuntimeWarning)
ms = MAX_REPRESENTABLE_TIME
h, m, s, ms = ms_to_times(ms)
requested_ms = MAX_REPRESENTABLE_TIME
# Aegisub does rounding, see https://github.com/Aegisub/Aegisub/blob/6f546951b4f004da16ce19ba638bf3eedefb9f31/libaegisub/include/libaegisub/ass/time.h#L32
cs = ((ms + 5) - (ms + 5) % 10) // 10
round_ms = ((requested_ms + 5) - (requested_ms + 5) % 10)
h, m, s, ms = ms_to_times(round_ms)
cs = ms // 10
return f"{h:01d}:{m:02d}:{s:02d}.{cs:02d}"
@classmethod
def guess_format(cls, text):
def guess_format(cls, text: str) -> Optional[str]:
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if re.search(r"V4\+ Styles", text, re.IGNORECASE):
return "ass"
elif re.search(r"V4 Styles", text, re.IGNORECASE):
return "ssa"
else:
return None
@classmethod
def from_file(cls, subs: "pysubs2.SSAFile", fp, format_, **kwargs):
def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None:
"""See :meth:`pysubs2.formats.FormatBase.from_file()`"""
def string_to_field(f: str, v: str):
def string_to_field(f: str, v: str) -> Any:
# Per issue #45, we should handle the case where there is extra whitespace around the values.
# Extra whitespace is removed in non-string fields where it would break the parser otherwise,
# and in font name (where it doesn't really make sense). It is preserved in Dialogue string
@ -199,9 +205,13 @@ class SubstationFormat(FormatBase):
v = v.strip()
return rgba_to_color(v)
elif f in {"bold", "underline", "italic", "strikeout"}:
return v == "-1"
return v != "0"
elif f in {"borderstyle", "encoding", "marginl", "marginr", "marginv", "layer", "alphalevel"}:
return int(v)
try:
return int(v)
except ValueError:
warnings.warn(f"Failed to parse {f}, using default", RuntimeWarning)
return 0
elif f in {"fontsize", "scalex", "scaley", "spacing", "angle", "outline", "shadow"}:
return float(v)
elif f == "marked":
@ -244,7 +254,8 @@ class SubstationFormat(FormatBase):
inside_font_section = "Fonts" in line
inside_graphic_section = "Graphics" in line
elif inside_info_section or inside_aegisub_section:
if line.startswith(";"): continue # skip comments
if line.startswith(";"):
continue # skip comments
try:
k, v = line.split(":", 1)
if inside_info_section:
@ -280,7 +291,7 @@ class SubstationFormat(FormatBase):
elif line.startswith("Style:"):
_, rest = line.split(":", 1)
buf = rest.strip().split(",")
name, raw_fields = buf[0], buf[1:] # splat workaround for Python 2.7
name, *raw_fields = buf
field_dict = {f: string_to_field(f, v) for f, v in zip(STYLE_FIELDS[format_], raw_fields)}
sty = SSAStyle(**field_dict)
subs.styles[name] = sty
@ -307,7 +318,7 @@ class SubstationFormat(FormatBase):
current_attachment_name = None
@classmethod
def to_file(cls, subs: "pysubs2.SSAFile", fp, format_, header_notice=NOTICE, **kwargs):
def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, header_notice: str = NOTICE, **kwargs: Any) -> None:
"""See :meth:`pysubs2.formats.FormatBase.to_file()`"""
print("[Script Info]", file=fp)
for line in header_notice.splitlines(False):
@ -322,7 +333,7 @@ class SubstationFormat(FormatBase):
for k, v in subs.aegisub_project.items():
print(k, v, sep=": ", file=fp)
def field_to_string(f: str, v: Any, line: Union[SSAEvent, SSAStyle]):
def field_to_string(f: str, v: Any, line: Union[SSAEvent, SSAStyle]) -> str:
if f in {"start", "end"}:
return cls.ms_to_timestamp(v)
elif f == "marked":
@ -340,8 +351,12 @@ class SubstationFormat(FormatBase):
return str(alignment.value)
elif isinstance(v, bool):
return "-1" if v else "0"
elif isinstance(v, (str, Number)):
elif isinstance(v, int):
return str(v)
elif isinstance(v, float):
return str(int(v) if v.is_integer() else v)
elif isinstance(v, str):
return v
elif isinstance(v, Color):
if format_ == "ass":
return color_to_ass_rgba(v)

@ -1,11 +1,14 @@
import re
import warnings
from typing import Optional, TextIO, Any
from .formatbase import FormatBase
from .ssaevent import SSAEvent
from .ssastyle import SSAStyle
from .base import FormatBase
from ..ssaevent import SSAEvent
from ..ssastyle import SSAStyle
from .substation import parse_tags
from .time import ms_to_times, make_time, TIMESTAMP_SHORT, timestamp_to_ms
from ..time import ms_to_times, make_time, TIMESTAMP_SHORT, timestamp_to_ms
from ..ssafile import SSAFile
#: Pattern that matches TMP line
TMP_LINE = re.compile(r"(\d{1,2}:\d{2}:\d{2}):(.+)")
@ -29,7 +32,7 @@ class TmpFormat(FormatBase):
return f"{h:02d}:{m:02d}:{s:02d}"
@classmethod
def guess_format(cls, text):
def guess_format(cls, text: str) -> Optional[str]:
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if "[Script Info]" in text or "[V4+ Styles]" in text:
# disambiguation vs. SSA/ASS
@ -39,14 +42,16 @@ class TmpFormat(FormatBase):
if TMP_LINE.match(line) and len(TMP_LINE.findall(line)) == 1:
return "tmp"
return None
@classmethod
def from_file(cls, subs, fp, format_, **kwargs):
def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None:
"""See :meth:`pysubs2.formats.FormatBase.from_file()`"""
events = []
def prepare_text(text):
def prepare_text(text: str) -> str:
text = text.replace("|", r"\N") # convert newlines
text = re.sub(r"< *u *>", "{\\\\u1}", text) # not r" for Python 2.7 compat, triggers unicodeescape
text = re.sub(r"< *u *>", r"{\\u1}", text)
text = re.sub(r"< */? *[a-zA-Z][^>]*>", "", text) # strip other HTML tags
return text
@ -56,7 +61,9 @@ class TmpFormat(FormatBase):
continue
start, text = match.groups()
start = timestamp_to_ms(TIMESTAMP_SHORT.match(start).groups())
match2 = TIMESTAMP_SHORT.match(start)
assert match2 is not None, "TMP_LINE contains TIMESTAMP_SHORT"
start = timestamp_to_ms(match2.groups())
# Unfortunately, end timestamp is not given; try to estimate something reasonable:
# start + 500 ms + 67 ms/character (15 chars per second)
@ -72,7 +79,7 @@ class TmpFormat(FormatBase):
subs.events = events
@classmethod
def to_file(cls, subs, fp, format_, apply_styles=True, **kwargs):
def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, apply_styles: bool = True, **kwargs: Any) -> None:
"""
See :meth:`pysubs2.formats.FormatBase.to_file()`
@ -82,28 +89,24 @@ class TmpFormat(FormatBase):
apply_styles: If False, do not write any styling.
"""
def prepare_text(text, style):
def prepare_text(text: str, style: SSAStyle) -> str:
body = []
skip = False
for fragment, sty in parse_tags(text, style, subs.styles):
fragment = fragment.replace(r"\h", " ")
fragment = fragment.replace(r"\n", "\n")
fragment = fragment.replace(r"\N", "\n")
if apply_styles:
if sty.italic: fragment = f"<i>{fragment}</i>"
if sty.underline: fragment = f"<u>{fragment}</u>"
if sty.strikeout: fragment = f"<s>{fragment}</s>"
if sty.drawing: skip = True
if sty.italic:
fragment = f"<i>{fragment}</i>"
if sty.underline:
fragment = f"<u>{fragment}</u>"
if sty.strikeout:
fragment = f"<s>{fragment}</s>"
body.append(fragment)
if skip:
return ""
else:
return re.sub("\n+", "\n", "".join(body).strip())
visible_lines = (line for line in subs if not line.is_comment)
return re.sub("\n+", "\n", "".join(body).strip())
for line in visible_lines:
for line in subs.get_text_events():
start = cls.ms_to_timestamp(line.start)
text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE))

@ -1,16 +1,17 @@
import re
from typing import List
from typing import List, Sequence, Optional, TextIO, Any
import pysubs2
from ..ssaevent import SSAEvent
from .subrip import SubripFormat
from .time import make_time
from ..time import make_time
from ..ssafile import SSAFile
class WebVTTFormat(SubripFormat):
"""
Web Video Text Tracks (WebVTT) subtitle format implementation
Currently, this shares implementation with :class:`pysubs2.subrip.SubripFormat`.
Currently, this shares implementation with :class:`pysubs2.formats.subrip.SubripFormat`.
"""
TIMESTAMP = re.compile(r"(\d{0,4}:)?(\d{2}):(\d{2})\.(\d{2,3})")
@ -20,7 +21,7 @@ class WebVTTFormat(SubripFormat):
return result.replace(',', '.')
@staticmethod
def timestamp_to_ms(groups):
def timestamp_to_ms(groups: Sequence[str]) -> int:
_h, _m, _s, _ms = groups
if not _h:
h = 0
@ -30,22 +31,24 @@ class WebVTTFormat(SubripFormat):
return make_time(h=h, m=m, s=s, ms=ms)
@classmethod
def guess_format(cls, text):
def guess_format(cls, text: str) -> Optional[str]:
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if text.lstrip().startswith("WEBVTT"):
return "vtt"
else:
return None
@classmethod
def to_file(cls, subs, fp, format_, **kwargs):
def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None: # type: ignore[override]
"""
See :meth:`pysubs2.formats.FormatBase.to_file()`
See :meth:`pysubs2.formats.SubripFormat.to_file()`, additional SRT options are supported by VTT as well
"""
print("WEBVTT\n", file=fp)
return super(WebVTTFormat, cls).to_file(
subs=subs, fp=fp, format_=format_, **kwargs)
@classmethod
def _get_visible_lines(cls, subs: "pysubs2.SSAFile") -> List["pysubs2.SSAEvent"]:
visible_lines = [line for line in subs if not line.is_comment]
def _get_visible_lines(cls, subs: "SSAFile") -> List[SSAEvent]:
visible_lines = super()._get_visible_lines(subs)
visible_lines.sort(key=lambda e: e.start)
return visible_lines

@ -5,9 +5,9 @@ See https://github.com/openai/whisper
"""
from .ssaevent import SSAEvent
from .ssafile import SSAFile
from .time import make_time
from ..ssaevent import SSAEvent
from ..ssafile import SSAFile
from ..time import make_time
from typing import Union, List, Dict, Any

@ -1,54 +0,0 @@
import re
from .time import times_to_ms
from .formatbase import FormatBase
from .ssaevent import SSAEvent
# thanks to http://otsaloma.io/gaupol/doc/api/aeidon.files.mpl2_source.html
MPL2_FORMAT = re.compile(r"^\[(-?\d+)\]\[(-?\d+)\](.*)", re.MULTILINE)
class MPL2Format(FormatBase):
"""MPL2 subtitle format implementation"""
@classmethod
def guess_format(cls, text):
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if MPL2_FORMAT.search(text):
return "mpl2"
@classmethod
def from_file(cls, subs, fp, format_, **kwargs):
"""See :meth:`pysubs2.formats.FormatBase.from_file()`"""
def prepare_text(lines):
out = []
for s in lines.split("|"):
s = s.strip()
if s.startswith("/"):
# line beginning with '/' is in italics
s = r"{\i1}%s{\i0}" % s[1:].strip()
out.append(s)
return "\\N".join(out)
subs.events = [SSAEvent(start=times_to_ms(s=float(start) / 10), end=times_to_ms(s=float(end) / 10),
text=prepare_text(text)) for start, end, text in MPL2_FORMAT.findall(fp.getvalue())]
@classmethod
def to_file(cls, subs, fp, format_, **kwargs):
"""
See :meth:`pysubs2.formats.FormatBase.to_file()`
No styling is supported at the moment.
"""
# TODO handle italics
for line in subs:
if line.is_comment:
continue
print("[{start}][{end}] {text}".format(start=int(line.start // 100),
end=int(line.end // 100),
text=line.plaintext.replace("\n", "|")),
file=fp)

@ -1,6 +1,6 @@
import re
import warnings
from typing import Optional, Dict, Any, ClassVar
from typing import Optional, Dict, Any, ClassVar, FrozenSet
import dataclasses
from .common import IntOrFloat
@ -41,7 +41,7 @@ class SSAEvent:
type: str = "Dialogue" #: Line type (Dialogue/Comment)
@property
def FIELDS(self):
def FIELDS(self) -> FrozenSet[str]:
"""All fields in SSAEvent."""
warnings.warn("Deprecated in 1.2.0 - it's a dataclass now", DeprecationWarning)
return frozenset(field.name for field in dataclasses.fields(self))
@ -57,7 +57,7 @@ class SSAEvent:
return self.end - self.start
@duration.setter
def duration(self, ms: int):
def duration(self, ms: int) -> None:
if ms >= 0:
self.end = self.start + ms
else:
@ -74,7 +74,7 @@ class SSAEvent:
return self.type == "Comment"
@is_comment.setter
def is_comment(self, value: bool):
def is_comment(self, value: bool) -> None:
if value:
self.type = "Comment"
else:
@ -83,9 +83,18 @@ class SSAEvent:
@property
def is_drawing(self) -> bool:
"""Returns True if line is SSA drawing tag (ie. not text)"""
from .substation import parse_tags
from .formats.substation import parse_tags
return any(sty.drawing for _, sty in parse_tags(self.text))
@property
def is_text(self) -> bool:
"""
Returns False for SSA drawings and comment lines, True otherwise
In general, for non-SSA formats these events should be ignored.
"""
return not (self.is_comment or self.is_drawing)
@property
def plaintext(self) -> str:
"""
@ -102,11 +111,11 @@ class SSAEvent:
return text
@plaintext.setter
def plaintext(self, text: str):
def plaintext(self, text: str) -> None:
self.text = text.replace("\n", r"\N")
def shift(self, h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0,
frames: Optional[int]=None, fps: Optional[float]=None):
def shift(self, h: IntOrFloat = 0, m: IntOrFloat = 0, s: IntOrFloat = 0, ms: IntOrFloat = 0,
frames: Optional[int] = None, fps: Optional[float] = None) -> None:
"""
Shift start and end times.
@ -132,36 +141,36 @@ class SSAEvent:
else:
raise TypeError("Cannot compare to non-SSAEvent object")
def __eq__(self, other) -> bool:
def __eq__(self, other: object) -> bool:
# XXX document this
if not isinstance(other, SSAEvent):
return NotImplemented
return self.start == other.start and self.end == other.end
def __ne__(self, other) -> bool:
def __ne__(self, other: object) -> bool:
if not isinstance(other, SSAEvent):
return NotImplemented
return self.start != other.start or self.end != other.end
def __lt__(self, other) -> bool:
def __lt__(self, other: object) -> bool:
if not isinstance(other, SSAEvent):
return NotImplemented
return (self.start, self.end) < (other.start, other.end)
def __le__(self, other) -> bool:
def __le__(self, other: object) -> bool:
if not isinstance(other, SSAEvent):
return NotImplemented
return (self.start, self.end) <= (other.start, other.end)
def __gt__(self, other) -> bool:
def __gt__(self, other: object) -> bool:
if not isinstance(other, SSAEvent):
return NotImplemented
return (self.start, self.end) > (other.start, other.end)
def __ge__(self, other) -> bool:
def __ge__(self, other: object) -> bool:
if not isinstance(other, SSAEvent):
return NotImplemented
return (self.start, self.end) >= (other.start, other.end)
def __repr__(self):
def __repr__(self) -> str:
return f"<SSAEvent type={self.type} start={ms_to_str(self.start)} end={ms_to_str(self.end)} text={self.text!r}>"

@ -1,20 +1,16 @@
from collections.abc import MutableSequence
import io
from io import open
from itertools import chain
import os.path
import logging
from typing import Optional, List, Dict, Iterable, Any, overload, Iterator
from typing import Optional, List, Dict, Iterable, Any, overload, Iterator, TextIO, Tuple, MutableSequence
from .common import IntOrFloat
from .formats import autodetect_format, get_format_class, get_format_identifier
from .substation import is_valid_field_content
from .ssaevent import SSAEvent
from .ssastyle import SSAStyle
from .time import make_time, ms_to_str
class SSAFile(MutableSequence):
class SSAFile(MutableSequence[SSAEvent]):
"""
Subtitle file in SubStation Alpha format.
@ -32,7 +28,7 @@ class SSAFile(MutableSequence):
"""
DEFAULT_INFO = {
DEFAULT_INFO: Dict[str, str] = {
"WrapStyle": "0",
"ScaledBorderAndShadow": "yes",
"Collisions": "Normal"
@ -53,7 +49,8 @@ class SSAFile(MutableSequence):
# ------------------------------------------------------------------------
@classmethod
def load(cls, path: str, encoding: str="utf-8", format_: Optional[str]=None, fps: Optional[float]=None, **kwargs) -> "SSAFile":
def load(cls, path: str, encoding: str = "utf-8", format_: Optional[str] = None, fps: Optional[float] = None,
errors: Optional[str] = None, **kwargs: Any) -> "SSAFile":
"""
Load subtitle file from given path.
@ -62,12 +59,23 @@ class SSAFile(MutableSequence):
See also:
Specific formats may implement additional loading options,
please refer to documentation of the implementation classes
(eg. :meth:`pysubs2.subrip.SubripFormat.from_file()`)
(eg. :meth:`pysubs2.formats.subrip.SubripFormat.from_file()`)
Arguments:
path (str): Path to subtitle file.
encoding (str): Character encoding of input file.
Defaults to UTF-8, you may need to change this.
errors (Optional[str]): Error handling for character encoding
of input file. Defaults to ``None``; use the value ``"surrogateescape"``
for pass-through of bytes not supported by selected encoding via
`Unicode surrogate pairs <https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Surrogates>`_.
See documentation of builtin ``open()`` function for more.
.. versionchanged:: 1.7.0
The ``errors`` parameter was introduced to facilitate
pass-through of subtitle files with unknown text encoding.
Previous versions of the library behaved as if ``errors=None``.
format_ (str): Optional, forces use of specific parser
(eg. `"srt"`, `"ass"`). Otherwise, format is detected
automatically from file contents. This argument should
@ -96,23 +104,32 @@ class SSAFile(MutableSequence):
Example:
>>> subs1 = pysubs2.load("subrip-subtitles.srt")
>>> subs2 = pysubs2.load("microdvd-subtitles.sub", fps=23.976)
>>> subs3 = pysubs2.load("subrip-subtitles-with-fancy-tags.srt", keep_unknown_html_tags=True)
>>> subs2 = pysubs2.load("microdvd-subtitles.sub",fps=23.976)
>>> subs3 = pysubs2.load("subrip-subtitles-with-fancy-tags.srt",keep_unknown_html_tags=True)
"""
with open(path, encoding=encoding) as fp:
with open(path, encoding=encoding, errors=errors) as fp:
return cls.from_file(fp, format_, fps=fps, **kwargs)
@classmethod
def from_string(cls, string: str, format_: Optional[str]=None, fps: Optional[float]=None, **kwargs) -> "SSAFile":
def from_string(cls, string: str, format_: Optional[str] = None, fps: Optional[float] = None,
**kwargs: Any) -> "SSAFile":
"""
Load subtitle file from string.
See :meth:`SSAFile.load()` for full description.
Arguments:
string (str): Subtitle file in a string. Note that the string
must be Unicode (in Python 2).
string (str): Subtitle file in a string. Note that the string must be Unicode (``str``, not ``bytes``).
format_ (str): Optional, forces use of specific parser
(eg. `"srt"`, `"ass"`). Otherwise, format is detected
automatically from file contents. This argument should
be rarely needed.
fps (float): Framerate for frame-based formats (MicroDVD),
for other formats this argument is ignored. Framerate might
be detected from the file, in which case you don't need
to specify it here (when given, this argument overrides
autodetection).
Returns:
SSAFile
@ -130,7 +147,8 @@ class SSAFile(MutableSequence):
return cls.from_file(fp, format_, fps=fps, **kwargs)
@classmethod
def from_file(cls, fp: io.TextIOBase, format_: Optional[str]=None, fps: Optional[float]=None, **kwargs) -> "SSAFile":
def from_file(cls, fp: TextIO, format_: Optional[str] = None, fps: Optional[float] = None,
**kwargs: Any) -> "SSAFile":
"""
Read subtitle file from file object.
@ -141,8 +159,17 @@ class SSAFile(MutableSequence):
or :meth:`SSAFile.from_string()` is preferable.
Arguments:
fp (file object): A file object, ie. :class:`io.TextIOBase` instance.
fp (file object): A file object, ie. :class:`TextIO` instance.
Note that the file must be opened in text mode (as opposed to binary).
format_ (str): Optional, forces use of specific parser
(eg. `"srt"`, `"ass"`). Otherwise, format is detected
automatically from file contents. This argument should
be rarely needed.
fps (float): Framerate for frame-based formats (MicroDVD),
for other formats this argument is ignored. Framerate might
be detected from the file, in which case you don't need
to specify it here (when given, this argument overrides
autodetection).
Returns:
SSAFile
@ -164,7 +191,8 @@ class SSAFile(MutableSequence):
impl.from_file(subs, fp, format_, fps=fps, **kwargs)
return subs
def save(self, path: str, encoding: str="utf-8", format_: Optional[str]=None, fps: Optional[float]=None, **kwargs):
def save(self, path: str, encoding: str = "utf-8", format_: Optional[str] = None, fps: Optional[float] = None,
errors: Optional[str] = None, **kwargs: Any) -> None:
"""
Save subtitle file to given path.
@ -173,7 +201,7 @@ class SSAFile(MutableSequence):
See also:
Specific formats may implement additional saving options,
please refer to documentation of the implementation classes
(eg. :meth:`pysubs2.subrip.SubripFormat.to_file()`)
(eg. :meth:`pysubs2.formats.subrip.SubripFormat.to_file()`)
Arguments:
path (str): Path to subtitle file.
@ -191,6 +219,17 @@ class SSAFile(MutableSequence):
different framerate, use this argument. See also
:meth:`SSAFile.transform_framerate()` for fixing bad
frame-based to time-based conversions.
errors (Optional[str]): Error handling for character encoding
of input file. Defaults to ``None``; use the value ``"surrogateescape"``
for pass-through of bytes not supported by selected encoding via
`Unicode surrogate pairs <https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Surrogates>`_.
See documentation of builtin ``open()`` function for more.
.. versionchanged:: 1.7.0
The ``errors`` parameter was introduced to facilitate
pass-through of subtitle files with unknown text encoding.
Previous versions of the library behaved as if ``errors=None``.
kwargs: Extra options for the writer.
Raises:
@ -205,10 +244,10 @@ class SSAFile(MutableSequence):
ext = os.path.splitext(path)[1].lower()
format_ = get_format_identifier(ext)
with open(path, "w", encoding=encoding) as fp:
with open(path, "w", encoding=encoding, errors=errors) as fp:
self.to_file(fp, format_, fps=fps, **kwargs)
def to_string(self, format_: str, fps: Optional[float]=None, **kwargs) -> str:
def to_string(self, format_: str, fps: Optional[float] = None, **kwargs: Any) -> str:
"""
Get subtitle file as a string.
@ -222,7 +261,7 @@ class SSAFile(MutableSequence):
self.to_file(fp, format_, fps=fps, **kwargs)
return fp.getvalue()
def to_file(self, fp: io.TextIOBase, format_: str, fps: Optional[float]=None, **kwargs):
def to_file(self, fp: TextIO, format_: str, fps: Optional[float] = None, **kwargs: Any) -> None:
"""
Write subtitle file to file object.
@ -233,7 +272,7 @@ class SSAFile(MutableSequence):
or :meth:`SSAFile.to_string()` is preferable.
Arguments:
fp (file object): A file object, ie. :class:`io.TextIOBase` instance.
fp (file object): A file object, ie. :class:`TextIO` instance.
Note that the file must be opened in text mode (as opposed to binary).
"""
@ -244,8 +283,8 @@ class SSAFile(MutableSequence):
# Retiming subtitles
# ------------------------------------------------------------------------
def shift(self, h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0,
frames: Optional[int]=None, fps: Optional[float]=None):
def shift(self, h: IntOrFloat = 0, m: IntOrFloat = 0, s: IntOrFloat = 0, ms: IntOrFloat = 0,
frames: Optional[int] = None, fps: Optional[float] = None) -> None:
"""
Shift all subtitles by constant time amount.
@ -253,7 +292,10 @@ class SSAFile(MutableSequence):
case, specify both frames and fps. h, m, s, ms will be ignored.
Arguments:
h, m, s, ms: Integer or float values, may be positive or negative.
h: Integer or float values, may be positive or negative (hours).
m: Integer or float values, may be positive or negative (minutes).
s: Integer or float values, may be positive or negative (seconds).
ms: Integer or float values, may be positive or negative (milliseconds).
frames (int): When specified, must be an integer number of frames.
May be positive or negative. fps must be also specified.
fps (float): When specified, must be a positive number.
@ -267,7 +309,7 @@ class SSAFile(MutableSequence):
line.start += delta
line.end += delta
def transform_framerate(self, in_fps: float, out_fps: float):
def transform_framerate(self, in_fps: float, out_fps: float) -> None:
"""
Rescale all timestamps by ratio of in_fps/out_fps.
@ -294,7 +336,7 @@ class SSAFile(MutableSequence):
# Working with styles
# ------------------------------------------------------------------------
def rename_style(self, old_name: str, new_name: str):
def rename_style(self, old_name: str, new_name: str) -> None:
"""
Rename a style, including references to it.
@ -308,6 +350,8 @@ class SSAFile(MutableSequence):
or new_name is taken.
"""
from .formats.substation import is_valid_field_content
if old_name not in self.styles:
raise KeyError(f"Style {old_name!r} not found")
if new_name in self.styles:
@ -323,7 +367,7 @@ class SSAFile(MutableSequence):
if line.style == old_name:
line.style = new_name
def import_styles(self, subs: "SSAFile", overwrite: bool=True):
def import_styles(self, subs: "SSAFile", overwrite: bool = True) -> None:
"""
Merge in styles from other SSAFile.
@ -344,7 +388,7 @@ class SSAFile(MutableSequence):
# Helper methods
# ------------------------------------------------------------------------
def remove_miscellaneous_events(self):
def remove_miscellaneous_events(self) -> None:
"""
Remove subtitles which appear to be non-essential (the --clean in CLI)
@ -357,7 +401,7 @@ class SSAFile(MutableSequence):
new_events = []
duplicate_text_ids = set()
times_to_texts = {}
times_to_texts: Dict[Tuple[int, int], List[str]] = {}
for i, e in enumerate(self):
tmp = times_to_texts.setdefault((e.start, e.end), [])
if tmp.count(e.plaintext) > 0:
@ -376,7 +420,13 @@ class SSAFile(MutableSequence):
self.events = new_events
def equals(self, other: "SSAFile"):
def get_text_events(self) -> List[SSAEvent]:
"""
Return list of events excluding SSA comment lines and lines with SSA drawing tags
"""
return [e for e in self if e.is_text]
def equals(self, other: "SSAFile") -> bool:
"""
Equality of two SSAFiles.
@ -435,7 +485,8 @@ class SSAFile(MutableSequence):
return False
elif self_style != other_style:
for k in self_style.FIELDS:
if getattr(self_style, k) != getattr(other_style, k): logging.debug("difference in field %r", k)
if getattr(self_style, k) != getattr(other_style, k):
logging.debug("difference in field %r", k)
logging.debug("style %r differs (self=%r, other=%r)", key, self_style.as_dict(), other_style.as_dict())
return False
@ -446,7 +497,8 @@ class SSAFile(MutableSequence):
for i, (self_event, other_event) in enumerate(zip(self.events, other.events)):
if not self_event.equals(other_event):
for k in self_event.FIELDS:
if getattr(self_event, k) != getattr(other_event, k): logging.debug("difference in field %r", k)
if getattr(self_event, k) != getattr(other_event, k):
logging.debug("difference in field %r", k)
logging.debug("event %d differs (self=%r, other=%r)", i, self_event.as_dict(), other_event.as_dict())
return False
@ -454,7 +506,7 @@ class SSAFile(MutableSequence):
else:
raise TypeError("Cannot compare to non-SSAFile object")
def __repr__(self):
def __repr__(self) -> str:
if self.events:
max_time = max(ev.end for ev in self)
s = f"<SSAFile with {len(self)} events and {len(self.styles)} styles, last timestamp {ms_to_str(max_time)}>"
@ -467,7 +519,7 @@ class SSAFile(MutableSequence):
# MutableSequence implementation + sort()
# ------------------------------------------------------------------------
def sort(self):
def sort(self) -> None:
"""Sort subtitles time-wise, in-place."""
self.events.sort()
@ -476,24 +528,24 @@ class SSAFile(MutableSequence):
@overload
def __getitem__(self, item: int) -> SSAEvent:
return self.events[item]
pass
@overload
def __getitem__(self, s: slice) -> List[SSAEvent]:
return self.events[s]
pass
def __getitem__(self, item):
def __getitem__(self, item: Any) -> Any:
return self.events[item]
@overload
def __setitem__(self, key: int, value: SSAEvent):
def __setitem__(self, key: int, value: SSAEvent) -> None:
pass
@overload
def __setitem__(self, keys: slice, values: Iterable[SSAEvent]):
def __setitem__(self, keys: slice, values: Iterable[SSAEvent]) -> None:
pass
def __setitem__(self, key, value):
def __setitem__(self, key: Any, value: Any) -> None:
if isinstance(key, int):
if isinstance(value, SSAEvent):
self.events[key] = value
@ -509,21 +561,24 @@ class SSAFile(MutableSequence):
raise TypeError("Bad key type")
@overload
def __delitem__(self, key: int):
def __delitem__(self, key: int) -> None:
pass
@overload
def __delitem__(self, s: slice):
def __delitem__(self, s: slice) -> None:
pass
def __delitem__(self, key):
def __delitem__(self, key: Any) -> None:
del self.events[key]
def __len__(self):
def __len__(self) -> int:
return len(self.events)
def insert(self, index: int, value: SSAEvent):
def insert(self, index: int, value: SSAEvent) -> None:
if isinstance(value, SSAEvent):
self.events.insert(index, value)
else:
raise TypeError("SSAFile.events must contain only SSAEvent objects")
from .formats import autodetect_format, get_format_class, get_format_identifier # noqa: E402

@ -1,5 +1,5 @@
import warnings
from typing import Dict, Any, ClassVar
from typing import Dict, Any, ClassVar, FrozenSet
import dataclasses
from .common import Color, Alignment
@ -23,7 +23,7 @@ class SSAStyle:
DEFAULT_STYLE: ClassVar["SSAStyle"] = None # type: ignore[assignment]
@property
def FIELDS(self):
def FIELDS(self) -> FrozenSet[str]:
"""All fields in SSAStyle."""
warnings.warn("Deprecated in 1.2.0 - it's a dataclass now", DeprecationWarning)
return frozenset(field.name for field in dataclasses.fields(self))
@ -66,7 +66,7 @@ class SSAStyle:
# dataclasses.asdict() would recursively dictify Color objects, which we don't want
return {field.name: getattr(self, field.name) for field in dataclasses.fields(self)}
def __repr__(self):
def __repr__(self) -> str:
return f"<SSAStyle {self.fontsize!r}px" \
f"{' bold' if self.bold else ''}" \
f"{' italic' if self.italic else ''}" \

@ -1,7 +1,7 @@
from collections import namedtuple
import re
from typing import Optional, List, Tuple, Sequence
from pysubs2.common import IntOrFloat
from typing import Optional, Sequence, NamedTuple
from .common import IntOrFloat
#: Pattern that matches both SubStation and SubRip timestamps.
TIMESTAMP = re.compile(r"(\d{1,2}):(\d{1,2}):(\d{1,2})[.,](\d{1,3})")
@ -9,11 +9,17 @@ TIMESTAMP = re.compile(r"(\d{1,2}):(\d{1,2}):(\d{1,2})[.,](\d{1,3})")
#: Pattern that matches H:MM:SS or HH:MM:SS timestamps.
TIMESTAMP_SHORT = re.compile(r"(\d{1,2}):(\d{2}):(\d{2})")
Times = namedtuple("Times", ["h", "m", "s", "ms"])
class Times(NamedTuple):
"""Named tuple (h, m, s, ms) of ints."""
h: int
m: int
s: int
ms: int
def make_time(h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0,
frames: Optional[int]=None, fps: Optional[float]=None):
def make_time(h: IntOrFloat = 0, m: IntOrFloat = 0, s: IntOrFloat = 0, ms: IntOrFloat = 0,
frames: Optional[int] = None, fps: Optional[float] = None) -> int:
"""
Convert time to milliseconds.
@ -38,7 +44,7 @@ def make_time(h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=
raise ValueError("Both fps and frames must be specified")
def timestamp_to_ms(groups: Sequence[str]):
def timestamp_to_ms(groups: Sequence[str]) -> int:
"""
Convert groups from :data:`pysubs2.time.TIMESTAMP` or :data:`pysubs2.time.TIMESTAMP_SHORT`
match to milliseconds.
@ -50,6 +56,11 @@ def timestamp_to_ms(groups: Sequence[str]):
1000
"""
h: int
m: int
s: int
ms: int
frac: int
if len(groups) == 4:
h, m, s, frac = map(int, groups)
ms = frac * 10**(3 - len(groups[-1]))
@ -65,7 +76,7 @@ def timestamp_to_ms(groups: Sequence[str]):
return ms
def times_to_ms(h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0) -> int:
def times_to_ms(h: IntOrFloat = 0, m: IntOrFloat = 0, s: IntOrFloat = 0, ms: IntOrFloat = 0) -> int:
"""
Convert hours, minutes, seconds to milliseconds.
@ -124,7 +135,7 @@ def ms_to_frames(ms: IntOrFloat, fps: float) -> int:
return int(round((ms / 1000) * fps))
def ms_to_times(ms: IntOrFloat) -> Tuple[int, int, int, int]:
def ms_to_times(ms: IntOrFloat) -> Times:
"""
Convert milliseconds to normalized tuple (h, m, s, ms).
@ -144,7 +155,7 @@ def ms_to_times(ms: IntOrFloat) -> Tuple[int, int, int, int]:
return Times(h, m, s, ms)
def ms_to_str(ms: IntOrFloat, fractions: bool=False) -> str:
def ms_to_str(ms: IntOrFloat, fractions: bool = False) -> str:
"""
Prettyprint milliseconds to [-]H:MM:SS[.mmm]

@ -28,7 +28,7 @@ knowit==0.5.2
Mako==1.3.2
pycountry==23.12.11
pyrsistent==0.20.0
pysubs2==1.6.1
pysubs2==1.7.2
python-engineio==4.9.0
python-socketio==5.11.1
pytz==2024.1

Loading…
Cancel
Save