diff --git a/custom_libs/subliminal_patch/subtitle.py b/custom_libs/subliminal_patch/subtitle.py
index 529e99019..d0cd585ed 100644
--- a/custom_libs/subliminal_patch/subtitle.py
+++ b/custom_libs/subliminal_patch/subtitle.py
@@ -13,7 +13,7 @@ import pysrt
import pysubs2
from bs4 import UnicodeDammit
from pysubs2 import SSAStyle
-from pysubs2.subrip import parse_tags, MAX_REPRESENTABLE_TIME
+from pysubs2.formats.subrip import parse_tags, MAX_REPRESENTABLE_TIME
from pysubs2.time import ms_to_times
from subzero.modification import SubtitleModifications
from subzero.language import Language
diff --git a/libs/pysubs2-1.6.1.dist-info/METADATA b/libs/pysubs2-1.6.1.dist-info/METADATA
deleted file mode 100644
index ee4854808..000000000
--- a/libs/pysubs2-1.6.1.dist-info/METADATA
+++ /dev/null
@@ -1,84 +0,0 @@
-Metadata-Version: 2.1
-Name: pysubs2
-Version: 1.6.1
-Summary: A library for editing subtitle files
-Home-page: https://github.com/tkarabela/pysubs2
-Author: Tomas Karabela
-Author-email: tkarabela@seznam.cz
-License: MIT
-Project-URL: Documentation, https://pysubs2.readthedocs.io
-Project-URL: Bug Tracker, https://github.com/tkarabela/pysubs2/issues
-Keywords: SubStation,SubRip,MicroDVD,MPL2,WebVTT,ass,srt,sub,vtt,subtitles,captions
-Classifier: Programming Language :: Python
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.7
-Classifier: Programming Language :: Python :: 3.8
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Development Status :: 5 - Production/Stable
-Classifier: Topic :: Software Development :: Libraries :: Python Modules
-Classifier: Topic :: Text Processing :: Markup
-Classifier: Topic :: Multimedia :: Video
-Classifier: Intended Audience :: Developers
-Classifier: Intended Audience :: End Users/Desktop
-Classifier: Operating System :: OS Independent
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Typing :: Typed
-Requires-Python: >=3.7
-Description-Content-Type: text/x-rst
-License-File: LICENSE.txt
-
-pysubs2
-=======
-
-
-.. image:: https://img.shields.io/github/actions/workflow/status/tkarabela/pysubs2/main.yml?branch=master
- :alt: pysubs2 build master branch
- :target: https://github.com/tkarabela/pysubs2/actions
-.. image:: https://img.shields.io/codecov/c/github/tkarabela/pysubs2
- :alt: pysubs2 test code coverage
- :target: https://app.codecov.io/github/tkarabela/pysubs2
-.. image:: http://www.mypy-lang.org/static/mypy_badge.svg
- :alt: MyPy checked
- :target: https://github.com/tkarabela/pysubs2/actions
-.. image:: https://img.shields.io/pypi/v/pysubs2.svg?style=flat-square
- :alt: PyPI - Version
- :target: https://pypi.org/project/pysubs2/
-.. image:: https://img.shields.io/pypi/status/pysubs2.svg?style=flat-square
- :alt: PyPI - Status
- :target: https://pypi.org/project/pysubs2/
-.. image:: https://img.shields.io/pypi/pyversions/pysubs2.svg?style=flat-square
- :alt: PyPI - Python Version
- :target: https://pypi.org/project/pysubs2/
-.. image:: https://img.shields.io/pypi/l/pysubs2.svg?style=flat-square
- :alt: PyPI - License
- :target: LICENSE.txt
-
-
-pysubs2 is a Python library for editing subtitle files.
-It’s based on *SubStation Alpha*, the native format of
-`Aegisub `_; it also supports *SubRip (SRT)*,
-*MicroDVD*, *MPL2*, *TMP* and *WebVTT* formats and *OpenAI Whisper* captions.
-
-There is a small CLI tool for batch conversion and retiming.
-
-.. code:: bash
-
- $ pip install pysubs2
- $ pysubs2 --shift 0.3s *.srt
- $ pysubs2 --to srt *.ass
-
-.. code:: python
-
- import pysubs2
- subs = pysubs2.load("my_subtitles.ass", encoding="utf-8")
- subs.shift(s=2.5)
- for line in subs:
- line.text = "{\\be1}" + line.text
- subs.save("my_subtitles_edited.ass")
-
-To learn more, please `see the documentation `_.
-If you'd like to contribute, see `CONTRIBUTING.md `_.
-
-pysubs2 is licensed under the MIT license (see `LICENSE.txt `_).
diff --git a/libs/pysubs2-1.6.1.dist-info/RECORD b/libs/pysubs2-1.6.1.dist-info/RECORD
deleted file mode 100644
index bd71ab000..000000000
--- a/libs/pysubs2-1.6.1.dist-info/RECORD
+++ /dev/null
@@ -1,28 +0,0 @@
-../../bin/pysubs2,sha256=1ipjtihceH0M9ljChd41YMd3co9-Mn5sngcbA3Kgupk,241
-pysubs2-1.6.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
-pysubs2-1.6.1.dist-info/LICENSE.txt,sha256=Syr1zDKD8s_Wl4E6er5xVmuOZ1bvaYyLOxcEI7lW4KU,1063
-pysubs2-1.6.1.dist-info/METADATA,sha256=f8UZ93EkW3RJVHEXKuIX29IT8Cuz2siGq1IlcLS17tc,3281
-pysubs2-1.6.1.dist-info/RECORD,,
-pysubs2-1.6.1.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-pysubs2-1.6.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-pysubs2-1.6.1.dist-info/entry_points.txt,sha256=mznmeaia9Ec19KsaIM8D4tt6V0HnOUlPhSg5nXiAfAE,49
-pysubs2-1.6.1.dist-info/top_level.txt,sha256=i_V6JX-AWjMX15-Dzyc0RlrInU7Zun9PYfdB9brM95E,8
-pysubs2/__init__.py,sha256=lkW5MzctUNvTu5KPVr84eO3Nh2x5yHp0SWAJ4NnAp-Y,505
-pysubs2/__main__.py,sha256=3iAK73rLxjLJqFMYD-jn3PDJvHZ8rIOW_NgsmBkRGhA,134
-pysubs2/cli.py,sha256=6zWUOifOCm5ziwn0dd_MsxZfdFcLiaGpnCvbz65WqbA,10607
-pysubs2/common.py,sha256=SWYT6_aoL3CBEO-j0vP2EZnTDcUvNom6hW_y8gChGFM,1479
-pysubs2/exceptions.py,sha256=pEbAUuDj3GOUbxryLAdZa7qawMWxNhIGyzC3j-VCykM,633
-pysubs2/formatbase.py,sha256=WLHV7ppth7giTxqjHN13exyx1tfuwJGpEq0iNCMGYUY,2945
-pysubs2/formats.py,sha256=nlGNc9dnC1vHQF-qAZiQEqfBN7Mn0B8OPGCr_ezzPvc,2438
-pysubs2/jsonformat.py,sha256=Z7BfaqGkK4vl0Gc9lPF_UUPUi3l4oJlxD-0TOSLDt9w,1862
-pysubs2/microdvd.py,sha256=boTIasQUUV8430k5kz8ba2GgCGqr1hGEEbdlE7Lw7dI,4398
-pysubs2/mpl2.py,sha256=T1JUIAG3zCJHKoLRlYVLI_qFbvdkgV0w6XG9wlYJ17Q,1802
-pysubs2/ssaevent.py,sha256=bz5oGH2yIJQl7Lzi4Dw_KFBfLMPelmomxXAeNJ6tXTc,5883
-pysubs2/ssafile.py,sha256=b9adFdbx1MdHEB_ZdRon7GwKT5pKsaJhnnTE7UHcaKM,20356
-pysubs2/ssastyle.py,sha256=PFFejFEwaUx_bO1lkoHVSSJcPhupGO3epCCBCAEgclk,3995
-pysubs2/subrip.py,sha256=lK8bxf98wLKqordXeY2j8DwknDBN8kyLZXVFbZwj3mw,7371
-pysubs2/substation.py,sha256=xUC19ASpj3yDS9ASzFjNn88KnIqkvWqE_vPOVxWCKsM,16170
-pysubs2/time.py,sha256=1GPQlajjEio9rJvGnNO8J5OnWChQlB3BgAzRS3gnEzM,4641
-pysubs2/tmp.py,sha256=hJ4hpja7AckpGmdbC5KuZ2jyIEXWu2b1_SILqzbR5iQ,3954
-pysubs2/webvtt.py,sha256=sNFtGAaWn7gVMG8SVHS0Ndcku6LyiAaiv9gPErl0kys,1531
-pysubs2/whisper.py,sha256=5dywTdlXa46ulYbVeWpM_dH9jiwRKlTm6G1GrzIjsx0,1604
diff --git a/libs/pysubs2-1.6.1.dist-info/INSTALLER b/libs/pysubs2-1.7.2.dist-info/INSTALLER
similarity index 100%
rename from libs/pysubs2-1.6.1.dist-info/INSTALLER
rename to libs/pysubs2-1.7.2.dist-info/INSTALLER
diff --git a/libs/pysubs2-1.6.1.dist-info/LICENSE.txt b/libs/pysubs2-1.7.2.dist-info/LICENSE.txt
similarity index 96%
rename from libs/pysubs2-1.6.1.dist-info/LICENSE.txt
rename to libs/pysubs2-1.7.2.dist-info/LICENSE.txt
index bad92093e..b29a6c1d5 100644
--- a/libs/pysubs2-1.6.1.dist-info/LICENSE.txt
+++ b/libs/pysubs2-1.7.2.dist-info/LICENSE.txt
@@ -1,4 +1,4 @@
-Copyright (c) 2014-2023 Tomas Karabela
+Copyright (c) 2014-2024 Tomas Karabela
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/libs/pysubs2-1.7.2.dist-info/METADATA b/libs/pysubs2-1.7.2.dist-info/METADATA
new file mode 100644
index 000000000..e1fef61e8
--- /dev/null
+++ b/libs/pysubs2-1.7.2.dist-info/METADATA
@@ -0,0 +1,70 @@
+Metadata-Version: 2.1
+Name: pysubs2
+Version: 1.7.2
+Summary: A library for editing subtitle files
+Home-page: https://github.com/tkarabela/pysubs2
+Author: Tomas Karabela
+Author-email: tkarabela@seznam.cz
+License: MIT
+Project-URL: Documentation, https://pysubs2.readthedocs.io
+Project-URL: Bug Tracker, https://github.com/tkarabela/pysubs2/issues
+Keywords: SubStation,SubRip,MicroDVD,MPL2,WebVTT,ass,srt,sub,vtt,subtitles,captions
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Topic :: Text Processing :: Markup
+Classifier: Topic :: Multimedia :: Video
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: End Users/Desktop
+Classifier: Operating System :: OS Independent
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Typing :: Typed
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE.txt
+
+pysubs2
+=======
+
+[![pysubs2 build master branch](https://img.shields.io/github/actions/workflow/status/tkarabela/pysubs2/main.yml?branch=master)](https://github.com/tkarabela/pysubs2/actions)
+[![pysubs2 test code coverage](https://img.shields.io/codecov/c/github/tkarabela/pysubs2)](https://app.codecov.io/github/tkarabela/pysubs2)
+[![Static Badge](https://img.shields.io/badge/MyPy%20%26%20Ruffle-checked-blue?style=flat)](https://github.com/tkarabela/pysubs2/actions)
+[![PyPI - Version](https://img.shields.io/pypi/v/pysubs2.svg?style=flat)](https://pypi.org/project/pysubs2/)
+[![PyPI - Status](https://img.shields.io/pypi/status/pysubs2.svg?style=flat)](https://pypi.org/project/pysubs2/)
+[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pysubs2.svg?style=flat)](https://pypi.org/project/pysubs2/)
+[![PyPI - License](https://img.shields.io/pypi/l/pysubs2.svg?style=flat)](LICENSE.txt)
+[![GitHub Repo stars](https://img.shields.io/github/stars/tkarabela/pysubs2?style=flat&label=GitHub%20stars)](https://github.com/tkarabela/pysubs2)
+
+
+pysubs2 is a Python library for editing subtitle files.
+It’s based on *SubStation Alpha*, the native format of
+[Aegisub](http://www.aegisub.org/); it also supports *SubRip (SRT)*,
+*MicroDVD*, *MPL2*, *TMP* and *WebVTT* formats and *OpenAI Whisper* captions.
+
+There is a small CLI tool for batch conversion and retiming.
+
+```bash
+pip install pysubs2
+pysubs2 --shift 0.3s *.srt
+pysubs2 --to srt *.ass
+```
+
+```python
+import pysubs2
+subs = pysubs2.load("my_subtitles.ass", encoding="utf-8")
+subs.shift(s=2.5)
+for line in subs:
+ line.text = "{\\be1}" + line.text
+subs.save("my_subtitles_edited.ass")
+```
+
+To learn more, please [see the documentation](http://pysubs2.readthedocs.io).
+If you'd like to contribute, see [CONTRIBUTING.md](CONTRIBUTING.md).
+
+pysubs2 is licensed under the MIT license (see [LICENSE.txt](LICENSE.txt)).
diff --git a/libs/pysubs2-1.7.2.dist-info/RECORD b/libs/pysubs2-1.7.2.dist-info/RECORD
new file mode 100644
index 000000000..d0730aa1f
--- /dev/null
+++ b/libs/pysubs2-1.7.2.dist-info/RECORD
@@ -0,0 +1,47 @@
+../../bin/pysubs2,sha256=qbyZPJ0JMXc2Ii34KZgXKNvu1v8En-GFSWQFwhstugg,249
+pysubs2-1.7.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+pysubs2-1.7.2.dist-info/LICENSE.txt,sha256=WYgskvBrUBBDxuKH9JSV1u22g-UBTwHjeuJLlPp6lWU,1063
+pysubs2-1.7.2.dist-info/METADATA,sha256=2Fm6UaI7kElE-smr83cNBIfWRXt0vfso1Kyx5sChjB0,3183
+pysubs2-1.7.2.dist-info/RECORD,,
+pysubs2-1.7.2.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+pysubs2-1.7.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+pysubs2-1.7.2.dist-info/entry_points.txt,sha256=mznmeaia9Ec19KsaIM8D4tt6V0HnOUlPhSg5nXiAfAE,49
+pysubs2-1.7.2.dist-info/top_level.txt,sha256=i_V6JX-AWjMX15-Dzyc0RlrInU7Zun9PYfdB9brM95E,8
+pysubs2/__init__.py,sha256=L71MMMS6zcAiGWdddhnc4COZmKLYX__igP0Tr5q-y3Q,783
+pysubs2/__main__.py,sha256=3iAK73rLxjLJqFMYD-jn3PDJvHZ8rIOW_NgsmBkRGhA,134
+pysubs2/__pycache__/__init__.cpython-311.pyc,,
+pysubs2/__pycache__/__main__.cpython-311.pyc,,
+pysubs2/__pycache__/cli.cpython-311.pyc,,
+pysubs2/__pycache__/common.cpython-311.pyc,,
+pysubs2/__pycache__/exceptions.cpython-311.pyc,,
+pysubs2/__pycache__/ssaevent.cpython-311.pyc,,
+pysubs2/__pycache__/ssafile.cpython-311.pyc,,
+pysubs2/__pycache__/ssastyle.cpython-311.pyc,,
+pysubs2/__pycache__/time.cpython-311.pyc,,
+pysubs2/cli.py,sha256=lFVZsLChtnnaRov2lNEHlecTJaoNNvTLMQmZe0-4oGE,11916
+pysubs2/common.py,sha256=33XyqTSPqUNMedlnVkx-7GWFwvX0Fpce1G_ZHHtwQuI,1503
+pysubs2/exceptions.py,sha256=5XMoOvloP4QPPkEPour6YalxomcK-RkJA9eb_ecIbH0,2318
+pysubs2/formats/__init__.py,sha256=KZy9IibVmK-WwCHAYYplJELmYEnQu9BBRz5HEvyZ5U4,2515
+pysubs2/formats/__pycache__/__init__.cpython-311.pyc,,
+pysubs2/formats/__pycache__/base.cpython-311.pyc,,
+pysubs2/formats/__pycache__/jsonformat.cpython-311.pyc,,
+pysubs2/formats/__pycache__/microdvd.cpython-311.pyc,,
+pysubs2/formats/__pycache__/mpl2.cpython-311.pyc,,
+pysubs2/formats/__pycache__/subrip.cpython-311.pyc,,
+pysubs2/formats/__pycache__/substation.cpython-311.pyc,,
+pysubs2/formats/__pycache__/tmp.cpython-311.pyc,,
+pysubs2/formats/__pycache__/webvtt.cpython-311.pyc,,
+pysubs2/formats/__pycache__/whisper.cpython-311.pyc,,
+pysubs2/formats/base.py,sha256=zzt4L3KIC6VrUNfzwjDHSF5HV7swrdo65dop90vfy70,2959
+pysubs2/formats/jsonformat.py,sha256=TWk_qOI0-GaV_Jzs2S5JOe7uWu7t1jllO9NYOnZgRcE,2101
+pysubs2/formats/microdvd.py,sha256=hfshagBIMMPz3WgqpttYGugtAj_6CheAiTkvTTrn3NE,5970
+pysubs2/formats/mpl2.py,sha256=vpyL5j7gxxkGvWvI8JXwFlHaEyCBHYTn_mtgk4zqNFw,1970
+pysubs2/formats/subrip.py,sha256=VsEhj6aX_NZdUbecxjKk-5Ri7GcUdGh8dfzZVTsAZSM,7484
+pysubs2/formats/substation.py,sha256=UFaZ3sZoCa3bizKvn_5Ugtqy6crO1FqldxS5OI7ozcQ,16795
+pysubs2/formats/tmp.py,sha256=FKktjpgv07rf6EZ1Og0rkNpcj59Z6kL6dL-V0QLSKQw,4100
+pysubs2/formats/webvtt.py,sha256=Ycur_E5ztjazW2akyUXTsbNUdLs4S-knq93hwompCac,1788
+pysubs2/formats/whisper.py,sha256=JHGY8nhiVso_iyVZOP72lLbC7GM-gX_FFc1BdzCGdo0,1607
+pysubs2/ssaevent.py,sha256=VR0Pso779Pw5bs-Q-VpIz8Zs-A6p2a2pptQtsFVjRnM,6291
+pysubs2/ssafile.py,sha256=smMZIFk5h8n3uUuzgrcSmzdNsVU7oB3kguKQtUAdQGU,23833
+pysubs2/ssastyle.py,sha256=hlNowFmuiiwOzWTtQxKUakTwcE7Q5KxlQtl64Q6ScMU,4031
+pysubs2/time.py,sha256=Onk1vs9xXKiuwYK0RorZVU29p2FCEARapnfP_yYczTQ,4739
diff --git a/libs/pysubs2-1.6.1.dist-info/REQUESTED b/libs/pysubs2-1.7.2.dist-info/REQUESTED
similarity index 100%
rename from libs/pysubs2-1.6.1.dist-info/REQUESTED
rename to libs/pysubs2-1.7.2.dist-info/REQUESTED
diff --git a/libs/pysubs2-1.6.1.dist-info/WHEEL b/libs/pysubs2-1.7.2.dist-info/WHEEL
similarity index 65%
rename from libs/pysubs2-1.6.1.dist-info/WHEEL
rename to libs/pysubs2-1.7.2.dist-info/WHEEL
index 98c0d20b7..bab98d675 100644
--- a/libs/pysubs2-1.6.1.dist-info/WHEEL
+++ b/libs/pysubs2-1.7.2.dist-info/WHEEL
@@ -1,5 +1,5 @@
Wheel-Version: 1.0
-Generator: bdist_wheel (0.42.0)
+Generator: bdist_wheel (0.43.0)
Root-Is-Purelib: true
Tag: py3-none-any
diff --git a/libs/pysubs2-1.6.1.dist-info/entry_points.txt b/libs/pysubs2-1.7.2.dist-info/entry_points.txt
similarity index 100%
rename from libs/pysubs2-1.6.1.dist-info/entry_points.txt
rename to libs/pysubs2-1.7.2.dist-info/entry_points.txt
diff --git a/libs/pysubs2-1.6.1.dist-info/top_level.txt b/libs/pysubs2-1.7.2.dist-info/top_level.txt
similarity index 100%
rename from libs/pysubs2-1.6.1.dist-info/top_level.txt
rename to libs/pysubs2-1.7.2.dist-info/top_level.txt
diff --git a/libs/pysubs2/__init__.py b/libs/pysubs2/__init__.py
index 936988346..8ce998674 100644
--- a/libs/pysubs2/__init__.py
+++ b/libs/pysubs2/__init__.py
@@ -1,10 +1,28 @@
from .ssafile import SSAFile
from .ssaevent import SSAEvent
from .ssastyle import SSAStyle
-from . import time, formats, cli, whisper
-from .exceptions import *
+from . import time, formats, cli, exceptions
+from .formats import whisper
+from .exceptions import * # noqa: F403
from .common import Color, Alignment, VERSION
+__all__ = [
+ "SSAFile",
+ "SSAEvent",
+ "SSAStyle",
+ "time",
+ "formats",
+ "cli",
+ "whisper",
+ "exceptions",
+ "Color",
+ "Alignment",
+ "VERSION",
+ "load",
+ "load_from_whisper",
+ "make_time",
+]
+
#: Alias for :meth:`SSAFile.load()`.
load = SSAFile.load
diff --git a/libs/pysubs2/cli.py b/libs/pysubs2/cli.py
index d99c63f5c..14dc6766d 100644
--- a/libs/pysubs2/cli.py
+++ b/libs/pysubs2/cli.py
@@ -3,10 +3,11 @@ import codecs
import os
import re
import os.path as op
-import io
-from io import open
+from io import TextIOWrapper
import sys
from textwrap import dedent
+from typing import List
+
from .formats import get_file_extension, FORMAT_IDENTIFIERS
from .time import make_time
from .ssafile import SSAFile
@@ -42,7 +43,7 @@ def change_ext(path: str, ext: str) -> str:
class Pysubs2CLI:
- def __init__(self):
+ def __init__(self) -> None:
parser = self.parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
prog="pysubs2",
description=dedent("""
@@ -80,6 +81,12 @@ class Pysubs2CLI:
"If you wish to convert between encodings, make sure --input-enc is set correctly! "
"Otherwise, your output files will probably be corrupted. It's a good idea to "
"back up your files or use the -o option.")
+ parser.add_argument("--enc-error-handling", choices=("strict", "surrogateescape"),
+ default="surrogateescape",
+ help="Character encoding error handling for input and output. Defaults to 'surrogateescape' "
+ "which passes through unrecognized characters to output unchanged. Use 'strict' if "
+ "you want the command to fail when encountering a character incompatible with selected "
+ "input/output encoding.")
parser.add_argument("--fps", metavar="FPS", type=positive_float,
help="This argument specifies framerate for MicroDVD files. By default, framerate "
"is detected from the file. Use this when framerate specification is missing "
@@ -116,13 +123,14 @@ class Pysubs2CLI:
extra_sub_options.add_argument("--sub-no-write-fps-declaration", action="store_true",
help="(output) omit writing FPS as first zero-length subtitle")
- def __call__(self, argv):
+ def __call__(self, argv: List[str]) -> int:
try:
- self.main(argv)
+ return self.main(argv)
except KeyboardInterrupt:
- exit("\nAborted by user.")
+ print("\nAborted by user.", file=sys.stderr)
+ return 1
- def main(self, argv):
+ def main(self, argv: List[str]) -> int:
args = self.parser.parse_args(argv)
errors = 0
@@ -157,7 +165,7 @@ class Pysubs2CLI:
print("Skipping", path, "(not a file)")
errors += 1
else:
- with open(path, encoding=args.input_enc) as infile:
+ with open(path, encoding=args.input_enc, errors=args.enc_error_handling) as infile:
subs = SSAFile.from_file(infile, args.input_format, args.fps, **extra_input_args)
self.process(subs, args)
@@ -165,31 +173,37 @@ class Pysubs2CLI:
if args.output_format is None:
outpath = path
output_format = subs.format
+ assert output_format is not None, "subs.format must not be None (it was read from file)"
else:
ext = get_file_extension(args.output_format)
outpath = change_ext(path, ext)
output_format = args.output_format
+ assert output_format is not None, "args.output_format must not be None (see if/else)"
if args.output_dir is not None:
_, filename = op.split(outpath)
outpath = op.join(args.output_dir, filename)
- with open(outpath, "w", encoding=args.output_enc) as outfile:
+ with open(outpath, "w", encoding=args.output_enc, errors=args.enc_error_handling) as outfile:
subs.to_file(outfile, output_format, args.fps, apply_styles=not args.clean,
**extra_output_args)
- else:
- infile = io.TextIOWrapper(sys.stdin.buffer, args.input_enc)
- outfile = io.TextIOWrapper(sys.stdout.buffer, args.output_enc)
+ elif not sys.stdin.isatty():
+ infile = TextIOWrapper(sys.stdin.buffer, encoding=args.input_enc, errors=args.enc_error_handling)
+ outfile = TextIOWrapper(sys.stdout.buffer, encoding=args.output_enc, errors=args.enc_error_handling)
subs = SSAFile.from_file(infile, args.input_format, args.fps)
self.process(subs, args)
output_format = args.output_format or subs.format
+ assert output_format is not None, "output_format must not be None (it's either given or inferred at read time)"
subs.to_file(outfile, output_format, args.fps, apply_styles=not args.clean)
+ else:
+ self.parser.print_help()
+ errors += 1
- return (0 if errors == 0 else 1)
+ return 0 if errors == 0 else 1
@staticmethod
- def process(subs, args):
+ def process(subs: SSAFile, args: argparse.Namespace) -> None:
if args.shift is not None:
subs.shift(ms=args.shift)
elif args.shift_back is not None:
@@ -202,7 +216,7 @@ class Pysubs2CLI:
subs.remove_miscellaneous_events()
-def __main__():
+def __main__() -> None:
cli = Pysubs2CLI()
rv = cli(sys.argv[1:])
sys.exit(rv)
diff --git a/libs/pysubs2/common.py b/libs/pysubs2/common.py
index 8595f485c..9ac8f13e8 100644
--- a/libs/pysubs2/common.py
+++ b/libs/pysubs2/common.py
@@ -1,5 +1,5 @@
from dataclasses import dataclass
-from typing import Union
+from typing import Tuple, Union
from enum import IntEnum
@@ -54,11 +54,11 @@ class Alignment(IntEnum):
return SSA_ALIGNMENT[self.value - 1]
-SSA_ALIGNMENT = (1, 2, 3, 9, 10, 11, 5, 6, 7)
+SSA_ALIGNMENT: Tuple[int, ...] = (1, 2, 3, 9, 10, 11, 5, 6, 7)
#: Version of the pysubs2 library.
-VERSION = "1.6.1"
+VERSION = "1.7.2"
IntOrFloat = Union[int, float]
diff --git a/libs/pysubs2/exceptions.py b/libs/pysubs2/exceptions.py
index 9568fa52f..acf8112fb 100644
--- a/libs/pysubs2/exceptions.py
+++ b/libs/pysubs2/exceptions.py
@@ -1,3 +1,15 @@
+from typing import List
+
+
+__all__ = [
+ "Pysubs2Error",
+ "UnknownFPSError",
+ "UnknownFileExtensionError",
+ "UnknownFormatIdentifierError",
+ "FormatAutodetectionError",
+]
+
+
class Pysubs2Error(Exception):
"""Base class for pysubs2 exceptions."""
@@ -7,16 +19,58 @@ class UnknownFPSError(Pysubs2Error):
class UnknownFileExtensionError(Pysubs2Error):
- """File extension does not pertain to any known subtitle format."""
+ """
+ File extension does not pertain to any known subtitle format.
+
+ This exception is raised by `SSAFile.save()` when the ``format_`` parameter
+ is not specified. It will try to guess the desired format from output filename
+ and raise this exception when it fails.
+
+ Attributes:
+ ext (str): File extension
+ """
+
+ def __init__(self, ext: str) -> None:
+ self.ext = ext
+ msg = f"File extension {ext!r} does not match any supported subtitle format"
+ super().__init__(msg)
class UnknownFormatIdentifierError(Pysubs2Error):
- """Unknown subtitle format identifier (ie. string like ``"srt"``)."""
+ """
+ Unknown subtitle format identifier (ie. string like ``"srt"``).
+
+ This exception is used when interpreting ``format_`` parameter fails,
+ eg. in `SSAFile.save()`.
+
+ Attributes:
+ format_ (str): Format identifier
+ """
+
+ def __init__(self, format_: str) -> None:
+ self.format_ = format_
+ msg = f"Format identifier {format_!r} does not match any supported subtitle format"
+ super().__init__(msg)
class FormatAutodetectionError(Pysubs2Error):
- """Subtitle format is ambiguous or unknown."""
+ """
+ Subtitle format is ambiguous or unknown based on analysis of file fragment
+ This exception is raised by `SSAFile.load()` and related methods
+ when the ``format_`` parameter is not specified. It will try to guess
+ the input format based on reading first few kilobytes of the input file
+ and raise this exception if the format cannot be uniquely determined.
-class ContentNotUsable(Pysubs2Error):
- """Current content not usable for specified format"""
+ Attributes:
+ content (str): Analyzed subtitle file content
+ formats (list[str]): Format identifiers for detected formats
+ """
+ def __init__(self, content: str, formats: List[str]) -> None:
+ self.content = content
+ self.formats = formats
+ if not formats:
+ msg = "No suitable formats"
+ else:
+ msg = f"Multiple suitable formats ({formats!r})"
+ super().__init__(msg)
diff --git a/libs/pysubs2/formats.py b/libs/pysubs2/formats/__init__.py
similarity index 88%
rename from libs/pysubs2/formats.py
rename to libs/pysubs2/formats/__init__.py
index 7a8388990..abb01560c 100644
--- a/libs/pysubs2/formats.py
+++ b/libs/pysubs2/formats/__init__.py
@@ -1,6 +1,6 @@
from typing import Dict, Type
-from .formatbase import FormatBase
+from .base import FormatBase
from .microdvd import MicroDVDFormat
from .subrip import SubripFormat
from .jsonformat import JSONFormat
@@ -8,7 +8,7 @@ from .substation import SubstationFormat
from .mpl2 import MPL2Format
from .tmp import TmpFormat
from .webvtt import WebVTTFormat
-from .exceptions import *
+from ..exceptions import UnknownFormatIdentifierError, UnknownFileExtensionError, FormatAutodetectionError
#: Dict mapping file extensions to format identifiers.
FILE_EXTENSION_TO_FORMAT_IDENTIFIER: Dict[str, str] = {
@@ -75,6 +75,6 @@ def autodetect_format(content: str) -> str:
if len(formats) == 1:
return formats.pop()
elif not formats:
- raise FormatAutodetectionError("No suitable formats")
+ raise FormatAutodetectionError(content=content, formats=[])
else:
- raise FormatAutodetectionError(f"Multiple suitable formats ({formats!r})")
+ raise FormatAutodetectionError(content=content, formats=list(formats))
diff --git a/libs/pysubs2/formatbase.py b/libs/pysubs2/formats/base.py
similarity index 91%
rename from libs/pysubs2/formatbase.py
rename to libs/pysubs2/formats/base.py
index 6ea3ea77e..be97845e3 100644
--- a/libs/pysubs2/formatbase.py
+++ b/libs/pysubs2/formats/base.py
@@ -1,6 +1,5 @@
-from typing import Optional
-import io
-import pysubs2
+from typing import Optional, Any, TextIO
+from ..ssafile import SSAFile
class FormatBase:
@@ -19,7 +18,7 @@ class FormatBase:
"""
@classmethod
- def from_file(cls, subs: "pysubs2.SSAFile", fp: io.TextIOBase, format_: str, **kwargs):
+ def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None:
"""
Load subtitle file into an empty SSAFile.
@@ -42,7 +41,7 @@ class FormatBase:
raise NotImplementedError("Parsing is not supported for this format")
@classmethod
- def to_file(cls, subs: "pysubs2.SSAFile", fp: io.TextIOBase, format_: str, **kwargs):
+ def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None:
"""
Write SSAFile into a file.
diff --git a/libs/pysubs2/jsonformat.py b/libs/pysubs2/formats/jsonformat.py
similarity index 73%
rename from libs/pysubs2/jsonformat.py
rename to libs/pysubs2/formats/jsonformat.py
index df838ee92..2f9acc5f1 100644
--- a/libs/pysubs2/jsonformat.py
+++ b/libs/pysubs2/formats/jsonformat.py
@@ -1,15 +1,18 @@
import dataclasses
import json
-from .common import Color
-from .ssaevent import SSAEvent
-from .ssastyle import SSAStyle
-from .formatbase import FormatBase
+from typing import Any, Optional, TextIO
+
+from ..common import Color
+from ..ssaevent import SSAEvent
+from ..ssastyle import SSAStyle
+from .base import FormatBase
+from ..ssafile import SSAFile
# We're using Color dataclass
# https://stackoverflow.com/questions/51286748/make-the-python-json-encoder-support-pythons-new-dataclasses
class EnhancedJSONEncoder(json.JSONEncoder):
- def default(self, o):
+ def default(self, o: Any) -> Any:
if dataclasses.is_dataclass(o):
return dataclasses.asdict(o)
return super().default(o)
@@ -22,13 +25,15 @@ class JSONFormat(FormatBase):
This is essentially SubStation Alpha as JSON.
"""
@classmethod
- def guess_format(cls, text):
+ def guess_format(cls, text: str) -> Optional[str]:
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
- if text.startswith("{\""):
+ if text.startswith("{\"") and "\"info\":" in text:
return "json"
+ else:
+ return None
@classmethod
- def from_file(cls, subs, fp, format_, **kwargs):
+ def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None:
"""See :meth:`pysubs2.formats.FormatBase.from_file()`"""
data = json.load(fp)
@@ -47,7 +52,7 @@ class JSONFormat(FormatBase):
subs.events = [SSAEvent(**fields) for fields in data["events"]]
@classmethod
- def to_file(cls, subs, fp, format_, **kwargs):
+ def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None:
"""See :meth:`pysubs2.formats.FormatBase.to_file()`"""
data = {
"info": dict(**subs.info),
diff --git a/libs/pysubs2/microdvd.py b/libs/pysubs2/formats/microdvd.py
similarity index 57%
rename from libs/pysubs2/microdvd.py
rename to libs/pysubs2/formats/microdvd.py
index f36377750..228dd35bd 100644
--- a/libs/pysubs2/microdvd.py
+++ b/libs/pysubs2/formats/microdvd.py
@@ -1,11 +1,15 @@
from functools import partial
import re
-from .exceptions import UnknownFPSError
-from .ssaevent import SSAEvent
-from .ssastyle import SSAStyle
-from .formatbase import FormatBase
+from typing import Optional, TextIO, Any, Match
+
+from ..exceptions import UnknownFPSError
+from ..ssaevent import SSAEvent
+from ..ssastyle import SSAStyle
+from .base import FormatBase
from .substation import parse_tags
-from .time import ms_to_frames, frames_to_ms
+from ..time import ms_to_frames, frames_to_ms
+from ..ssafile import SSAFile
+
#: Matches a MicroDVD line.
MICRODVD_LINE = re.compile(r" *\{ *(\d+) *\} *\{ *(\d+) *\}(.+)")
@@ -14,14 +18,33 @@ MICRODVD_LINE = re.compile(r" *\{ *(\d+) *\} *\{ *(\d+) *\}(.+)")
class MicroDVDFormat(FormatBase):
"""MicroDVD subtitle format implementation"""
@classmethod
- def guess_format(cls, text):
+ def guess_format(cls, text: str) -> Optional[str]:
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if any(map(MICRODVD_LINE.match, text.splitlines())):
return "microdvd"
+ else:
+ return None
@classmethod
- def from_file(cls, subs, fp, format_, fps=None, **kwargs):
- """See :meth:`pysubs2.formats.FormatBase.from_file()`"""
+ def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, fps: Optional[float] = None,
+ strict_fps_inference: bool = True, **kwargs: Any) -> None:
+ """
+ See :meth:`pysubs2.formats.FormatBase.from_file()`
+
+ Keyword args:
+ strict_fps_inference: If True (default), in the case when ``fps`` is not given, it will be read
+ from the first subtitle text only if the start and end frame of this subtitle is ``{1}{1}``
+ (matches VLC Player behaviour), otherwise :class:`pysubs2.exceptions.UnknownFPSError` is raised.
+
+ When ``strict_fps_inference``
+ is False, framerate will be read from the first subtitle text in this case regardless of
+ start and end frame (which may result in bogus result, if the first subtitle is not supposed
+ to contain framerate). Before introduction of this option, the library behaved as if this
+ option was False.
+
+ .. versionchanged:: 1.7.0
+ Added the ``strict_fps_inference`` option.
+ """
for line in fp:
match = MICRODVD_LINE.match(line)
if not match:
@@ -31,10 +54,14 @@ class MicroDVDFormat(FormatBase):
fstart, fend = map(int, (fstart, fend))
if fps is None:
- # We don't know the framerate, but it is customary to include
- # it as text of the first subtitle. In that case, we skip
- # this auxiliary subtitle and proceed with reading.
+ # We don't know the framerate, but it is customary to include it as text of the first subtitle,
+ # in the format {1}{1}fps, see pysubs2 issue #71 or VLC player source:
+ # https://code.videolan.org/videolan/vlc/-/blob/dccda0e133ff0a2e85de727cf19ddbc634f06b67/modules/demux/subtitle.c#L1014
+ # In that case, we skip this auxiliary subtitle and proceed with reading.
try:
+ if strict_fps_inference and not (fstart == 1 and fend == 1):
+ raise ValueError("Frame mismatch, expected {1}{1}")
+
fps = float(text)
subs.fps = fps
continue
@@ -45,10 +72,10 @@ class MicroDVDFormat(FormatBase):
start, end = map(partial(frames_to_ms, fps=fps), (fstart, fend))
- def prepare_text(text):
+ def prepare_text(text: str) -> str:
text = text.replace("|", r"\N")
- def style_replacer(match: re.Match) -> str:
+ def style_replacer(match: Match[str]) -> str:
tags = [c for c in "biu" if c in match.group(0)]
return "{%s}" % "".join(f"\\{c}1" for c in tags)
@@ -63,14 +90,15 @@ class MicroDVDFormat(FormatBase):
subs.append(ev)
@classmethod
- def to_file(cls, subs, fp, format_, fps=None, write_fps_declaration=True, apply_styles=True, **kwargs):
+ def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, fps: Optional[float] = None,
+ write_fps_declaration: bool = True, apply_styles: bool = True, **kwargs: Any) -> None:
"""
See :meth:`pysubs2.formats.FormatBase.to_file()`
The only supported styling is marking whole lines italic.
Keyword args:
- write_fps_declaration: If True, create a zero-duration first subtitle which will contain
+ write_fps_declaration: If True, create a zero-duration first subtitle ``{1}{1}`` which will contain
the fps.
apply_styles: If False, do not write any styling.
@@ -94,12 +122,9 @@ class MicroDVDFormat(FormatBase):
# insert an artificial first line telling the framerate
if write_fps_declaration:
- subs.insert(0, SSAEvent(start=0, end=0, text=str(fps)))
-
- for line in subs:
- if line.is_comment or line.is_drawing:
- continue
+ subs.insert(0, SSAEvent(start=1, end=1, text=str(fps)))
+ for line in subs.get_text_events():
text = "|".join(line.plaintext.splitlines())
if apply_styles and is_entirely_italic(line):
text = "{Y:i}" + text
@@ -107,8 +132,10 @@ class MicroDVDFormat(FormatBase):
start, end = map(to_frames, (line.start, line.end))
# XXX warn on underflow?
- if start < 0: start = 0
- if end < 0: end = 0
+ if start < 0:
+ start = 0
+ if end < 0:
+ end = 0
print("{%d}{%d}%s" % (start, end, text), file=fp)
diff --git a/libs/pysubs2/formats/mpl2.py b/libs/pysubs2/formats/mpl2.py
new file mode 100644
index 000000000..d67696394
--- /dev/null
+++ b/libs/pysubs2/formats/mpl2.py
@@ -0,0 +1,60 @@
+import re
+from typing import Optional, Any, TextIO
+from ..time import times_to_ms
+from .base import FormatBase
+from ..ssaevent import SSAEvent
+from ..ssafile import SSAFile
+
+
+# thanks to http://otsaloma.io/gaupol/doc/api/aeidon.files.mpl2_source.html
+MPL2_FORMAT = re.compile(r"^\[(-?\d+)\]\[(-?\d+)\](.*)", re.MULTILINE)
+
+
+class MPL2Format(FormatBase):
+ """MPL2 subtitle format implementation"""
+ @classmethod
+ def guess_format(cls, text: str) -> Optional[str]:
+ """See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
+ if MPL2_FORMAT.search(text):
+ return "mpl2"
+ else:
+ return None
+
+ @classmethod
+ def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None:
+ """See :meth:`pysubs2.formats.FormatBase.from_file()`"""
+ def prepare_text(lines: str) -> str:
+ out = []
+ for s in lines.split("|"):
+ s = s.strip()
+
+ if s.startswith("/"):
+ # line beginning with '/' is in italics
+ s = r"{\i1}%s{\i0}" % s[1:].strip()
+
+ out.append(s)
+ return "\\N".join(out)
+
+ text = fp.read()
+ for start, end, text in MPL2_FORMAT.findall(text):
+ e = SSAEvent(
+ start=times_to_ms(s=float(start) / 10),
+ end=times_to_ms(s=float(end) / 10),
+ text=prepare_text(text)
+ )
+ subs.append(e)
+
+ @classmethod
+ def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None:
+ """
+ See :meth:`pysubs2.formats.FormatBase.to_file()`
+
+ No styling is supported at the moment.
+
+ """
+ # TODO handle italics
+ for line in subs.get_text_events():
+ start = int(line.start // 100)
+ end = int(line.end // 100)
+ text = line.plaintext.replace("\n", "|")
+ print(f"[{start}][{end}] {text}", file=fp)
diff --git a/libs/pysubs2/subrip.py b/libs/pysubs2/formats/subrip.py
similarity index 76%
rename from libs/pysubs2/subrip.py
rename to libs/pysubs2/formats/subrip.py
index 1f599c064..1dceaf0e8 100644
--- a/libs/pysubs2/subrip.py
+++ b/libs/pysubs2/formats/subrip.py
@@ -1,14 +1,14 @@
import re
import warnings
-from typing import List
+from typing import List, Sequence, Optional, TextIO, Any, Tuple
-import pysubs2
-from .formatbase import FormatBase
-from .ssaevent import SSAEvent
-from .ssastyle import SSAStyle
+from .base import FormatBase
+from ..ssaevent import SSAEvent
+from ..ssastyle import SSAStyle
from .substation import parse_tags
-from .exceptions import ContentNotUsable
-from .time import ms_to_times, make_time, TIMESTAMP, timestamp_to_ms
+from ..time import ms_to_times, make_time, TIMESTAMP, timestamp_to_ms
+from ..ssafile import SSAFile
+
#: Largest timestamp allowed in SubRip, ie. 99:59:59,999.
MAX_REPRESENTABLE_TIME = make_time(h=100) - 1
@@ -30,11 +30,11 @@ class SubripFormat(FormatBase):
return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
@staticmethod
- def timestamp_to_ms(groups):
+ def timestamp_to_ms(groups: Sequence[str]) -> int:
return timestamp_to_ms(groups)
@classmethod
- def guess_format(cls, text):
+ def guess_format(cls, text: str) -> Optional[str]:
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if "[Script Info]" in text or "[V4+ Styles]" in text:
# disambiguation vs. SSA/ASS
@@ -48,8 +48,11 @@ class SubripFormat(FormatBase):
if len(cls.TIMESTAMP.findall(line)) == 2:
return "srt"
+ return None
+
@classmethod
- def from_file(cls, subs, fp, format_, keep_html_tags=False, keep_unknown_html_tags=False, **kwargs):
+ def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, keep_html_tags: bool = False,
+ keep_unknown_html_tags: bool = False, **kwargs: Any) -> None:
"""
See :meth:`pysubs2.formats.FormatBase.from_file()`
@@ -70,8 +73,8 @@ class SubripFormat(FormatBase):
If False, these other HTML tags will be stripped from output
(in the previous example, you would get only ``example {\\i1}text{\\i0}``).
"""
- timestamps = [] # (start, end)
- following_lines = [] # contains lists of lines following each timestamp
+ timestamps: List[Tuple[int, int]] = [] # (start, end)
+ following_lines: List[List[str]] = [] # contains lists of lines following each timestamp
for line in fp:
stamps = cls.TIMESTAMP.findall(line)
@@ -83,7 +86,7 @@ class SubripFormat(FormatBase):
if timestamps:
following_lines[-1].append(line)
- def prepare_text(lines):
+ def prepare_text(lines: List[str]) -> str:
# Handle the "happy" empty subtitle case, which is timestamp line followed by blank line(s)
# followed by number line and timestamp line of the next subtitle. Fixes issue #11.
if (len(lines) >= 2
@@ -108,11 +111,13 @@ class SubripFormat(FormatBase):
s = re.sub(r"\n", r"\\N", s) # convert newlines
return s
- subs.events = [SSAEvent(start=start, end=end, text=prepare_text(lines))
- for (start, end), lines in zip(timestamps, following_lines)]
+ for (start, end), lines in zip(timestamps, following_lines):
+ e = SSAEvent(start=start, end=end, text=prepare_text(lines))
+ subs.append(e)
@classmethod
- def to_file(cls, subs, fp, format_, apply_styles=True, keep_ssa_tags=False, **kwargs):
+ def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, apply_styles: bool = True,
+ keep_ssa_tags: bool = False, **kwargs: Any) -> None:
"""
See :meth:`pysubs2.formats.FormatBase.to_file()`
@@ -134,7 +139,7 @@ class SubripFormat(FormatBase):
is SRT which doesn't use line styles - this shouldn't be much
of an issue in practice.)
"""
- def prepare_text(text: str, style: SSAStyle):
+ def prepare_text(text: str, style: SSAStyle) -> str:
text = text.replace(r"\h", " ")
text = text.replace(r"\n", "\n")
text = text.replace(r"\N", "\n")
@@ -145,24 +150,20 @@ class SubripFormat(FormatBase):
else:
for fragment, sty in parse_tags(text, style, subs.styles):
if apply_styles:
- if sty.italic: fragment = f"{fragment}"
- if sty.underline: fragment = f"{fragment}"
- if sty.strikeout: fragment = f"{fragment}"
- if sty.drawing: raise ContentNotUsable
+ if sty.italic:
+ fragment = f"{fragment}"
+ if sty.underline:
+ fragment = f"{fragment}"
+ if sty.strikeout:
+ fragment = f"{fragment}"
body.append(fragment)
return re.sub("\n+", "\n", "".join(body).strip())
- visible_lines = cls._get_visible_lines(subs)
-
- lineno = 1
- for line in visible_lines:
+ for lineno, line in enumerate(cls._get_visible_lines(subs), 1):
start = cls.ms_to_timestamp(line.start)
end = cls.ms_to_timestamp(line.end)
- try:
- text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE))
- except ContentNotUsable:
- continue
+ text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE))
print(lineno, file=fp)
print(start, "-->", end, file=fp)
@@ -170,6 +171,5 @@ class SubripFormat(FormatBase):
lineno += 1
@classmethod
- def _get_visible_lines(cls, subs: "pysubs2.SSAFile") -> List["pysubs2.SSAEvent"]:
- visible_lines = [line for line in subs if not line.is_comment]
- return visible_lines
+ def _get_visible_lines(cls, subs: "SSAFile") -> List[SSAEvent]:
+ return subs.get_text_events()
diff --git a/libs/pysubs2/substation.py b/libs/pysubs2/formats/substation.py
similarity index 87%
rename from libs/pysubs2/substation.py
rename to libs/pysubs2/formats/substation.py
index afcd84e36..994277286 100644
--- a/libs/pysubs2/substation.py
+++ b/libs/pysubs2/formats/substation.py
@@ -1,22 +1,21 @@
import logging
import re
import warnings
-from numbers import Number
-from typing import Any, Union, Optional, Dict
+from typing import Any, Union, Optional, Dict, Tuple, List, TextIO
-import pysubs2
-from .formatbase import FormatBase
-from .ssaevent import SSAEvent
-from .ssastyle import SSAStyle
-from .common import Color, Alignment, SSA_ALIGNMENT
-from .time import make_time, ms_to_times, timestamp_to_ms, TIMESTAMP, TIMESTAMP_SHORT
+from .base import FormatBase
+from ..ssaevent import SSAEvent
+from ..ssastyle import SSAStyle
+from ..common import Color, Alignment, SSA_ALIGNMENT
+from ..time import make_time, ms_to_times, timestamp_to_ms, TIMESTAMP, TIMESTAMP_SHORT
+from ..ssafile import SSAFile
-def ass_to_ssa_alignment(i):
+def ass_to_ssa_alignment(i: int) -> int:
warnings.warn("ass_to_ssa_alignment function is deprecated, please use the Alignment enum", DeprecationWarning)
return SSA_ALIGNMENT[i-1]
-def ssa_to_ass_alignment(i):
+def ssa_to_ass_alignment(i: int) -> int:
warnings.warn("ssa_to_ass_alignment function is deprecated, please use the Alignment enum", DeprecationWarning)
return SSA_ALIGNMENT.index(i) + 1
@@ -86,7 +85,8 @@ def is_valid_field_content(s: str) -> bool:
return "\n" not in s and "," not in s
-def parse_tags(text: str, style: SSAStyle = SSAStyle.DEFAULT_STYLE, styles: Optional[Dict[str, SSAStyle]] = None):
+def parse_tags(text: str, style: SSAStyle = SSAStyle.DEFAULT_STYLE,
+ styles: Optional[Dict[str, SSAStyle]] = None) -> List[Tuple[str, SSAStyle]]:
"""
Split text into fragments with computed SSAStyles.
@@ -117,14 +117,18 @@ def parse_tags(text: str, style: SSAStyle = SSAStyle.DEFAULT_STYLE, styles: Opti
s = style.copy() # reset to original line style
elif tag.startswith(r"\r"):
name = tag[2:]
- if name in styles: # type: ignore[operator]
+ if name in styles:
# reset to named style
- s = styles[name].copy() # type: ignore[index]
+ s = styles[name].copy()
else:
- if "i" in tag: s.italic = "1" in tag
- elif "b" in tag: s.bold = "1" in tag
- elif "u" in tag: s.underline = "1" in tag
- elif "s" in tag: s.strikeout = "1" in tag
+ if "i" in tag:
+ s.italic = "1" in tag
+ elif "b" in tag:
+ s.bold = "1" in tag
+ elif "u" in tag:
+ s.underline = "1" in tag
+ elif "s" in tag:
+ s.strikeout = "1" in tag
elif "p" in tag:
try:
scale = int(tag[2:])
@@ -146,34 +150,36 @@ class SubstationFormat(FormatBase):
"""SubStation Alpha (ASS, SSA) subtitle format implementation"""
@staticmethod
- def ms_to_timestamp(ms: int) -> str:
+ def ms_to_timestamp(requested_ms: int) -> str:
"""Convert ms to 'H:MM:SS.cc'"""
- if ms < 0:
- ms = 0
- if ms > MAX_REPRESENTABLE_TIME:
+ if requested_ms < 0:
+ requested_ms = 0
+ if requested_ms > MAX_REPRESENTABLE_TIME:
warnings.warn("Overflow in SubStation timestamp, clamping to MAX_REPRESENTABLE_TIME", RuntimeWarning)
- ms = MAX_REPRESENTABLE_TIME
-
- h, m, s, ms = ms_to_times(ms)
+ requested_ms = MAX_REPRESENTABLE_TIME
# Aegisub does rounding, see https://github.com/Aegisub/Aegisub/blob/6f546951b4f004da16ce19ba638bf3eedefb9f31/libaegisub/include/libaegisub/ass/time.h#L32
- cs = ((ms + 5) - (ms + 5) % 10) // 10
+ round_ms = ((requested_ms + 5) - (requested_ms + 5) % 10)
+ h, m, s, ms = ms_to_times(round_ms)
+ cs = ms // 10
return f"{h:01d}:{m:02d}:{s:02d}.{cs:02d}"
@classmethod
- def guess_format(cls, text):
+ def guess_format(cls, text: str) -> Optional[str]:
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if re.search(r"V4\+ Styles", text, re.IGNORECASE):
return "ass"
elif re.search(r"V4 Styles", text, re.IGNORECASE):
return "ssa"
+ else:
+ return None
@classmethod
- def from_file(cls, subs: "pysubs2.SSAFile", fp, format_, **kwargs):
+ def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None:
"""See :meth:`pysubs2.formats.FormatBase.from_file()`"""
- def string_to_field(f: str, v: str):
+ def string_to_field(f: str, v: str) -> Any:
# Per issue #45, we should handle the case where there is extra whitespace around the values.
# Extra whitespace is removed in non-string fields where it would break the parser otherwise,
# and in font name (where it doesn't really make sense). It is preserved in Dialogue string
@@ -199,9 +205,13 @@ class SubstationFormat(FormatBase):
v = v.strip()
return rgba_to_color(v)
elif f in {"bold", "underline", "italic", "strikeout"}:
- return v == "-1"
+ return v != "0"
elif f in {"borderstyle", "encoding", "marginl", "marginr", "marginv", "layer", "alphalevel"}:
- return int(v)
+ try:
+ return int(v)
+ except ValueError:
+ warnings.warn(f"Failed to parse {f}, using default", RuntimeWarning)
+ return 0
elif f in {"fontsize", "scalex", "scaley", "spacing", "angle", "outline", "shadow"}:
return float(v)
elif f == "marked":
@@ -244,7 +254,8 @@ class SubstationFormat(FormatBase):
inside_font_section = "Fonts" in line
inside_graphic_section = "Graphics" in line
elif inside_info_section or inside_aegisub_section:
- if line.startswith(";"): continue # skip comments
+ if line.startswith(";"):
+ continue # skip comments
try:
k, v = line.split(":", 1)
if inside_info_section:
@@ -280,7 +291,7 @@ class SubstationFormat(FormatBase):
elif line.startswith("Style:"):
_, rest = line.split(":", 1)
buf = rest.strip().split(",")
- name, raw_fields = buf[0], buf[1:] # splat workaround for Python 2.7
+ name, *raw_fields = buf
field_dict = {f: string_to_field(f, v) for f, v in zip(STYLE_FIELDS[format_], raw_fields)}
sty = SSAStyle(**field_dict)
subs.styles[name] = sty
@@ -307,7 +318,7 @@ class SubstationFormat(FormatBase):
current_attachment_name = None
@classmethod
- def to_file(cls, subs: "pysubs2.SSAFile", fp, format_, header_notice=NOTICE, **kwargs):
+ def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, header_notice: str = NOTICE, **kwargs: Any) -> None:
"""See :meth:`pysubs2.formats.FormatBase.to_file()`"""
print("[Script Info]", file=fp)
for line in header_notice.splitlines(False):
@@ -322,7 +333,7 @@ class SubstationFormat(FormatBase):
for k, v in subs.aegisub_project.items():
print(k, v, sep=": ", file=fp)
- def field_to_string(f: str, v: Any, line: Union[SSAEvent, SSAStyle]):
+ def field_to_string(f: str, v: Any, line: Union[SSAEvent, SSAStyle]) -> str:
if f in {"start", "end"}:
return cls.ms_to_timestamp(v)
elif f == "marked":
@@ -340,8 +351,12 @@ class SubstationFormat(FormatBase):
return str(alignment.value)
elif isinstance(v, bool):
return "-1" if v else "0"
- elif isinstance(v, (str, Number)):
+ elif isinstance(v, int):
return str(v)
+ elif isinstance(v, float):
+ return str(int(v) if v.is_integer() else v)
+ elif isinstance(v, str):
+ return v
elif isinstance(v, Color):
if format_ == "ass":
return color_to_ass_rgba(v)
diff --git a/libs/pysubs2/tmp.py b/libs/pysubs2/formats/tmp.py
similarity index 69%
rename from libs/pysubs2/tmp.py
rename to libs/pysubs2/formats/tmp.py
index 426a6f191..e9db38a39 100644
--- a/libs/pysubs2/tmp.py
+++ b/libs/pysubs2/formats/tmp.py
@@ -1,11 +1,14 @@
import re
import warnings
+from typing import Optional, TextIO, Any
-from .formatbase import FormatBase
-from .ssaevent import SSAEvent
-from .ssastyle import SSAStyle
+from .base import FormatBase
+from ..ssaevent import SSAEvent
+from ..ssastyle import SSAStyle
from .substation import parse_tags
-from .time import ms_to_times, make_time, TIMESTAMP_SHORT, timestamp_to_ms
+from ..time import ms_to_times, make_time, TIMESTAMP_SHORT, timestamp_to_ms
+from ..ssafile import SSAFile
+
#: Pattern that matches TMP line
TMP_LINE = re.compile(r"(\d{1,2}:\d{2}:\d{2}):(.+)")
@@ -29,7 +32,7 @@ class TmpFormat(FormatBase):
return f"{h:02d}:{m:02d}:{s:02d}"
@classmethod
- def guess_format(cls, text):
+ def guess_format(cls, text: str) -> Optional[str]:
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if "[Script Info]" in text or "[V4+ Styles]" in text:
# disambiguation vs. SSA/ASS
@@ -39,14 +42,16 @@ class TmpFormat(FormatBase):
if TMP_LINE.match(line) and len(TMP_LINE.findall(line)) == 1:
return "tmp"
+ return None
+
@classmethod
- def from_file(cls, subs, fp, format_, **kwargs):
+ def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None:
"""See :meth:`pysubs2.formats.FormatBase.from_file()`"""
events = []
- def prepare_text(text):
+ def prepare_text(text: str) -> str:
text = text.replace("|", r"\N") # convert newlines
- text = re.sub(r"< *u *>", "{\\\\u1}", text) # not r" for Python 2.7 compat, triggers unicodeescape
+ text = re.sub(r"< *u *>", r"{\\u1}", text)
text = re.sub(r"< */? *[a-zA-Z][^>]*>", "", text) # strip other HTML tags
return text
@@ -56,7 +61,9 @@ class TmpFormat(FormatBase):
continue
start, text = match.groups()
- start = timestamp_to_ms(TIMESTAMP_SHORT.match(start).groups())
+ match2 = TIMESTAMP_SHORT.match(start)
+ assert match2 is not None, "TMP_LINE contains TIMESTAMP_SHORT"
+ start = timestamp_to_ms(match2.groups())
# Unfortunately, end timestamp is not given; try to estimate something reasonable:
# start + 500 ms + 67 ms/character (15 chars per second)
@@ -72,7 +79,7 @@ class TmpFormat(FormatBase):
subs.events = events
@classmethod
- def to_file(cls, subs, fp, format_, apply_styles=True, **kwargs):
+ def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, apply_styles: bool = True, **kwargs: Any) -> None:
"""
See :meth:`pysubs2.formats.FormatBase.to_file()`
@@ -82,28 +89,24 @@ class TmpFormat(FormatBase):
apply_styles: If False, do not write any styling.
"""
- def prepare_text(text, style):
+ def prepare_text(text: str, style: SSAStyle) -> str:
body = []
- skip = False
for fragment, sty in parse_tags(text, style, subs.styles):
fragment = fragment.replace(r"\h", " ")
fragment = fragment.replace(r"\n", "\n")
fragment = fragment.replace(r"\N", "\n")
if apply_styles:
- if sty.italic: fragment = f"{fragment}"
- if sty.underline: fragment = f"{fragment}"
- if sty.strikeout: fragment = f"{fragment}"
- if sty.drawing: skip = True
+ if sty.italic:
+ fragment = f"{fragment}"
+ if sty.underline:
+ fragment = f"{fragment}"
+ if sty.strikeout:
+ fragment = f"{fragment}"
body.append(fragment)
- if skip:
- return ""
- else:
- return re.sub("\n+", "\n", "".join(body).strip())
-
- visible_lines = (line for line in subs if not line.is_comment)
+ return re.sub("\n+", "\n", "".join(body).strip())
- for line in visible_lines:
+ for line in subs.get_text_events():
start = cls.ms_to_timestamp(line.start)
text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE))
diff --git a/libs/pysubs2/webvtt.py b/libs/pysubs2/formats/webvtt.py
similarity index 58%
rename from libs/pysubs2/webvtt.py
rename to libs/pysubs2/formats/webvtt.py
index 89c245db6..88f088a0a 100644
--- a/libs/pysubs2/webvtt.py
+++ b/libs/pysubs2/formats/webvtt.py
@@ -1,16 +1,17 @@
import re
-from typing import List
+from typing import List, Sequence, Optional, TextIO, Any
-import pysubs2
+from ..ssaevent import SSAEvent
from .subrip import SubripFormat
-from .time import make_time
+from ..time import make_time
+from ..ssafile import SSAFile
class WebVTTFormat(SubripFormat):
"""
Web Video Text Tracks (WebVTT) subtitle format implementation
- Currently, this shares implementation with :class:`pysubs2.subrip.SubripFormat`.
+ Currently, this shares implementation with :class:`pysubs2.formats.subrip.SubripFormat`.
"""
TIMESTAMP = re.compile(r"(\d{0,4}:)?(\d{2}):(\d{2})\.(\d{2,3})")
@@ -20,7 +21,7 @@ class WebVTTFormat(SubripFormat):
return result.replace(',', '.')
@staticmethod
- def timestamp_to_ms(groups):
+ def timestamp_to_ms(groups: Sequence[str]) -> int:
_h, _m, _s, _ms = groups
if not _h:
h = 0
@@ -30,22 +31,24 @@ class WebVTTFormat(SubripFormat):
return make_time(h=h, m=m, s=s, ms=ms)
@classmethod
- def guess_format(cls, text):
+ def guess_format(cls, text: str) -> Optional[str]:
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if text.lstrip().startswith("WEBVTT"):
return "vtt"
+ else:
+ return None
@classmethod
- def to_file(cls, subs, fp, format_, **kwargs):
+ def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None: # type: ignore[override]
"""
- See :meth:`pysubs2.formats.FormatBase.to_file()`
+ See :meth:`pysubs2.formats.SubripFormat.to_file()`, additional SRT options are supported by VTT as well
"""
print("WEBVTT\n", file=fp)
return super(WebVTTFormat, cls).to_file(
subs=subs, fp=fp, format_=format_, **kwargs)
@classmethod
- def _get_visible_lines(cls, subs: "pysubs2.SSAFile") -> List["pysubs2.SSAEvent"]:
- visible_lines = [line for line in subs if not line.is_comment]
+ def _get_visible_lines(cls, subs: "SSAFile") -> List[SSAEvent]:
+ visible_lines = super()._get_visible_lines(subs)
visible_lines.sort(key=lambda e: e.start)
return visible_lines
diff --git a/libs/pysubs2/whisper.py b/libs/pysubs2/formats/whisper.py
similarity index 94%
rename from libs/pysubs2/whisper.py
rename to libs/pysubs2/formats/whisper.py
index 5bfe8e726..c5353cd08 100644
--- a/libs/pysubs2/whisper.py
+++ b/libs/pysubs2/formats/whisper.py
@@ -5,9 +5,9 @@ See https://github.com/openai/whisper
"""
-from .ssaevent import SSAEvent
-from .ssafile import SSAFile
-from .time import make_time
+from ..ssaevent import SSAEvent
+from ..ssafile import SSAFile
+from ..time import make_time
from typing import Union, List, Dict, Any
diff --git a/libs/pysubs2/mpl2.py b/libs/pysubs2/mpl2.py
deleted file mode 100644
index 78af420fe..000000000
--- a/libs/pysubs2/mpl2.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import re
-
-from .time import times_to_ms
-from .formatbase import FormatBase
-from .ssaevent import SSAEvent
-
-
-# thanks to http://otsaloma.io/gaupol/doc/api/aeidon.files.mpl2_source.html
-MPL2_FORMAT = re.compile(r"^\[(-?\d+)\]\[(-?\d+)\](.*)", re.MULTILINE)
-
-
-class MPL2Format(FormatBase):
- """MPL2 subtitle format implementation"""
- @classmethod
- def guess_format(cls, text):
- """See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
- if MPL2_FORMAT.search(text):
- return "mpl2"
-
- @classmethod
- def from_file(cls, subs, fp, format_, **kwargs):
- """See :meth:`pysubs2.formats.FormatBase.from_file()`"""
- def prepare_text(lines):
- out = []
- for s in lines.split("|"):
- s = s.strip()
-
- if s.startswith("/"):
- # line beginning with '/' is in italics
- s = r"{\i1}%s{\i0}" % s[1:].strip()
-
- out.append(s)
- return "\\N".join(out)
-
- subs.events = [SSAEvent(start=times_to_ms(s=float(start) / 10), end=times_to_ms(s=float(end) / 10),
- text=prepare_text(text)) for start, end, text in MPL2_FORMAT.findall(fp.getvalue())]
-
- @classmethod
- def to_file(cls, subs, fp, format_, **kwargs):
- """
- See :meth:`pysubs2.formats.FormatBase.to_file()`
-
- No styling is supported at the moment.
-
- """
- # TODO handle italics
- for line in subs:
- if line.is_comment:
- continue
-
- print("[{start}][{end}] {text}".format(start=int(line.start // 100),
- end=int(line.end // 100),
- text=line.plaintext.replace("\n", "|")),
- file=fp)
diff --git a/libs/pysubs2/ssaevent.py b/libs/pysubs2/ssaevent.py
index 9906631d0..a33d2e308 100644
--- a/libs/pysubs2/ssaevent.py
+++ b/libs/pysubs2/ssaevent.py
@@ -1,6 +1,6 @@
import re
import warnings
-from typing import Optional, Dict, Any, ClassVar
+from typing import Optional, Dict, Any, ClassVar, FrozenSet
import dataclasses
from .common import IntOrFloat
@@ -41,7 +41,7 @@ class SSAEvent:
type: str = "Dialogue" #: Line type (Dialogue/Comment)
@property
- def FIELDS(self):
+ def FIELDS(self) -> FrozenSet[str]:
"""All fields in SSAEvent."""
warnings.warn("Deprecated in 1.2.0 - it's a dataclass now", DeprecationWarning)
return frozenset(field.name for field in dataclasses.fields(self))
@@ -57,7 +57,7 @@ class SSAEvent:
return self.end - self.start
@duration.setter
- def duration(self, ms: int):
+ def duration(self, ms: int) -> None:
if ms >= 0:
self.end = self.start + ms
else:
@@ -74,7 +74,7 @@ class SSAEvent:
return self.type == "Comment"
@is_comment.setter
- def is_comment(self, value: bool):
+ def is_comment(self, value: bool) -> None:
if value:
self.type = "Comment"
else:
@@ -83,9 +83,18 @@ class SSAEvent:
@property
def is_drawing(self) -> bool:
"""Returns True if line is SSA drawing tag (ie. not text)"""
- from .substation import parse_tags
+ from .formats.substation import parse_tags
return any(sty.drawing for _, sty in parse_tags(self.text))
+ @property
+ def is_text(self) -> bool:
+ """
+ Returns False for SSA drawings and comment lines, True otherwise
+
+ In general, for non-SSA formats these events should be ignored.
+ """
+ return not (self.is_comment or self.is_drawing)
+
@property
def plaintext(self) -> str:
"""
@@ -102,11 +111,11 @@ class SSAEvent:
return text
@plaintext.setter
- def plaintext(self, text: str):
+ def plaintext(self, text: str) -> None:
self.text = text.replace("\n", r"\N")
- def shift(self, h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0,
- frames: Optional[int]=None, fps: Optional[float]=None):
+ def shift(self, h: IntOrFloat = 0, m: IntOrFloat = 0, s: IntOrFloat = 0, ms: IntOrFloat = 0,
+ frames: Optional[int] = None, fps: Optional[float] = None) -> None:
"""
Shift start and end times.
@@ -132,36 +141,36 @@ class SSAEvent:
else:
raise TypeError("Cannot compare to non-SSAEvent object")
- def __eq__(self, other) -> bool:
+ def __eq__(self, other: object) -> bool:
# XXX document this
if not isinstance(other, SSAEvent):
return NotImplemented
return self.start == other.start and self.end == other.end
- def __ne__(self, other) -> bool:
+ def __ne__(self, other: object) -> bool:
if not isinstance(other, SSAEvent):
return NotImplemented
return self.start != other.start or self.end != other.end
- def __lt__(self, other) -> bool:
+ def __lt__(self, other: object) -> bool:
if not isinstance(other, SSAEvent):
return NotImplemented
return (self.start, self.end) < (other.start, other.end)
- def __le__(self, other) -> bool:
+ def __le__(self, other: object) -> bool:
if not isinstance(other, SSAEvent):
return NotImplemented
return (self.start, self.end) <= (other.start, other.end)
- def __gt__(self, other) -> bool:
+ def __gt__(self, other: object) -> bool:
if not isinstance(other, SSAEvent):
return NotImplemented
return (self.start, self.end) > (other.start, other.end)
- def __ge__(self, other) -> bool:
+ def __ge__(self, other: object) -> bool:
if not isinstance(other, SSAEvent):
return NotImplemented
return (self.start, self.end) >= (other.start, other.end)
- def __repr__(self):
+ def __repr__(self) -> str:
return f""
diff --git a/libs/pysubs2/ssafile.py b/libs/pysubs2/ssafile.py
index 1202a46f5..8c0b9cb14 100644
--- a/libs/pysubs2/ssafile.py
+++ b/libs/pysubs2/ssafile.py
@@ -1,20 +1,16 @@
-from collections.abc import MutableSequence
import io
-from io import open
from itertools import chain
import os.path
import logging
-from typing import Optional, List, Dict, Iterable, Any, overload, Iterator
+from typing import Optional, List, Dict, Iterable, Any, overload, Iterator, TextIO, Tuple, MutableSequence
from .common import IntOrFloat
-from .formats import autodetect_format, get_format_class, get_format_identifier
-from .substation import is_valid_field_content
from .ssaevent import SSAEvent
from .ssastyle import SSAStyle
from .time import make_time, ms_to_str
-class SSAFile(MutableSequence):
+class SSAFile(MutableSequence[SSAEvent]):
"""
Subtitle file in SubStation Alpha format.
@@ -32,7 +28,7 @@ class SSAFile(MutableSequence):
"""
- DEFAULT_INFO = {
+ DEFAULT_INFO: Dict[str, str] = {
"WrapStyle": "0",
"ScaledBorderAndShadow": "yes",
"Collisions": "Normal"
@@ -53,7 +49,8 @@ class SSAFile(MutableSequence):
# ------------------------------------------------------------------------
@classmethod
- def load(cls, path: str, encoding: str="utf-8", format_: Optional[str]=None, fps: Optional[float]=None, **kwargs) -> "SSAFile":
+ def load(cls, path: str, encoding: str = "utf-8", format_: Optional[str] = None, fps: Optional[float] = None,
+ errors: Optional[str] = None, **kwargs: Any) -> "SSAFile":
"""
Load subtitle file from given path.
@@ -62,12 +59,23 @@ class SSAFile(MutableSequence):
See also:
Specific formats may implement additional loading options,
please refer to documentation of the implementation classes
- (eg. :meth:`pysubs2.subrip.SubripFormat.from_file()`)
+ (eg. :meth:`pysubs2.formats.subrip.SubripFormat.from_file()`)
Arguments:
path (str): Path to subtitle file.
encoding (str): Character encoding of input file.
Defaults to UTF-8, you may need to change this.
+ errors (Optional[str]): Error handling for character encoding
+ of input file. Defaults to ``None``; use the value ``"surrogateescape"``
+ for pass-through of bytes not supported by selected encoding via
+ `Unicode surrogate pairs `_.
+ See documentation of builtin ``open()`` function for more.
+
+ .. versionchanged:: 1.7.0
+ The ``errors`` parameter was introduced to facilitate
+ pass-through of subtitle files with unknown text encoding.
+ Previous versions of the library behaved as if ``errors=None``.
+
format_ (str): Optional, forces use of specific parser
(eg. `"srt"`, `"ass"`). Otherwise, format is detected
automatically from file contents. This argument should
@@ -96,23 +104,32 @@ class SSAFile(MutableSequence):
Example:
>>> subs1 = pysubs2.load("subrip-subtitles.srt")
- >>> subs2 = pysubs2.load("microdvd-subtitles.sub", fps=23.976)
- >>> subs3 = pysubs2.load("subrip-subtitles-with-fancy-tags.srt", keep_unknown_html_tags=True)
+ >>> subs2 = pysubs2.load("microdvd-subtitles.sub",fps=23.976)
+ >>> subs3 = pysubs2.load("subrip-subtitles-with-fancy-tags.srt",keep_unknown_html_tags=True)
"""
- with open(path, encoding=encoding) as fp:
+ with open(path, encoding=encoding, errors=errors) as fp:
return cls.from_file(fp, format_, fps=fps, **kwargs)
@classmethod
- def from_string(cls, string: str, format_: Optional[str]=None, fps: Optional[float]=None, **kwargs) -> "SSAFile":
+ def from_string(cls, string: str, format_: Optional[str] = None, fps: Optional[float] = None,
+ **kwargs: Any) -> "SSAFile":
"""
Load subtitle file from string.
See :meth:`SSAFile.load()` for full description.
Arguments:
- string (str): Subtitle file in a string. Note that the string
- must be Unicode (in Python 2).
+ string (str): Subtitle file in a string. Note that the string must be Unicode (``str``, not ``bytes``).
+ format_ (str): Optional, forces use of specific parser
+ (eg. `"srt"`, `"ass"`). Otherwise, format is detected
+ automatically from file contents. This argument should
+ be rarely needed.
+ fps (float): Framerate for frame-based formats (MicroDVD),
+ for other formats this argument is ignored. Framerate might
+ be detected from the file, in which case you don't need
+ to specify it here (when given, this argument overrides
+ autodetection).
Returns:
SSAFile
@@ -130,7 +147,8 @@ class SSAFile(MutableSequence):
return cls.from_file(fp, format_, fps=fps, **kwargs)
@classmethod
- def from_file(cls, fp: io.TextIOBase, format_: Optional[str]=None, fps: Optional[float]=None, **kwargs) -> "SSAFile":
+ def from_file(cls, fp: TextIO, format_: Optional[str] = None, fps: Optional[float] = None,
+ **kwargs: Any) -> "SSAFile":
"""
Read subtitle file from file object.
@@ -141,8 +159,17 @@ class SSAFile(MutableSequence):
or :meth:`SSAFile.from_string()` is preferable.
Arguments:
- fp (file object): A file object, ie. :class:`io.TextIOBase` instance.
+ fp (file object): A file object, ie. :class:`TextIO` instance.
Note that the file must be opened in text mode (as opposed to binary).
+ format_ (str): Optional, forces use of specific parser
+ (eg. `"srt"`, `"ass"`). Otherwise, format is detected
+ automatically from file contents. This argument should
+ be rarely needed.
+ fps (float): Framerate for frame-based formats (MicroDVD),
+ for other formats this argument is ignored. Framerate might
+ be detected from the file, in which case you don't need
+ to specify it here (when given, this argument overrides
+ autodetection).
Returns:
SSAFile
@@ -164,7 +191,8 @@ class SSAFile(MutableSequence):
impl.from_file(subs, fp, format_, fps=fps, **kwargs)
return subs
- def save(self, path: str, encoding: str="utf-8", format_: Optional[str]=None, fps: Optional[float]=None, **kwargs):
+ def save(self, path: str, encoding: str = "utf-8", format_: Optional[str] = None, fps: Optional[float] = None,
+ errors: Optional[str] = None, **kwargs: Any) -> None:
"""
Save subtitle file to given path.
@@ -173,7 +201,7 @@ class SSAFile(MutableSequence):
See also:
Specific formats may implement additional saving options,
please refer to documentation of the implementation classes
- (eg. :meth:`pysubs2.subrip.SubripFormat.to_file()`)
+ (eg. :meth:`pysubs2.formats.subrip.SubripFormat.to_file()`)
Arguments:
path (str): Path to subtitle file.
@@ -191,6 +219,17 @@ class SSAFile(MutableSequence):
different framerate, use this argument. See also
:meth:`SSAFile.transform_framerate()` for fixing bad
frame-based to time-based conversions.
+ errors (Optional[str]): Error handling for character encoding
+ of input file. Defaults to ``None``; use the value ``"surrogateescape"``
+ for pass-through of bytes not supported by selected encoding via
+ `Unicode surrogate pairs `_.
+ See documentation of builtin ``open()`` function for more.
+
+ .. versionchanged:: 1.7.0
+ The ``errors`` parameter was introduced to facilitate
+ pass-through of subtitle files with unknown text encoding.
+ Previous versions of the library behaved as if ``errors=None``.
+
kwargs: Extra options for the writer.
Raises:
@@ -205,10 +244,10 @@ class SSAFile(MutableSequence):
ext = os.path.splitext(path)[1].lower()
format_ = get_format_identifier(ext)
- with open(path, "w", encoding=encoding) as fp:
+ with open(path, "w", encoding=encoding, errors=errors) as fp:
self.to_file(fp, format_, fps=fps, **kwargs)
- def to_string(self, format_: str, fps: Optional[float]=None, **kwargs) -> str:
+ def to_string(self, format_: str, fps: Optional[float] = None, **kwargs: Any) -> str:
"""
Get subtitle file as a string.
@@ -222,7 +261,7 @@ class SSAFile(MutableSequence):
self.to_file(fp, format_, fps=fps, **kwargs)
return fp.getvalue()
- def to_file(self, fp: io.TextIOBase, format_: str, fps: Optional[float]=None, **kwargs):
+ def to_file(self, fp: TextIO, format_: str, fps: Optional[float] = None, **kwargs: Any) -> None:
"""
Write subtitle file to file object.
@@ -233,7 +272,7 @@ class SSAFile(MutableSequence):
or :meth:`SSAFile.to_string()` is preferable.
Arguments:
- fp (file object): A file object, ie. :class:`io.TextIOBase` instance.
+ fp (file object): A file object, ie. :class:`TextIO` instance.
Note that the file must be opened in text mode (as opposed to binary).
"""
@@ -244,8 +283,8 @@ class SSAFile(MutableSequence):
# Retiming subtitles
# ------------------------------------------------------------------------
- def shift(self, h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0,
- frames: Optional[int]=None, fps: Optional[float]=None):
+ def shift(self, h: IntOrFloat = 0, m: IntOrFloat = 0, s: IntOrFloat = 0, ms: IntOrFloat = 0,
+ frames: Optional[int] = None, fps: Optional[float] = None) -> None:
"""
Shift all subtitles by constant time amount.
@@ -253,7 +292,10 @@ class SSAFile(MutableSequence):
case, specify both frames and fps. h, m, s, ms will be ignored.
Arguments:
- h, m, s, ms: Integer or float values, may be positive or negative.
+ h: Integer or float values, may be positive or negative (hours).
+ m: Integer or float values, may be positive or negative (minutes).
+ s: Integer or float values, may be positive or negative (seconds).
+ ms: Integer or float values, may be positive or negative (milliseconds).
frames (int): When specified, must be an integer number of frames.
May be positive or negative. fps must be also specified.
fps (float): When specified, must be a positive number.
@@ -267,7 +309,7 @@ class SSAFile(MutableSequence):
line.start += delta
line.end += delta
- def transform_framerate(self, in_fps: float, out_fps: float):
+ def transform_framerate(self, in_fps: float, out_fps: float) -> None:
"""
Rescale all timestamps by ratio of in_fps/out_fps.
@@ -294,7 +336,7 @@ class SSAFile(MutableSequence):
# Working with styles
# ------------------------------------------------------------------------
- def rename_style(self, old_name: str, new_name: str):
+ def rename_style(self, old_name: str, new_name: str) -> None:
"""
Rename a style, including references to it.
@@ -308,6 +350,8 @@ class SSAFile(MutableSequence):
or new_name is taken.
"""
+ from .formats.substation import is_valid_field_content
+
if old_name not in self.styles:
raise KeyError(f"Style {old_name!r} not found")
if new_name in self.styles:
@@ -323,7 +367,7 @@ class SSAFile(MutableSequence):
if line.style == old_name:
line.style = new_name
- def import_styles(self, subs: "SSAFile", overwrite: bool=True):
+ def import_styles(self, subs: "SSAFile", overwrite: bool = True) -> None:
"""
Merge in styles from other SSAFile.
@@ -344,7 +388,7 @@ class SSAFile(MutableSequence):
# Helper methods
# ------------------------------------------------------------------------
- def remove_miscellaneous_events(self):
+ def remove_miscellaneous_events(self) -> None:
"""
Remove subtitles which appear to be non-essential (the --clean in CLI)
@@ -357,7 +401,7 @@ class SSAFile(MutableSequence):
new_events = []
duplicate_text_ids = set()
- times_to_texts = {}
+ times_to_texts: Dict[Tuple[int, int], List[str]] = {}
for i, e in enumerate(self):
tmp = times_to_texts.setdefault((e.start, e.end), [])
if tmp.count(e.plaintext) > 0:
@@ -376,7 +420,13 @@ class SSAFile(MutableSequence):
self.events = new_events
- def equals(self, other: "SSAFile"):
+ def get_text_events(self) -> List[SSAEvent]:
+ """
+ Return list of events excluding SSA comment lines and lines with SSA drawing tags
+ """
+ return [e for e in self if e.is_text]
+
+ def equals(self, other: "SSAFile") -> bool:
"""
Equality of two SSAFiles.
@@ -435,7 +485,8 @@ class SSAFile(MutableSequence):
return False
elif self_style != other_style:
for k in self_style.FIELDS:
- if getattr(self_style, k) != getattr(other_style, k): logging.debug("difference in field %r", k)
+ if getattr(self_style, k) != getattr(other_style, k):
+ logging.debug("difference in field %r", k)
logging.debug("style %r differs (self=%r, other=%r)", key, self_style.as_dict(), other_style.as_dict())
return False
@@ -446,7 +497,8 @@ class SSAFile(MutableSequence):
for i, (self_event, other_event) in enumerate(zip(self.events, other.events)):
if not self_event.equals(other_event):
for k in self_event.FIELDS:
- if getattr(self_event, k) != getattr(other_event, k): logging.debug("difference in field %r", k)
+ if getattr(self_event, k) != getattr(other_event, k):
+ logging.debug("difference in field %r", k)
logging.debug("event %d differs (self=%r, other=%r)", i, self_event.as_dict(), other_event.as_dict())
return False
@@ -454,7 +506,7 @@ class SSAFile(MutableSequence):
else:
raise TypeError("Cannot compare to non-SSAFile object")
- def __repr__(self):
+ def __repr__(self) -> str:
if self.events:
max_time = max(ev.end for ev in self)
s = f""
@@ -467,7 +519,7 @@ class SSAFile(MutableSequence):
# MutableSequence implementation + sort()
# ------------------------------------------------------------------------
- def sort(self):
+ def sort(self) -> None:
"""Sort subtitles time-wise, in-place."""
self.events.sort()
@@ -476,24 +528,24 @@ class SSAFile(MutableSequence):
@overload
def __getitem__(self, item: int) -> SSAEvent:
- return self.events[item]
+ pass
@overload
def __getitem__(self, s: slice) -> List[SSAEvent]:
- return self.events[s]
+ pass
- def __getitem__(self, item):
+ def __getitem__(self, item: Any) -> Any:
return self.events[item]
@overload
- def __setitem__(self, key: int, value: SSAEvent):
+ def __setitem__(self, key: int, value: SSAEvent) -> None:
pass
@overload
- def __setitem__(self, keys: slice, values: Iterable[SSAEvent]):
+ def __setitem__(self, keys: slice, values: Iterable[SSAEvent]) -> None:
pass
- def __setitem__(self, key, value):
+ def __setitem__(self, key: Any, value: Any) -> None:
if isinstance(key, int):
if isinstance(value, SSAEvent):
self.events[key] = value
@@ -509,21 +561,24 @@ class SSAFile(MutableSequence):
raise TypeError("Bad key type")
@overload
- def __delitem__(self, key: int):
+ def __delitem__(self, key: int) -> None:
pass
@overload
- def __delitem__(self, s: slice):
+ def __delitem__(self, s: slice) -> None:
pass
- def __delitem__(self, key):
+ def __delitem__(self, key: Any) -> None:
del self.events[key]
- def __len__(self):
+ def __len__(self) -> int:
return len(self.events)
- def insert(self, index: int, value: SSAEvent):
+ def insert(self, index: int, value: SSAEvent) -> None:
if isinstance(value, SSAEvent):
self.events.insert(index, value)
else:
raise TypeError("SSAFile.events must contain only SSAEvent objects")
+
+
+from .formats import autodetect_format, get_format_class, get_format_identifier # noqa: E402
diff --git a/libs/pysubs2/ssastyle.py b/libs/pysubs2/ssastyle.py
index f22476166..b0df57106 100644
--- a/libs/pysubs2/ssastyle.py
+++ b/libs/pysubs2/ssastyle.py
@@ -1,5 +1,5 @@
import warnings
-from typing import Dict, Any, ClassVar
+from typing import Dict, Any, ClassVar, FrozenSet
import dataclasses
from .common import Color, Alignment
@@ -23,7 +23,7 @@ class SSAStyle:
DEFAULT_STYLE: ClassVar["SSAStyle"] = None # type: ignore[assignment]
@property
- def FIELDS(self):
+ def FIELDS(self) -> FrozenSet[str]:
"""All fields in SSAStyle."""
warnings.warn("Deprecated in 1.2.0 - it's a dataclass now", DeprecationWarning)
return frozenset(field.name for field in dataclasses.fields(self))
@@ -66,7 +66,7 @@ class SSAStyle:
# dataclasses.asdict() would recursively dictify Color objects, which we don't want
return {field.name: getattr(self, field.name) for field in dataclasses.fields(self)}
- def __repr__(self):
+ def __repr__(self) -> str:
return f" int:
"""
Convert time to milliseconds.
@@ -38,7 +44,7 @@ def make_time(h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=
raise ValueError("Both fps and frames must be specified")
-def timestamp_to_ms(groups: Sequence[str]):
+def timestamp_to_ms(groups: Sequence[str]) -> int:
"""
Convert groups from :data:`pysubs2.time.TIMESTAMP` or :data:`pysubs2.time.TIMESTAMP_SHORT`
match to milliseconds.
@@ -50,6 +56,11 @@ def timestamp_to_ms(groups: Sequence[str]):
1000
"""
+ h: int
+ m: int
+ s: int
+ ms: int
+ frac: int
if len(groups) == 4:
h, m, s, frac = map(int, groups)
ms = frac * 10**(3 - len(groups[-1]))
@@ -65,7 +76,7 @@ def timestamp_to_ms(groups: Sequence[str]):
return ms
-def times_to_ms(h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0) -> int:
+def times_to_ms(h: IntOrFloat = 0, m: IntOrFloat = 0, s: IntOrFloat = 0, ms: IntOrFloat = 0) -> int:
"""
Convert hours, minutes, seconds to milliseconds.
@@ -124,7 +135,7 @@ def ms_to_frames(ms: IntOrFloat, fps: float) -> int:
return int(round((ms / 1000) * fps))
-def ms_to_times(ms: IntOrFloat) -> Tuple[int, int, int, int]:
+def ms_to_times(ms: IntOrFloat) -> Times:
"""
Convert milliseconds to normalized tuple (h, m, s, ms).
@@ -144,7 +155,7 @@ def ms_to_times(ms: IntOrFloat) -> Tuple[int, int, int, int]:
return Times(h, m, s, ms)
-def ms_to_str(ms: IntOrFloat, fractions: bool=False) -> str:
+def ms_to_str(ms: IntOrFloat, fractions: bool = False) -> str:
"""
Prettyprint milliseconds to [-]H:MM:SS[.mmm]
diff --git a/libs/version.txt b/libs/version.txt
index 0774103d7..09235fb12 100644
--- a/libs/version.txt
+++ b/libs/version.txt
@@ -28,7 +28,7 @@ knowit==0.5.2
Mako==1.3.2
pycountry==23.12.11
pyrsistent==0.20.0
-pysubs2==1.6.1
+pysubs2==1.7.2
python-engineio==4.9.0
python-socketio==5.11.1
pytz==2024.1