diff --git a/custom_libs/subliminal_patch/subtitle.py b/custom_libs/subliminal_patch/subtitle.py index 529e99019..d0cd585ed 100644 --- a/custom_libs/subliminal_patch/subtitle.py +++ b/custom_libs/subliminal_patch/subtitle.py @@ -13,7 +13,7 @@ import pysrt import pysubs2 from bs4 import UnicodeDammit from pysubs2 import SSAStyle -from pysubs2.subrip import parse_tags, MAX_REPRESENTABLE_TIME +from pysubs2.formats.subrip import parse_tags, MAX_REPRESENTABLE_TIME from pysubs2.time import ms_to_times from subzero.modification import SubtitleModifications from subzero.language import Language diff --git a/libs/pysubs2-1.6.1.dist-info/METADATA b/libs/pysubs2-1.6.1.dist-info/METADATA deleted file mode 100644 index ee4854808..000000000 --- a/libs/pysubs2-1.6.1.dist-info/METADATA +++ /dev/null @@ -1,84 +0,0 @@ -Metadata-Version: 2.1 -Name: pysubs2 -Version: 1.6.1 -Summary: A library for editing subtitle files -Home-page: https://github.com/tkarabela/pysubs2 -Author: Tomas Karabela -Author-email: tkarabela@seznam.cz -License: MIT -Project-URL: Documentation, https://pysubs2.readthedocs.io -Project-URL: Bug Tracker, https://github.com/tkarabela/pysubs2/issues -Keywords: SubStation,SubRip,MicroDVD,MPL2,WebVTT,ass,srt,sub,vtt,subtitles,captions -Classifier: Programming Language :: Python -Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.7 -Classifier: Programming Language :: Python :: 3.8 -Classifier: Programming Language :: Python :: 3.9 -Classifier: Programming Language :: Python :: 3.10 -Classifier: Programming Language :: Python :: 3.11 -Classifier: Development Status :: 5 - Production/Stable -Classifier: Topic :: Software Development :: Libraries :: Python Modules -Classifier: Topic :: Text Processing :: Markup -Classifier: Topic :: Multimedia :: Video -Classifier: Intended Audience :: Developers -Classifier: Intended Audience :: End Users/Desktop -Classifier: Operating System :: OS Independent -Classifier: License :: OSI Approved :: MIT License -Classifier: Typing :: Typed -Requires-Python: >=3.7 -Description-Content-Type: text/x-rst -License-File: LICENSE.txt - -pysubs2 -======= - - -.. image:: https://img.shields.io/github/actions/workflow/status/tkarabela/pysubs2/main.yml?branch=master - :alt: pysubs2 build master branch - :target: https://github.com/tkarabela/pysubs2/actions -.. image:: https://img.shields.io/codecov/c/github/tkarabela/pysubs2 - :alt: pysubs2 test code coverage - :target: https://app.codecov.io/github/tkarabela/pysubs2 -.. image:: http://www.mypy-lang.org/static/mypy_badge.svg - :alt: MyPy checked - :target: https://github.com/tkarabela/pysubs2/actions -.. image:: https://img.shields.io/pypi/v/pysubs2.svg?style=flat-square - :alt: PyPI - Version - :target: https://pypi.org/project/pysubs2/ -.. image:: https://img.shields.io/pypi/status/pysubs2.svg?style=flat-square - :alt: PyPI - Status - :target: https://pypi.org/project/pysubs2/ -.. image:: https://img.shields.io/pypi/pyversions/pysubs2.svg?style=flat-square - :alt: PyPI - Python Version - :target: https://pypi.org/project/pysubs2/ -.. image:: https://img.shields.io/pypi/l/pysubs2.svg?style=flat-square - :alt: PyPI - License - :target: LICENSE.txt - - -pysubs2 is a Python library for editing subtitle files. -It’s based on *SubStation Alpha*, the native format of -`Aegisub `_; it also supports *SubRip (SRT)*, -*MicroDVD*, *MPL2*, *TMP* and *WebVTT* formats and *OpenAI Whisper* captions. - -There is a small CLI tool for batch conversion and retiming. - -.. code:: bash - - $ pip install pysubs2 - $ pysubs2 --shift 0.3s *.srt - $ pysubs2 --to srt *.ass - -.. code:: python - - import pysubs2 - subs = pysubs2.load("my_subtitles.ass", encoding="utf-8") - subs.shift(s=2.5) - for line in subs: - line.text = "{\\be1}" + line.text - subs.save("my_subtitles_edited.ass") - -To learn more, please `see the documentation `_. -If you'd like to contribute, see `CONTRIBUTING.md `_. - -pysubs2 is licensed under the MIT license (see `LICENSE.txt `_). diff --git a/libs/pysubs2-1.6.1.dist-info/RECORD b/libs/pysubs2-1.6.1.dist-info/RECORD deleted file mode 100644 index bd71ab000..000000000 --- a/libs/pysubs2-1.6.1.dist-info/RECORD +++ /dev/null @@ -1,28 +0,0 @@ -../../bin/pysubs2,sha256=1ipjtihceH0M9ljChd41YMd3co9-Mn5sngcbA3Kgupk,241 -pysubs2-1.6.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 -pysubs2-1.6.1.dist-info/LICENSE.txt,sha256=Syr1zDKD8s_Wl4E6er5xVmuOZ1bvaYyLOxcEI7lW4KU,1063 -pysubs2-1.6.1.dist-info/METADATA,sha256=f8UZ93EkW3RJVHEXKuIX29IT8Cuz2siGq1IlcLS17tc,3281 -pysubs2-1.6.1.dist-info/RECORD,, -pysubs2-1.6.1.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 -pysubs2-1.6.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92 -pysubs2-1.6.1.dist-info/entry_points.txt,sha256=mznmeaia9Ec19KsaIM8D4tt6V0HnOUlPhSg5nXiAfAE,49 -pysubs2-1.6.1.dist-info/top_level.txt,sha256=i_V6JX-AWjMX15-Dzyc0RlrInU7Zun9PYfdB9brM95E,8 -pysubs2/__init__.py,sha256=lkW5MzctUNvTu5KPVr84eO3Nh2x5yHp0SWAJ4NnAp-Y,505 -pysubs2/__main__.py,sha256=3iAK73rLxjLJqFMYD-jn3PDJvHZ8rIOW_NgsmBkRGhA,134 -pysubs2/cli.py,sha256=6zWUOifOCm5ziwn0dd_MsxZfdFcLiaGpnCvbz65WqbA,10607 -pysubs2/common.py,sha256=SWYT6_aoL3CBEO-j0vP2EZnTDcUvNom6hW_y8gChGFM,1479 -pysubs2/exceptions.py,sha256=pEbAUuDj3GOUbxryLAdZa7qawMWxNhIGyzC3j-VCykM,633 -pysubs2/formatbase.py,sha256=WLHV7ppth7giTxqjHN13exyx1tfuwJGpEq0iNCMGYUY,2945 -pysubs2/formats.py,sha256=nlGNc9dnC1vHQF-qAZiQEqfBN7Mn0B8OPGCr_ezzPvc,2438 -pysubs2/jsonformat.py,sha256=Z7BfaqGkK4vl0Gc9lPF_UUPUi3l4oJlxD-0TOSLDt9w,1862 -pysubs2/microdvd.py,sha256=boTIasQUUV8430k5kz8ba2GgCGqr1hGEEbdlE7Lw7dI,4398 -pysubs2/mpl2.py,sha256=T1JUIAG3zCJHKoLRlYVLI_qFbvdkgV0w6XG9wlYJ17Q,1802 -pysubs2/ssaevent.py,sha256=bz5oGH2yIJQl7Lzi4Dw_KFBfLMPelmomxXAeNJ6tXTc,5883 -pysubs2/ssafile.py,sha256=b9adFdbx1MdHEB_ZdRon7GwKT5pKsaJhnnTE7UHcaKM,20356 -pysubs2/ssastyle.py,sha256=PFFejFEwaUx_bO1lkoHVSSJcPhupGO3epCCBCAEgclk,3995 -pysubs2/subrip.py,sha256=lK8bxf98wLKqordXeY2j8DwknDBN8kyLZXVFbZwj3mw,7371 -pysubs2/substation.py,sha256=xUC19ASpj3yDS9ASzFjNn88KnIqkvWqE_vPOVxWCKsM,16170 -pysubs2/time.py,sha256=1GPQlajjEio9rJvGnNO8J5OnWChQlB3BgAzRS3gnEzM,4641 -pysubs2/tmp.py,sha256=hJ4hpja7AckpGmdbC5KuZ2jyIEXWu2b1_SILqzbR5iQ,3954 -pysubs2/webvtt.py,sha256=sNFtGAaWn7gVMG8SVHS0Ndcku6LyiAaiv9gPErl0kys,1531 -pysubs2/whisper.py,sha256=5dywTdlXa46ulYbVeWpM_dH9jiwRKlTm6G1GrzIjsx0,1604 diff --git a/libs/pysubs2-1.6.1.dist-info/INSTALLER b/libs/pysubs2-1.7.2.dist-info/INSTALLER similarity index 100% rename from libs/pysubs2-1.6.1.dist-info/INSTALLER rename to libs/pysubs2-1.7.2.dist-info/INSTALLER diff --git a/libs/pysubs2-1.6.1.dist-info/LICENSE.txt b/libs/pysubs2-1.7.2.dist-info/LICENSE.txt similarity index 96% rename from libs/pysubs2-1.6.1.dist-info/LICENSE.txt rename to libs/pysubs2-1.7.2.dist-info/LICENSE.txt index bad92093e..b29a6c1d5 100644 --- a/libs/pysubs2-1.6.1.dist-info/LICENSE.txt +++ b/libs/pysubs2-1.7.2.dist-info/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2014-2023 Tomas Karabela +Copyright (c) 2014-2024 Tomas Karabela Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/libs/pysubs2-1.7.2.dist-info/METADATA b/libs/pysubs2-1.7.2.dist-info/METADATA new file mode 100644 index 000000000..e1fef61e8 --- /dev/null +++ b/libs/pysubs2-1.7.2.dist-info/METADATA @@ -0,0 +1,70 @@ +Metadata-Version: 2.1 +Name: pysubs2 +Version: 1.7.2 +Summary: A library for editing subtitle files +Home-page: https://github.com/tkarabela/pysubs2 +Author: Tomas Karabela +Author-email: tkarabela@seznam.cz +License: MIT +Project-URL: Documentation, https://pysubs2.readthedocs.io +Project-URL: Bug Tracker, https://github.com/tkarabela/pysubs2/issues +Keywords: SubStation,SubRip,MicroDVD,MPL2,WebVTT,ass,srt,sub,vtt,subtitles,captions +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Development Status :: 5 - Production/Stable +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Text Processing :: Markup +Classifier: Topic :: Multimedia :: Video +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: End Users/Desktop +Classifier: Operating System :: OS Independent +Classifier: License :: OSI Approved :: MIT License +Classifier: Typing :: Typed +Requires-Python: >=3.8 +Description-Content-Type: text/markdown +License-File: LICENSE.txt + +pysubs2 +======= + +[![pysubs2 build master branch](https://img.shields.io/github/actions/workflow/status/tkarabela/pysubs2/main.yml?branch=master)](https://github.com/tkarabela/pysubs2/actions) +[![pysubs2 test code coverage](https://img.shields.io/codecov/c/github/tkarabela/pysubs2)](https://app.codecov.io/github/tkarabela/pysubs2) +[![Static Badge](https://img.shields.io/badge/MyPy%20%26%20Ruffle-checked-blue?style=flat)](https://github.com/tkarabela/pysubs2/actions) +[![PyPI - Version](https://img.shields.io/pypi/v/pysubs2.svg?style=flat)](https://pypi.org/project/pysubs2/) +[![PyPI - Status](https://img.shields.io/pypi/status/pysubs2.svg?style=flat)](https://pypi.org/project/pysubs2/) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pysubs2.svg?style=flat)](https://pypi.org/project/pysubs2/) +[![PyPI - License](https://img.shields.io/pypi/l/pysubs2.svg?style=flat)](LICENSE.txt) +[![GitHub Repo stars](https://img.shields.io/github/stars/tkarabela/pysubs2?style=flat&label=GitHub%20stars)](https://github.com/tkarabela/pysubs2) + + +pysubs2 is a Python library for editing subtitle files. +It’s based on *SubStation Alpha*, the native format of +[Aegisub](http://www.aegisub.org/); it also supports *SubRip (SRT)*, +*MicroDVD*, *MPL2*, *TMP* and *WebVTT* formats and *OpenAI Whisper* captions. + +There is a small CLI tool for batch conversion and retiming. + +```bash +pip install pysubs2 +pysubs2 --shift 0.3s *.srt +pysubs2 --to srt *.ass +``` + +```python +import pysubs2 +subs = pysubs2.load("my_subtitles.ass", encoding="utf-8") +subs.shift(s=2.5) +for line in subs: + line.text = "{\\be1}" + line.text +subs.save("my_subtitles_edited.ass") +``` + +To learn more, please [see the documentation](http://pysubs2.readthedocs.io). +If you'd like to contribute, see [CONTRIBUTING.md](CONTRIBUTING.md). + +pysubs2 is licensed under the MIT license (see [LICENSE.txt](LICENSE.txt)). diff --git a/libs/pysubs2-1.7.2.dist-info/RECORD b/libs/pysubs2-1.7.2.dist-info/RECORD new file mode 100644 index 000000000..d0730aa1f --- /dev/null +++ b/libs/pysubs2-1.7.2.dist-info/RECORD @@ -0,0 +1,47 @@ +../../bin/pysubs2,sha256=qbyZPJ0JMXc2Ii34KZgXKNvu1v8En-GFSWQFwhstugg,249 +pysubs2-1.7.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +pysubs2-1.7.2.dist-info/LICENSE.txt,sha256=WYgskvBrUBBDxuKH9JSV1u22g-UBTwHjeuJLlPp6lWU,1063 +pysubs2-1.7.2.dist-info/METADATA,sha256=2Fm6UaI7kElE-smr83cNBIfWRXt0vfso1Kyx5sChjB0,3183 +pysubs2-1.7.2.dist-info/RECORD,, +pysubs2-1.7.2.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +pysubs2-1.7.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92 +pysubs2-1.7.2.dist-info/entry_points.txt,sha256=mznmeaia9Ec19KsaIM8D4tt6V0HnOUlPhSg5nXiAfAE,49 +pysubs2-1.7.2.dist-info/top_level.txt,sha256=i_V6JX-AWjMX15-Dzyc0RlrInU7Zun9PYfdB9brM95E,8 +pysubs2/__init__.py,sha256=L71MMMS6zcAiGWdddhnc4COZmKLYX__igP0Tr5q-y3Q,783 +pysubs2/__main__.py,sha256=3iAK73rLxjLJqFMYD-jn3PDJvHZ8rIOW_NgsmBkRGhA,134 +pysubs2/__pycache__/__init__.cpython-311.pyc,, +pysubs2/__pycache__/__main__.cpython-311.pyc,, +pysubs2/__pycache__/cli.cpython-311.pyc,, +pysubs2/__pycache__/common.cpython-311.pyc,, +pysubs2/__pycache__/exceptions.cpython-311.pyc,, +pysubs2/__pycache__/ssaevent.cpython-311.pyc,, +pysubs2/__pycache__/ssafile.cpython-311.pyc,, +pysubs2/__pycache__/ssastyle.cpython-311.pyc,, +pysubs2/__pycache__/time.cpython-311.pyc,, +pysubs2/cli.py,sha256=lFVZsLChtnnaRov2lNEHlecTJaoNNvTLMQmZe0-4oGE,11916 +pysubs2/common.py,sha256=33XyqTSPqUNMedlnVkx-7GWFwvX0Fpce1G_ZHHtwQuI,1503 +pysubs2/exceptions.py,sha256=5XMoOvloP4QPPkEPour6YalxomcK-RkJA9eb_ecIbH0,2318 +pysubs2/formats/__init__.py,sha256=KZy9IibVmK-WwCHAYYplJELmYEnQu9BBRz5HEvyZ5U4,2515 +pysubs2/formats/__pycache__/__init__.cpython-311.pyc,, +pysubs2/formats/__pycache__/base.cpython-311.pyc,, +pysubs2/formats/__pycache__/jsonformat.cpython-311.pyc,, +pysubs2/formats/__pycache__/microdvd.cpython-311.pyc,, +pysubs2/formats/__pycache__/mpl2.cpython-311.pyc,, +pysubs2/formats/__pycache__/subrip.cpython-311.pyc,, +pysubs2/formats/__pycache__/substation.cpython-311.pyc,, +pysubs2/formats/__pycache__/tmp.cpython-311.pyc,, +pysubs2/formats/__pycache__/webvtt.cpython-311.pyc,, +pysubs2/formats/__pycache__/whisper.cpython-311.pyc,, +pysubs2/formats/base.py,sha256=zzt4L3KIC6VrUNfzwjDHSF5HV7swrdo65dop90vfy70,2959 +pysubs2/formats/jsonformat.py,sha256=TWk_qOI0-GaV_Jzs2S5JOe7uWu7t1jllO9NYOnZgRcE,2101 +pysubs2/formats/microdvd.py,sha256=hfshagBIMMPz3WgqpttYGugtAj_6CheAiTkvTTrn3NE,5970 +pysubs2/formats/mpl2.py,sha256=vpyL5j7gxxkGvWvI8JXwFlHaEyCBHYTn_mtgk4zqNFw,1970 +pysubs2/formats/subrip.py,sha256=VsEhj6aX_NZdUbecxjKk-5Ri7GcUdGh8dfzZVTsAZSM,7484 +pysubs2/formats/substation.py,sha256=UFaZ3sZoCa3bizKvn_5Ugtqy6crO1FqldxS5OI7ozcQ,16795 +pysubs2/formats/tmp.py,sha256=FKktjpgv07rf6EZ1Og0rkNpcj59Z6kL6dL-V0QLSKQw,4100 +pysubs2/formats/webvtt.py,sha256=Ycur_E5ztjazW2akyUXTsbNUdLs4S-knq93hwompCac,1788 +pysubs2/formats/whisper.py,sha256=JHGY8nhiVso_iyVZOP72lLbC7GM-gX_FFc1BdzCGdo0,1607 +pysubs2/ssaevent.py,sha256=VR0Pso779Pw5bs-Q-VpIz8Zs-A6p2a2pptQtsFVjRnM,6291 +pysubs2/ssafile.py,sha256=smMZIFk5h8n3uUuzgrcSmzdNsVU7oB3kguKQtUAdQGU,23833 +pysubs2/ssastyle.py,sha256=hlNowFmuiiwOzWTtQxKUakTwcE7Q5KxlQtl64Q6ScMU,4031 +pysubs2/time.py,sha256=Onk1vs9xXKiuwYK0RorZVU29p2FCEARapnfP_yYczTQ,4739 diff --git a/libs/pysubs2-1.6.1.dist-info/REQUESTED b/libs/pysubs2-1.7.2.dist-info/REQUESTED similarity index 100% rename from libs/pysubs2-1.6.1.dist-info/REQUESTED rename to libs/pysubs2-1.7.2.dist-info/REQUESTED diff --git a/libs/pysubs2-1.6.1.dist-info/WHEEL b/libs/pysubs2-1.7.2.dist-info/WHEEL similarity index 65% rename from libs/pysubs2-1.6.1.dist-info/WHEEL rename to libs/pysubs2-1.7.2.dist-info/WHEEL index 98c0d20b7..bab98d675 100644 --- a/libs/pysubs2-1.6.1.dist-info/WHEEL +++ b/libs/pysubs2-1.7.2.dist-info/WHEEL @@ -1,5 +1,5 @@ Wheel-Version: 1.0 -Generator: bdist_wheel (0.42.0) +Generator: bdist_wheel (0.43.0) Root-Is-Purelib: true Tag: py3-none-any diff --git a/libs/pysubs2-1.6.1.dist-info/entry_points.txt b/libs/pysubs2-1.7.2.dist-info/entry_points.txt similarity index 100% rename from libs/pysubs2-1.6.1.dist-info/entry_points.txt rename to libs/pysubs2-1.7.2.dist-info/entry_points.txt diff --git a/libs/pysubs2-1.6.1.dist-info/top_level.txt b/libs/pysubs2-1.7.2.dist-info/top_level.txt similarity index 100% rename from libs/pysubs2-1.6.1.dist-info/top_level.txt rename to libs/pysubs2-1.7.2.dist-info/top_level.txt diff --git a/libs/pysubs2/__init__.py b/libs/pysubs2/__init__.py index 936988346..8ce998674 100644 --- a/libs/pysubs2/__init__.py +++ b/libs/pysubs2/__init__.py @@ -1,10 +1,28 @@ from .ssafile import SSAFile from .ssaevent import SSAEvent from .ssastyle import SSAStyle -from . import time, formats, cli, whisper -from .exceptions import * +from . import time, formats, cli, exceptions +from .formats import whisper +from .exceptions import * # noqa: F403 from .common import Color, Alignment, VERSION +__all__ = [ + "SSAFile", + "SSAEvent", + "SSAStyle", + "time", + "formats", + "cli", + "whisper", + "exceptions", + "Color", + "Alignment", + "VERSION", + "load", + "load_from_whisper", + "make_time", +] + #: Alias for :meth:`SSAFile.load()`. load = SSAFile.load diff --git a/libs/pysubs2/cli.py b/libs/pysubs2/cli.py index d99c63f5c..14dc6766d 100644 --- a/libs/pysubs2/cli.py +++ b/libs/pysubs2/cli.py @@ -3,10 +3,11 @@ import codecs import os import re import os.path as op -import io -from io import open +from io import TextIOWrapper import sys from textwrap import dedent +from typing import List + from .formats import get_file_extension, FORMAT_IDENTIFIERS from .time import make_time from .ssafile import SSAFile @@ -42,7 +43,7 @@ def change_ext(path: str, ext: str) -> str: class Pysubs2CLI: - def __init__(self): + def __init__(self) -> None: parser = self.parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, prog="pysubs2", description=dedent(""" @@ -80,6 +81,12 @@ class Pysubs2CLI: "If you wish to convert between encodings, make sure --input-enc is set correctly! " "Otherwise, your output files will probably be corrupted. It's a good idea to " "back up your files or use the -o option.") + parser.add_argument("--enc-error-handling", choices=("strict", "surrogateescape"), + default="surrogateescape", + help="Character encoding error handling for input and output. Defaults to 'surrogateescape' " + "which passes through unrecognized characters to output unchanged. Use 'strict' if " + "you want the command to fail when encountering a character incompatible with selected " + "input/output encoding.") parser.add_argument("--fps", metavar="FPS", type=positive_float, help="This argument specifies framerate for MicroDVD files. By default, framerate " "is detected from the file. Use this when framerate specification is missing " @@ -116,13 +123,14 @@ class Pysubs2CLI: extra_sub_options.add_argument("--sub-no-write-fps-declaration", action="store_true", help="(output) omit writing FPS as first zero-length subtitle") - def __call__(self, argv): + def __call__(self, argv: List[str]) -> int: try: - self.main(argv) + return self.main(argv) except KeyboardInterrupt: - exit("\nAborted by user.") + print("\nAborted by user.", file=sys.stderr) + return 1 - def main(self, argv): + def main(self, argv: List[str]) -> int: args = self.parser.parse_args(argv) errors = 0 @@ -157,7 +165,7 @@ class Pysubs2CLI: print("Skipping", path, "(not a file)") errors += 1 else: - with open(path, encoding=args.input_enc) as infile: + with open(path, encoding=args.input_enc, errors=args.enc_error_handling) as infile: subs = SSAFile.from_file(infile, args.input_format, args.fps, **extra_input_args) self.process(subs, args) @@ -165,31 +173,37 @@ class Pysubs2CLI: if args.output_format is None: outpath = path output_format = subs.format + assert output_format is not None, "subs.format must not be None (it was read from file)" else: ext = get_file_extension(args.output_format) outpath = change_ext(path, ext) output_format = args.output_format + assert output_format is not None, "args.output_format must not be None (see if/else)" if args.output_dir is not None: _, filename = op.split(outpath) outpath = op.join(args.output_dir, filename) - with open(outpath, "w", encoding=args.output_enc) as outfile: + with open(outpath, "w", encoding=args.output_enc, errors=args.enc_error_handling) as outfile: subs.to_file(outfile, output_format, args.fps, apply_styles=not args.clean, **extra_output_args) - else: - infile = io.TextIOWrapper(sys.stdin.buffer, args.input_enc) - outfile = io.TextIOWrapper(sys.stdout.buffer, args.output_enc) + elif not sys.stdin.isatty(): + infile = TextIOWrapper(sys.stdin.buffer, encoding=args.input_enc, errors=args.enc_error_handling) + outfile = TextIOWrapper(sys.stdout.buffer, encoding=args.output_enc, errors=args.enc_error_handling) subs = SSAFile.from_file(infile, args.input_format, args.fps) self.process(subs, args) output_format = args.output_format or subs.format + assert output_format is not None, "output_format must not be None (it's either given or inferred at read time)" subs.to_file(outfile, output_format, args.fps, apply_styles=not args.clean) + else: + self.parser.print_help() + errors += 1 - return (0 if errors == 0 else 1) + return 0 if errors == 0 else 1 @staticmethod - def process(subs, args): + def process(subs: SSAFile, args: argparse.Namespace) -> None: if args.shift is not None: subs.shift(ms=args.shift) elif args.shift_back is not None: @@ -202,7 +216,7 @@ class Pysubs2CLI: subs.remove_miscellaneous_events() -def __main__(): +def __main__() -> None: cli = Pysubs2CLI() rv = cli(sys.argv[1:]) sys.exit(rv) diff --git a/libs/pysubs2/common.py b/libs/pysubs2/common.py index 8595f485c..9ac8f13e8 100644 --- a/libs/pysubs2/common.py +++ b/libs/pysubs2/common.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Union +from typing import Tuple, Union from enum import IntEnum @@ -54,11 +54,11 @@ class Alignment(IntEnum): return SSA_ALIGNMENT[self.value - 1] -SSA_ALIGNMENT = (1, 2, 3, 9, 10, 11, 5, 6, 7) +SSA_ALIGNMENT: Tuple[int, ...] = (1, 2, 3, 9, 10, 11, 5, 6, 7) #: Version of the pysubs2 library. -VERSION = "1.6.1" +VERSION = "1.7.2" IntOrFloat = Union[int, float] diff --git a/libs/pysubs2/exceptions.py b/libs/pysubs2/exceptions.py index 9568fa52f..acf8112fb 100644 --- a/libs/pysubs2/exceptions.py +++ b/libs/pysubs2/exceptions.py @@ -1,3 +1,15 @@ +from typing import List + + +__all__ = [ + "Pysubs2Error", + "UnknownFPSError", + "UnknownFileExtensionError", + "UnknownFormatIdentifierError", + "FormatAutodetectionError", +] + + class Pysubs2Error(Exception): """Base class for pysubs2 exceptions.""" @@ -7,16 +19,58 @@ class UnknownFPSError(Pysubs2Error): class UnknownFileExtensionError(Pysubs2Error): - """File extension does not pertain to any known subtitle format.""" + """ + File extension does not pertain to any known subtitle format. + + This exception is raised by `SSAFile.save()` when the ``format_`` parameter + is not specified. It will try to guess the desired format from output filename + and raise this exception when it fails. + + Attributes: + ext (str): File extension + """ + + def __init__(self, ext: str) -> None: + self.ext = ext + msg = f"File extension {ext!r} does not match any supported subtitle format" + super().__init__(msg) class UnknownFormatIdentifierError(Pysubs2Error): - """Unknown subtitle format identifier (ie. string like ``"srt"``).""" + """ + Unknown subtitle format identifier (ie. string like ``"srt"``). + + This exception is used when interpreting ``format_`` parameter fails, + eg. in `SSAFile.save()`. + + Attributes: + format_ (str): Format identifier + """ + + def __init__(self, format_: str) -> None: + self.format_ = format_ + msg = f"Format identifier {format_!r} does not match any supported subtitle format" + super().__init__(msg) class FormatAutodetectionError(Pysubs2Error): - """Subtitle format is ambiguous or unknown.""" + """ + Subtitle format is ambiguous or unknown based on analysis of file fragment + This exception is raised by `SSAFile.load()` and related methods + when the ``format_`` parameter is not specified. It will try to guess + the input format based on reading first few kilobytes of the input file + and raise this exception if the format cannot be uniquely determined. -class ContentNotUsable(Pysubs2Error): - """Current content not usable for specified format""" + Attributes: + content (str): Analyzed subtitle file content + formats (list[str]): Format identifiers for detected formats + """ + def __init__(self, content: str, formats: List[str]) -> None: + self.content = content + self.formats = formats + if not formats: + msg = "No suitable formats" + else: + msg = f"Multiple suitable formats ({formats!r})" + super().__init__(msg) diff --git a/libs/pysubs2/formats.py b/libs/pysubs2/formats/__init__.py similarity index 88% rename from libs/pysubs2/formats.py rename to libs/pysubs2/formats/__init__.py index 7a8388990..abb01560c 100644 --- a/libs/pysubs2/formats.py +++ b/libs/pysubs2/formats/__init__.py @@ -1,6 +1,6 @@ from typing import Dict, Type -from .formatbase import FormatBase +from .base import FormatBase from .microdvd import MicroDVDFormat from .subrip import SubripFormat from .jsonformat import JSONFormat @@ -8,7 +8,7 @@ from .substation import SubstationFormat from .mpl2 import MPL2Format from .tmp import TmpFormat from .webvtt import WebVTTFormat -from .exceptions import * +from ..exceptions import UnknownFormatIdentifierError, UnknownFileExtensionError, FormatAutodetectionError #: Dict mapping file extensions to format identifiers. FILE_EXTENSION_TO_FORMAT_IDENTIFIER: Dict[str, str] = { @@ -75,6 +75,6 @@ def autodetect_format(content: str) -> str: if len(formats) == 1: return formats.pop() elif not formats: - raise FormatAutodetectionError("No suitable formats") + raise FormatAutodetectionError(content=content, formats=[]) else: - raise FormatAutodetectionError(f"Multiple suitable formats ({formats!r})") + raise FormatAutodetectionError(content=content, formats=list(formats)) diff --git a/libs/pysubs2/formatbase.py b/libs/pysubs2/formats/base.py similarity index 91% rename from libs/pysubs2/formatbase.py rename to libs/pysubs2/formats/base.py index 6ea3ea77e..be97845e3 100644 --- a/libs/pysubs2/formatbase.py +++ b/libs/pysubs2/formats/base.py @@ -1,6 +1,5 @@ -from typing import Optional -import io -import pysubs2 +from typing import Optional, Any, TextIO +from ..ssafile import SSAFile class FormatBase: @@ -19,7 +18,7 @@ class FormatBase: """ @classmethod - def from_file(cls, subs: "pysubs2.SSAFile", fp: io.TextIOBase, format_: str, **kwargs): + def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None: """ Load subtitle file into an empty SSAFile. @@ -42,7 +41,7 @@ class FormatBase: raise NotImplementedError("Parsing is not supported for this format") @classmethod - def to_file(cls, subs: "pysubs2.SSAFile", fp: io.TextIOBase, format_: str, **kwargs): + def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None: """ Write SSAFile into a file. diff --git a/libs/pysubs2/jsonformat.py b/libs/pysubs2/formats/jsonformat.py similarity index 73% rename from libs/pysubs2/jsonformat.py rename to libs/pysubs2/formats/jsonformat.py index df838ee92..2f9acc5f1 100644 --- a/libs/pysubs2/jsonformat.py +++ b/libs/pysubs2/formats/jsonformat.py @@ -1,15 +1,18 @@ import dataclasses import json -from .common import Color -from .ssaevent import SSAEvent -from .ssastyle import SSAStyle -from .formatbase import FormatBase +from typing import Any, Optional, TextIO + +from ..common import Color +from ..ssaevent import SSAEvent +from ..ssastyle import SSAStyle +from .base import FormatBase +from ..ssafile import SSAFile # We're using Color dataclass # https://stackoverflow.com/questions/51286748/make-the-python-json-encoder-support-pythons-new-dataclasses class EnhancedJSONEncoder(json.JSONEncoder): - def default(self, o): + def default(self, o: Any) -> Any: if dataclasses.is_dataclass(o): return dataclasses.asdict(o) return super().default(o) @@ -22,13 +25,15 @@ class JSONFormat(FormatBase): This is essentially SubStation Alpha as JSON. """ @classmethod - def guess_format(cls, text): + def guess_format(cls, text: str) -> Optional[str]: """See :meth:`pysubs2.formats.FormatBase.guess_format()`""" - if text.startswith("{\""): + if text.startswith("{\"") and "\"info\":" in text: return "json" + else: + return None @classmethod - def from_file(cls, subs, fp, format_, **kwargs): + def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None: """See :meth:`pysubs2.formats.FormatBase.from_file()`""" data = json.load(fp) @@ -47,7 +52,7 @@ class JSONFormat(FormatBase): subs.events = [SSAEvent(**fields) for fields in data["events"]] @classmethod - def to_file(cls, subs, fp, format_, **kwargs): + def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None: """See :meth:`pysubs2.formats.FormatBase.to_file()`""" data = { "info": dict(**subs.info), diff --git a/libs/pysubs2/microdvd.py b/libs/pysubs2/formats/microdvd.py similarity index 57% rename from libs/pysubs2/microdvd.py rename to libs/pysubs2/formats/microdvd.py index f36377750..228dd35bd 100644 --- a/libs/pysubs2/microdvd.py +++ b/libs/pysubs2/formats/microdvd.py @@ -1,11 +1,15 @@ from functools import partial import re -from .exceptions import UnknownFPSError -from .ssaevent import SSAEvent -from .ssastyle import SSAStyle -from .formatbase import FormatBase +from typing import Optional, TextIO, Any, Match + +from ..exceptions import UnknownFPSError +from ..ssaevent import SSAEvent +from ..ssastyle import SSAStyle +from .base import FormatBase from .substation import parse_tags -from .time import ms_to_frames, frames_to_ms +from ..time import ms_to_frames, frames_to_ms +from ..ssafile import SSAFile + #: Matches a MicroDVD line. MICRODVD_LINE = re.compile(r" *\{ *(\d+) *\} *\{ *(\d+) *\}(.+)") @@ -14,14 +18,33 @@ MICRODVD_LINE = re.compile(r" *\{ *(\d+) *\} *\{ *(\d+) *\}(.+)") class MicroDVDFormat(FormatBase): """MicroDVD subtitle format implementation""" @classmethod - def guess_format(cls, text): + def guess_format(cls, text: str) -> Optional[str]: """See :meth:`pysubs2.formats.FormatBase.guess_format()`""" if any(map(MICRODVD_LINE.match, text.splitlines())): return "microdvd" + else: + return None @classmethod - def from_file(cls, subs, fp, format_, fps=None, **kwargs): - """See :meth:`pysubs2.formats.FormatBase.from_file()`""" + def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, fps: Optional[float] = None, + strict_fps_inference: bool = True, **kwargs: Any) -> None: + """ + See :meth:`pysubs2.formats.FormatBase.from_file()` + + Keyword args: + strict_fps_inference: If True (default), in the case when ``fps`` is not given, it will be read + from the first subtitle text only if the start and end frame of this subtitle is ``{1}{1}`` + (matches VLC Player behaviour), otherwise :class:`pysubs2.exceptions.UnknownFPSError` is raised. + + When ``strict_fps_inference`` + is False, framerate will be read from the first subtitle text in this case regardless of + start and end frame (which may result in bogus result, if the first subtitle is not supposed + to contain framerate). Before introduction of this option, the library behaved as if this + option was False. + + .. versionchanged:: 1.7.0 + Added the ``strict_fps_inference`` option. + """ for line in fp: match = MICRODVD_LINE.match(line) if not match: @@ -31,10 +54,14 @@ class MicroDVDFormat(FormatBase): fstart, fend = map(int, (fstart, fend)) if fps is None: - # We don't know the framerate, but it is customary to include - # it as text of the first subtitle. In that case, we skip - # this auxiliary subtitle and proceed with reading. + # We don't know the framerate, but it is customary to include it as text of the first subtitle, + # in the format {1}{1}fps, see pysubs2 issue #71 or VLC player source: + # https://code.videolan.org/videolan/vlc/-/blob/dccda0e133ff0a2e85de727cf19ddbc634f06b67/modules/demux/subtitle.c#L1014 + # In that case, we skip this auxiliary subtitle and proceed with reading. try: + if strict_fps_inference and not (fstart == 1 and fend == 1): + raise ValueError("Frame mismatch, expected {1}{1}") + fps = float(text) subs.fps = fps continue @@ -45,10 +72,10 @@ class MicroDVDFormat(FormatBase): start, end = map(partial(frames_to_ms, fps=fps), (fstart, fend)) - def prepare_text(text): + def prepare_text(text: str) -> str: text = text.replace("|", r"\N") - def style_replacer(match: re.Match) -> str: + def style_replacer(match: Match[str]) -> str: tags = [c for c in "biu" if c in match.group(0)] return "{%s}" % "".join(f"\\{c}1" for c in tags) @@ -63,14 +90,15 @@ class MicroDVDFormat(FormatBase): subs.append(ev) @classmethod - def to_file(cls, subs, fp, format_, fps=None, write_fps_declaration=True, apply_styles=True, **kwargs): + def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, fps: Optional[float] = None, + write_fps_declaration: bool = True, apply_styles: bool = True, **kwargs: Any) -> None: """ See :meth:`pysubs2.formats.FormatBase.to_file()` The only supported styling is marking whole lines italic. Keyword args: - write_fps_declaration: If True, create a zero-duration first subtitle which will contain + write_fps_declaration: If True, create a zero-duration first subtitle ``{1}{1}`` which will contain the fps. apply_styles: If False, do not write any styling. @@ -94,12 +122,9 @@ class MicroDVDFormat(FormatBase): # insert an artificial first line telling the framerate if write_fps_declaration: - subs.insert(0, SSAEvent(start=0, end=0, text=str(fps))) - - for line in subs: - if line.is_comment or line.is_drawing: - continue + subs.insert(0, SSAEvent(start=1, end=1, text=str(fps))) + for line in subs.get_text_events(): text = "|".join(line.plaintext.splitlines()) if apply_styles and is_entirely_italic(line): text = "{Y:i}" + text @@ -107,8 +132,10 @@ class MicroDVDFormat(FormatBase): start, end = map(to_frames, (line.start, line.end)) # XXX warn on underflow? - if start < 0: start = 0 - if end < 0: end = 0 + if start < 0: + start = 0 + if end < 0: + end = 0 print("{%d}{%d}%s" % (start, end, text), file=fp) diff --git a/libs/pysubs2/formats/mpl2.py b/libs/pysubs2/formats/mpl2.py new file mode 100644 index 000000000..d67696394 --- /dev/null +++ b/libs/pysubs2/formats/mpl2.py @@ -0,0 +1,60 @@ +import re +from typing import Optional, Any, TextIO +from ..time import times_to_ms +from .base import FormatBase +from ..ssaevent import SSAEvent +from ..ssafile import SSAFile + + +# thanks to http://otsaloma.io/gaupol/doc/api/aeidon.files.mpl2_source.html +MPL2_FORMAT = re.compile(r"^\[(-?\d+)\]\[(-?\d+)\](.*)", re.MULTILINE) + + +class MPL2Format(FormatBase): + """MPL2 subtitle format implementation""" + @classmethod + def guess_format(cls, text: str) -> Optional[str]: + """See :meth:`pysubs2.formats.FormatBase.guess_format()`""" + if MPL2_FORMAT.search(text): + return "mpl2" + else: + return None + + @classmethod + def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None: + """See :meth:`pysubs2.formats.FormatBase.from_file()`""" + def prepare_text(lines: str) -> str: + out = [] + for s in lines.split("|"): + s = s.strip() + + if s.startswith("/"): + # line beginning with '/' is in italics + s = r"{\i1}%s{\i0}" % s[1:].strip() + + out.append(s) + return "\\N".join(out) + + text = fp.read() + for start, end, text in MPL2_FORMAT.findall(text): + e = SSAEvent( + start=times_to_ms(s=float(start) / 10), + end=times_to_ms(s=float(end) / 10), + text=prepare_text(text) + ) + subs.append(e) + + @classmethod + def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None: + """ + See :meth:`pysubs2.formats.FormatBase.to_file()` + + No styling is supported at the moment. + + """ + # TODO handle italics + for line in subs.get_text_events(): + start = int(line.start // 100) + end = int(line.end // 100) + text = line.plaintext.replace("\n", "|") + print(f"[{start}][{end}] {text}", file=fp) diff --git a/libs/pysubs2/subrip.py b/libs/pysubs2/formats/subrip.py similarity index 76% rename from libs/pysubs2/subrip.py rename to libs/pysubs2/formats/subrip.py index 1f599c064..1dceaf0e8 100644 --- a/libs/pysubs2/subrip.py +++ b/libs/pysubs2/formats/subrip.py @@ -1,14 +1,14 @@ import re import warnings -from typing import List +from typing import List, Sequence, Optional, TextIO, Any, Tuple -import pysubs2 -from .formatbase import FormatBase -from .ssaevent import SSAEvent -from .ssastyle import SSAStyle +from .base import FormatBase +from ..ssaevent import SSAEvent +from ..ssastyle import SSAStyle from .substation import parse_tags -from .exceptions import ContentNotUsable -from .time import ms_to_times, make_time, TIMESTAMP, timestamp_to_ms +from ..time import ms_to_times, make_time, TIMESTAMP, timestamp_to_ms +from ..ssafile import SSAFile + #: Largest timestamp allowed in SubRip, ie. 99:59:59,999. MAX_REPRESENTABLE_TIME = make_time(h=100) - 1 @@ -30,11 +30,11 @@ class SubripFormat(FormatBase): return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}" @staticmethod - def timestamp_to_ms(groups): + def timestamp_to_ms(groups: Sequence[str]) -> int: return timestamp_to_ms(groups) @classmethod - def guess_format(cls, text): + def guess_format(cls, text: str) -> Optional[str]: """See :meth:`pysubs2.formats.FormatBase.guess_format()`""" if "[Script Info]" in text or "[V4+ Styles]" in text: # disambiguation vs. SSA/ASS @@ -48,8 +48,11 @@ class SubripFormat(FormatBase): if len(cls.TIMESTAMP.findall(line)) == 2: return "srt" + return None + @classmethod - def from_file(cls, subs, fp, format_, keep_html_tags=False, keep_unknown_html_tags=False, **kwargs): + def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, keep_html_tags: bool = False, + keep_unknown_html_tags: bool = False, **kwargs: Any) -> None: """ See :meth:`pysubs2.formats.FormatBase.from_file()` @@ -70,8 +73,8 @@ class SubripFormat(FormatBase): If False, these other HTML tags will be stripped from output (in the previous example, you would get only ``example {\\i1}text{\\i0}``). """ - timestamps = [] # (start, end) - following_lines = [] # contains lists of lines following each timestamp + timestamps: List[Tuple[int, int]] = [] # (start, end) + following_lines: List[List[str]] = [] # contains lists of lines following each timestamp for line in fp: stamps = cls.TIMESTAMP.findall(line) @@ -83,7 +86,7 @@ class SubripFormat(FormatBase): if timestamps: following_lines[-1].append(line) - def prepare_text(lines): + def prepare_text(lines: List[str]) -> str: # Handle the "happy" empty subtitle case, which is timestamp line followed by blank line(s) # followed by number line and timestamp line of the next subtitle. Fixes issue #11. if (len(lines) >= 2 @@ -108,11 +111,13 @@ class SubripFormat(FormatBase): s = re.sub(r"\n", r"\\N", s) # convert newlines return s - subs.events = [SSAEvent(start=start, end=end, text=prepare_text(lines)) - for (start, end), lines in zip(timestamps, following_lines)] + for (start, end), lines in zip(timestamps, following_lines): + e = SSAEvent(start=start, end=end, text=prepare_text(lines)) + subs.append(e) @classmethod - def to_file(cls, subs, fp, format_, apply_styles=True, keep_ssa_tags=False, **kwargs): + def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, apply_styles: bool = True, + keep_ssa_tags: bool = False, **kwargs: Any) -> None: """ See :meth:`pysubs2.formats.FormatBase.to_file()` @@ -134,7 +139,7 @@ class SubripFormat(FormatBase): is SRT which doesn't use line styles - this shouldn't be much of an issue in practice.) """ - def prepare_text(text: str, style: SSAStyle): + def prepare_text(text: str, style: SSAStyle) -> str: text = text.replace(r"\h", " ") text = text.replace(r"\n", "\n") text = text.replace(r"\N", "\n") @@ -145,24 +150,20 @@ class SubripFormat(FormatBase): else: for fragment, sty in parse_tags(text, style, subs.styles): if apply_styles: - if sty.italic: fragment = f"{fragment}" - if sty.underline: fragment = f"{fragment}" - if sty.strikeout: fragment = f"{fragment}" - if sty.drawing: raise ContentNotUsable + if sty.italic: + fragment = f"{fragment}" + if sty.underline: + fragment = f"{fragment}" + if sty.strikeout: + fragment = f"{fragment}" body.append(fragment) return re.sub("\n+", "\n", "".join(body).strip()) - visible_lines = cls._get_visible_lines(subs) - - lineno = 1 - for line in visible_lines: + for lineno, line in enumerate(cls._get_visible_lines(subs), 1): start = cls.ms_to_timestamp(line.start) end = cls.ms_to_timestamp(line.end) - try: - text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE)) - except ContentNotUsable: - continue + text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE)) print(lineno, file=fp) print(start, "-->", end, file=fp) @@ -170,6 +171,5 @@ class SubripFormat(FormatBase): lineno += 1 @classmethod - def _get_visible_lines(cls, subs: "pysubs2.SSAFile") -> List["pysubs2.SSAEvent"]: - visible_lines = [line for line in subs if not line.is_comment] - return visible_lines + def _get_visible_lines(cls, subs: "SSAFile") -> List[SSAEvent]: + return subs.get_text_events() diff --git a/libs/pysubs2/substation.py b/libs/pysubs2/formats/substation.py similarity index 87% rename from libs/pysubs2/substation.py rename to libs/pysubs2/formats/substation.py index afcd84e36..994277286 100644 --- a/libs/pysubs2/substation.py +++ b/libs/pysubs2/formats/substation.py @@ -1,22 +1,21 @@ import logging import re import warnings -from numbers import Number -from typing import Any, Union, Optional, Dict +from typing import Any, Union, Optional, Dict, Tuple, List, TextIO -import pysubs2 -from .formatbase import FormatBase -from .ssaevent import SSAEvent -from .ssastyle import SSAStyle -from .common import Color, Alignment, SSA_ALIGNMENT -from .time import make_time, ms_to_times, timestamp_to_ms, TIMESTAMP, TIMESTAMP_SHORT +from .base import FormatBase +from ..ssaevent import SSAEvent +from ..ssastyle import SSAStyle +from ..common import Color, Alignment, SSA_ALIGNMENT +from ..time import make_time, ms_to_times, timestamp_to_ms, TIMESTAMP, TIMESTAMP_SHORT +from ..ssafile import SSAFile -def ass_to_ssa_alignment(i): +def ass_to_ssa_alignment(i: int) -> int: warnings.warn("ass_to_ssa_alignment function is deprecated, please use the Alignment enum", DeprecationWarning) return SSA_ALIGNMENT[i-1] -def ssa_to_ass_alignment(i): +def ssa_to_ass_alignment(i: int) -> int: warnings.warn("ssa_to_ass_alignment function is deprecated, please use the Alignment enum", DeprecationWarning) return SSA_ALIGNMENT.index(i) + 1 @@ -86,7 +85,8 @@ def is_valid_field_content(s: str) -> bool: return "\n" not in s and "," not in s -def parse_tags(text: str, style: SSAStyle = SSAStyle.DEFAULT_STYLE, styles: Optional[Dict[str, SSAStyle]] = None): +def parse_tags(text: str, style: SSAStyle = SSAStyle.DEFAULT_STYLE, + styles: Optional[Dict[str, SSAStyle]] = None) -> List[Tuple[str, SSAStyle]]: """ Split text into fragments with computed SSAStyles. @@ -117,14 +117,18 @@ def parse_tags(text: str, style: SSAStyle = SSAStyle.DEFAULT_STYLE, styles: Opti s = style.copy() # reset to original line style elif tag.startswith(r"\r"): name = tag[2:] - if name in styles: # type: ignore[operator] + if name in styles: # reset to named style - s = styles[name].copy() # type: ignore[index] + s = styles[name].copy() else: - if "i" in tag: s.italic = "1" in tag - elif "b" in tag: s.bold = "1" in tag - elif "u" in tag: s.underline = "1" in tag - elif "s" in tag: s.strikeout = "1" in tag + if "i" in tag: + s.italic = "1" in tag + elif "b" in tag: + s.bold = "1" in tag + elif "u" in tag: + s.underline = "1" in tag + elif "s" in tag: + s.strikeout = "1" in tag elif "p" in tag: try: scale = int(tag[2:]) @@ -146,34 +150,36 @@ class SubstationFormat(FormatBase): """SubStation Alpha (ASS, SSA) subtitle format implementation""" @staticmethod - def ms_to_timestamp(ms: int) -> str: + def ms_to_timestamp(requested_ms: int) -> str: """Convert ms to 'H:MM:SS.cc'""" - if ms < 0: - ms = 0 - if ms > MAX_REPRESENTABLE_TIME: + if requested_ms < 0: + requested_ms = 0 + if requested_ms > MAX_REPRESENTABLE_TIME: warnings.warn("Overflow in SubStation timestamp, clamping to MAX_REPRESENTABLE_TIME", RuntimeWarning) - ms = MAX_REPRESENTABLE_TIME - - h, m, s, ms = ms_to_times(ms) + requested_ms = MAX_REPRESENTABLE_TIME # Aegisub does rounding, see https://github.com/Aegisub/Aegisub/blob/6f546951b4f004da16ce19ba638bf3eedefb9f31/libaegisub/include/libaegisub/ass/time.h#L32 - cs = ((ms + 5) - (ms + 5) % 10) // 10 + round_ms = ((requested_ms + 5) - (requested_ms + 5) % 10) + h, m, s, ms = ms_to_times(round_ms) + cs = ms // 10 return f"{h:01d}:{m:02d}:{s:02d}.{cs:02d}" @classmethod - def guess_format(cls, text): + def guess_format(cls, text: str) -> Optional[str]: """See :meth:`pysubs2.formats.FormatBase.guess_format()`""" if re.search(r"V4\+ Styles", text, re.IGNORECASE): return "ass" elif re.search(r"V4 Styles", text, re.IGNORECASE): return "ssa" + else: + return None @classmethod - def from_file(cls, subs: "pysubs2.SSAFile", fp, format_, **kwargs): + def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None: """See :meth:`pysubs2.formats.FormatBase.from_file()`""" - def string_to_field(f: str, v: str): + def string_to_field(f: str, v: str) -> Any: # Per issue #45, we should handle the case where there is extra whitespace around the values. # Extra whitespace is removed in non-string fields where it would break the parser otherwise, # and in font name (where it doesn't really make sense). It is preserved in Dialogue string @@ -199,9 +205,13 @@ class SubstationFormat(FormatBase): v = v.strip() return rgba_to_color(v) elif f in {"bold", "underline", "italic", "strikeout"}: - return v == "-1" + return v != "0" elif f in {"borderstyle", "encoding", "marginl", "marginr", "marginv", "layer", "alphalevel"}: - return int(v) + try: + return int(v) + except ValueError: + warnings.warn(f"Failed to parse {f}, using default", RuntimeWarning) + return 0 elif f in {"fontsize", "scalex", "scaley", "spacing", "angle", "outline", "shadow"}: return float(v) elif f == "marked": @@ -244,7 +254,8 @@ class SubstationFormat(FormatBase): inside_font_section = "Fonts" in line inside_graphic_section = "Graphics" in line elif inside_info_section or inside_aegisub_section: - if line.startswith(";"): continue # skip comments + if line.startswith(";"): + continue # skip comments try: k, v = line.split(":", 1) if inside_info_section: @@ -280,7 +291,7 @@ class SubstationFormat(FormatBase): elif line.startswith("Style:"): _, rest = line.split(":", 1) buf = rest.strip().split(",") - name, raw_fields = buf[0], buf[1:] # splat workaround for Python 2.7 + name, *raw_fields = buf field_dict = {f: string_to_field(f, v) for f, v in zip(STYLE_FIELDS[format_], raw_fields)} sty = SSAStyle(**field_dict) subs.styles[name] = sty @@ -307,7 +318,7 @@ class SubstationFormat(FormatBase): current_attachment_name = None @classmethod - def to_file(cls, subs: "pysubs2.SSAFile", fp, format_, header_notice=NOTICE, **kwargs): + def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, header_notice: str = NOTICE, **kwargs: Any) -> None: """See :meth:`pysubs2.formats.FormatBase.to_file()`""" print("[Script Info]", file=fp) for line in header_notice.splitlines(False): @@ -322,7 +333,7 @@ class SubstationFormat(FormatBase): for k, v in subs.aegisub_project.items(): print(k, v, sep=": ", file=fp) - def field_to_string(f: str, v: Any, line: Union[SSAEvent, SSAStyle]): + def field_to_string(f: str, v: Any, line: Union[SSAEvent, SSAStyle]) -> str: if f in {"start", "end"}: return cls.ms_to_timestamp(v) elif f == "marked": @@ -340,8 +351,12 @@ class SubstationFormat(FormatBase): return str(alignment.value) elif isinstance(v, bool): return "-1" if v else "0" - elif isinstance(v, (str, Number)): + elif isinstance(v, int): return str(v) + elif isinstance(v, float): + return str(int(v) if v.is_integer() else v) + elif isinstance(v, str): + return v elif isinstance(v, Color): if format_ == "ass": return color_to_ass_rgba(v) diff --git a/libs/pysubs2/tmp.py b/libs/pysubs2/formats/tmp.py similarity index 69% rename from libs/pysubs2/tmp.py rename to libs/pysubs2/formats/tmp.py index 426a6f191..e9db38a39 100644 --- a/libs/pysubs2/tmp.py +++ b/libs/pysubs2/formats/tmp.py @@ -1,11 +1,14 @@ import re import warnings +from typing import Optional, TextIO, Any -from .formatbase import FormatBase -from .ssaevent import SSAEvent -from .ssastyle import SSAStyle +from .base import FormatBase +from ..ssaevent import SSAEvent +from ..ssastyle import SSAStyle from .substation import parse_tags -from .time import ms_to_times, make_time, TIMESTAMP_SHORT, timestamp_to_ms +from ..time import ms_to_times, make_time, TIMESTAMP_SHORT, timestamp_to_ms +from ..ssafile import SSAFile + #: Pattern that matches TMP line TMP_LINE = re.compile(r"(\d{1,2}:\d{2}:\d{2}):(.+)") @@ -29,7 +32,7 @@ class TmpFormat(FormatBase): return f"{h:02d}:{m:02d}:{s:02d}" @classmethod - def guess_format(cls, text): + def guess_format(cls, text: str) -> Optional[str]: """See :meth:`pysubs2.formats.FormatBase.guess_format()`""" if "[Script Info]" in text or "[V4+ Styles]" in text: # disambiguation vs. SSA/ASS @@ -39,14 +42,16 @@ class TmpFormat(FormatBase): if TMP_LINE.match(line) and len(TMP_LINE.findall(line)) == 1: return "tmp" + return None + @classmethod - def from_file(cls, subs, fp, format_, **kwargs): + def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None: """See :meth:`pysubs2.formats.FormatBase.from_file()`""" events = [] - def prepare_text(text): + def prepare_text(text: str) -> str: text = text.replace("|", r"\N") # convert newlines - text = re.sub(r"< *u *>", "{\\\\u1}", text) # not r" for Python 2.7 compat, triggers unicodeescape + text = re.sub(r"< *u *>", r"{\\u1}", text) text = re.sub(r"< */? *[a-zA-Z][^>]*>", "", text) # strip other HTML tags return text @@ -56,7 +61,9 @@ class TmpFormat(FormatBase): continue start, text = match.groups() - start = timestamp_to_ms(TIMESTAMP_SHORT.match(start).groups()) + match2 = TIMESTAMP_SHORT.match(start) + assert match2 is not None, "TMP_LINE contains TIMESTAMP_SHORT" + start = timestamp_to_ms(match2.groups()) # Unfortunately, end timestamp is not given; try to estimate something reasonable: # start + 500 ms + 67 ms/character (15 chars per second) @@ -72,7 +79,7 @@ class TmpFormat(FormatBase): subs.events = events @classmethod - def to_file(cls, subs, fp, format_, apply_styles=True, **kwargs): + def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, apply_styles: bool = True, **kwargs: Any) -> None: """ See :meth:`pysubs2.formats.FormatBase.to_file()` @@ -82,28 +89,24 @@ class TmpFormat(FormatBase): apply_styles: If False, do not write any styling. """ - def prepare_text(text, style): + def prepare_text(text: str, style: SSAStyle) -> str: body = [] - skip = False for fragment, sty in parse_tags(text, style, subs.styles): fragment = fragment.replace(r"\h", " ") fragment = fragment.replace(r"\n", "\n") fragment = fragment.replace(r"\N", "\n") if apply_styles: - if sty.italic: fragment = f"{fragment}" - if sty.underline: fragment = f"{fragment}" - if sty.strikeout: fragment = f"{fragment}" - if sty.drawing: skip = True + if sty.italic: + fragment = f"{fragment}" + if sty.underline: + fragment = f"{fragment}" + if sty.strikeout: + fragment = f"{fragment}" body.append(fragment) - if skip: - return "" - else: - return re.sub("\n+", "\n", "".join(body).strip()) - - visible_lines = (line for line in subs if not line.is_comment) + return re.sub("\n+", "\n", "".join(body).strip()) - for line in visible_lines: + for line in subs.get_text_events(): start = cls.ms_to_timestamp(line.start) text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE)) diff --git a/libs/pysubs2/webvtt.py b/libs/pysubs2/formats/webvtt.py similarity index 58% rename from libs/pysubs2/webvtt.py rename to libs/pysubs2/formats/webvtt.py index 89c245db6..88f088a0a 100644 --- a/libs/pysubs2/webvtt.py +++ b/libs/pysubs2/formats/webvtt.py @@ -1,16 +1,17 @@ import re -from typing import List +from typing import List, Sequence, Optional, TextIO, Any -import pysubs2 +from ..ssaevent import SSAEvent from .subrip import SubripFormat -from .time import make_time +from ..time import make_time +from ..ssafile import SSAFile class WebVTTFormat(SubripFormat): """ Web Video Text Tracks (WebVTT) subtitle format implementation - Currently, this shares implementation with :class:`pysubs2.subrip.SubripFormat`. + Currently, this shares implementation with :class:`pysubs2.formats.subrip.SubripFormat`. """ TIMESTAMP = re.compile(r"(\d{0,4}:)?(\d{2}):(\d{2})\.(\d{2,3})") @@ -20,7 +21,7 @@ class WebVTTFormat(SubripFormat): return result.replace(',', '.') @staticmethod - def timestamp_to_ms(groups): + def timestamp_to_ms(groups: Sequence[str]) -> int: _h, _m, _s, _ms = groups if not _h: h = 0 @@ -30,22 +31,24 @@ class WebVTTFormat(SubripFormat): return make_time(h=h, m=m, s=s, ms=ms) @classmethod - def guess_format(cls, text): + def guess_format(cls, text: str) -> Optional[str]: """See :meth:`pysubs2.formats.FormatBase.guess_format()`""" if text.lstrip().startswith("WEBVTT"): return "vtt" + else: + return None @classmethod - def to_file(cls, subs, fp, format_, **kwargs): + def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None: # type: ignore[override] """ - See :meth:`pysubs2.formats.FormatBase.to_file()` + See :meth:`pysubs2.formats.SubripFormat.to_file()`, additional SRT options are supported by VTT as well """ print("WEBVTT\n", file=fp) return super(WebVTTFormat, cls).to_file( subs=subs, fp=fp, format_=format_, **kwargs) @classmethod - def _get_visible_lines(cls, subs: "pysubs2.SSAFile") -> List["pysubs2.SSAEvent"]: - visible_lines = [line for line in subs if not line.is_comment] + def _get_visible_lines(cls, subs: "SSAFile") -> List[SSAEvent]: + visible_lines = super()._get_visible_lines(subs) visible_lines.sort(key=lambda e: e.start) return visible_lines diff --git a/libs/pysubs2/whisper.py b/libs/pysubs2/formats/whisper.py similarity index 94% rename from libs/pysubs2/whisper.py rename to libs/pysubs2/formats/whisper.py index 5bfe8e726..c5353cd08 100644 --- a/libs/pysubs2/whisper.py +++ b/libs/pysubs2/formats/whisper.py @@ -5,9 +5,9 @@ See https://github.com/openai/whisper """ -from .ssaevent import SSAEvent -from .ssafile import SSAFile -from .time import make_time +from ..ssaevent import SSAEvent +from ..ssafile import SSAFile +from ..time import make_time from typing import Union, List, Dict, Any diff --git a/libs/pysubs2/mpl2.py b/libs/pysubs2/mpl2.py deleted file mode 100644 index 78af420fe..000000000 --- a/libs/pysubs2/mpl2.py +++ /dev/null @@ -1,54 +0,0 @@ -import re - -from .time import times_to_ms -from .formatbase import FormatBase -from .ssaevent import SSAEvent - - -# thanks to http://otsaloma.io/gaupol/doc/api/aeidon.files.mpl2_source.html -MPL2_FORMAT = re.compile(r"^\[(-?\d+)\]\[(-?\d+)\](.*)", re.MULTILINE) - - -class MPL2Format(FormatBase): - """MPL2 subtitle format implementation""" - @classmethod - def guess_format(cls, text): - """See :meth:`pysubs2.formats.FormatBase.guess_format()`""" - if MPL2_FORMAT.search(text): - return "mpl2" - - @classmethod - def from_file(cls, subs, fp, format_, **kwargs): - """See :meth:`pysubs2.formats.FormatBase.from_file()`""" - def prepare_text(lines): - out = [] - for s in lines.split("|"): - s = s.strip() - - if s.startswith("/"): - # line beginning with '/' is in italics - s = r"{\i1}%s{\i0}" % s[1:].strip() - - out.append(s) - return "\\N".join(out) - - subs.events = [SSAEvent(start=times_to_ms(s=float(start) / 10), end=times_to_ms(s=float(end) / 10), - text=prepare_text(text)) for start, end, text in MPL2_FORMAT.findall(fp.getvalue())] - - @classmethod - def to_file(cls, subs, fp, format_, **kwargs): - """ - See :meth:`pysubs2.formats.FormatBase.to_file()` - - No styling is supported at the moment. - - """ - # TODO handle italics - for line in subs: - if line.is_comment: - continue - - print("[{start}][{end}] {text}".format(start=int(line.start // 100), - end=int(line.end // 100), - text=line.plaintext.replace("\n", "|")), - file=fp) diff --git a/libs/pysubs2/ssaevent.py b/libs/pysubs2/ssaevent.py index 9906631d0..a33d2e308 100644 --- a/libs/pysubs2/ssaevent.py +++ b/libs/pysubs2/ssaevent.py @@ -1,6 +1,6 @@ import re import warnings -from typing import Optional, Dict, Any, ClassVar +from typing import Optional, Dict, Any, ClassVar, FrozenSet import dataclasses from .common import IntOrFloat @@ -41,7 +41,7 @@ class SSAEvent: type: str = "Dialogue" #: Line type (Dialogue/Comment) @property - def FIELDS(self): + def FIELDS(self) -> FrozenSet[str]: """All fields in SSAEvent.""" warnings.warn("Deprecated in 1.2.0 - it's a dataclass now", DeprecationWarning) return frozenset(field.name for field in dataclasses.fields(self)) @@ -57,7 +57,7 @@ class SSAEvent: return self.end - self.start @duration.setter - def duration(self, ms: int): + def duration(self, ms: int) -> None: if ms >= 0: self.end = self.start + ms else: @@ -74,7 +74,7 @@ class SSAEvent: return self.type == "Comment" @is_comment.setter - def is_comment(self, value: bool): + def is_comment(self, value: bool) -> None: if value: self.type = "Comment" else: @@ -83,9 +83,18 @@ class SSAEvent: @property def is_drawing(self) -> bool: """Returns True if line is SSA drawing tag (ie. not text)""" - from .substation import parse_tags + from .formats.substation import parse_tags return any(sty.drawing for _, sty in parse_tags(self.text)) + @property + def is_text(self) -> bool: + """ + Returns False for SSA drawings and comment lines, True otherwise + + In general, for non-SSA formats these events should be ignored. + """ + return not (self.is_comment or self.is_drawing) + @property def plaintext(self) -> str: """ @@ -102,11 +111,11 @@ class SSAEvent: return text @plaintext.setter - def plaintext(self, text: str): + def plaintext(self, text: str) -> None: self.text = text.replace("\n", r"\N") - def shift(self, h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0, - frames: Optional[int]=None, fps: Optional[float]=None): + def shift(self, h: IntOrFloat = 0, m: IntOrFloat = 0, s: IntOrFloat = 0, ms: IntOrFloat = 0, + frames: Optional[int] = None, fps: Optional[float] = None) -> None: """ Shift start and end times. @@ -132,36 +141,36 @@ class SSAEvent: else: raise TypeError("Cannot compare to non-SSAEvent object") - def __eq__(self, other) -> bool: + def __eq__(self, other: object) -> bool: # XXX document this if not isinstance(other, SSAEvent): return NotImplemented return self.start == other.start and self.end == other.end - def __ne__(self, other) -> bool: + def __ne__(self, other: object) -> bool: if not isinstance(other, SSAEvent): return NotImplemented return self.start != other.start or self.end != other.end - def __lt__(self, other) -> bool: + def __lt__(self, other: object) -> bool: if not isinstance(other, SSAEvent): return NotImplemented return (self.start, self.end) < (other.start, other.end) - def __le__(self, other) -> bool: + def __le__(self, other: object) -> bool: if not isinstance(other, SSAEvent): return NotImplemented return (self.start, self.end) <= (other.start, other.end) - def __gt__(self, other) -> bool: + def __gt__(self, other: object) -> bool: if not isinstance(other, SSAEvent): return NotImplemented return (self.start, self.end) > (other.start, other.end) - def __ge__(self, other) -> bool: + def __ge__(self, other: object) -> bool: if not isinstance(other, SSAEvent): return NotImplemented return (self.start, self.end) >= (other.start, other.end) - def __repr__(self): + def __repr__(self) -> str: return f"" diff --git a/libs/pysubs2/ssafile.py b/libs/pysubs2/ssafile.py index 1202a46f5..8c0b9cb14 100644 --- a/libs/pysubs2/ssafile.py +++ b/libs/pysubs2/ssafile.py @@ -1,20 +1,16 @@ -from collections.abc import MutableSequence import io -from io import open from itertools import chain import os.path import logging -from typing import Optional, List, Dict, Iterable, Any, overload, Iterator +from typing import Optional, List, Dict, Iterable, Any, overload, Iterator, TextIO, Tuple, MutableSequence from .common import IntOrFloat -from .formats import autodetect_format, get_format_class, get_format_identifier -from .substation import is_valid_field_content from .ssaevent import SSAEvent from .ssastyle import SSAStyle from .time import make_time, ms_to_str -class SSAFile(MutableSequence): +class SSAFile(MutableSequence[SSAEvent]): """ Subtitle file in SubStation Alpha format. @@ -32,7 +28,7 @@ class SSAFile(MutableSequence): """ - DEFAULT_INFO = { + DEFAULT_INFO: Dict[str, str] = { "WrapStyle": "0", "ScaledBorderAndShadow": "yes", "Collisions": "Normal" @@ -53,7 +49,8 @@ class SSAFile(MutableSequence): # ------------------------------------------------------------------------ @classmethod - def load(cls, path: str, encoding: str="utf-8", format_: Optional[str]=None, fps: Optional[float]=None, **kwargs) -> "SSAFile": + def load(cls, path: str, encoding: str = "utf-8", format_: Optional[str] = None, fps: Optional[float] = None, + errors: Optional[str] = None, **kwargs: Any) -> "SSAFile": """ Load subtitle file from given path. @@ -62,12 +59,23 @@ class SSAFile(MutableSequence): See also: Specific formats may implement additional loading options, please refer to documentation of the implementation classes - (eg. :meth:`pysubs2.subrip.SubripFormat.from_file()`) + (eg. :meth:`pysubs2.formats.subrip.SubripFormat.from_file()`) Arguments: path (str): Path to subtitle file. encoding (str): Character encoding of input file. Defaults to UTF-8, you may need to change this. + errors (Optional[str]): Error handling for character encoding + of input file. Defaults to ``None``; use the value ``"surrogateescape"`` + for pass-through of bytes not supported by selected encoding via + `Unicode surrogate pairs `_. + See documentation of builtin ``open()`` function for more. + + .. versionchanged:: 1.7.0 + The ``errors`` parameter was introduced to facilitate + pass-through of subtitle files with unknown text encoding. + Previous versions of the library behaved as if ``errors=None``. + format_ (str): Optional, forces use of specific parser (eg. `"srt"`, `"ass"`). Otherwise, format is detected automatically from file contents. This argument should @@ -96,23 +104,32 @@ class SSAFile(MutableSequence): Example: >>> subs1 = pysubs2.load("subrip-subtitles.srt") - >>> subs2 = pysubs2.load("microdvd-subtitles.sub", fps=23.976) - >>> subs3 = pysubs2.load("subrip-subtitles-with-fancy-tags.srt", keep_unknown_html_tags=True) + >>> subs2 = pysubs2.load("microdvd-subtitles.sub",fps=23.976) + >>> subs3 = pysubs2.load("subrip-subtitles-with-fancy-tags.srt",keep_unknown_html_tags=True) """ - with open(path, encoding=encoding) as fp: + with open(path, encoding=encoding, errors=errors) as fp: return cls.from_file(fp, format_, fps=fps, **kwargs) @classmethod - def from_string(cls, string: str, format_: Optional[str]=None, fps: Optional[float]=None, **kwargs) -> "SSAFile": + def from_string(cls, string: str, format_: Optional[str] = None, fps: Optional[float] = None, + **kwargs: Any) -> "SSAFile": """ Load subtitle file from string. See :meth:`SSAFile.load()` for full description. Arguments: - string (str): Subtitle file in a string. Note that the string - must be Unicode (in Python 2). + string (str): Subtitle file in a string. Note that the string must be Unicode (``str``, not ``bytes``). + format_ (str): Optional, forces use of specific parser + (eg. `"srt"`, `"ass"`). Otherwise, format is detected + automatically from file contents. This argument should + be rarely needed. + fps (float): Framerate for frame-based formats (MicroDVD), + for other formats this argument is ignored. Framerate might + be detected from the file, in which case you don't need + to specify it here (when given, this argument overrides + autodetection). Returns: SSAFile @@ -130,7 +147,8 @@ class SSAFile(MutableSequence): return cls.from_file(fp, format_, fps=fps, **kwargs) @classmethod - def from_file(cls, fp: io.TextIOBase, format_: Optional[str]=None, fps: Optional[float]=None, **kwargs) -> "SSAFile": + def from_file(cls, fp: TextIO, format_: Optional[str] = None, fps: Optional[float] = None, + **kwargs: Any) -> "SSAFile": """ Read subtitle file from file object. @@ -141,8 +159,17 @@ class SSAFile(MutableSequence): or :meth:`SSAFile.from_string()` is preferable. Arguments: - fp (file object): A file object, ie. :class:`io.TextIOBase` instance. + fp (file object): A file object, ie. :class:`TextIO` instance. Note that the file must be opened in text mode (as opposed to binary). + format_ (str): Optional, forces use of specific parser + (eg. `"srt"`, `"ass"`). Otherwise, format is detected + automatically from file contents. This argument should + be rarely needed. + fps (float): Framerate for frame-based formats (MicroDVD), + for other formats this argument is ignored. Framerate might + be detected from the file, in which case you don't need + to specify it here (when given, this argument overrides + autodetection). Returns: SSAFile @@ -164,7 +191,8 @@ class SSAFile(MutableSequence): impl.from_file(subs, fp, format_, fps=fps, **kwargs) return subs - def save(self, path: str, encoding: str="utf-8", format_: Optional[str]=None, fps: Optional[float]=None, **kwargs): + def save(self, path: str, encoding: str = "utf-8", format_: Optional[str] = None, fps: Optional[float] = None, + errors: Optional[str] = None, **kwargs: Any) -> None: """ Save subtitle file to given path. @@ -173,7 +201,7 @@ class SSAFile(MutableSequence): See also: Specific formats may implement additional saving options, please refer to documentation of the implementation classes - (eg. :meth:`pysubs2.subrip.SubripFormat.to_file()`) + (eg. :meth:`pysubs2.formats.subrip.SubripFormat.to_file()`) Arguments: path (str): Path to subtitle file. @@ -191,6 +219,17 @@ class SSAFile(MutableSequence): different framerate, use this argument. See also :meth:`SSAFile.transform_framerate()` for fixing bad frame-based to time-based conversions. + errors (Optional[str]): Error handling for character encoding + of input file. Defaults to ``None``; use the value ``"surrogateescape"`` + for pass-through of bytes not supported by selected encoding via + `Unicode surrogate pairs `_. + See documentation of builtin ``open()`` function for more. + + .. versionchanged:: 1.7.0 + The ``errors`` parameter was introduced to facilitate + pass-through of subtitle files with unknown text encoding. + Previous versions of the library behaved as if ``errors=None``. + kwargs: Extra options for the writer. Raises: @@ -205,10 +244,10 @@ class SSAFile(MutableSequence): ext = os.path.splitext(path)[1].lower() format_ = get_format_identifier(ext) - with open(path, "w", encoding=encoding) as fp: + with open(path, "w", encoding=encoding, errors=errors) as fp: self.to_file(fp, format_, fps=fps, **kwargs) - def to_string(self, format_: str, fps: Optional[float]=None, **kwargs) -> str: + def to_string(self, format_: str, fps: Optional[float] = None, **kwargs: Any) -> str: """ Get subtitle file as a string. @@ -222,7 +261,7 @@ class SSAFile(MutableSequence): self.to_file(fp, format_, fps=fps, **kwargs) return fp.getvalue() - def to_file(self, fp: io.TextIOBase, format_: str, fps: Optional[float]=None, **kwargs): + def to_file(self, fp: TextIO, format_: str, fps: Optional[float] = None, **kwargs: Any) -> None: """ Write subtitle file to file object. @@ -233,7 +272,7 @@ class SSAFile(MutableSequence): or :meth:`SSAFile.to_string()` is preferable. Arguments: - fp (file object): A file object, ie. :class:`io.TextIOBase` instance. + fp (file object): A file object, ie. :class:`TextIO` instance. Note that the file must be opened in text mode (as opposed to binary). """ @@ -244,8 +283,8 @@ class SSAFile(MutableSequence): # Retiming subtitles # ------------------------------------------------------------------------ - def shift(self, h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0, - frames: Optional[int]=None, fps: Optional[float]=None): + def shift(self, h: IntOrFloat = 0, m: IntOrFloat = 0, s: IntOrFloat = 0, ms: IntOrFloat = 0, + frames: Optional[int] = None, fps: Optional[float] = None) -> None: """ Shift all subtitles by constant time amount. @@ -253,7 +292,10 @@ class SSAFile(MutableSequence): case, specify both frames and fps. h, m, s, ms will be ignored. Arguments: - h, m, s, ms: Integer or float values, may be positive or negative. + h: Integer or float values, may be positive or negative (hours). + m: Integer or float values, may be positive or negative (minutes). + s: Integer or float values, may be positive or negative (seconds). + ms: Integer or float values, may be positive or negative (milliseconds). frames (int): When specified, must be an integer number of frames. May be positive or negative. fps must be also specified. fps (float): When specified, must be a positive number. @@ -267,7 +309,7 @@ class SSAFile(MutableSequence): line.start += delta line.end += delta - def transform_framerate(self, in_fps: float, out_fps: float): + def transform_framerate(self, in_fps: float, out_fps: float) -> None: """ Rescale all timestamps by ratio of in_fps/out_fps. @@ -294,7 +336,7 @@ class SSAFile(MutableSequence): # Working with styles # ------------------------------------------------------------------------ - def rename_style(self, old_name: str, new_name: str): + def rename_style(self, old_name: str, new_name: str) -> None: """ Rename a style, including references to it. @@ -308,6 +350,8 @@ class SSAFile(MutableSequence): or new_name is taken. """ + from .formats.substation import is_valid_field_content + if old_name not in self.styles: raise KeyError(f"Style {old_name!r} not found") if new_name in self.styles: @@ -323,7 +367,7 @@ class SSAFile(MutableSequence): if line.style == old_name: line.style = new_name - def import_styles(self, subs: "SSAFile", overwrite: bool=True): + def import_styles(self, subs: "SSAFile", overwrite: bool = True) -> None: """ Merge in styles from other SSAFile. @@ -344,7 +388,7 @@ class SSAFile(MutableSequence): # Helper methods # ------------------------------------------------------------------------ - def remove_miscellaneous_events(self): + def remove_miscellaneous_events(self) -> None: """ Remove subtitles which appear to be non-essential (the --clean in CLI) @@ -357,7 +401,7 @@ class SSAFile(MutableSequence): new_events = [] duplicate_text_ids = set() - times_to_texts = {} + times_to_texts: Dict[Tuple[int, int], List[str]] = {} for i, e in enumerate(self): tmp = times_to_texts.setdefault((e.start, e.end), []) if tmp.count(e.plaintext) > 0: @@ -376,7 +420,13 @@ class SSAFile(MutableSequence): self.events = new_events - def equals(self, other: "SSAFile"): + def get_text_events(self) -> List[SSAEvent]: + """ + Return list of events excluding SSA comment lines and lines with SSA drawing tags + """ + return [e for e in self if e.is_text] + + def equals(self, other: "SSAFile") -> bool: """ Equality of two SSAFiles. @@ -435,7 +485,8 @@ class SSAFile(MutableSequence): return False elif self_style != other_style: for k in self_style.FIELDS: - if getattr(self_style, k) != getattr(other_style, k): logging.debug("difference in field %r", k) + if getattr(self_style, k) != getattr(other_style, k): + logging.debug("difference in field %r", k) logging.debug("style %r differs (self=%r, other=%r)", key, self_style.as_dict(), other_style.as_dict()) return False @@ -446,7 +497,8 @@ class SSAFile(MutableSequence): for i, (self_event, other_event) in enumerate(zip(self.events, other.events)): if not self_event.equals(other_event): for k in self_event.FIELDS: - if getattr(self_event, k) != getattr(other_event, k): logging.debug("difference in field %r", k) + if getattr(self_event, k) != getattr(other_event, k): + logging.debug("difference in field %r", k) logging.debug("event %d differs (self=%r, other=%r)", i, self_event.as_dict(), other_event.as_dict()) return False @@ -454,7 +506,7 @@ class SSAFile(MutableSequence): else: raise TypeError("Cannot compare to non-SSAFile object") - def __repr__(self): + def __repr__(self) -> str: if self.events: max_time = max(ev.end for ev in self) s = f"" @@ -467,7 +519,7 @@ class SSAFile(MutableSequence): # MutableSequence implementation + sort() # ------------------------------------------------------------------------ - def sort(self): + def sort(self) -> None: """Sort subtitles time-wise, in-place.""" self.events.sort() @@ -476,24 +528,24 @@ class SSAFile(MutableSequence): @overload def __getitem__(self, item: int) -> SSAEvent: - return self.events[item] + pass @overload def __getitem__(self, s: slice) -> List[SSAEvent]: - return self.events[s] + pass - def __getitem__(self, item): + def __getitem__(self, item: Any) -> Any: return self.events[item] @overload - def __setitem__(self, key: int, value: SSAEvent): + def __setitem__(self, key: int, value: SSAEvent) -> None: pass @overload - def __setitem__(self, keys: slice, values: Iterable[SSAEvent]): + def __setitem__(self, keys: slice, values: Iterable[SSAEvent]) -> None: pass - def __setitem__(self, key, value): + def __setitem__(self, key: Any, value: Any) -> None: if isinstance(key, int): if isinstance(value, SSAEvent): self.events[key] = value @@ -509,21 +561,24 @@ class SSAFile(MutableSequence): raise TypeError("Bad key type") @overload - def __delitem__(self, key: int): + def __delitem__(self, key: int) -> None: pass @overload - def __delitem__(self, s: slice): + def __delitem__(self, s: slice) -> None: pass - def __delitem__(self, key): + def __delitem__(self, key: Any) -> None: del self.events[key] - def __len__(self): + def __len__(self) -> int: return len(self.events) - def insert(self, index: int, value: SSAEvent): + def insert(self, index: int, value: SSAEvent) -> None: if isinstance(value, SSAEvent): self.events.insert(index, value) else: raise TypeError("SSAFile.events must contain only SSAEvent objects") + + +from .formats import autodetect_format, get_format_class, get_format_identifier # noqa: E402 diff --git a/libs/pysubs2/ssastyle.py b/libs/pysubs2/ssastyle.py index f22476166..b0df57106 100644 --- a/libs/pysubs2/ssastyle.py +++ b/libs/pysubs2/ssastyle.py @@ -1,5 +1,5 @@ import warnings -from typing import Dict, Any, ClassVar +from typing import Dict, Any, ClassVar, FrozenSet import dataclasses from .common import Color, Alignment @@ -23,7 +23,7 @@ class SSAStyle: DEFAULT_STYLE: ClassVar["SSAStyle"] = None # type: ignore[assignment] @property - def FIELDS(self): + def FIELDS(self) -> FrozenSet[str]: """All fields in SSAStyle.""" warnings.warn("Deprecated in 1.2.0 - it's a dataclass now", DeprecationWarning) return frozenset(field.name for field in dataclasses.fields(self)) @@ -66,7 +66,7 @@ class SSAStyle: # dataclasses.asdict() would recursively dictify Color objects, which we don't want return {field.name: getattr(self, field.name) for field in dataclasses.fields(self)} - def __repr__(self): + def __repr__(self) -> str: return f" int: """ Convert time to milliseconds. @@ -38,7 +44,7 @@ def make_time(h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat= raise ValueError("Both fps and frames must be specified") -def timestamp_to_ms(groups: Sequence[str]): +def timestamp_to_ms(groups: Sequence[str]) -> int: """ Convert groups from :data:`pysubs2.time.TIMESTAMP` or :data:`pysubs2.time.TIMESTAMP_SHORT` match to milliseconds. @@ -50,6 +56,11 @@ def timestamp_to_ms(groups: Sequence[str]): 1000 """ + h: int + m: int + s: int + ms: int + frac: int if len(groups) == 4: h, m, s, frac = map(int, groups) ms = frac * 10**(3 - len(groups[-1])) @@ -65,7 +76,7 @@ def timestamp_to_ms(groups: Sequence[str]): return ms -def times_to_ms(h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0) -> int: +def times_to_ms(h: IntOrFloat = 0, m: IntOrFloat = 0, s: IntOrFloat = 0, ms: IntOrFloat = 0) -> int: """ Convert hours, minutes, seconds to milliseconds. @@ -124,7 +135,7 @@ def ms_to_frames(ms: IntOrFloat, fps: float) -> int: return int(round((ms / 1000) * fps)) -def ms_to_times(ms: IntOrFloat) -> Tuple[int, int, int, int]: +def ms_to_times(ms: IntOrFloat) -> Times: """ Convert milliseconds to normalized tuple (h, m, s, ms). @@ -144,7 +155,7 @@ def ms_to_times(ms: IntOrFloat) -> Tuple[int, int, int, int]: return Times(h, m, s, ms) -def ms_to_str(ms: IntOrFloat, fractions: bool=False) -> str: +def ms_to_str(ms: IntOrFloat, fractions: bool = False) -> str: """ Prettyprint milliseconds to [-]H:MM:SS[.mmm] diff --git a/libs/version.txt b/libs/version.txt index 0774103d7..09235fb12 100644 --- a/libs/version.txt +++ b/libs/version.txt @@ -28,7 +28,7 @@ knowit==0.5.2 Mako==1.3.2 pycountry==23.12.11 pyrsistent==0.20.0 -pysubs2==1.6.1 +pysubs2==1.7.2 python-engineio==4.9.0 python-socketio==5.11.1 pytz==2024.1