You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
bazarr/libs/pysubs2/formats/microdvd.py

145 lines
5.8 KiB

6 years ago
from functools import partial
import re
from typing import Optional, TextIO, Any, Match
from ..exceptions import UnknownFPSError
from ..ssaevent import SSAEvent
from ..ssastyle import SSAStyle
from .base import FormatBase
6 years ago
from .substation import parse_tags
from ..time import ms_to_frames, frames_to_ms
from ..ssafile import SSAFile
6 years ago
#: Matches a MicroDVD line.
MICRODVD_LINE = re.compile(r" *\{ *(\d+) *\} *\{ *(\d+) *\}(.+)")
class MicroDVDFormat(FormatBase):
"""MicroDVD subtitle format implementation"""
6 years ago
@classmethod
def guess_format(cls, text: str) -> Optional[str]:
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
6 years ago
if any(map(MICRODVD_LINE.match, text.splitlines())):
return "microdvd"
else:
return None
6 years ago
@classmethod
def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, fps: Optional[float] = None,
strict_fps_inference: bool = True, **kwargs: Any) -> None:
"""
See :meth:`pysubs2.formats.FormatBase.from_file()`
Keyword args:
strict_fps_inference: If True (default), in the case when ``fps`` is not given, it will be read
from the first subtitle text only if the start and end frame of this subtitle is ``{1}{1}``
(matches VLC Player behaviour), otherwise :class:`pysubs2.exceptions.UnknownFPSError` is raised.
When ``strict_fps_inference``
is False, framerate will be read from the first subtitle text in this case regardless of
start and end frame (which may result in bogus result, if the first subtitle is not supposed
to contain framerate). Before introduction of this option, the library behaved as if this
option was False.
.. versionchanged:: 1.7.0
Added the ``strict_fps_inference`` option.
"""
6 years ago
for line in fp:
match = MICRODVD_LINE.match(line)
if not match:
continue
fstart, fend, text = match.groups()
fstart, fend = map(int, (fstart, fend))
if fps is None:
# We don't know the framerate, but it is customary to include it as text of the first subtitle,
# in the format {1}{1}fps, see pysubs2 issue #71 or VLC player source:
# https://code.videolan.org/videolan/vlc/-/blob/dccda0e133ff0a2e85de727cf19ddbc634f06b67/modules/demux/subtitle.c#L1014
# In that case, we skip this auxiliary subtitle and proceed with reading.
6 years ago
try:
if strict_fps_inference and not (fstart == 1 and fend == 1):
raise ValueError("Frame mismatch, expected {1}{1}")
6 years ago
fps = float(text)
subs.fps = fps
continue
except ValueError:
raise UnknownFPSError("Framerate was not specified and "
"cannot be read from "
"the MicroDVD file.")
start, end = map(partial(frames_to_ms, fps=fps), (fstart, fend))
def prepare_text(text: str) -> str:
6 years ago
text = text.replace("|", r"\N")
def style_replacer(match: Match[str]) -> str:
6 years ago
tags = [c for c in "biu" if c in match.group(0)]
return "{%s}" % "".join(f"\\{c}1" for c in tags)
6 years ago
text = re.sub(r"\{[Yy]:[^}]+\}", style_replacer, text)
text = re.sub(r"\{[Ff]:([^}]+)\}", r"{\\fn\1}", text)
text = re.sub(r"\{[Ss]:([^}]+)\}", r"{\\fs\1}", text)
text = re.sub(r"\{P:(\d+),(\d+)\}", r"{\\pos(\1,\2)}", text)
return text.strip()
ev = SSAEvent(start=start, end=end, text=prepare_text(text))
subs.append(ev)
@classmethod
def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, fps: Optional[float] = None,
write_fps_declaration: bool = True, apply_styles: bool = True, **kwargs: Any) -> None:
"""
See :meth:`pysubs2.formats.FormatBase.to_file()`
The only supported styling is marking whole lines italic.
Keyword args:
write_fps_declaration: If True, create a zero-duration first subtitle ``{1}{1}`` which will contain
the fps.
apply_styles: If False, do not write any styling.
"""
6 years ago
if fps is None:
fps = subs.fps
if fps is None:
raise UnknownFPSError("Framerate must be specified when writing MicroDVD.")
to_frames = partial(ms_to_frames, fps=fps)
def is_entirely_italic(line: SSAEvent) -> bool:
6 years ago
style = subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE)
for fragment, sty in parse_tags(line.text, style, subs.styles):
fragment = fragment.replace(r"\h", " ")
fragment = fragment.replace(r"\n", "\n")
fragment = fragment.replace(r"\N", "\n")
if not sty.italic and fragment and not fragment.isspace():
return False
return True
# insert an artificial first line telling the framerate
if write_fps_declaration:
subs.insert(0, SSAEvent(start=1, end=1, text=str(fps)))
6 years ago
for line in subs.get_text_events():
6 years ago
text = "|".join(line.plaintext.splitlines())
if apply_styles and is_entirely_italic(line):
6 years ago
text = "{Y:i}" + text
start, end = map(to_frames, (line.start, line.end))
# XXX warn on underflow?
if start < 0:
start = 0
if end < 0:
end = 0
6 years ago
print("{%d}{%d}%s" % (start, end, text), file=fp)
# remove the artificial framerate-telling line
if write_fps_declaration:
subs.pop(0)