@ -1,20 +1,16 @@
from collections . abc import MutableSequence
import io
from io import open
from itertools import chain
import os . path
import logging
from typing import Optional , List , Dict , Iterable , Any , overload , Iterator
from typing import Optional , List , Dict , Iterable , Any , overload , Iterator , TextIO , Tuple , MutableSequence
from . common import IntOrFloat
from . formats import autodetect_format , get_format_class , get_format_identifier
from . substation import is_valid_field_content
from . ssaevent import SSAEvent
from . ssastyle import SSAStyle
from . time import make_time , ms_to_str
class SSAFile ( MutableSequence ):
class SSAFile ( MutableSequence [SSAEvent ] ):
"""
Subtitle file in SubStation Alpha format .
@ -32,7 +28,7 @@ class SSAFile(MutableSequence):
"""
DEFAULT_INFO = {
DEFAULT_INFO : Dict [ str , str ] = {
" WrapStyle " : " 0 " ,
" ScaledBorderAndShadow " : " yes " ,
" Collisions " : " Normal "
@ -53,7 +49,8 @@ class SSAFile(MutableSequence):
# ------------------------------------------------------------------------
@classmethod
def load ( cls , path : str , encoding : str = " utf-8 " , format_ : Optional [ str ] = None , fps : Optional [ float ] = None , * * kwargs ) - > " SSAFile " :
def load ( cls , path : str , encoding : str = " utf-8 " , format_ : Optional [ str ] = None , fps : Optional [ float ] = None ,
errors : Optional [ str ] = None , * * kwargs : Any ) - > " SSAFile " :
"""
Load subtitle file from given path .
@ -62,12 +59,23 @@ class SSAFile(MutableSequence):
See also :
Specific formats may implement additional loading options ,
please refer to documentation of the implementation classes
( eg . : meth : ` pysubs2 . subrip. SubripFormat . from_file ( ) ` )
( eg . : meth : ` pysubs2 . formats. subrip. SubripFormat . from_file ( ) ` )
Arguments :
path ( str ) : Path to subtitle file .
encoding ( str ) : Character encoding of input file .
Defaults to UTF - 8 , you may need to change this .
errors ( Optional [ str ] ) : Error handling for character encoding
of input file . Defaults to ` ` None ` ` ; use the value ` ` " surrogateescape " ` `
for pass - through of bytes not supported by selected encoding via
` Unicode surrogate pairs < https : / / en . wikipedia . org / wiki / Universal_Character_Set_characters #Surrogates>`_.
See documentation of builtin ` ` open ( ) ` ` function for more .
. . versionchanged : : 1.7 .0
The ` ` errors ` ` parameter was introduced to facilitate
pass - through of subtitle files with unknown text encoding .
Previous versions of the library behaved as if ` ` errors = None ` ` .
format_ ( str ) : Optional , forces use of specific parser
( eg . ` " srt " ` , ` " ass " ` ) . Otherwise , format is detected
automatically from file contents . This argument should
@ -96,23 +104,32 @@ class SSAFile(MutableSequence):
Example :
>> > subs1 = pysubs2 . load ( " subrip-subtitles.srt " )
>> > subs2 = pysubs2 . load ( " microdvd-subtitles.sub " , fps = 23.976 )
>> > subs3 = pysubs2 . load ( " subrip-subtitles-with-fancy-tags.srt " , keep_unknown_html_tags = True )
>> > subs2 = pysubs2 . load ( " microdvd-subtitles.sub " , fps = 23.976 )
>> > subs3 = pysubs2 . load ( " subrip-subtitles-with-fancy-tags.srt " , keep_unknown_html_tags = True )
"""
with open ( path , encoding = encoding ) as fp :
with open ( path , encoding = encoding , errors = errors ) as fp :
return cls . from_file ( fp , format_ , fps = fps , * * kwargs )
@classmethod
def from_string ( cls , string : str , format_ : Optional [ str ] = None , fps : Optional [ float ] = None , * * kwargs ) - > " SSAFile " :
def from_string ( cls , string : str , format_ : Optional [ str ] = None , fps : Optional [ float ] = None ,
* * kwargs : Any ) - > " SSAFile " :
"""
Load subtitle file from string .
See : meth : ` SSAFile . load ( ) ` for full description .
Arguments :
string ( str ) : Subtitle file in a string . Note that the string
must be Unicode ( in Python 2 ) .
string ( str ) : Subtitle file in a string . Note that the string must be Unicode ( ` ` str ` ` , not ` ` bytes ` ` ) .
format_ ( str ) : Optional , forces use of specific parser
( eg . ` " srt " ` , ` " ass " ` ) . Otherwise , format is detected
automatically from file contents . This argument should
be rarely needed .
fps ( float ) : Framerate for frame - based formats ( MicroDVD ) ,
for other formats this argument is ignored . Framerate might
be detected from the file , in which case you don ' t need
to specify it here ( when given , this argument overrides
autodetection ) .
Returns :
SSAFile
@ -130,7 +147,8 @@ class SSAFile(MutableSequence):
return cls . from_file ( fp , format_ , fps = fps , * * kwargs )
@classmethod
def from_file ( cls , fp : io . TextIOBase , format_ : Optional [ str ] = None , fps : Optional [ float ] = None , * * kwargs ) - > " SSAFile " :
def from_file ( cls , fp : TextIO , format_ : Optional [ str ] = None , fps : Optional [ float ] = None ,
* * kwargs : Any ) - > " SSAFile " :
"""
Read subtitle file from file object .
@ -141,8 +159,17 @@ class SSAFile(MutableSequence):
or : meth : ` SSAFile . from_string ( ) ` is preferable .
Arguments :
fp ( file object ) : A file object , ie . : class : ` io. TextIOBase ` instance .
fp ( file object ) : A file object , ie . : class : ` TextIO` instance .
Note that the file must be opened in text mode ( as opposed to binary ) .
format_ ( str ) : Optional , forces use of specific parser
( eg . ` " srt " ` , ` " ass " ` ) . Otherwise , format is detected
automatically from file contents . This argument should
be rarely needed .
fps ( float ) : Framerate for frame - based formats ( MicroDVD ) ,
for other formats this argument is ignored . Framerate might
be detected from the file , in which case you don ' t need
to specify it here ( when given , this argument overrides
autodetection ) .
Returns :
SSAFile
@ -164,7 +191,8 @@ class SSAFile(MutableSequence):
impl . from_file ( subs , fp , format_ , fps = fps , * * kwargs )
return subs
def save ( self , path : str , encoding : str = " utf-8 " , format_ : Optional [ str ] = None , fps : Optional [ float ] = None , * * kwargs ) :
def save ( self , path : str , encoding : str = " utf-8 " , format_ : Optional [ str ] = None , fps : Optional [ float ] = None ,
errors : Optional [ str ] = None , * * kwargs : Any ) - > None :
"""
Save subtitle file to given path .
@ -173,7 +201,7 @@ class SSAFile(MutableSequence):
See also :
Specific formats may implement additional saving options ,
please refer to documentation of the implementation classes
( eg . : meth : ` pysubs2 . subrip. SubripFormat . to_file ( ) ` )
( eg . : meth : ` pysubs2 . formats. subrip. SubripFormat . to_file ( ) ` )
Arguments :
path ( str ) : Path to subtitle file .
@ -191,6 +219,17 @@ class SSAFile(MutableSequence):
different framerate , use this argument . See also
: meth : ` SSAFile . transform_framerate ( ) ` for fixing bad
frame - based to time - based conversions .
errors ( Optional [ str ] ) : Error handling for character encoding
of input file . Defaults to ` ` None ` ` ; use the value ` ` " surrogateescape " ` `
for pass - through of bytes not supported by selected encoding via
` Unicode surrogate pairs < https : / / en . wikipedia . org / wiki / Universal_Character_Set_characters #Surrogates>`_.
See documentation of builtin ` ` open ( ) ` ` function for more .
. . versionchanged : : 1.7 .0
The ` ` errors ` ` parameter was introduced to facilitate
pass - through of subtitle files with unknown text encoding .
Previous versions of the library behaved as if ` ` errors = None ` ` .
kwargs : Extra options for the writer .
Raises :
@ -205,10 +244,10 @@ class SSAFile(MutableSequence):
ext = os . path . splitext ( path ) [ 1 ] . lower ( )
format_ = get_format_identifier ( ext )
with open ( path , " w " , encoding = encoding ) as fp :
with open ( path , " w " , encoding = encoding , errors = errors ) as fp :
self . to_file ( fp , format_ , fps = fps , * * kwargs )
def to_string ( self , format_ : str , fps : Optional [ float ] = None , * * kwargs ) - > str :
def to_string ( self , format_ : str , fps : Optional [ float ] = None , * * kwargs : Any ) - > str :
"""
Get subtitle file as a string .
@ -222,7 +261,7 @@ class SSAFile(MutableSequence):
self . to_file ( fp , format_ , fps = fps , * * kwargs )
return fp . getvalue ( )
def to_file ( self , fp : io. TextIOBase , format_ : str , fps : Optional [ float ] = None , * * kwargs ):
def to_file ( self , fp : TextIO, format_ : str , fps : Optional [ float ] = None , * * kwargs : Any ) - > None :
"""
Write subtitle file to file object .
@ -233,7 +272,7 @@ class SSAFile(MutableSequence):
or : meth : ` SSAFile . to_string ( ) ` is preferable .
Arguments :
fp ( file object ) : A file object , ie . : class : ` io. TextIOBase ` instance .
fp ( file object ) : A file object , ie . : class : ` TextIO` instance .
Note that the file must be opened in text mode ( as opposed to binary ) .
"""
@ -244,8 +283,8 @@ class SSAFile(MutableSequence):
# Retiming subtitles
# ------------------------------------------------------------------------
def shift ( self , h : IntOrFloat = 0 , m : IntOrFloat = 0 , s : IntOrFloat = 0 , ms : IntOrFloat = 0 ,
frames : Optional [ int ] = None , fps : Optional [ float ] = None ) :
def shift ( self , h : IntOrFloat = 0 , m : IntOrFloat = 0 , s : IntOrFloat = 0 , ms : IntOrFloat = 0 ,
frames : Optional [ int ] = None , fps : Optional [ float ] = None ) - > None :
"""
Shift all subtitles by constant time amount .
@ -253,7 +292,10 @@ class SSAFile(MutableSequence):
case , specify both frames and fps . h , m , s , ms will be ignored .
Arguments :
h , m , s , ms : Integer or float values , may be positive or negative .
h : Integer or float values , may be positive or negative ( hours ) .
m : Integer or float values , may be positive or negative ( minutes ) .
s : Integer or float values , may be positive or negative ( seconds ) .
ms : Integer or float values , may be positive or negative ( milliseconds ) .
frames ( int ) : When specified , must be an integer number of frames .
May be positive or negative . fps must be also specified .
fps ( float ) : When specified , must be a positive number .
@ -267,7 +309,7 @@ class SSAFile(MutableSequence):
line . start + = delta
line . end + = delta
def transform_framerate ( self , in_fps : float , out_fps : float ) :
def transform_framerate ( self , in_fps : float , out_fps : float ) - > None :
"""
Rescale all timestamps by ratio of in_fps / out_fps .
@ -294,7 +336,7 @@ class SSAFile(MutableSequence):
# Working with styles
# ------------------------------------------------------------------------
def rename_style ( self , old_name : str , new_name : str ) :
def rename_style ( self , old_name : str , new_name : str ) - > None :
"""
Rename a style , including references to it .
@ -308,6 +350,8 @@ class SSAFile(MutableSequence):
or new_name is taken .
"""
from . formats . substation import is_valid_field_content
if old_name not in self . styles :
raise KeyError ( f " Style { old_name !r} not found " )
if new_name in self . styles :
@ -323,7 +367,7 @@ class SSAFile(MutableSequence):
if line . style == old_name :
line . style = new_name
def import_styles ( self , subs : " SSAFile " , overwrite : bool = True ) :
def import_styles ( self , subs : " SSAFile " , overwrite : bool = True ) - > None :
"""
Merge in styles from other SSAFile .
@ -344,7 +388,7 @@ class SSAFile(MutableSequence):
# Helper methods
# ------------------------------------------------------------------------
def remove_miscellaneous_events ( self ) :
def remove_miscellaneous_events ( self ) - > None :
"""
Remove subtitles which appear to be non - essential ( the - - clean in CLI )
@ -357,7 +401,7 @@ class SSAFile(MutableSequence):
new_events = [ ]
duplicate_text_ids = set ( )
times_to_texts = { }
times_to_texts : Dict [ Tuple [ int , int ] , List [ str ] ] = { }
for i , e in enumerate ( self ) :
tmp = times_to_texts . setdefault ( ( e . start , e . end ) , [ ] )
if tmp . count ( e . plaintext ) > 0 :
@ -376,7 +420,13 @@ class SSAFile(MutableSequence):
self . events = new_events
def equals ( self , other : " SSAFile " ) :
def get_text_events ( self ) - > List [ SSAEvent ] :
"""
Return list of events excluding SSA comment lines and lines with SSA drawing tags
"""
return [ e for e in self if e . is_text ]
def equals ( self , other : " SSAFile " ) - > bool :
"""
Equality of two SSAFiles .
@ -435,7 +485,8 @@ class SSAFile(MutableSequence):
return False
elif self_style != other_style :
for k in self_style . FIELDS :
if getattr ( self_style , k ) != getattr ( other_style , k ) : logging . debug ( " difference in field %r " , k )
if getattr ( self_style , k ) != getattr ( other_style , k ) :
logging . debug ( " difference in field %r " , k )
logging . debug ( " style %r differs (self= %r , other= %r ) " , key , self_style . as_dict ( ) , other_style . as_dict ( ) )
return False
@ -446,7 +497,8 @@ class SSAFile(MutableSequence):
for i , ( self_event , other_event ) in enumerate ( zip ( self . events , other . events ) ) :
if not self_event . equals ( other_event ) :
for k in self_event . FIELDS :
if getattr ( self_event , k ) != getattr ( other_event , k ) : logging . debug ( " difference in field %r " , k )
if getattr ( self_event , k ) != getattr ( other_event , k ) :
logging . debug ( " difference in field %r " , k )
logging . debug ( " event %d differs (self= %r , other= %r ) " , i , self_event . as_dict ( ) , other_event . as_dict ( ) )
return False
@ -454,7 +506,7 @@ class SSAFile(MutableSequence):
else :
raise TypeError ( " Cannot compare to non-SSAFile object " )
def __repr__ ( self ) :
def __repr__ ( self ) - > str :
if self . events :
max_time = max ( ev . end for ev in self )
s = f " <SSAFile with { len ( self ) } events and { len ( self . styles ) } styles, last timestamp { ms_to_str ( max_time ) } > "
@ -467,7 +519,7 @@ class SSAFile(MutableSequence):
# MutableSequence implementation + sort()
# ------------------------------------------------------------------------
def sort ( self ) :
def sort ( self ) - > None :
""" Sort subtitles time-wise, in-place. """
self . events . sort ( )
@ -476,24 +528,24 @@ class SSAFile(MutableSequence):
@overload
def __getitem__ ( self , item : int ) - > SSAEvent :
return self . events [ item ]
pass
@overload
def __getitem__ ( self , s : slice ) - > List [ SSAEvent ] :
return self . events [ s ]
pass
def __getitem__ ( self , item ):
def __getitem__ ( self , item : Any ) - > Any :
return self . events [ item ]
@overload
def __setitem__ ( self , key : int , value : SSAEvent ) :
def __setitem__ ( self , key : int , value : SSAEvent ) - > None :
pass
@overload
def __setitem__ ( self , keys : slice , values : Iterable [ SSAEvent ] ) :
def __setitem__ ( self , keys : slice , values : Iterable [ SSAEvent ] ) - > None :
pass
def __setitem__ ( self , key , value ):
def __setitem__ ( self , key : Any , value : Any ) - > None :
if isinstance ( key , int ) :
if isinstance ( value , SSAEvent ) :
self . events [ key ] = value
@ -509,21 +561,24 @@ class SSAFile(MutableSequence):
raise TypeError ( " Bad key type " )
@overload
def __delitem__ ( self , key : int ) :
def __delitem__ ( self , key : int ) - > None :
pass
@overload
def __delitem__ ( self , s : slice ) :
def __delitem__ ( self , s : slice ) - > None :
pass
def __delitem__ ( self , key ):
def __delitem__ ( self , key : Any ) - > None :
del self . events [ key ]
def __len__ ( self ) :
def __len__ ( self ) - > int :
return len ( self . events )
def insert ( self , index : int , value : SSAEvent ) :
def insert ( self , index : int , value : SSAEvent ) - > None :
if isinstance ( value , SSAEvent ) :
self . events . insert ( index , value )
else :
raise TypeError ( " SSAFile.events must contain only SSAEvent objects " )
from . formats import autodetect_format , get_format_class , get_format_identifier # noqa: E402