@ -5,12 +5,14 @@ from datetime import datetime
import logging
import logging
import os
import os
import shutil
import shutil
import subprocess
import sys
import sys
import numpy as np
import numpy as np
from . aligners import FFTAligner , MaxScoreAligner , FailedToFindAlignmentException
from . aligners import FFTAligner , MaxScoreAligner , FailedToFindAlignmentException
from . constants import *
from . constants import *
from . ffmpeg_utils import ffmpeg_bin_path
from . sklearn_shim import Pipeline
from . sklearn_shim import Pipeline
from . speech_transformers import (
from . speech_transformers import (
VideoSpeechTransformer ,
VideoSpeechTransformer ,
@ -19,7 +21,7 @@ from .speech_transformers import (
)
)
from . subtitle_parser import make_subtitle_parser
from . subtitle_parser import make_subtitle_parser
from . subtitle_transformers import SubtitleMerger , SubtitleShifter
from . subtitle_transformers import SubtitleMerger , SubtitleShifter
from . version import __version__
from . version import get_version
logger = logging . getLogger ( __name__ )
logger = logging . getLogger ( __name__ )
@ -30,42 +32,94 @@ def override(args, **kwargs):
return args_dict
return args_dict
def run ( args ) :
def _ref_format ( ref_fname ) :
result = { ' retval ' : 0 ,
return ref_fname [ - 3 : ]
' offset_seconds ' : None ,
' framerate_scale_factor ' : None ,
' sync_was_successful ' : None }
def make_test_case ( args , npy_savename , sync_was_successful ) :
if args . vlc_mode :
if npy_savename is None :
logger . setLevel ( logging . CRITICAL )
raise ValueError ( ' need non-null npy_savename ' )
if args . make_test_case and not args . gui_mode : # this validation not necessary for gui mode
tar_dir = ' {} . {} ' . format (
if args . srtin is None or args . srtout is None :
args . reference ,
logger . error ( ' need to specify input and output srt files for test cases ' )
datetime . now ( ) . strftime ( ' % Y- % m- %d - % H: % M: % S ' )
result [ ' retval ' ] = 1
)
return result
logger . info ( ' creating test archive {} .tar.gz... ' . format ( tar_dir ) )
if args . overwrite_input :
os . mkdir ( tar_dir )
if args . srtin is None :
try :
logger . error ( ' need to specify input srt if --overwrite-input is specified since we cannot overwrite stdin ' )
shutil . move ( ' ffsubsync.log ' , tar_dir )
result [ ' retval ' ] = 1
shutil . copy ( args . srtin , tar_dir )
return result
if sync_was_successful :
if args . srtout is not None :
shutil . move ( args . srtout , tar_dir )
logger . error ( ' overwrite input set but output file specified; refusing to run in case this was not intended ' )
if _ref_format ( args . reference ) in SUBTITLE_EXTENSIONS :
result [ ' retval ' ] = 1
shutil . copy ( args . reference , tar_dir )
return result
elif args . serialize_speech or args . reference == npy_savename :
args . srtout = args . srtin
shutil . copy ( npy_savename , tar_dir )
if args . gui_mode and args . srtout is None :
else :
args . srtout = ' {} .synced.srt ' . format ( os . path . splitext ( args . srtin ) [ 0 ] )
shutil . move ( npy_savename , tar_dir )
ref_format = args . reference [ - 3 : ]
supported_formats = set ( list ( zip ( * shutil . get_archive_formats ( ) ) ) [ 0 ] )
if args . merge_with_reference and ref_format not in SUBTITLE_EXTENSIONS :
preferred_formats = [ ' gztar ' , ' bztar ' , ' xztar ' , ' zip ' , ' tar ' ]
logger . error ( ' merging synced output with reference only valid '
for archive_format in preferred_formats :
' when reference composed of subtitles ' )
if archive_format in supported_formats :
shutil . make_archive ( tar_dir , ' gztar ' , os . curdir , tar_dir )
break
else :
logger . error ( ' failed to create test archive; no formats supported '
' (this should not happen) ' )
return 1
return 1
if args . make_test_case :
logger . info ( ' ...done ' )
handler = logging . FileHandler ( ' ffsubsync.log ' )
finally :
logger . addHandler ( handler )
shutil . rmtree ( tar_dir )
return 0
def try_sync ( args , reference_pipe , srt_pipes , result ) :
sync_was_successful = True
try :
logger . info ( ' extracting speech segments from subtitles file %s ... ' , args . srtin )
for srt_pipe in srt_pipes :
srt_pipe . fit ( args . srtin )
logger . info ( ' ...done ' )
logger . info ( ' computing alignments... ' )
offset_samples , best_srt_pipe = MaxScoreAligner (
FFTAligner , SAMPLE_RATE , args . max_offset_seconds
) . fit_transform (
reference_pipe . transform ( args . reference ) ,
srt_pipes ,
)
logger . info ( ' ...done ' )
offset_seconds = offset_samples / float ( SAMPLE_RATE )
scale_step = best_srt_pipe . named_steps [ ' scale ' ]
logger . info ( ' offset seconds: %.3f ' , offset_seconds )
logger . info ( ' framerate scale factor: %.3f ' , scale_step . scale_factor )
output_steps = [ ( ' shift ' , SubtitleShifter ( offset_seconds ) ) ]
if args . merge_with_reference :
output_steps . append (
( ' merge ' ,
SubtitleMerger ( reference_pipe . named_steps [ ' parse ' ] . subs_ ) )
)
output_pipe = Pipeline ( output_steps )
out_subs = output_pipe . fit_transform ( scale_step . subs_ )
if args . output_encoding != ' same ' :
out_subs = out_subs . set_encoding ( args . output_encoding )
logger . info ( ' writing output to {} ' . format ( args . srtout or ' stdout ' ) )
out_subs . write_file ( args . srtout )
except FailedToFindAlignmentException as e :
sync_was_successful = False
logger . error ( e )
else :
result [ ' offset_seconds ' ] = offset_seconds
result [ ' framerate_scale_factor ' ] = scale_step . scale_factor
finally :
result [ ' sync_was_successful ' ] = sync_was_successful
return sync_was_successful
def make_reference_pipe ( args ) :
ref_format = _ref_format ( args . reference )
if ref_format in SUBTITLE_EXTENSIONS :
if ref_format in SUBTITLE_EXTENSIONS :
if args . vad is not None :
if args . vad is not None :
logger . warning ( ' Vad specified, but reference was not a movie ' )
logger . warning ( ' Vad specified, but reference was not a movie ' )
reference_pipe = make_subtitle_speech_pipeline (
return make_subtitle_speech_pipeline (
fmt = ref_format ,
fmt = ref_format ,
* * override (
* * override (
args ,
args ,
@ -75,7 +129,7 @@ def run(args):
elif ref_format in ( ' npy ' , ' npz ' ) :
elif ref_format in ( ' npy ' , ' npz ' ) :
if args . vad is not None :
if args . vad is not None :
logger . warning ( ' Vad specified, but reference was not a movie ' )
logger . warning ( ' Vad specified, but reference was not a movie ' )
reference_pipe = Pipeline ( [
return Pipeline ( [
( ' deserialize ' , DeserializeSpeechTransformer ( ) )
( ' deserialize ' , DeserializeSpeechTransformer ( ) )
] )
] )
else :
else :
@ -85,7 +139,7 @@ def run(args):
ref_stream = args . reference_stream
ref_stream = args . reference_stream
if ref_stream is not None and not ref_stream . startswith ( ' 0: ' ) :
if ref_stream is not None and not ref_stream . startswith ( ' 0: ' ) :
ref_stream = ' 0: ' + ref_stream
ref_stream = ' 0: ' + ref_stream
reference_pipe = Pipeline ( [
return Pipeline ( [
( ' speech_extract ' , VideoSpeechTransformer ( vad = vad ,
( ' speech_extract ' , VideoSpeechTransformer ( vad = vad ,
sample_rate = SAMPLE_RATE ,
sample_rate = SAMPLE_RATE ,
frame_rate = args . frame_rate ,
frame_rate = args . frame_rate ,
@ -95,101 +149,125 @@ def run(args):
vlc_mode = args . vlc_mode ,
vlc_mode = args . vlc_mode ,
gui_mode = args . gui_mode ) )
gui_mode = args . gui_mode ) )
] )
] )
def make_srt_pipes ( args ) :
if args . no_fix_framerate :
if args . no_fix_framerate :
framerate_ratios = [ 1. ]
framerate_ratios = [ 1. ]
else :
else :
framerate_ratios = np . concatenate ( [
framerate_ratios = np . concatenate ( [
[ 1. ] , np . array ( FRAMERATE_RATIOS ) , 1. / np . array ( FRAMERATE_RATIOS )
[ 1. ] , np . array ( FRAMERATE_RATIOS ) , 1. / np . array ( FRAMERATE_RATIOS )
] )
] )
logger . info ( " extracting speech segments from reference ' %s ' ... " , args . reference )
reference_pipe . fit ( args . reference )
logger . info ( ' ...done ' )
npy_savename = None
if args . make_test_case or args . serialize_speech :
logger . info ( ' serializing speech... ' )
npy_savename = os . path . splitext ( args . reference ) [ 0 ] + ' .npz '
np . savez_compressed ( npy_savename , speech = reference_pipe . transform ( args . reference ) )
logger . info ( ' ...done ' )
if args . srtin is None :
logger . info ( ' unsynchronized subtitle file not specified; skipping synchronization ' )
return result
parser = make_subtitle_parser ( fmt = os . path . splitext ( args . srtin ) [ - 1 ] [ 1 : ] , caching = True , * * args . __dict__ )
parser = make_subtitle_parser ( fmt = os . path . splitext ( args . srtin ) [ - 1 ] [ 1 : ] , caching = True , * * args . __dict__ )
logger . info ( " extracting speech segments from subtitles ' %s ' ... " , args . srtin )
srt_pipes = [
srt_pipes = [
make_subtitle_speech_pipeline (
make_subtitle_speech_pipeline (
* * override ( args , scale_factor = scale_factor , parser = parser )
* * override ( args , scale_factor = scale_factor , parser = parser )
) . fit ( args . srtin )
)
for scale_factor in framerate_ratios
for scale_factor in framerate_ratios
]
]
return srt_pipes
def extract_subtitles_from_reference ( args ) :
stream = args . extract_subs_from_stream
if not stream . startswith ( ' 0:s: ' ) :
stream = ' 0:s: {} ' . format ( stream )
elif not stream . startswith ( ' 0: ' ) and stream . startswith ( ' s: ' ) :
stream = ' 0: {} ' . format ( stream )
if not stream . startswith ( ' 0:s: ' ) :
logger . error ( ' invalid stream for subtitle extraction: %s ' , args . extract_subs_from_stream )
ffmpeg_args = [ ffmpeg_bin_path ( ' ffmpeg ' , args . gui_mode , ffmpeg_resources_path = args . ffmpeg_path ) ]
ffmpeg_args . extend ( [
' -y ' ,
' -nostdin ' ,
' -loglevel ' , ' fatal ' ,
' -i ' , args . reference ,
' -map ' , ' {} ' . format ( stream ) ,
' -f ' , ' srt ' ,
] )
if args . srtout is None :
ffmpeg_args . append ( ' - ' )
else :
ffmpeg_args . append ( args . srtout )
logger . info ( ' attempting to extract subtitles to {} ... ' . format ( ' stdout ' if args . srtout is None else args . srtout ) )
retcode = subprocess . call ( ffmpeg_args )
if retcode == 0 :
logger . info ( ' ...done ' )
logger . info ( ' ...done ' )
logger . info ( ' computing alignments... ' )
else :
max_offset_seconds = args . max_offset_seconds
logger . error ( ' ffmpeg unable to extract subtitles from reference; return code %d ' , retcode )
try :
return retcode
sync_was_successful = True
offset_samples , best_srt_pipe = MaxScoreAligner (
FFTAligner , SAMPLE_RATE , max_offset_seconds
def validate_args ( args ) :
) . fit_transform (
if args . vlc_mode :
reference_pipe . transform ( args . reference ) ,
logger . setLevel ( logging . CRITICAL )
srt_pipes ,
if args . make_test_case and not args . gui_mode : # this validation not necessary for gui mode
if args . srtin is None or args . srtout is None :
raise ValueError ( ' need to specify input and output srt files for test cases ' )
if args . overwrite_input :
if args . extract_subs_from_stream is not None :
raise ValueError ( ' input overwriting not allowed for extracting subtitles from referece ' )
if args . srtin is None :
raise ValueError (
' need to specify input srt if --overwrite-input is specified since we cannot overwrite stdin '
)
)
logger . info ( ' ...done ' )
if args . srtout is not None :
offset_seconds = offset_samples / float ( SAMPLE_RATE )
raise ValueError (
scale_step = best_srt_pipe . named_steps [ ' scale ' ]
' overwrite input set but output file specified; refusing to run in case this was not intended '
logger . info ( ' offset seconds: %.3f ' , offset_seconds )
logger . info ( ' framerate scale factor: %.3f ' , scale_step . scale_factor )
output_steps = [ ( ' shift ' , SubtitleShifter ( offset_seconds ) ) ]
if args . merge_with_reference :
output_steps . append (
( ' merge ' ,
SubtitleMerger ( reference_pipe . named_steps [ ' parse ' ] . subs_ ) )
)
)
output_pipe = Pipeline ( output_steps )
if args . extract_subs_from_stream is not None :
out_subs = output_pipe . fit_transform ( scale_step . subs_ )
if args . output_encoding != ' same ' :
out_subs = out_subs . set_encoding ( args . output_encoding )
logger . info ( ' writing output to {} ' . format ( args . srtout or ' stdout ' ) )
out_subs . write_file ( args . srtout )
except FailedToFindAlignmentException as e :
sync_was_successful = False
logger . error ( e )
else :
result [ ' offset_seconds ' ] = offset_seconds
result [ ' framerate_scale_factor ' ] = scale_step . scale_factor
finally :
result [ ' sync_was_successful ' ] = sync_was_successful
if args . make_test_case :
if args . make_test_case :
if npy_savename is None :
raise ValueError ( ' test case is for sync and not subtitle extraction ' )
raise ValueError ( ' need non-null npy_savename ' )
if args . srtin is not None :
tar_dir = ' {} . {} ' . format (
raise ValueError ( ' stream specified for reference subtitle extraction; -i flag for sync input not allowed ' )
args . reference ,
datetime . now ( ) . strftime ( ' % Y- % m- %d - % H: % M: % S ' )
)
def run ( args ) :
logger . info ( ' creating test archive {} .tar.gz... ' . format ( tar_dir ) )
result = {
os . mkdir ( tar_dir )
' retval ' : 0 ,
' offset_seconds ' : None ,
' framerate_scale_factor ' : None ,
' sync_was_successful ' : None
}
try :
try :
shutil . move ( ' ffsubsync.log ' , tar_dir )
validate_args ( args )
shutil . copy ( args . srtin , tar_dir )
except ValueError as e :
if sync_was_successful :
logger . error ( e )
shutil . move ( args . srtout , tar_dir )
if ref_format in SUBTITLE_EXTENSIONS :
shutil . copy ( args . reference , tar_dir )
elif args . serialize_speech or args . reference == npy_savename :
shutil . copy ( npy_savename , tar_dir )
else :
shutil . move ( npy_savename , tar_dir )
supported_formats = set ( list ( zip ( * shutil . get_archive_formats ( ) ) ) [ 0 ] )
preferred_formats = [ ' gztar ' , ' bztar ' , ' xztar ' , ' zip ' , ' tar ' ]
for archive_format in preferred_formats :
if archive_format in supported_formats :
shutil . make_archive ( tar_dir , ' gztar ' , os . curdir , tar_dir )
break
else :
logger . error ( ' failed to create test archive; no formats supported '
' (this should not happen) ' )
result [ ' retval ' ] = 1
result [ ' retval ' ] = 1
return result
if args . overwrite_input :
args . srtout = args . srtin
if args . gui_mode and args . srtout is None :
args . srtout = ' {} .synced.srt ' . format ( os . path . splitext ( args . srtin ) [ 0 ] )
ref_format = _ref_format ( args . reference )
if args . merge_with_reference and ref_format not in SUBTITLE_EXTENSIONS :
logger . error ( ' merging synced output with reference only valid '
' when reference composed of subtitles ' )
result [ ' retval ' ] = 1
return result
if args . make_test_case :
handler = logging . FileHandler ( ' ffsubsync.log ' )
logger . addHandler ( handler )
if args . extract_subs_from_stream is not None :
result [ ' retval ' ] = extract_subtitles_from_reference ( args )
return result
reference_pipe = make_reference_pipe ( args )
logger . info ( " extracting speech segments from reference ' %s ' ... " , args . reference )
reference_pipe . fit ( args . reference )
logger . info ( ' ...done ' )
logger . info ( ' ...done ' )
finally :
npy_savename = None
shutil . rmtree ( tar_dir )
if args . make_test_case or args . serialize_speech :
logger . info ( ' serializing speech... ' )
npy_savename = os . path . splitext ( args . reference ) [ 0 ] + ' .npz '
np . savez_compressed ( npy_savename , speech = reference_pipe . transform ( args . reference ) )
logger . info ( ' ...done ' )
if args . srtin is None :
logger . info ( ' unsynchronized subtitle file not specified; skipping synchronization ' )
return result
srt_pipes = make_srt_pipes ( args )
sync_was_successful = try_sync ( args , reference_pipe , srt_pipes , result )
if args . make_test_case :
result [ ' retval ' ] + = make_test_case ( args , npy_savename , sync_was_successful )
return result
return result
@ -206,11 +284,18 @@ def add_main_args_for_cli(parser):
help = ' If specified, serialize reference speech to a numpy array, '
help = ' If specified, serialize reference speech to a numpy array, '
' and create an archive with input/output subtitles '
' and create an archive with input/output subtitles '
' and serialized speech. ' )
' and serialized speech. ' )
parser . add_argument (
' --reference-stream ' , ' --refstream ' , ' --reference-track ' , ' --reftrack ' ,
default = None ,
help = ' Which stream/track in the video file to use as reference, '
' formatted according to ffmpeg conventions. For example, s:0 '
' uses the first subtitle track; a:3 would use the third audio track. '
)
def add_cli_only_args ( parser ) :
def add_cli_only_args ( parser ) :
parser . add_argument ( ' -v ' , ' --version ' , action = ' version ' ,
# parser.add_argument('-v', '--version', action='version' ,
version = ' {package} {version} ' . format ( package = __package__ , version = __version__ ) )
# version='{package} {version}'.format(package=__package__, version=get_version()) )
parser . add_argument ( ' --overwrite-input ' , action = ' store_true ' ,
parser . add_argument ( ' --overwrite-input ' , action = ' store_true ' ,
help = ' If specified, will overwrite the input srt instead of writing the output to a new file. ' )
help = ' If specified, will overwrite the input srt instead of writing the output to a new file. ' )
parser . add_argument ( ' --encoding ' , default = DEFAULT_ENCODING ,
parser . add_argument ( ' --encoding ' , default = DEFAULT_ENCODING ,
@ -243,13 +328,9 @@ def add_cli_only_args(parser):
' mismatch between reference and subtitles. ' )
' mismatch between reference and subtitles. ' )
parser . add_argument ( ' --serialize-speech ' , action = ' store_true ' ,
parser . add_argument ( ' --serialize-speech ' , action = ' store_true ' ,
help = ' If specified, serialize reference speech to a numpy array. ' )
help = ' If specified, serialize reference speech to a numpy array. ' )
parser . add_argument (
parser . add_argument ( ' --extract-subs-from-stream ' , default = None ,
' --reference-stream ' , ' --refstream ' , ' --reference-track ' , ' --reftrack ' ,
help = ' If specified, do not attempt sync; instead, just extract subtitles '
default = None ,
' from the specified stream using the reference. ' )
help = ' Which stream/track in the video file to use as reference, '
' formatted according to ffmpeg conventions. For example, s:0 '
' uses the first subtitle track; a:3 would use the third audio track. '
)
parser . add_argument (
parser . add_argument (
' --ffmpeg-path ' , ' --ffmpegpath ' , default = None ,
' --ffmpeg-path ' , ' --ffmpegpath ' , default = None ,
help = ' Where to look for ffmpeg and ffprobe. Uses the system PATH by default. '
help = ' Where to look for ffmpeg and ffprobe. Uses the system PATH by default. '
@ -268,7 +349,7 @@ def make_parser():
def main ( ) :
def main ( ) :
parser = make_parser ( )
parser = make_parser ( )
args = parser . parse_args ( )
args = parser . parse_args ( )
return run ( args )
return run ( args ) [ ' retval ' ]
if __name__ == " __main__ " :
if __name__ == " __main__ " :