@ -5,12 +5,14 @@ from datetime import datetime
import logging
import os
import shutil
import subprocess
import sys
import numpy as np
from . aligners import FFTAligner , MaxScoreAligner , FailedToFindAlignmentException
from . constants import *
from . ffmpeg_utils import ffmpeg_bin_path
from . sklearn_shim import Pipeline
from . speech_transformers import (
VideoSpeechTransformer ,
@ -19,7 +21,7 @@ from .speech_transformers import (
)
from . subtitle_parser import make_subtitle_parser
from . subtitle_transformers import SubtitleMerger , SubtitleShifter
from . version import __version__
from . version import get_version
logger = logging . getLogger ( __name__ )
@ -30,42 +32,94 @@ def override(args, **kwargs):
return args_dict
def run ( args ) :
result = { ' retval ' : 0 ,
' offset_seconds ' : None ,
' framerate_scale_factor ' : None ,
' sync_was_successful ' : None }
if args . vlc_mode :
logger . setLevel ( logging . CRITICAL )
if args . make_test_case and not args . gui_mode : # this validation not necessary for gui mode
if args . srtin is None or args . srtout is None :
logger . error ( ' need to specify input and output srt files for test cases ' )
result [ ' retval ' ] = 1
return result
if args . overwrite_input :
if args . srtin is None :
logger . error ( ' need to specify input srt if --overwrite-input is specified since we cannot overwrite stdin ' )
result [ ' retval ' ] = 1
return result
if args . srtout is not None :
logger . error ( ' overwrite input set but output file specified; refusing to run in case this was not intended ' )
result [ ' retval ' ] = 1
return result
args . srtout = args . srtin
if args . gui_mode and args . srtout is None :
args . srtout = ' {} .synced.srt ' . format ( os . path . splitext ( args . srtin ) [ 0 ] )
ref_format = args . reference [ - 3 : ]
if args . merge_with_reference and ref_format not in SUBTITLE_EXTENSIONS :
logger . error ( ' merging synced output with reference only valid '
' when reference composed of subtitles ' )
return 1
if args . make_test_case :
handler = logging . FileHandler ( ' ffsubsync.log ' )
logger . addHandler ( handler )
def _ref_format ( ref_fname ) :
return ref_fname [ - 3 : ]
def make_test_case ( args , npy_savename , sync_was_successful ) :
if npy_savename is None :
raise ValueError ( ' need non-null npy_savename ' )
tar_dir = ' {} . {} ' . format (
args . reference ,
datetime . now ( ) . strftime ( ' % Y- % m- %d - % H: % M: % S ' )
)
logger . info ( ' creating test archive {} .tar.gz... ' . format ( tar_dir ) )
os . mkdir ( tar_dir )
try :
shutil . move ( ' ffsubsync.log ' , tar_dir )
shutil . copy ( args . srtin , tar_dir )
if sync_was_successful :
shutil . move ( args . srtout , tar_dir )
if _ref_format ( args . reference ) in SUBTITLE_EXTENSIONS :
shutil . copy ( args . reference , tar_dir )
elif args . serialize_speech or args . reference == npy_savename :
shutil . copy ( npy_savename , tar_dir )
else :
shutil . move ( npy_savename , tar_dir )
supported_formats = set ( list ( zip ( * shutil . get_archive_formats ( ) ) ) [ 0 ] )
preferred_formats = [ ' gztar ' , ' bztar ' , ' xztar ' , ' zip ' , ' tar ' ]
for archive_format in preferred_formats :
if archive_format in supported_formats :
shutil . make_archive ( tar_dir , ' gztar ' , os . curdir , tar_dir )
break
else :
logger . error ( ' failed to create test archive; no formats supported '
' (this should not happen) ' )
return 1
logger . info ( ' ...done ' )
finally :
shutil . rmtree ( tar_dir )
return 0
def try_sync ( args , reference_pipe , srt_pipes , result ) :
sync_was_successful = True
try :
logger . info ( ' extracting speech segments from subtitles file %s ... ' , args . srtin )
for srt_pipe in srt_pipes :
srt_pipe . fit ( args . srtin )
logger . info ( ' ...done ' )
logger . info ( ' computing alignments... ' )
offset_samples , best_srt_pipe = MaxScoreAligner (
FFTAligner , SAMPLE_RATE , args . max_offset_seconds
) . fit_transform (
reference_pipe . transform ( args . reference ) ,
srt_pipes ,
)
logger . info ( ' ...done ' )
offset_seconds = offset_samples / float ( SAMPLE_RATE )
scale_step = best_srt_pipe . named_steps [ ' scale ' ]
logger . info ( ' offset seconds: %.3f ' , offset_seconds )
logger . info ( ' framerate scale factor: %.3f ' , scale_step . scale_factor )
output_steps = [ ( ' shift ' , SubtitleShifter ( offset_seconds ) ) ]
if args . merge_with_reference :
output_steps . append (
( ' merge ' ,
SubtitleMerger ( reference_pipe . named_steps [ ' parse ' ] . subs_ ) )
)
output_pipe = Pipeline ( output_steps )
out_subs = output_pipe . fit_transform ( scale_step . subs_ )
if args . output_encoding != ' same ' :
out_subs = out_subs . set_encoding ( args . output_encoding )
logger . info ( ' writing output to {} ' . format ( args . srtout or ' stdout ' ) )
out_subs . write_file ( args . srtout )
except FailedToFindAlignmentException as e :
sync_was_successful = False
logger . error ( e )
else :
result [ ' offset_seconds ' ] = offset_seconds
result [ ' framerate_scale_factor ' ] = scale_step . scale_factor
finally :
result [ ' sync_was_successful ' ] = sync_was_successful
return sync_was_successful
def make_reference_pipe ( args ) :
ref_format = _ref_format ( args . reference )
if ref_format in SUBTITLE_EXTENSIONS :
if args . vad is not None :
logger . warning ( ' Vad specified, but reference was not a movie ' )
reference_pipe = make_subtitle_speech_pipeline (
return make_subtitle_speech_pipeline (
fmt = ref_format ,
* * override (
args ,
@ -75,7 +129,7 @@ def run(args):
elif ref_format in ( ' npy ' , ' npz ' ) :
if args . vad is not None :
logger . warning ( ' Vad specified, but reference was not a movie ' )
reference_pipe = Pipeline ( [
return Pipeline ( [
( ' deserialize ' , DeserializeSpeechTransformer ( ) )
] )
else :
@ -85,7 +139,7 @@ def run(args):
ref_stream = args . reference_stream
if ref_stream is not None and not ref_stream . startswith ( ' 0: ' ) :
ref_stream = ' 0: ' + ref_stream
reference_pipe = Pipeline ( [
return Pipeline ( [
( ' speech_extract ' , VideoSpeechTransformer ( vad = vad ,
sample_rate = SAMPLE_RATE ,
frame_rate = args . frame_rate ,
@ -95,12 +149,109 @@ def run(args):
vlc_mode = args . vlc_mode ,
gui_mode = args . gui_mode ) )
] )
def make_srt_pipes ( args ) :
if args . no_fix_framerate :
framerate_ratios = [ 1. ]
else :
framerate_ratios = np . concatenate ( [
[ 1. ] , np . array ( FRAMERATE_RATIOS ) , 1. / np . array ( FRAMERATE_RATIOS )
] )
parser = make_subtitle_parser ( fmt = os . path . splitext ( args . srtin ) [ - 1 ] [ 1 : ] , caching = True , * * args . __dict__ )
srt_pipes = [
make_subtitle_speech_pipeline (
* * override ( args , scale_factor = scale_factor , parser = parser )
)
for scale_factor in framerate_ratios
]
return srt_pipes
def extract_subtitles_from_reference ( args ) :
stream = args . extract_subs_from_stream
if not stream . startswith ( ' 0:s: ' ) :
stream = ' 0:s: {} ' . format ( stream )
elif not stream . startswith ( ' 0: ' ) and stream . startswith ( ' s: ' ) :
stream = ' 0: {} ' . format ( stream )
if not stream . startswith ( ' 0:s: ' ) :
logger . error ( ' invalid stream for subtitle extraction: %s ' , args . extract_subs_from_stream )
ffmpeg_args = [ ffmpeg_bin_path ( ' ffmpeg ' , args . gui_mode , ffmpeg_resources_path = args . ffmpeg_path ) ]
ffmpeg_args . extend ( [
' -y ' ,
' -nostdin ' ,
' -loglevel ' , ' fatal ' ,
' -i ' , args . reference ,
' -map ' , ' {} ' . format ( stream ) ,
' -f ' , ' srt ' ,
] )
if args . srtout is None :
ffmpeg_args . append ( ' - ' )
else :
ffmpeg_args . append ( args . srtout )
logger . info ( ' attempting to extract subtitles to {} ... ' . format ( ' stdout ' if args . srtout is None else args . srtout ) )
retcode = subprocess . call ( ffmpeg_args )
if retcode == 0 :
logger . info ( ' ...done ' )
else :
logger . error ( ' ffmpeg unable to extract subtitles from reference; return code %d ' , retcode )
return retcode
def validate_args ( args ) :
if args . vlc_mode :
logger . setLevel ( logging . CRITICAL )
if args . make_test_case and not args . gui_mode : # this validation not necessary for gui mode
if args . srtin is None or args . srtout is None :
raise ValueError ( ' need to specify input and output srt files for test cases ' )
if args . overwrite_input :
if args . extract_subs_from_stream is not None :
raise ValueError ( ' input overwriting not allowed for extracting subtitles from referece ' )
if args . srtin is None :
raise ValueError (
' need to specify input srt if --overwrite-input is specified since we cannot overwrite stdin '
)
if args . srtout is not None :
raise ValueError (
' overwrite input set but output file specified; refusing to run in case this was not intended '
)
if args . extract_subs_from_stream is not None :
if args . make_test_case :
raise ValueError ( ' test case is for sync and not subtitle extraction ' )
if args . srtin is not None :
raise ValueError ( ' stream specified for reference subtitle extraction; -i flag for sync input not allowed ' )
def run ( args ) :
result = {
' retval ' : 0 ,
' offset_seconds ' : None ,
' framerate_scale_factor ' : None ,
' sync_was_successful ' : None
}
try :
validate_args ( args )
except ValueError as e :
logger . error ( e )
result [ ' retval ' ] = 1
return result
if args . overwrite_input :
args . srtout = args . srtin
if args . gui_mode and args . srtout is None :
args . srtout = ' {} .synced.srt ' . format ( os . path . splitext ( args . srtin ) [ 0 ] )
ref_format = _ref_format ( args . reference )
if args . merge_with_reference and ref_format not in SUBTITLE_EXTENSIONS :
logger . error ( ' merging synced output with reference only valid '
' when reference composed of subtitles ' )
result [ ' retval ' ] = 1
return result
if args . make_test_case :
handler = logging . FileHandler ( ' ffsubsync.log ' )
logger . addHandler ( handler )
if args . extract_subs_from_stream is not None :
result [ ' retval ' ] = extract_subtitles_from_reference ( args )
return result
reference_pipe = make_reference_pipe ( args )
logger . info ( " extracting speech segments from reference ' %s ' ... " , args . reference )
reference_pipe . fit ( args . reference )
logger . info ( ' ...done ' )
@ -113,83 +264,10 @@ def run(args):
if args . srtin is None :
logger . info ( ' unsynchronized subtitle file not specified; skipping synchronization ' )
return result
parser = make_subtitle_parser ( fmt = os . path . splitext ( args . srtin ) [ - 1 ] [ 1 : ] , caching = True , * * args . __dict__ )
logger . info ( " extracting speech segments from subtitles ' %s ' ... " , args . srtin )
srt_pipes = [
make_subtitle_speech_pipeline (
* * override ( args , scale_factor = scale_factor , parser = parser )
) . fit ( args . srtin )
for scale_factor in framerate_ratios
]
logger . info ( ' ...done ' )
logger . info ( ' computing alignments... ' )
max_offset_seconds = args . max_offset_seconds
try :
sync_was_successful = True
offset_samples , best_srt_pipe = MaxScoreAligner (
FFTAligner , SAMPLE_RATE , max_offset_seconds
) . fit_transform (
reference_pipe . transform ( args . reference ) ,
srt_pipes ,
)
logger . info ( ' ...done ' )
offset_seconds = offset_samples / float ( SAMPLE_RATE )
scale_step = best_srt_pipe . named_steps [ ' scale ' ]
logger . info ( ' offset seconds: %.3f ' , offset_seconds )
logger . info ( ' framerate scale factor: %.3f ' , scale_step . scale_factor )
output_steps = [ ( ' shift ' , SubtitleShifter ( offset_seconds ) ) ]
if args . merge_with_reference :
output_steps . append (
( ' merge ' ,
SubtitleMerger ( reference_pipe . named_steps [ ' parse ' ] . subs_ ) )
)
output_pipe = Pipeline ( output_steps )
out_subs = output_pipe . fit_transform ( scale_step . subs_ )
if args . output_encoding != ' same ' :
out_subs = out_subs . set_encoding ( args . output_encoding )
logger . info ( ' writing output to {} ' . format ( args . srtout or ' stdout ' ) )
out_subs . write_file ( args . srtout )
except FailedToFindAlignmentException as e :
sync_was_successful = False
logger . error ( e )
else :
result [ ' offset_seconds ' ] = offset_seconds
result [ ' framerate_scale_factor ' ] = scale_step . scale_factor
finally :
result [ ' sync_was_successful ' ] = sync_was_successful
srt_pipes = make_srt_pipes ( args )
sync_was_successful = try_sync ( args , reference_pipe , srt_pipes , result )
if args . make_test_case :
if npy_savename is None :
raise ValueError ( ' need non-null npy_savename ' )
tar_dir = ' {} . {} ' . format (
args . reference ,
datetime . now ( ) . strftime ( ' % Y- % m- %d - % H: % M: % S ' )
)
logger . info ( ' creating test archive {} .tar.gz... ' . format ( tar_dir ) )
os . mkdir ( tar_dir )
try :
shutil . move ( ' ffsubsync.log ' , tar_dir )
shutil . copy ( args . srtin , tar_dir )
if sync_was_successful :
shutil . move ( args . srtout , tar_dir )
if ref_format in SUBTITLE_EXTENSIONS :
shutil . copy ( args . reference , tar_dir )
elif args . serialize_speech or args . reference == npy_savename :
shutil . copy ( npy_savename , tar_dir )
else :
shutil . move ( npy_savename , tar_dir )
supported_formats = set ( list ( zip ( * shutil . get_archive_formats ( ) ) ) [ 0 ] )
preferred_formats = [ ' gztar ' , ' bztar ' , ' xztar ' , ' zip ' , ' tar ' ]
for archive_format in preferred_formats :
if archive_format in supported_formats :
shutil . make_archive ( tar_dir , ' gztar ' , os . curdir , tar_dir )
break
else :
logger . error ( ' failed to create test archive; no formats supported '
' (this should not happen) ' )
result [ ' retval ' ] = 1
logger . info ( ' ...done ' )
finally :
shutil . rmtree ( tar_dir )
result [ ' retval ' ] + = make_test_case ( args , npy_savename , sync_was_successful )
return result
@ -206,11 +284,18 @@ def add_main_args_for_cli(parser):
help = ' If specified, serialize reference speech to a numpy array, '
' and create an archive with input/output subtitles '
' and serialized speech. ' )
parser . add_argument (
' --reference-stream ' , ' --refstream ' , ' --reference-track ' , ' --reftrack ' ,
default = None ,
help = ' Which stream/track in the video file to use as reference, '
' formatted according to ffmpeg conventions. For example, s:0 '
' uses the first subtitle track; a:3 would use the third audio track. '
)
def add_cli_only_args ( parser ) :
parser . add_argument ( ' -v ' , ' --version ' , action = ' version ' ,
version = ' {package} {version} ' . format ( package = __package__ , version = __version__ ) )
# parser.add_argument('-v', '--version', action='version' ,
# version='{package} {version}'.format(package=__package__, version=get_version()) )
parser . add_argument ( ' --overwrite-input ' , action = ' store_true ' ,
help = ' If specified, will overwrite the input srt instead of writing the output to a new file. ' )
parser . add_argument ( ' --encoding ' , default = DEFAULT_ENCODING ,
@ -243,13 +328,9 @@ def add_cli_only_args(parser):
' mismatch between reference and subtitles. ' )
parser . add_argument ( ' --serialize-speech ' , action = ' store_true ' ,
help = ' If specified, serialize reference speech to a numpy array. ' )
parser . add_argument (
' --reference-stream ' , ' --refstream ' , ' --reference-track ' , ' --reftrack ' ,
default = None ,
help = ' Which stream/track in the video file to use as reference, '
' formatted according to ffmpeg conventions. For example, s:0 '
' uses the first subtitle track; a:3 would use the third audio track. '
)
parser . add_argument ( ' --extract-subs-from-stream ' , default = None ,
help = ' If specified, do not attempt sync; instead, just extract subtitles '
' from the specified stream using the reference. ' )
parser . add_argument (
' --ffmpeg-path ' , ' --ffmpegpath ' , default = None ,
help = ' Where to look for ffmpeg and ffprobe. Uses the system PATH by default. '
@ -268,7 +349,7 @@ def make_parser():
def main ( ) :
parser = make_parser ( )
args = parser . parse_args ( )
return run ( args )
return run ( args ) [ ' retval ' ]
if __name__ == " __main__ " :