|
|
@ -13,7 +13,6 @@ from subliminal import core
|
|
|
|
from subliminal_patch import search_external_subtitles
|
|
|
|
from subliminal_patch import search_external_subtitles
|
|
|
|
from subzero.language import Language
|
|
|
|
from subzero.language import Language
|
|
|
|
from bs4 import UnicodeDammit
|
|
|
|
from bs4 import UnicodeDammit
|
|
|
|
from itertools import islice
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from get_args import args
|
|
|
|
from get_args import args
|
|
|
|
from database import database
|
|
|
|
from database import database
|
|
|
@ -88,10 +87,9 @@ def store_subtitles(file):
|
|
|
|
if os.path.splitext(subtitle)[1] != ".sub":
|
|
|
|
if os.path.splitext(subtitle)[1] != ".sub":
|
|
|
|
logging.debug("BAZARR falling back to file content analysis to detect language.")
|
|
|
|
logging.debug("BAZARR falling back to file content analysis to detect language.")
|
|
|
|
with open(os.path.join(os.path.dirname(file), subtitle), 'r') as f:
|
|
|
|
with open(os.path.join(os.path.dirname(file), subtitle), 'r') as f:
|
|
|
|
text = list(islice(f, 100))
|
|
|
|
text = f.read()
|
|
|
|
text = ' '.join(text)
|
|
|
|
|
|
|
|
encoding = UnicodeDammit(text)
|
|
|
|
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
|
|
|
|
encoding = UnicodeDammit(text)
|
|
|
|
text = text.decode(encoding.original_encoding)
|
|
|
|
text = text.decode(encoding.original_encoding)
|
|
|
|
detected_language = langdetect.detect(text)
|
|
|
|
detected_language = langdetect.detect(text)
|
|
|
|
except Exception as e:
|
|
|
|
except Exception as e:
|
|
|
@ -186,10 +184,9 @@ def store_subtitles_movie(file):
|
|
|
|
if os.path.splitext(subtitle)[1] != ".sub":
|
|
|
|
if os.path.splitext(subtitle)[1] != ".sub":
|
|
|
|
logging.debug("BAZARR falling back to file content analysis to detect language.")
|
|
|
|
logging.debug("BAZARR falling back to file content analysis to detect language.")
|
|
|
|
with open(os.path.join(os.path.dirname(file), dest_folder, subtitle), 'r') as f:
|
|
|
|
with open(os.path.join(os.path.dirname(file), dest_folder, subtitle), 'r') as f:
|
|
|
|
text = list(islice(f, 100))
|
|
|
|
text = f.read()
|
|
|
|
text = ' '.join(text)
|
|
|
|
|
|
|
|
encoding = UnicodeDammit(text)
|
|
|
|
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
|
|
|
|
encoding = UnicodeDammit(text)
|
|
|
|
text = text.decode(encoding.original_encoding)
|
|
|
|
text = text.decode(encoding.original_encoding)
|
|
|
|
detected_language = langdetect.detect(text)
|
|
|
|
detected_language = langdetect.detect(text)
|
|
|
|
except Exception as e:
|
|
|
|
except Exception as e:
|
|
|
@ -408,7 +405,7 @@ def guess_external_subtitles(dest_folder, subtitles):
|
|
|
|
logging.debug("BAZARR falling back to file content analysis to detect language.")
|
|
|
|
logging.debug("BAZARR falling back to file content analysis to detect language.")
|
|
|
|
detected_language = None
|
|
|
|
detected_language = None
|
|
|
|
with open(subtitle_path, 'r') as f:
|
|
|
|
with open(subtitle_path, 'r') as f:
|
|
|
|
text = ' '.join(list(islice(f, 100)))
|
|
|
|
text = f.read()
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
encoding = UnicodeDammit(text)
|
|
|
|
encoding = UnicodeDammit(text)
|
|
|
|
text = text.decode(encoding.original_encoding)
|
|
|
|
text = text.decode(encoding.original_encoding)
|
|
|
|