Read the whole text file when guessing the actual language of an external subtitles with a filename that doesn't include language code.

pull/684/head
Louis Vézina 5 years ago
parent e0aac7de4a
commit 8008c5f019

@ -13,7 +13,6 @@ from subliminal import core
from subliminal_patch import search_external_subtitles from subliminal_patch import search_external_subtitles
from subzero.language import Language from subzero.language import Language
from bs4 import UnicodeDammit from bs4 import UnicodeDammit
from itertools import islice
from get_args import args from get_args import args
from database import database from database import database
@ -88,10 +87,9 @@ def store_subtitles(file):
if os.path.splitext(subtitle)[1] != ".sub": if os.path.splitext(subtitle)[1] != ".sub":
logging.debug("BAZARR falling back to file content analysis to detect language.") logging.debug("BAZARR falling back to file content analysis to detect language.")
with open(os.path.join(os.path.dirname(file), subtitle), 'r') as f: with open(os.path.join(os.path.dirname(file), subtitle), 'r') as f:
text = list(islice(f, 100)) text = f.read()
text = ' '.join(text)
encoding = UnicodeDammit(text)
try: try:
encoding = UnicodeDammit(text)
text = text.decode(encoding.original_encoding) text = text.decode(encoding.original_encoding)
detected_language = langdetect.detect(text) detected_language = langdetect.detect(text)
except Exception as e: except Exception as e:
@ -186,10 +184,9 @@ def store_subtitles_movie(file):
if os.path.splitext(subtitle)[1] != ".sub": if os.path.splitext(subtitle)[1] != ".sub":
logging.debug("BAZARR falling back to file content analysis to detect language.") logging.debug("BAZARR falling back to file content analysis to detect language.")
with open(os.path.join(os.path.dirname(file), dest_folder, subtitle), 'r') as f: with open(os.path.join(os.path.dirname(file), dest_folder, subtitle), 'r') as f:
text = list(islice(f, 100)) text = f.read()
text = ' '.join(text)
encoding = UnicodeDammit(text)
try: try:
encoding = UnicodeDammit(text)
text = text.decode(encoding.original_encoding) text = text.decode(encoding.original_encoding)
detected_language = langdetect.detect(text) detected_language = langdetect.detect(text)
except Exception as e: except Exception as e:
@ -408,7 +405,7 @@ def guess_external_subtitles(dest_folder, subtitles):
logging.debug("BAZARR falling back to file content analysis to detect language.") logging.debug("BAZARR falling back to file content analysis to detect language.")
detected_language = None detected_language = None
with open(subtitle_path, 'r') as f: with open(subtitle_path, 'r') as f:
text = ' '.join(list(islice(f, 100))) text = f.read()
try: try:
encoding = UnicodeDammit(text) encoding = UnicodeDammit(text)
text = text.decode(encoding.original_encoding) text = text.decode(encoding.original_encoding)

Loading…
Cancel
Save