From 8008c5f019bfd265f3eaa7c38b97baf87be9c1d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Louis=20V=C3=A9zina?= <5130500+morpheus65535@users.noreply.github.com> Date: Sun, 17 Nov 2019 19:32:41 -0500 Subject: [PATCH] Read the whole text file when guessing the actual language of an external subtitles with a filename that doesn't include language code. --- bazarr/list_subtitles.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/bazarr/list_subtitles.py b/bazarr/list_subtitles.py index 21bf7925e..c89bdd963 100644 --- a/bazarr/list_subtitles.py +++ b/bazarr/list_subtitles.py @@ -13,7 +13,6 @@ from subliminal import core from subliminal_patch import search_external_subtitles from subzero.language import Language from bs4 import UnicodeDammit -from itertools import islice from get_args import args from database import database @@ -88,10 +87,9 @@ def store_subtitles(file): if os.path.splitext(subtitle)[1] != ".sub": logging.debug("BAZARR falling back to file content analysis to detect language.") with open(os.path.join(os.path.dirname(file), subtitle), 'r') as f: - text = list(islice(f, 100)) - text = ' '.join(text) - encoding = UnicodeDammit(text) + text = f.read() try: + encoding = UnicodeDammit(text) text = text.decode(encoding.original_encoding) detected_language = langdetect.detect(text) except Exception as e: @@ -186,10 +184,9 @@ def store_subtitles_movie(file): if os.path.splitext(subtitle)[1] != ".sub": logging.debug("BAZARR falling back to file content analysis to detect language.") with open(os.path.join(os.path.dirname(file), dest_folder, subtitle), 'r') as f: - text = list(islice(f, 100)) - text = ' '.join(text) - encoding = UnicodeDammit(text) + text = f.read() try: + encoding = UnicodeDammit(text) text = text.decode(encoding.original_encoding) detected_language = langdetect.detect(text) except Exception as e: @@ -408,7 +405,7 @@ def guess_external_subtitles(dest_folder, subtitles): logging.debug("BAZARR falling back to file content analysis to detect language.") detected_language = None with open(subtitle_path, 'r') as f: - text = ' '.join(list(islice(f, 100))) + text = f.read() try: encoding = UnicodeDammit(text) text = text.decode(encoding.original_encoding)