From d06cace2d6246745720e50675e03bb5c6ae9f76a Mon Sep 17 00:00:00 2001 From: josdion Date: Tue, 9 Jun 2020 22:20:43 +0300 Subject: [PATCH] Fix #886 Use the whole subtitle file to detect encoding. Seems like if trying to detect encoding only by a part of the file, is not always working. This modification will lead to worse performance, but should be more reliable. --- bazarr/list_subtitles.py | 13 ++++--------- libs/subliminal_patch/core.py | 2 +- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/bazarr/list_subtitles.py b/bazarr/list_subtitles.py index 20bddc995..1cf1f0494 100644 --- a/bazarr/list_subtitles.py +++ b/bazarr/list_subtitles.py @@ -364,8 +364,8 @@ def guess_external_subtitles(dest_folder, subtitles): logging.debug("BAZARR falling back to file content analysis to detect language.") detected_language = None - # to improve performance, skip detection of files larger that 5M - if os.path.getsize(subtitle_path) > 5*1024*1024: + # to improve performance, skip detection of files larger that 1M + if os.path.getsize(subtitle_path) > 1*1024*1024: logging.debug("BAZARR subtitles file is too large to be text based. Skipping this file: " + subtitle_path) continue @@ -374,16 +374,11 @@ def guess_external_subtitles(dest_folder, subtitles): text = f.read() try: - # to improve performance, use only the first 32K to detect encoding - guess = chardet.detect(text[:32768]) + guess = chardet.detect(text) logging.debug('BAZARR detected encoding %r', guess) - if guess["confidence"] < 0.6: - raise UnicodeError - if guess["encoding"] == "ascii": - guess["encoding"] = "utf-8" text = text.decode(guess["encoding"]) detected_language = guess_language(text) - except UnicodeError: + except (UnicodeDecodeError, TypeError): logging.exception("BAZARR subtitles file doesn't seems to be text based. Skipping this file: " + subtitle_path) except: diff --git a/libs/subliminal_patch/core.py b/libs/subliminal_patch/core.py index 397effcfe..f854390cf 100644 --- a/libs/subliminal_patch/core.py +++ b/libs/subliminal_patch/core.py @@ -606,7 +606,7 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen continue if p_root.lower() == fn_no_ext_lower: - # skip check for language code is the subtitle file name is the same as the video name + # skip check for language code if the subtitle file name is the same as the video name subtitles[p] = None continue