|
|
|
@ -76,25 +76,26 @@ def guess_external_subtitles(dest_folder, subtitles, media_type, previously_inde
|
|
|
|
|
with open(subtitle_path, 'rb') as f:
|
|
|
|
|
text = f.read()
|
|
|
|
|
|
|
|
|
|
encoding = detect(text)['encoding']
|
|
|
|
|
if not encoding:
|
|
|
|
|
encoding = detect(text)
|
|
|
|
|
if encoding and 'encoding' in encoding:
|
|
|
|
|
encoding = detect(text)['encoding']
|
|
|
|
|
else:
|
|
|
|
|
logging.debug("BAZARR skipping this subtitles because we can't guess the encoding. "
|
|
|
|
|
"It's probably a binary file: " + subtitle_path)
|
|
|
|
|
continue
|
|
|
|
|
if 'UTF' in encoding:
|
|
|
|
|
text = text.decode('utf-8')
|
|
|
|
|
detected_language = guess_language(text)
|
|
|
|
|
# add simplified and traditional chinese detection
|
|
|
|
|
if detected_language == 'zh':
|
|
|
|
|
traditional_chinese_fuzzy = [u"繁", u"雙語"]
|
|
|
|
|
traditional_chinese = [".cht", ".tc", ".zh-tw", ".zht", ".zh-hant", ".zhhant", ".zh_hant",
|
|
|
|
|
".hant", ".big5", ".traditional"]
|
|
|
|
|
if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(traditional_chinese)) or (str(subtitle_path).lower())[:-5] in traditional_chinese_fuzzy:
|
|
|
|
|
detected_language == 'zt'
|
|
|
|
|
else:
|
|
|
|
|
text = text.decode(encoding)
|
|
|
|
|
text = text.decode(encoding)
|
|
|
|
|
|
|
|
|
|
detected_language = guess_language(text)
|
|
|
|
|
|
|
|
|
|
# add simplified and traditional chinese detection
|
|
|
|
|
if detected_language == 'zh':
|
|
|
|
|
traditional_chinese_fuzzy = [u"繁", u"雙語"]
|
|
|
|
|
traditional_chinese = [".cht", ".tc", ".zh-tw", ".zht", ".zh-hant", ".zhhant", ".zh_hant",
|
|
|
|
|
".hant", ".big5", ".traditional"]
|
|
|
|
|
if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(traditional_chinese)) or \
|
|
|
|
|
(str(subtitle_path).lower())[:-5] in traditional_chinese_fuzzy:
|
|
|
|
|
detected_language = 'zt'
|
|
|
|
|
|
|
|
|
|
if detected_language:
|
|
|
|
|
logging.debug("BAZARR external subtitles detected and guessed this language: " + str(
|
|
|
|
|
detected_language))
|
|
|
|
@ -127,15 +128,14 @@ def guess_external_subtitles(dest_folder, subtitles, media_type, previously_inde
|
|
|
|
|
with open(subtitle_path, 'rb') as f:
|
|
|
|
|
text = f.read()
|
|
|
|
|
|
|
|
|
|
encoding = detect(text)['encoding']
|
|
|
|
|
if not encoding:
|
|
|
|
|
encoding = detect(text)
|
|
|
|
|
if encoding and 'encoding' in encoding:
|
|
|
|
|
encoding = detect(text)['encoding']
|
|
|
|
|
else:
|
|
|
|
|
logging.debug("BAZARR skipping this subtitles because we can't guess the encoding. "
|
|
|
|
|
"It's probably a binary file: " + subtitle_path)
|
|
|
|
|
continue
|
|
|
|
|
if 'UTF' in encoding:
|
|
|
|
|
text = text.decode('utf-8')
|
|
|
|
|
else:
|
|
|
|
|
text = text.decode(encoding)
|
|
|
|
|
text = text.decode(encoding)
|
|
|
|
|
|
|
|
|
|
if bool(re.search(hi_regex, text)):
|
|
|
|
|
subtitles[subtitle] = Language.rebuild(subtitles[subtitle], forced=False, hi=True)
|
|
|
|
|