Added custom language class to make it easier to implement non-standard/regional languages

pull/1427/head v0.9.6-beta.19
Vitiko 4 years ago committed by GitHub
parent 0ef9729f9d
commit 4ebcd49546
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,204 @@
# -*- coding: utf-8 -*-
import logging
import os
from subzero.language import Language
logger = logging.getLogger(__name__)
class CustomLanguage:
"""Base class for custom languages."""
alpha2 = "pb"
alpha3 = "pob"
language = "pt-BR"
official_alpha2 = "pt"
official_alpha3 = "por"
name = "Brazilian Portuguese"
iso = "BR"
_possible_matches = ("pt-br", "pob", "pb", "brazilian", "brasil", "brazil")
_extensions = (".pt-br", ".pob", ".pb")
_extensions_forced = (".pt-br.forced", ".pob.forced", ".pb.forced")
def subzero_language(self):
return Language(self.official_alpha3, self.iso)
@classmethod
def from_value(cls, value, attr="alpha3"):
"""Return a custom language subclass by value and attribute
if found, otherwise return None.
:param value:
:param attr:
"""
for sub in cls.__subclasses__():
if getattr(sub, attr) == str(value):
return sub()
return None
@classmethod
def register(cls, table):
"Register the custom language subclasses in the database."
for sub in cls.__subclasses__():
table.insert(
{table.code3: sub.alpha3, table.code2: sub.alpha2, table.name: sub.name}
).on_conflict(action="IGNORE").execute()
@classmethod
def found_external(cls, subtitle, subtitle_path):
for sub in cls.__subclasses__():
code = sub.get_alpha_type(subtitle, subtitle_path)
if code:
return code
return None
@classmethod
def get_alpha_type(cls, subtitle: str, subtitle_path=None):
assert subtitle_path is not None
extension = str(os.path.splitext(subtitle)[0]).lower()
to_return = None
if extension.endswith(cls._extensions):
to_return = cls.alpha2
if extension.endswith(cls._extensions_forced):
to_return = f"{cls.alpha2}:forced"
if to_return is not None:
logging.debug("BAZARR external subtitles detected: %s", to_return)
return to_return
def ffprobe_found(self, detected_language: dict) -> bool:
name = detected_language.get("name", "").lower()
if not name:
return False
return any(ext in name for ext in self._possible_matches)
class BrazilianPortuguese(CustomLanguage):
# Same attributes as base class
pass
class ChineseTraditional(CustomLanguage):
alpha2 = "zt"
alpha3 = "zht"
language = "zh-TW"
official_alpha2 = "zh"
official_alpha3 = "zho"
name = "Chinese Traditional"
iso = "TW"
_extensions = (
".cht",
".tc",
".zh-tw",
".zht",
".zh-hant",
".zhhant",
".zh_hant",
".hant",
".big5",
".traditional",
)
_extensions_forced = (
".cht.forced",
".tc.forced",
".zht.forced",
"hant.forced",
".big5.forced",
"繁體中文.forced",
"雙語.forced",
".zh-tw.forced",
)
_extensions_fuzzy = ("", "雙語")
_extensions_disamb_fuzzy = ("", "双语")
_extensions_disamb = (
".chs",
".sc",
".zhs",
".zh-hans",
".hans",
".zh_hans",
".zhhans",
".gb",
".simplified",
)
_extensions_disamb_forced = (
".chs.forced",
".sc.forced",
".zhs.forced",
"hans.forced",
".gb.forced",
"简体中文.forced",
"双语.forced",
)
@classmethod
def get_alpha_type(cls, subtitle, subtitle_path=None):
subtitle_path = str(subtitle_path).lower()
extension = str(os.path.splitext(subtitle)[0]).lower()
to_return = None
# Simplified chinese
if (
extension.endswith(cls._extensions_disamb)
or subtitle_path in cls._extensions_disamb_fuzzy
):
to_return = "zh"
elif any(ext in extension[-12:] for ext in cls._extensions_disamb_forced):
to_return = "zh:forced"
# Traditional chinese
elif (
extension.endswith(cls._extensions)
or subtitle_path[:-5] in cls._extensions_fuzzy
):
to_return = "zt"
elif any(ext in extension[-12:] for ext in cls._extensions_forced):
to_return = "zt:forced"
if to_return is not None:
logging.debug("BAZARR external subtitles detected: %s", to_return)
return to_return
class LatinAmericanSpanish(CustomLanguage):
alpha2 = "ea" # Only one available I can think of
alpha3 = "spl"
language = "es-LA"
official_alpha2 = "es"
official_alpha3 = "spa"
name = "Latin American Spanish"
iso = "MX" # Not fair, but ok
_possible_matches = (
"es-la",
"spa-la",
"spl",
"mx",
"latin",
"mexic",
"argent",
"latam",
)
_extensions = (".es-la", ".spl", ".spa-la", ".ea", ".es-mx", ".lat", ".es.ar")
_extensions_forced = (
".es-la.forced",
".spl.forced",
".spa-la.forced",
".ea.forced",
".es-mx.forced",
".lat.forced",
".es.ar.forced",
)

@ -7,39 +7,29 @@ from knowit import api
import enzyme
from enzyme.exceptions import MalformedMKVError
from enzyme.exceptions import MalformedMKVError
from custom_lang import CustomLanguage
from database import TableEpisodes, TableMovies
_FFPROBE_SPECIAL_LANGS = {
"zho": {
"list": ["cht", "tc", "traditional", "zht", "hant", "big5", u"", u"雙語"],
"alpha3": "zht",
},
"por": {
"list": ["pt-br", "pob", "pb", "brazilian", "brasil", "brazil"],
"alpha3": "pob",
},
}
logger = logging.getLogger(__name__)
def _handle_alpha3(detected_language: dict):
alpha3 = detected_language["language"].alpha3
custom = CustomLanguage.from_value(alpha3, "official_alpha3")
name = detected_language.get("name", "").lower()
special_lang = _FFPROBE_SPECIAL_LANGS.get(alpha3)
if special_lang is None or not name:
return alpha3 # The original alpha3
if custom and custom.ffprobe_found(detected_language):
logger.debug("Custom embedded language found: %s", custom.name)
return custom.alpha3
if any(ext in name for ext in special_lang["list"]):
return special_lang["alpha3"] # Guessed alpha from _FFPROBE_OTHER_LANGS
return alpha3
return alpha3 # In any case
def embedded_subs_reader(file, file_size, episode_file_id=None, movie_file_id=None):
data = parse_video_metadata(file, file_size, episode_file_id, movie_file_id)
subtitles_list = []
if data['ffprobe'] and 'subtitle' in data['ffprobe']:
for detected_language in data['ffprobe']['subtitle']:
if data["ffprobe"] and "subtitle" in data["ffprobe"]:
for detected_language in data["ffprobe"]["subtitle"]:
if not "language" in detected_language:
continue
@ -53,15 +43,23 @@ def embedded_subs_reader(file, file_size, episode_file_id=None, movie_file_id=No
forced = detected_language.get("forced", False)
hearing_impaired = detected_language.get("hearing_impaired", False)
codec = detected_language.get("format") # or None
codec = detected_language.get("format") # or None
subtitles_list.append([language, forced, hearing_impaired, codec])
elif data['enzyme']:
for subtitle_track in data['enzyme'].subtitle_tracks:
hearing_impaired = subtitle_track.name and "sdh" in subtitle_track.name.lower()
subtitles_list.append([subtitle_track.language, subtitle_track.forced, hearing_impaired,
subtitle_track.codec_id])
elif data["enzyme"]:
for subtitle_track in data["enzyme"].subtitle_tracks:
hearing_impaired = (
subtitle_track.name and "sdh" in subtitle_track.name.lower()
)
subtitles_list.append(
[
subtitle_track.language,
subtitle_track.forced,
hearing_impaired,
subtitle_track.codec_id,
]
)
return subtitles_list
@ -69,10 +67,10 @@ def embedded_subs_reader(file, file_size, episode_file_id=None, movie_file_id=No
def parse_video_metadata(file, file_size, episode_file_id=None, movie_file_id=None):
# Define default data keys value
data = {
'ffprobe': {},
'enzyme': {},
'file_id': episode_file_id or movie_file_id,
'file_size': file_size
"ffprobe": {},
"enzyme": {},
"file_id": episode_file_id or movie_file_id,
"file_size": file_size,
}
# Get the actual cache value form database
@ -104,24 +102,26 @@ def parse_video_metadata(file, file_size, episode_file_id=None, movie_file_id=No
# if not, we retrieve the metadata from the file
from utils import get_binary
ffprobe_path = get_binary("ffprobe")
# if we have ffprobe available
if ffprobe_path:
api.initialize({'provider': 'ffmpeg', 'ffmpeg': ffprobe_path})
data['ffprobe'] = api.know(file)
api.initialize({"provider": "ffmpeg", "ffmpeg": ffprobe_path})
data["ffprobe"] = api.know(file)
# if nto, we use enzyme for mkv files
else:
if os.path.splitext(file)[1] == '.mkv':
with open(file, 'rb') as f:
if os.path.splitext(file)[1] == ".mkv":
with open(file, "rb") as f:
try:
mkv = enzyme.MKV(f)
except MalformedMKVError:
logging.error(
'BAZARR cannot analyze this MKV with our built-in MKV parser, you should install '
'ffmpeg/ffprobe: ' + file)
logger.error(
"BAZARR cannot analyze this MKV with our built-in MKV parser, you should install "
"ffmpeg/ffprobe: " + file
)
else:
data['enzyme'] = mkv
data["enzyme"] = mkv
# we write to db the result and return the newly cached ffprobe dict
if episode_file_id:

@ -3,7 +3,8 @@
import pycountry
from subzero.language import Language
from database import database, TableSettingsLanguages
from custom_lang import CustomLanguage
from database import TableSettingsLanguages
def load_language_in_db():
@ -13,22 +14,7 @@ def load_language_in_db():
if hasattr(lang, 'alpha_2')]
# Insert languages in database table
TableSettingsLanguages.insert_many(langs,
fields=[TableSettingsLanguages.code3, TableSettingsLanguages.code2,
TableSettingsLanguages.name]) \
.on_conflict(action='IGNORE') \
.execute()
TableSettingsLanguages.insert({TableSettingsLanguages.code3: 'pob', TableSettingsLanguages.code2: 'pb',
TableSettingsLanguages.name: 'Brazilian Portuguese'}) \
.on_conflict(action='IGNORE') \
.execute()
# insert chinese languages
TableSettingsLanguages.insert({TableSettingsLanguages.code3: 'zht', TableSettingsLanguages.code2: 'zt',
TableSettingsLanguages.name: 'Chinese Traditional'}) \
.on_conflict(action='IGNORE')\
.execute()
CustomLanguage.register(TableSettingsLanguages)
langs = [[lang.bibliographic, lang.alpha_3]
for lang in pycountry.languages
@ -88,15 +74,14 @@ def get_language_set():
.where(TableSettingsLanguages.enabled == 1).dicts()
language_set = set()
for lang in languages:
if lang['code3'] == 'pob':
language_set.add(Language('por', 'BR'))
elif lang['code3'] == 'zht':
language_set.add(Language('zho', 'TW'))
custom = CustomLanguage.from_value(lang["code3"], "alpha3")
if custom is None:
language_set.add(Language(lang["code3"]))
else:
language_set.add(Language(lang['code3']))
language_set.add(custom.subzero_language())
return language_set

@ -34,6 +34,7 @@ from get_providers import get_providers, get_providers_auth, provider_throttle,
from knowit import api
from subsyncer import subsync
from guessit import guessit
from custom_lang import CustomLanguage
from database import dict_mapper, get_exclusion_clause, get_profiles_list, get_audio_profile_languages, \
get_desired_languages, TableShows, TableEpisodes, TableMovies, TableHistory, TableHistoryMovie
from event_handler import event_stream, show_progress, hide_progress
@ -139,24 +140,13 @@ def download_subtitle(path, language, audio_language, hi, forced, providers, pro
# Always use alpha2 in API Request
l = alpha3_from_alpha2(l)
if l == 'pob':
lang_obj = Language('por', 'BR')
if forced == "True":
lang_obj = Language.rebuild(lang_obj, forced=True)
if hi == "force HI":
lang_obj = Language.rebuild(lang_obj, hi=True)
elif l == 'zht':
lang_obj = Language('zho', 'TW')
if forced == "True":
lang_obj = Language.rebuild(lang_obj, forced=True)
if hi == "force HI":
lang_obj = Language.rebuild(lang_obj, hi=True)
else:
lang_obj = Language(l)
if forced == "True":
lang_obj = Language.rebuild(lang_obj, forced=True)
if hi == "force HI":
lang_obj = Language.rebuild(lang_obj, hi=True)
lang_obj = _get_lang_obj(l)
if forced == "True":
lang_obj = Language.rebuild(lang_obj, forced=True)
if hi == "force HI":
lang_obj = Language.rebuild(lang_obj, hi=True)
language_set.add(lang_obj)
minimum_score = settings.general.minimum_score
@ -165,6 +155,7 @@ def download_subtitle(path, language, audio_language, hi, forced, providers, pro
postprocessing_cmd = settings.general.postprocessing_cmd
single = settings.general.getboolean('single_language')
# todo:
"""
AsyncProviderPool:
@ -228,12 +219,8 @@ def download_subtitle(path, language, audio_language, hi, forced, providers, pro
saved_any = True
for subtitle in saved_subtitles:
downloaded_provider = subtitle.provider_name
if subtitle.language == 'pt-BR':
downloaded_language_code3 = 'pob'
elif subtitle.language == 'zh-TW':
downloaded_language_code3 = 'zht'
else:
downloaded_language_code3 = subtitle.language.alpha3
downloaded_language_code3 = _get_download_code3(subtitle)
downloaded_language = language_from_alpha3(downloaded_language_code3)
downloaded_language_code2 = alpha2_from_alpha3(downloaded_language_code3)
audio_language_code2 = alpha2_from_language(audio_language)
@ -346,12 +333,7 @@ def manual_search(path, profileId, providers, providers_auth, sceneName, title,
lang = alpha3_from_alpha2(language)
if lang == 'pob':
lang_obj = Language('por', 'BR')
elif lang == 'zht':
lang_obj = Language('zho', 'TW')
else:
lang_obj = Language(lang)
lang_obj = _get_lang_obj(lang)
if forced == "True":
lang_obj = Language.rebuild(lang_obj, forced=True)
@ -562,12 +544,8 @@ def manual_download_subtitle(path, language, audio_language, hi, forced, subtitl
if saved_subtitles:
for saved_subtitle in saved_subtitles:
downloaded_provider = saved_subtitle.provider_name
if saved_subtitle.language == 'pt-BR':
downloaded_language_code3 = 'pob'
elif saved_subtitle.language == 'zh-TW':
downloaded_language_code3 = 'zht'
else:
downloaded_language_code3 = subtitle.language.alpha3
downloaded_language_code3 = _get_download_code3(subtitle)
downloaded_language = language_from_alpha3(downloaded_language_code3)
downloaded_language_code2 = alpha2_from_alpha3(downloaded_language_code3)
audio_language_code2 = alpha2_from_language(audio_language)
@ -666,13 +644,12 @@ def manual_upload_subtitle(path, language, forced, title, scene_name, media_type
'win') and settings.general.getboolean('chmod_enabled') else None
language = alpha3_from_alpha2(language)
if language == 'pob':
lang_obj = Language('por', 'BR')
elif language == 'zht':
lang_obj = Language('zho', 'TW')
else:
custom = Language.from_value(language)
if custom is None:
lang_obj = Language(language)
else:
lang_obj = custom.subzero_language()
if forced:
lang_obj = Language.rebuild(lang_obj, forced=True)
@ -1685,3 +1662,18 @@ def sync_subtitles(video_path, srt_path, srt_lang, media_type, percent_score, so
logging.debug("BAZARR subsync skipped because subtitles score isn't below this "
"threshold value: " + subsync_threshold + "%")
return False
def _get_download_code3(subtitle):
custom = CustomLanguage.from_value(subtitle.language, "language")
if custom is None:
return subtitle.language.alpha3
return custom.alpha3
def _get_lang_obj(alpha3):
sub = CustomLanguage.from_value(alpha3, "alpha3")
if sub is None:
return Language(alpha3)
return sub.subzero_language()

@ -10,6 +10,7 @@ from subliminal_patch import core, search_external_subtitles
from subzero.language import Language
from gevent import sleep
from custom_lang import CustomLanguage
from database import get_profiles_list, get_profile_cutoff, TableEpisodes, TableShows, TableMovies
from get_languages import alpha2_from_alpha3, language_from_alpha2, get_language_set
from config import settings
@ -64,16 +65,6 @@ def store_subtitles(original_path, reversed_path):
logging.exception(
"BAZARR error when trying to analyze this %s file: %s" % (os.path.splitext(reversed_path)[1], reversed_path))
pass
brazilian_portuguese = [".pt-br", ".pob", "pb"]
brazilian_portuguese_forced = [".pt-br.forced", ".pob.forced", "pb.forced"]
simplified_chinese_fuzzy = [u"", u"双语"]
simplified_chinese = [".chs", ".sc", ".zhs",".zh-hans",".hans",".zh_hans",".zhhans",".gb",".simplified"]
simplified_chinese_forced = [".chs.forced", ".sc.forced", ".zhs.forced", "hans.forced", ".gb.forced", u"简体中文.forced", u"双语.forced"]
traditional_chinese_fuzzy = [u"", u"雙語"]
traditional_chinese = [".cht", ".tc", ".zh-tw", ".zht",".zh-hant",".zhhant",".zh_hant",".hant", ".big5", ".traditional"]
traditional_chinese_forced = [".cht.forced", ".tc.forced", ".zht.forced", "hant.forced", ".big5.forced", u"繁體中文.forced", u"雙語.forced", "zh-tw.forced"]
try:
dest_folder = get_subtitle_destination_folder()
core.CUSTOM_PATHS = [dest_folder] if dest_folder else []
@ -86,38 +77,19 @@ def store_subtitles(original_path, reversed_path):
elif settings.general.subfolder == "relative":
full_dest_folder_path = os.path.join(os.path.dirname(reversed_path), dest_folder)
subtitles = guess_external_subtitles(full_dest_folder_path, subtitles)
except Exception as e:
except Exception:
logging.exception("BAZARR unable to index external subtitles.")
pass
else:
for subtitle, language in subtitles.items():
subtitle_path = get_external_subtitles_path(reversed_path, subtitle)
if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese)):
logging.debug("BAZARR external subtitles detected: " + "pb")
actual_subtitles.append(
[str("pb"), path_mappings.path_replace_reverse(subtitle_path)])
elif str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese_forced)):
logging.debug("BAZARR external subtitles detected: " + "pb:forced")
actual_subtitles.append(
[str("pb:forced"), path_mappings.path_replace_reverse(subtitle_path)])
elif str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(simplified_chinese)) or (str(subtitle_path).lower())[:-5] in simplified_chinese_fuzzy:
logging.debug("BAZARR external subtitles detected: " + "zh")
actual_subtitles.append(
[str("zh"), path_mappings.path_replace_reverse(subtitle_path)])
elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in simplified_chinese_forced):
logging.debug("BAZARR external subtitles detected: " + "zh:forced")
actual_subtitles.append(
[str("zh:forced"), path_mappings.path_replace_reverse(subtitle_path)])
elif str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(traditional_chinese)) or (str(subtitle_path).lower())[:-5] in traditional_chinese_fuzzy:
logging.debug("BAZARR external subtitles detected: " + "zt")
actual_subtitles.append(
[str("zt"), path_mappings.path_replace_reverse(subtitle_path)])
elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in traditional_chinese_forced):
logging.debug("BAZARR external subtitles detected: " + "zt:forced")
actual_subtitles.append(
[str("zt:forced"), path_mappings.path_replace_reverse(subtitle_path)])
elif not language:
if not language:
continue
subtitle_path = get_external_subtitles_path(reversed_path, subtitle)
custom = CustomLanguage.found_external(subtitle, subtitle_path)
if custom is not None:
actual_subtitles.append([custom, path_mappings.path_replace_reverse(subtitle_path)])
elif str(language) != 'und':
if language.forced:
language_str = str(language)
@ -184,19 +156,11 @@ def store_subtitles_movie(original_path, reversed_path):
except:
logging.debug("BAZARR unable to index this unrecognized language: " + subtitle_language)
pass
except Exception as e:
except Exception:
logging.exception(
"BAZARR error when trying to analyze this %s file: %s" % (os.path.splitext(reversed_path)[1], reversed_path))
pass
brazilian_portuguese = [".pt-br", ".pob", "pb"]
brazilian_portuguese_forced = [".pt-br.forced", ".pob.forced", "pb.forced"]
simplified_chinese_fuzzy = [u"", u"双语"]
simplified_chinese = [".chs", ".sc", ".zhs",".zh-hans",".hans",".zh_hans",".zhhans",".gb",".simplified"]
simplified_chinese_forced = [".chs.forced", ".sc.forced", ".zhs.forced", "hans.forced", ".gb.forced", u"简体中文.forced", u"双语.forced"]
traditional_chinese_fuzzy = [u"", u"雙語"]
traditional_chinese = [".cht", ".tc", ".zh-tw", ".zht",".zh-hant",".zhhant",".zh_hant",".hant", ".big5", ".traditional"]
traditional_chinese_forced = [".cht.forced", ".tc.forced", ".zht.forced", "hant.forced", ".big5.forced", u"繁體中文.forced", u"雙語.forced", "zh-tw.forced"]
try:
dest_folder = get_subtitle_destination_folder() or ''
core.CUSTOM_PATHS = [dest_folder] if dest_folder else []
@ -213,27 +177,15 @@ def store_subtitles_movie(original_path, reversed_path):
pass
else:
for subtitle, language in subtitles.items():
subtitle_path = get_external_subtitles_path(reversed_path, subtitle)
if str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese)):
logging.debug("BAZARR external subtitles detected: " + "pb")
actual_subtitles.append([str("pb"), path_mappings.path_replace_reverse_movie(subtitle_path)])
elif str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(brazilian_portuguese_forced)):
logging.debug("BAZARR external subtitles detected: " + "pb:forced")
actual_subtitles.append([str("pb:forced"), path_mappings.path_replace_reverse_movie(subtitle_path)])
elif str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(simplified_chinese)) or (str(subtitle_path).lower())[:-5] in simplified_chinese_fuzzy:
logging.debug("BAZARR external subtitles detected: " + "zh")
actual_subtitles.append([str("zh"), path_mappings.path_replace_reverse_movie(subtitle_path)])
elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in simplified_chinese_forced):
logging.debug("BAZARR external subtitles detected: " + "zh:forced")
actual_subtitles.append([str("zh:forced"), path_mappings.path_replace_reverse_movie(subtitle_path)])
elif str(os.path.splitext(subtitle)[0]).lower().endswith(tuple(traditional_chinese)) or (str(subtitle_path).lower())[:-5] in traditional_chinese_fuzzy:
logging.debug("BAZARR external subtitles detected: " + "zt")
actual_subtitles.append([str("zt"), path_mappings.path_replace_reverse_movie(subtitle_path)])
elif any(ext in (str(os.path.splitext(subtitle)[0]).lower())[-12:] for ext in traditional_chinese_forced):
logging.debug("BAZARR external subtitles detected: " + "zt:forced")
actual_subtitles.append([str("zt:forced"), path_mappings.path_replace_reverse_movie(subtitle_path)])
elif not language:
if not language:
continue
subtitle_path = get_external_subtitles_path(reversed_path, subtitle)
custom = CustomLanguage.found_external(subtitle, subtitle_path)
if custom is not None:
actual_subtitles.append([custom, path_mappings.path_replace_reverse_movie(subtitle_path)])
elif str(language.basename) != 'und':
if language.forced:
language_str = str(language)

@ -13,6 +13,7 @@ import stat
from whichcraft import which
from get_args import args
from config import settings, url_sonarr, url_radarr
from custom_lang import CustomLanguage
from database import TableHistory, TableHistoryMovie, TableBlacklist, TableBlacklistMovie, TableShowsRootfolder, \
TableMoviesRootfolder
from event_handler import event_stream
@ -375,12 +376,11 @@ def delete_subtitles(media_type, language, forced, hi, media_path, subtitles_pat
def subtitles_apply_mods(language, subtitle_path, mods):
language = alpha3_from_alpha2(language)
if language == 'pob':
lang_obj = Language('por', 'BR')
elif language == 'zht':
lang_obj = Language('zho', 'TW')
else:
custom = CustomLanguage.from_value(language, "alpha3")
if custom is None:
lang_obj = Language(language)
else:
lang_obj = custom.subzero_language()
sub = Subtitle(lang_obj, mods=mods)
with open(subtitle_path, 'rb') as f:

@ -23,7 +23,7 @@ export const ProviderList: Readonly<ProviderInfo[]> = [
password: "",
},
},
{ key: "argenteam", description: "Spanish Subtitles Provider" },
{ key: "argenteam", description: "LATAM Spanish Subtitles Provider" },
{
key: "assrt",
description: "Chinese Subtitles Provider",
@ -130,7 +130,7 @@ export const ProviderList: Readonly<ProviderInfo[]> = [
name: "Sous-Titres.eu",
description: "Mostly French Subtitles Provider",
},
{ key: "subdivx", description: "Spanish Subtitles Provider" },
{ key: "subdivx", description: "LATAM Spanish / Spanish Subtitles Provider" },
{
key: "subssabbz",
name: "Subs.sab.bz",
@ -171,7 +171,7 @@ export const ProviderList: Readonly<ProviderInfo[]> = [
name: "Subtitulamos.tv",
description: "Spanish Subtitles Provider",
},
{ key: "sucha", description: "Spanish Subtitles Provider" },
{ key: "sucha", description: "LATAM Spanish Subtitles Provider" },
{ key: "supersubtitles" },
{
key: "titlovi",
@ -184,7 +184,7 @@ export const ProviderList: Readonly<ProviderInfo[]> = [
{
key: "tusubtitulo",
name: "Tusubtitulo.com",
description: "Spanish / English Subtitles Provider for TV Shows",
description: "LATAM Spanish / Spanish / English Subtitles Provider for TV Shows",
},
{ key: "tvsubtitles", name: "TVSubtitles" },
{ key: "wizdom", description: "Wizdom.xyz Subtitles Provider." },

@ -643,8 +643,6 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen
#add simplified/traditional chinese detection
simplified_chinese = ["chs", "sc", "zhs", "hans","zh-hans", "gb", "", "简中", "简体", "简体中文", "中英双语", "中日双语","中法双语","简体&英文"]
traditional_chinese = ["cht", "tc", "zht", "hant","zh-hant", "big5", "", "繁中", "繁体", "繁體","繁体中文", "繁體中文", "正體中文", "中英雙語", "中日雙語","中法雙語","繁体&英文"]
FULL_LANGUAGE_LIST.extend(simplified_chinese)
FULL_LANGUAGE_LIST.extend(traditional_chinese)
p_root = p_root.replace('zh-TW', 'zht')
# remove possible language code for matching
@ -676,7 +674,7 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen
language.forced = forced
language.hi = hi
elif any(ext in str(language_code) for ext in traditional_chinese):
language = Language.fromietf('zh')
language = Language.fromietf('zh')
language.forced = forced
language.hi = hi
else:

@ -21,10 +21,12 @@ class PatchedOpenSubtitlesConverter(OpenSubtitlesConverter):
self.to_opensubtitles.update({
('srp', None, "Latn"): 'scc',
('srp', None, "Cyrl"): 'scc',
('chi', None, 'Hant'): 'zht'
('chi', None, 'Hant'): 'zht',
('spa', 'MX'): 'spl',
})
self.from_opensubtitles.update({
'zht': ('zho', None, 'Hant')
'zht': ('zho', None, 'Hant'),
'spl': ('spa', 'MX'),
})
def convert(self, alpha3, country=None, script=None):

@ -46,7 +46,8 @@ class ArgenteamSubtitle(Subtitle):
class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
provider_name = "argenteam"
languages = {Language.fromalpha2(l) for l in ["es"]}
# Safe to assume every subtitle from Argenteam is Latam Spanish
languages = {Language("spa", "MX")}
video_types = (Episode, Movie)
subtitle_class = ArgenteamSubtitle
hearing_impaired_verifiable = False
@ -59,9 +60,9 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
def initialize(self):
self.session = Session()
self.session.headers = {
"User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")
}
self.session.headers.update(
{"User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")}
)
def terminate(self):
self.session.close()
@ -75,48 +76,38 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
is_episode = True
query = f"{title} S{kwargs['season']:02}E{kwargs['episode']:02}"
logger.info(f"Searching ID (episode: {is_episode}) for {query}")
logger.debug(f"Searching ID (episode: {is_episode}) for {query}")
r = self.session.get(API_URL + "search", params={"q": query}, timeout=10)
r.raise_for_status()
results = r.json()
match_ids = []
if results["total"] >= 1:
for result in results["results"]:
if (result["type"] == "episode" and not is_episode) or (
result["type"] == "movie" and is_episode
):
for result in results["results"]:
if result["type"] == "movie" and is_episode:
continue
imdb = f"tt{result.get('imdb', 'n/a')}"
if not is_episode and imdb == kwargs.get("imdb_id"):
logger.debug("Movie matched by IMDB ID, taking shortcut")
match_ids = [result["id"]]
break
# advanced title check in case of multiple movie results
title_year = kwargs.get("year") and kwargs.get("title")
if results["total"] > 1 and not is_episode and title_year:
sanitized = sanitize(result["title"])
titles = [f"{sanitize(name)} {kwargs['year']}" for name in titles]
if sanitized not in titles:
continue
# shortcut in case of matching imdb id (don't match NoneType)
if not is_episode and f"tt{result.get('imdb', 'n/a')}" == kwargs.get(
"imdb_id"
):
logger.debug(f"Movie matched by IMDB ID, taking shortcut")
match_ids = [result["id"]]
break
# advanced title check in case of multiple movie results
if results["total"] > 1:
if not is_episode and kwargs.get("year"):
if result["title"] and not (
sanitize(result["title"])
in (
"%s %s" % (sanitize(name), kwargs.get("year"))
for name in titles
)
):
continue
match_ids.append(result["id"])
else:
logger.error(f"No episode ID found for {query}")
match_ids.append(result["id"])
if match_ids:
logger.debug(
f"Found matching IDs: {', '.join(str(id) for id in match_ids)}"
)
ids = ", ".join(str(id) for id in match_ids)
logger.debug("Found matching IDs: %s", ids)
else:
logger.debug("Nothing found from %s query", query)
return match_ids

@ -24,7 +24,7 @@ from subliminal_patch.providers import Provider
from guessit import guessit
CLEAN_TITLE_RES = [
_CLEAN_TITLE_RES = [
(r"subt[ií]tulos de", ""),
(r"´|`", "'"),
(r" {2,}", " "),
@ -82,7 +82,7 @@ class SubdivxSubtitle(Subtitle):
class SubdivxSubtitlesProvider(Provider):
provider_name = "subdivx"
hash_verifiable = False
languages = {Language.fromalpha2(lang) for lang in ["es"]}
languages = {Language("spa", "MX")} | {Language.fromalpha2("es")}
subtitle_class = SubdivxSubtitle
server_url = "https://www.subdivx.com/"
@ -176,22 +176,28 @@ class SubdivxSubtitlesProvider(Provider):
for subtitle in range(0, len(title_soups)):
title_soup, body_soup = title_soups[subtitle], body_soups[subtitle]
# title
title = self._clean_title(title_soup.find("a").text)
# filter by year
if video.year and str(video.year) not in title:
continue
page_link = title_soup.find("a")["href"]
# Data
datos = body_soup.find("div", {"id": "buscador_detalle_sub_datos"}).text
# Ignore multi-disc and non-srt subtitles
if not any(item in datos for item in ("Cds:</b> 1", "SubRip")):
continue
spain = "/pais/7.gif" in datos
language = Language.fromalpha2("es") if spain else Language("spa", "MX")
# description
description = body_soup.find("div", {"id": "buscador_detalle_sub"}).text
description = description.replace(",", " ").lower()
sub_details = body_soup.find("div", {"id": "buscador_detalle_sub"}).text
description = sub_details.replace(",", " ").lower()
# uploader
uploader = body_soup.find("a", {"class": "link1"}).text
page_link = title_soup.find("a")["href"]
subtitle = self.subtitle_class(
language, video, page_link, title, description, uploader
@ -228,7 +234,7 @@ class SubdivxSubtitlesProvider(Provider):
Normalize apostrophes and spaces to avoid matching problems
(e.g. Subtitulos de Carlito´s Way -> Carlito's Way)
"""
for og, new in CLEAN_TITLE_RES:
for og, new in _CLEAN_TITLE_RES:
title = re.sub(og, new, title, flags=re.IGNORECASE)
return title

@ -17,8 +17,8 @@ from subzero.language import Language
logger = logging.getLogger(__name__)
SERVER_URL = "http://sapidb.caretas.club/"
PAGE_URL = "https://sucha.caretas.club/"
SERVER_URL = "http://sapidb.caretas.club"
PAGE_URL = "https://sucha.caretas.club"
UNDESIRED_FILES = ("[eng]", ".en.", ".eng.", ".fr.", ".pt.")
@ -53,41 +53,40 @@ class SuchaSubtitle(Subtitle):
return self.download_id
def get_matches(self, video):
type_ = "episode" if isinstance(video, Episode) else "movie"
self.found_matches |= guess_matches(
video,
guessit(
self.filename,
{"type": "episode" if isinstance(video, Episode) else "movie"},
),
guessit(self.filename, {"type": type_}),
)
self.found_matches |= guess_matches(
video,
guessit(
self.guessed_release_info,
{"type": "episode" if isinstance(video, Episode) else "movie"},
),
guessit(self.guessed_release_info, {"type": type_}),
)
return self.found_matches
class SuchaProvider(Provider):
"""Sucha Provider"""
languages = {Language.fromalpha2(l) for l in ["es"]}
# This is temporary. Castilian spanish subtitles may exist, but are rare
# and currently impossible to guess from the API.
languages = {Language("spa", "MX")}
language_list = list(languages)
video_types = (Episode, Movie)
def initialize(self):
self.session = Session()
self.session.headers = {
"User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")
}
self.session.headers.update(
{"User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")}
)
def terminate(self):
self.session.close()
def query(self, languages, video):
movie_year = video.year if video.year else "0"
movie_year = video.year or "0"
is_episode = isinstance(video, Episode)
type_str = "episode" if is_episode else "movie"
language = self.language_list[0]
if is_episode:
@ -96,43 +95,37 @@ class SuchaProvider(Provider):
q = {"query": video.title, "year": movie_year}
logger.debug(f"Searching subtitles: {q}")
result = self.session.get(
SERVER_URL + ("episode" if is_episode else "movie"), params=q, timeout=10
)
result = self.session.get(f"{SERVER_URL}/{type_str}", params=q, timeout=10)
result.raise_for_status()
result_ = result.json()
results = result.json()
subtitles = []
for i in result_:
for item in results:
matches = set()
try:
if (
video.title.lower() in i["title"].lower()
or video.title.lower() in i["alt_title"].lower()
):
matches.add("title")
except TypeError:
title = item.get("title", "").lower()
alt_title = item.get("alt_title", title).lower()
if not title:
logger.debug("No subtitles found")
return []
if is_episode:
if (
q["query"].lower() in i["title"].lower()
or q["query"].lower() in i["alt_title"].lower()
):
matches_ = ("title", "series", "season", "episode", "year")
[matches.add(match) for match in matches_]
if any(video.title.lower() in item for item in (title, alt_title)):
matches.add("title")
if str(i["year"]) == video.year:
if str(item["year"]) == video.year:
matches.add("year")
if is_episode and any(
q["query"].lower() in item for item in (title, alt_title)
):
matches.update("title", "series", "season", "episode", "year")
subtitles.append(
SuchaSubtitle(
language,
i["release"],
i["filename"],
str(i["id"]),
"episode" if is_episode else "movie",
item["release"],
item["filename"],
str(item["id"]),
type_str,
matches,
)
)
@ -141,10 +134,6 @@ class SuchaProvider(Provider):
def list_subtitles(self, video, languages):
return self.query(languages, video)
def _check_response(self, response):
if response.status_code != 200:
raise ServiceUnavailable(f"Bad status code: {response.status_code}")
def _get_archive(self, content):
archive_stream = io.BytesIO(content)
@ -177,12 +166,11 @@ class SuchaProvider(Provider):
def download_subtitle(self, subtitle):
logger.info("Downloading subtitle %r", subtitle)
response = self.session.get(
SERVER_URL + "download",
f"{SERVER_URL}/download",
params={"id": subtitle.download_id, "type": subtitle.download_type},
timeout=10,
)
response.raise_for_status()
self._check_response(response)
archive = self._get_archive(response.content)
subtitle_file = self.get_file(archive)
subtitle.content = fix_line_ending(subtitle_file)

@ -57,7 +57,9 @@ class TuSubtituloSubtitle(Subtitle):
class TuSubtituloProvider(Provider):
"""TuSubtitulo.com Provider"""
languages = {Language.fromietf(lang) for lang in ["en", "es"]}
languages = {Language.fromietf(lang) for lang in ["en", "es"]} | {
Language("spa", "MX")
}
logger.debug(languages)
video_types = (Episode,)
@ -123,11 +125,13 @@ class TuSubtituloProvider(Provider):
try:
content = tables[tr + inc].find_all("td")
language = content[4].text
if "eng" in language.lower():
language = "en"
elif "esp" in language.lower():
language = "es"
language = content[4].text.lower()
if "eng" in language:
language = Language.fromietf("en")
elif "lat" in language:
language = Language("spa", "MX")
elif "esp" in language:
language = Language.fromietf("es")
else:
language = None
@ -236,7 +240,7 @@ class TuSubtituloProvider(Provider):
matches.update(["title", "series", "season", "episode", "year"])
subtitles.append(
TuSubtituloSubtitle(
Language.fromietf(sub["language"]),
sub["language"],
sub,
matches,
)

@ -32,10 +32,15 @@ repl_map = {
"tib": "bo",
}
CUSTOM_LIST = ["chs", "sc", "zhs", "hans", "gb", u"", u"双语",
"cht", "tc", "zht", "hant", "big5", u"", u"雙語",
"spl", "ea", "pob", "pb"]
ALPHA2_LIST = list(set(filter(lambda x: x, map(lambda x: x.alpha2, LANGUAGE_MATRIX)))) + list(repl_map.values())
ALPHA3b_LIST = list(set(filter(lambda x: x, map(lambda x: x.alpha3, LANGUAGE_MATRIX)))) + \
list(set(filter(lambda x: len(x) == 3, list(repl_map.keys()))))
FULL_LANGUAGE_LIST = ALPHA2_LIST + ALPHA3b_LIST
FULL_LANGUAGE_LIST.extend(CUSTOM_LIST)
def language_from_stream(l):

Loading…
Cancel
Save