diff --git a/libs/subliminal_patch/converters/zimuku.py b/libs/subliminal_patch/converters/zimuku.py new file mode 100644 index 000000000..12994c666 --- /dev/null +++ b/libs/subliminal_patch/converters/zimuku.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import +from babelfish import LanguageReverseConverter +from subliminal.exceptions import ConfigurationError + +class zimukuConverter(LanguageReverseConverter): + def __init__(self): + self.from_zimuku = { u'简体': ('zho', 'CN', None), u'繁体': ('zho', 'TW', None), + u'簡體': ('zho', 'CN', None), u'繁體': ('zho', 'TW', None), + u'英文': ('eng',), + u'chs': ('zho', 'CN', None), u'cht': ('zho', 'TW', None), + u'chn': ('zho', 'CN', None), u'twn': ('zho', 'TW', None)} + self.to_zimuku = { ('zho', 'CN', None): u'chs', ('zho', 'TW', None): u'cht', + ('eng', None, None) : u'eng', ('zho', None, None): u'chs'} + self.codes = set(self.from_zimuku.keys()) + + def convert(self, alpha3, country=None, script=None): + if (alpha3, country, script) in self.to_zimuku: + return self.to_zimuku[(alpha3, country, script)] + + raise ConfigurationError('Unsupported language for zimuku: %s, %s, %s' % (alpha3, country, script)) + + def reverse(self, zimuku): + if zimuku in self.from_zimuku: + return self.from_zimuku[zimuku] + + raise ConfigurationError('Unsupported language code for zimuku: %s' % zimuku) \ No newline at end of file diff --git a/libs/subliminal_patch/core.py b/libs/subliminal_patch/core.py index 07cd6ad91..eb9d9e3d2 100644 --- a/libs/subliminal_patch/core.py +++ b/libs/subliminal_patch/core.py @@ -637,8 +637,8 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen hi = any(i for i in hi_tag if i in adv_tag) #add simplified/traditional chinese detection - simplified_chinese = ["chs", "sc", "zhs", "hans","zh-hans", "gb", "简", "简中", "简体", "简体中文", "中英双语", "中日双语","中法双语"] - traditional_chinese = ["cht", "tc", "zht", "hant","zh-hant", "big5", "繁", "繁中", "繁体", "繁體","繁体中文", "繁體中文", "正體中文", "中英雙語", "中日雙語","中法雙語"] + simplified_chinese = ["chs", "sc", "zhs", "hans","zh-hans", "gb", "简", "简中", "简体", "简体中文", "中英双语", "中日双语","中法双语","简体&英文"] + traditional_chinese = ["cht", "tc", "zht", "hant","zh-hant", "big5", "繁", "繁中", "繁体", "繁體","繁体中文", "繁體中文", "正體中文", "中英雙語", "中日雙語","中法雙語","繁体&英文"] FULL_LANGUAGE_LIST.extend(simplified_chinese) FULL_LANGUAGE_LIST.extend(traditional_chinese) p_root = p_root.replace('zh-TW', 'zht') diff --git a/libs/subliminal_patch/providers/zimuku.py b/libs/subliminal_patch/providers/zimuku.py index ac05ef720..8ab897fde 100644 --- a/libs/subliminal_patch/providers/zimuku.py +++ b/libs/subliminal_patch/providers/zimuku.py @@ -13,23 +13,30 @@ except ImportError: from urllib.parse import urljoin import rarfile +from babelfish import language_converters from subzero.language import Language from guessit import guessit from requests import Session from six import text_type from subliminal import __short_version__ -from subliminal.providers import ParserBeautifulSoup, Provider +from subliminal.providers import ParserBeautifulSoup +from subliminal_patch.providers import Provider from subliminal.subtitle import ( SUBTITLE_EXTENSIONS, + fix_line_ending + ) +from subliminal_patch.subtitle import ( Subtitle, - fix_line_ending, - guess_matches, + guess_matches ) from subliminal.video import Episode, Movie logger = logging.getLogger(__name__) +language_converters.register('zimuku = subliminal_patch.converters.zimuku:zimukuConverter') + +supported_languages = list(language_converters['zimuku'].to_zimuku.keys()) class ZimukuSubtitle(Subtitle): """Zimuku Subtitle.""" @@ -77,7 +84,8 @@ class ZimukuSubtitle(Subtitle): class ZimukuProvider(Provider): """Zimuku Provider.""" - languages = {Language(l) for l in ["zho", "eng"]} + languages = {Language(*l) for l in supported_languages} + logger.info(str(supported_languages)) server_url = "http://www.zimuku.la" search_url = "/search?q={}" @@ -114,11 +122,18 @@ class ZimukuProvider(Provider): language = Language("eng") for img in sub.find("td", class_="tac lang").find_all("img"): if ( - "hongkong" in img.attrs["src"] - or "china" in img.attrs["src"] + "china" in img.attrs["src"] + and "hongkong" in img.attrs["src"] + ): + language = Language("zho").add(Language('zho', 'TW', None)) + logger.debug("language:"+str(language)) + elif ( + "china" in img.attrs["src"] or "jollyroger" in img.attrs["src"] ): language = Language("zho") + elif "hongkong" in img.attrs["src"]: + language = Language('zho', 'TW', None) break sub_page_link = urljoin(self.server_url, a.attrs["href"]) backup_session = copy.deepcopy(self.session) @@ -189,7 +204,7 @@ class ZimukuProvider(Provider): new_subs = self._parse_episode_page(episode_link, subs_year) subtitles += new_subs - # NOTE: shooter result pages are ignored due to the existence of assrt provider + # NOTE: shooter result pages are ignored due to the existence of zimuku provider return subtitles @@ -305,15 +320,15 @@ def _get_subtitle_from_archive(archive): if not subname.lower().endswith(SUBTITLE_EXTENSIONS): continue - # prefer ass/ssa subtitles with double languages or simplified chinese - score = ("ass" in subname or "ssa" in subname) * 1 + # prefer ass/ssa/srt subtitles with double languages or simplified/traditional chinese + score = ("ass" in subname or "ssa" in subname or "srt" in subname) * 1 if "简体" in subname or "chs" in subname or ".gb." in subname: score += 2 if "繁体" in subname or "cht" in subname or ".big5." in subname: - pass - if "chs.eng" in subname or "chs&eng" in subname: score += 2 - if "中英" in subname or "简英" in subname or "双语" in subname or "简体&英文" in subname: + if "chs.eng" in subname or "chs&eng" in subname or "cht.eng" in subname or "cht&eng" in subname: + score += 2 + if "中英" in subname or "简英" in subname or "繁英" in subname or "双语" in subname or "简体&英文" in subname or "繁体&英文" in subname: score += 4 logger.debug("subtitle {}, score: {}".format(subname, score)) if score > max_score: @@ -374,4 +389,4 @@ def num_to_cn(number): else: part1 = "十" if number[0] == "1" else trans_map[number[0]] + "十" part2 = trans_map[number[1]] if number[1] != "0" else "" - return part1 + part2 + return part1 + part2 \ No newline at end of file