diff --git a/libs/subliminal_patch/converters/titlovi.py b/libs/subliminal_patch/converters/titlovi.py index 940507d4f..761cf79a6 100644 --- a/libs/subliminal_patch/converters/titlovi.py +++ b/libs/subliminal_patch/converters/titlovi.py @@ -27,16 +27,6 @@ class TitloviConverter(LanguageReverseConverter): } self.codes = set(self.from_titlovi.keys()) - # temporary fix, should be removed as soon as API is used - self.lang_from_countrycode = {'ba': ('bos',), - 'en': ('eng',), - 'hr': ('hrv',), - 'mk': ('mkd',), - 'rs': ('srp',), - 'rsc': ('srp', None, 'Cyrl'), - 'si': ('slv',) - } - def convert(self, alpha3, country=None, script=None): if (alpha3, country, script) in self.to_titlovi: return self.to_titlovi[(alpha3, country, script)] @@ -49,9 +39,5 @@ class TitloviConverter(LanguageReverseConverter): if titlovi in self.from_titlovi: return self.from_titlovi[titlovi] - # temporary fix, should be removed as soon as API is used - if titlovi in self.lang_from_countrycode: - return self.lang_from_countrycode[titlovi] - raise ConfigurationError('Unsupported language number for titlovi: %s' % titlovi) diff --git a/libs/subliminal_patch/core.py b/libs/subliminal_patch/core.py index bb9b3752b..9e96754dd 100644 --- a/libs/subliminal_patch/core.py +++ b/libs/subliminal_patch/core.py @@ -30,7 +30,7 @@ from subliminal.core import guessit, ProviderPool, io, is_windows_special_path, ThreadPoolExecutor, check_video from subliminal_patch.exceptions import TooManyRequests, APIThrottled, ParseResponseError -from subzero.language import Language +from subzero.language import Language, ENDSWITH_LANGUAGECODE_RE from scandir import scandir, scandir_generic as _scandir_generic logger = logging.getLogger(__name__) @@ -571,12 +571,14 @@ def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, ski return video -def _search_external_subtitles(path, languages=None, only_one=False, scandir_generic=False): +def _search_external_subtitles(path, languages=None, only_one=False, scandir_generic=False, match_strictness="strict"): dirpath, filename = os.path.split(path) dirpath = dirpath or '.' - fileroot, fileext = os.path.splitext(filename) + fn_no_ext, fileext = os.path.splitext(filename) + fn_no_ext_lower = fn_no_ext.lower() subtitles = {} _scandir = _scandir_generic if scandir_generic else scandir + for entry in _scandir(dirpath): if (not entry.name or entry.name in ('\x0c', '$', ',', '\x7f')) and not scandir_generic: logger.debug('Could not determine the name of the file, retrying with scandir_generic') @@ -587,9 +589,11 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen p = entry.name # keep only valid subtitle filenames - if not p.lower().startswith(fileroot.lower()) or not p.lower().endswith(SUBTITLE_EXTENSIONS): + if not p.lower().endswith(SUBTITLE_EXTENSIONS): continue + # not p.lower().startswith(fileroot.lower()) or not + p_root, p_ext = os.path.splitext(p) if not INCLUDE_EXOTIC_SUBS and p_ext not in (".srt", ".ass", ".ssa", ".vtt"): continue @@ -608,7 +612,19 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen forced = "forced" in adv_tag # extract the potential language code - language_code = p_root[len(fileroot):].replace('_', '-')[1:] + language_code = p_root.rsplit(".", 1)[1].replace('_', '-') + + # remove possible language code for matching + p_root_bare = ENDSWITH_LANGUAGECODE_RE.sub("", p_root) + + p_root_lower = p_root_bare.lower() + + filename_matches = p_root_lower == fn_no_ext_lower + filename_contains = p_root_lower in fn_no_ext_lower + + if not filename_matches: + if match_strictness == "strict" or (match_strictness == "loose" and not filename_contains): + continue # default language is undefined language = Language('und') @@ -632,7 +648,7 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen return subtitles -def search_external_subtitles(path, languages=None, only_one=False): +def search_external_subtitles(path, languages=None, only_one=False, match_strictness="strict"): """ wrap original search_external_subtitles function to search multiple paths for one given video # todo: cleanup and merge with _search_external_subtitles @@ -653,10 +669,11 @@ def search_external_subtitles(path, languages=None, only_one=False): if os.path.isdir(os.path.dirname(abspath)): try: subtitles.update(_search_external_subtitles(abspath, languages=languages, - only_one=only_one)) + only_one=only_one, match_strictness=match_strictness)) except OSError: subtitles.update(_search_external_subtitles(abspath, languages=languages, - only_one=only_one, scandir_generic=True)) + only_one=only_one, match_strictness=match_strictness, + scandir_generic=True)) logger.debug("external subs: found %s", subtitles) return subtitles diff --git a/libs/subliminal_patch/providers/titlovi.py b/libs/subliminal_patch/providers/titlovi.py index c0a1ffa11..9be0a92f6 100644 --- a/libs/subliminal_patch/providers/titlovi.py +++ b/libs/subliminal_patch/providers/titlovi.py @@ -2,42 +2,35 @@ import io import logging -import math import re -import time +from datetime import datetime +import dateutil.parser import rarfile -from bs4 import BeautifulSoup from zipfile import ZipFile, is_zipfile from rarfile import RarFile, is_rarfile from babelfish import language_converters, Script -from requests import RequestException +from requests import RequestException, codes as request_codes from guessit import guessit from subliminal_patch.http import RetryingCFSession from subliminal_patch.providers import Provider from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin from subliminal_patch.subtitle import Subtitle from subliminal_patch.utils import sanitize, fix_inconsistent_naming as _fix_inconsistent_naming -from subliminal.exceptions import ProviderError +from subliminal.exceptions import ProviderError, AuthenticationError, ConfigurationError from subliminal.score import get_equivalent_release_groups from subliminal.utils import sanitize_release_group from subliminal.subtitle import guess_matches from subliminal.video import Episode, Movie from subliminal.subtitle import fix_line_ending -from subliminal_patch.pitcher import pitchers, load_verification, store_verification -from subzero.language import Language -from random import randint -from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST +from subzero.language import Language +from dogpile.cache.api import NO_VALUE +from subliminal.cache import region # parsing regex definitions title_re = re.compile(r'(?P(?:.+(?= [Aa][Kk][Aa] ))|.+)(?:(?:.+)(?P<altitle>(?<= [Aa][Kk][Aa] ).+))?') -lang_re = re.compile(r'(?<=flags/)(?P<lang>.{2})(?:.)(?P<script>c?)(?:.+)') -season_re = re.compile(r'Sezona (?P<season>\d+)') -episode_re = re.compile(r'Epizoda (?P<episode>\d+)') -year_re = re.compile(r'(?P<year>\d+)') -fps_re = re.compile(r'fps: (?P<fps>.+)') def fix_inconsistent_naming(title): @@ -51,6 +44,7 @@ def fix_inconsistent_naming(title): return _fix_inconsistent_naming(title, {"DC's Legends of Tomorrow": "Legends of Tomorrow", "Marvel's Jessica Jones": "Jessica Jones"}) + logger = logging.getLogger(__name__) # Configure :mod:`rarfile` to use the same path separator as :mod:`zipfile` @@ -62,9 +56,9 @@ language_converters.register('titlovi = subliminal_patch.converters.titlovi:Titl class TitloviSubtitle(Subtitle): provider_name = 'titlovi' - def __init__(self, language, page_link, download_link, sid, releases, title, alt_title=None, season=None, - episode=None, year=None, fps=None, asked_for_release_group=None, asked_for_episode=None): - super(TitloviSubtitle, self).__init__(language, page_link=page_link) + def __init__(self, language, download_link, sid, releases, title, alt_title=None, season=None, + episode=None, year=None, rating=None, download_count=None, asked_for_release_group=None, asked_for_episode=None): + super(TitloviSubtitle, self).__init__(language) self.sid = sid self.releases = self.release_info = releases self.title = title @@ -73,11 +67,21 @@ class TitloviSubtitle(Subtitle): self.episode = episode self.year = year self.download_link = download_link - self.fps = fps + self.rating = rating + self.download_count = download_count self.matches = None self.asked_for_release_group = asked_for_release_group self.asked_for_episode = asked_for_episode + def __repr__(self): + if self.season and self.episode: + return '<%s "%s (%r)" s%.2de%.2d [%s:%s] ID:%r R:%.2f D:%r>' % ( + self.__class__.__name__, self.title, self.year, self.season, self.episode, self.language, self._guessed_encoding, self.sid, + self.rating, self.download_count) + else: + return '<%s "%s (%r)" [%s:%s] ID:%r R:%.2f D:%r>' % ( + self.__class__.__name__, self.title, self.year, self.language, self._guessed_encoding, self.sid, self.rating, self.download_count) + @property def id(self): return self.sid @@ -134,20 +138,62 @@ class TitloviSubtitle(Subtitle): class TitloviProvider(Provider, ProviderSubtitleArchiveMixin): subtitle_class = TitloviSubtitle languages = {Language.fromtitlovi(l) for l in language_converters['titlovi'].codes} | {Language.fromietf('sr-Latn')} - server_url = 'https://titlovi.com' - search_url = server_url + '/titlovi/?' - download_url = server_url + '/download/?type=1&mediaid=' + api_url = 'https://kodi.titlovi.com/api/subtitles' + api_gettoken_url = api_url + '/gettoken' + api_search_url = api_url + '/search' + + def __init__(self, username=None, password=None): + if not all((username, password)): + raise ConfigurationError('Username and password must be specified') + + self.username = username + self.password = password + + self.session = None + + self.user_id = None + self.login_token = None + self.token_exp = None def initialize(self): self.session = RetryingCFSession() #load_verification("titlovi", self.session) + token = region.get("titlovi_token") + if token is not NO_VALUE: + self.user_id, self.login_token, self.token_exp = token + if datetime.now() > self.token_exp: + logger.debug('Token expired') + self.log_in() + else: + logger.debug('Use cached token') + else: + logger.debug('Token not found in cache') + self.log_in() + + def log_in(self): + login_params = dict(username=self.username, password=self.password, json=True) + try: + response = self.session.post(self.api_gettoken_url, params=login_params) + if response.status_code == request_codes.ok: + resp_json = response.json() + self.login_token = resp_json.get('Token') + self.user_id = resp_json.get('UserId') + self.token_exp = dateutil.parser.parse(resp_json.get('ExpirationDate')) + + region.set("titlovi_token", [self.user_id, self.login_token, self.token_exp]) + logger.debug('New token obtained') + + elif response.status_code == request_codes.unauthorized: + raise AuthenticationError('Login failed') + + except RequestException as e: + logger.error(e) def terminate(self): self.session.close() - def query(self, languages, title, season=None, episode=None, year=None, video=None): - items_per_page = 10 - current_page = 1 + def query(self, languages, title, season=None, episode=None, year=None, imdb_id=None, video=None): + search_params = dict() used_languages = languages lang_strings = [str(lang) for lang in used_languages] @@ -162,135 +208,73 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin): langs = '|'.join(map(str, [l.titlovi for l in used_languages])) # set query params - params = {'prijevod': title, 'jezik': langs} + search_params['query'] = title + search_params['lang'] = langs is_episode = False if season and episode: is_episode = True - params['s'] = season - params['e'] = episode - if year: - params['g'] = year + search_params['season'] = season + search_params['episode'] = episode + #if year: + # search_params['year'] = year + if imdb_id: + search_params['imdbID'] = imdb_id # loop through paginated results - logger.info('Searching subtitles %r', params) + logger.info('Searching subtitles %r', search_params) subtitles = [] + query_results = [] - while True: - # query the server - try: - r = self.session.get(self.search_url, params=params, timeout=10) - r.raise_for_status() - except RequestException as e: - logger.exception('RequestException %s', e) - break + try: + search_params['token'] = self.login_token + search_params['userid'] = self.user_id + search_params['json'] = True + + response = self.session.get(self.api_search_url, params=search_params) + resp_json = response.json() + if resp_json['SubtitleResults']: + query_results.extend(resp_json['SubtitleResults']) + + + except Exception as e: + logger.error(e) + + for sub in query_results: + + # title and alternate title + match = title_re.search(sub.get('Title')) + if match: + _title = match.group('title') + alt_title = match.group('altitle') else: - try: - soup = BeautifulSoup(r.content, 'lxml') - - # number of results - result_count = int(soup.select_one('.results_count b').string) - except: - result_count = None - - # exit if no results - if not result_count: - if not subtitles: - logger.debug('No subtitles found') - else: - logger.debug("No more subtitles found") - break - - # number of pages with results - pages = int(math.ceil(result_count / float(items_per_page))) - - # get current page - if 'pg' in params: - current_page = int(params['pg']) - - try: - sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit') - for sub in sublist: - # subtitle id - sid = sub.find(attrs={'data-id': True}).attrs['data-id'] - # get download link - download_link = self.download_url + sid - # title and alternate title - match = title_re.search(sub.a.string) - if match: - _title = match.group('title') - alt_title = match.group('altitle') - else: - continue - - # page link - page_link = self.server_url + sub.a.attrs['href'] - # subtitle language - _lang = sub.select_one('.lang') - match = lang_re.search(_lang.attrs.get('src', _lang.attrs.get('data-cfsrc', ''))) - if match: - try: - # decode language - lang = Language.fromtitlovi(match.group('lang')+match.group('script')) - except ValueError: - continue - - # relase year or series start year - match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string) - if match: - r_year = int(match.group('year')) - # fps - match = fps_re.search(sub.select_one('.fps').string) - if match: - fps = match.group('fps') - # releases - releases = str(sub.select_one('.fps').parent.contents[0].string) - - # handle movies and series separately - if is_episode: - # season and episode info - sxe = sub.select_one('.s0xe0y').string - r_season = None - r_episode = None - if sxe: - match = season_re.search(sxe) - if match: - r_season = int(match.group('season')) - match = episode_re.search(sxe) - if match: - r_episode = int(match.group('episode')) - - subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title, - alt_title=alt_title, season=r_season, episode=r_episode, - year=r_year, fps=fps, - asked_for_release_group=video.release_group, - asked_for_episode=episode) - else: - subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title, - alt_title=alt_title, year=r_year, fps=fps, - asked_for_release_group=video.release_group) - logger.debug('Found subtitle %r', subtitle) - - # prime our matches so we can use the values later - subtitle.get_matches(video) - - # add found subtitles - subtitles.append(subtitle) - - finally: - soup.decompose() - - # stop on last page - if current_page >= pages: - break - - # increment current page - params['pg'] = current_page + 1 - logger.debug('Getting page %d', params['pg']) + continue + + # handle movies and series separately + if is_episode: + subtitle = self.subtitle_class(Language.fromtitlovi(sub.get('Lang')), sub.get('Link'), sub.get('Id'), sub.get('Release'), _title, + alt_title=alt_title, season=sub.get('Season'), episode=sub.get('Episode'), + year=sub.get('Year'), rating=sub.get('Rating'), + download_count=sub.get('DownloadCount'), + asked_for_release_group=video.release_group, + asked_for_episode=episode) + else: + subtitle = self.subtitle_class(Language.fromtitlovi(sub.get('Lang')), sub.get('Link'), sub.get('Id'), sub.get('Release'), _title, + alt_title=alt_title, year=sub.get('Year'), rating=sub.get('Rating'), + download_count=sub.get('DownloadCount'), + asked_for_release_group=video.release_group) + logger.debug('Found subtitle %r', subtitle) + + # prime our matches so we can use the values later + subtitle.get_matches(video) + + # add found subtitles + subtitles.append(subtitle) return subtitles def list_subtitles(self, video, languages): season = episode = None + if isinstance(video, Episode): title = video.series season = video.season @@ -300,6 +284,7 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin): return [s for s in self.query(languages, fix_inconsistent_naming(title), season=season, episode=episode, year=video.year, + imdb_id=video.imdb_id, video=video)] def download_subtitle(self, subtitle): @@ -337,10 +322,12 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin): sub_to_extract = None for sub_name in subs_in_archive: - if not ('.cyr' in sub_name or '.cir' in sub_name): + _sub_name = sub_name.lower() + + if not ('.cyr' in _sub_name or '.cir' in _sub_name or 'cyr)' in _sub_name): sr_lat_subs.append(sub_name) - if ('.cyr' in sub_name or '.cir' in sub_name) and not '.lat' in sub_name: + if ('.cyr' in sub_name or '.cir' in _sub_name) and not '.lat' in _sub_name.lower(): sr_cyr_subs.append(sub_name) if subtitle.language == 'sr': diff --git a/libs/subzero/language.py b/libs/subzero/language.py index 0a3a5e775..a13bab160 100644 --- a/libs/subzero/language.py +++ b/libs/subzero/language.py @@ -1,5 +1,6 @@ # coding=utf-8 import types +import re from babelfish.exceptions import LanguageError from babelfish import Language as Language_, basestr @@ -134,3 +135,16 @@ class Language(Language_): return Language(*Language_.fromietf(s).__getstate__()) return Language(*Language_.fromalpha3b(s).__getstate__()) + + +IETF_MATCH = ".+\.([^-.]+)(?:-[A-Za-z]+)?$" +ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2,})?$") + + +def match_ietf_language(s, ietf=False): + language_match = re.match(".+\.([^\.]+)$" if not ietf + else IETF_MATCH, s) + if language_match and len(language_match.groups()) == 1: + language = language_match.groups()[0] + return language + return s diff --git a/libs/subzero/lib/dict.py b/libs/subzero/lib/dict.py index 3f327dcf4..929a9a642 100644 --- a/libs/subzero/lib/dict.py +++ b/libs/subzero/lib/dict.py @@ -107,6 +107,12 @@ class Dicked(object): for key, value in entries.iteritems(): self.__dict__[key] = (Dicked(**value) if isinstance(value, dict) else value) + def has(self, key): + return self._entries is not None and key in self._entries + + def get(self, key, default=None): + return self._entries.get(key, default) if self._entries else default + def __repr__(self): return str(self) diff --git a/libs/subzero/video.py b/libs/subzero/video.py index 13db33ddf..160e1afec 100644 --- a/libs/subzero/video.py +++ b/libs/subzero/video.py @@ -17,7 +17,8 @@ def has_external_subtitle(part_id, stored_subs, language): def set_existing_languages(video, video_info, external_subtitles=False, embedded_subtitles=False, known_embedded=None, - stored_subs=None, languages=None, only_one=False, known_metadata_subs=None): + stored_subs=None, languages=None, only_one=False, known_metadata_subs=None, + match_strictness="strict"): logger.debug(u"Determining existing subtitles for %s", video.name) external_langs_found = set() @@ -27,7 +28,8 @@ def set_existing_languages(video, video_info, external_subtitles=False, embedded external_langs_found = known_metadata_subs external_langs_found.update(set(search_external_subtitles(video.name, languages=languages, - only_one=only_one).values())) + only_one=only_one, + match_strictness=match_strictness).values())) # found external subtitles should be considered? if external_subtitles: