From 8799938b4e29d5f496f6dd49cd6e376edd7bb8b5 Mon Sep 17 00:00:00 2001 From: panni Date: Sun, 20 Oct 2019 06:07:10 +0200 Subject: [PATCH] core: update to subliminal_patch:head; addic7ed: show ids fetching --- libs/subliminal_patch/core.py | 2 +- libs/subliminal_patch/providers/addic7ed.py | 96 ++++++++++++++------- 2 files changed, 64 insertions(+), 34 deletions(-) diff --git a/libs/subliminal_patch/core.py b/libs/subliminal_patch/core.py index 46d701dc8..5057eb9b1 100644 --- a/libs/subliminal_patch/core.py +++ b/libs/subliminal_patch/core.py @@ -62,7 +62,7 @@ class SZProviderPool(ProviderPool): def __init__(self, providers=None, provider_configs=None, blacklist=None, throttle_callback=None, pre_download_hook=None, post_download_hook=None, language_hook=None): #: Name of providers to use - self.providers = providers or provider_registry.names() + self.providers = providers #: Provider configuration self.provider_configs = provider_configs or {} diff --git a/libs/subliminal_patch/providers/addic7ed.py b/libs/subliminal_patch/providers/addic7ed.py index 1e04821b0..86e1a810a 100644 --- a/libs/subliminal_patch/providers/addic7ed.py +++ b/libs/subliminal_patch/providers/addic7ed.py @@ -6,9 +6,11 @@ import subliminal import time from random import randint + +from dogpile.cache.api import NO_VALUE from requests import Session from subliminal.cache import region -from subliminal.exceptions import DownloadLimitExceeded, AuthenticationError +from subliminal.exceptions import DownloadLimitExceeded, AuthenticationError, ConfigurationError from subliminal.providers.addic7ed import Addic7edProvider as _Addic7edProvider, \ Addic7edSubtitle as _Addic7edSubtitle, ParserBeautifulSoup from subliminal.subtitle import fix_line_ending @@ -68,11 +70,15 @@ class Addic7edProvider(_Addic7edProvider): server_url = 'https://www.addic7ed.com/' sanitize_characters = {'-', ':', '(', ')', '.', '/'} + last_show_ids_fetch_key = "addic7ed_last_id_fetch" def __init__(self, username=None, password=None, use_random_agents=False): super(Addic7edProvider, self).__init__(username=username, password=password) self.USE_ADDICTED_RANDOM_AGENTS = use_random_agents + if not all((username, password)): + raise ConfigurationError('Username and password must be specified') + def initialize(self): self.session = Session() self.session.headers['User-Agent'] = 'Subliminal/%s' % subliminal.__short_version__ @@ -103,7 +109,8 @@ class Addic7edProvider(_Addic7edProvider): 'remember': 'true'} tries = 0 - while tries < 3: + while tries <= 3: + tries += 1 r = self.session.get(self.server_url + 'login.php', timeout=10, headers={"Referer": self.server_url}) if "g-recaptcha" in r.content or "grecaptcha" in r.content: logger.info('Addic7ed: Solving captcha. This might take a couple of minutes, but should only ' @@ -125,7 +132,10 @@ class Addic7edProvider(_Addic7edProvider): result = pitcher.throw() if not result: - raise Exception("Addic7ed: Couldn't solve captcha!") + if tries >= 3: + raise Exception("Addic7ed: Couldn't solve captcha!") + logger.info("Addic7ed: Couldn't solve captcha! Retrying") + continue data[g] = result @@ -135,12 +145,15 @@ class Addic7edProvider(_Addic7edProvider): if "relax, slow down" in r.content: raise TooManyRequests(self.username) - if "Try again" in r.content or "Wrong password" in r.content: + if "Wrong password" in r.content or "doesn't exist" in r.content: raise AuthenticationError(self.username) if r.status_code != 302: - logger.error("Addic7ed: Something went wrong when logging in") - raise AuthenticationError(self.username) + if tries >= 3: + logger.error("Addic7ed: Something went wrong when logging in") + raise AuthenticationError(self.username) + logger.info("Addic7ed: Something went wrong when logging in; retrying") + continue break store_verification("addic7ed", self.session) @@ -151,7 +164,7 @@ class Addic7edProvider(_Addic7edProvider): def terminate(self): self.session.close() - def get_show_id(self, series, year=None, country_code=None): + def get_show_id(self, series, year=None, country_code=None, ignore_cache=False): """Get the best matching show id for `series`, `year` and `country_code`. First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`. @@ -163,32 +176,46 @@ class Addic7edProvider(_Addic7edProvider): :type country_code: str :return: the show id, if found. :rtype: int - """ - series_sanitized = sanitize(series).lower() - show_ids = self._get_show_ids() show_id = None - - # attempt with country - if not show_id and country_code: - logger.debug('Getting show id with country') - show_id = show_ids.get('%s %s' % (series_sanitized, country_code.lower())) - - # attempt with year - if not show_id and year: - logger.debug('Getting show id with year') - show_id = show_ids.get('%s %d' % (series_sanitized, year)) - - # attempt clean - if not show_id: - logger.debug('Getting show id') - show_id = show_ids.get(series_sanitized) - - # search as last resort - # broken right now - # if not show_id: - # logger.warning('Series %s not found in show ids', series) - # show_id = self._search_show_id(series) + show_ids = {sanitize(series).lower(), sanitize(series.replace(".", "")).lower()} + logger.debug("Trying show ids: %s", show_ids) + for series_sanitized in show_ids: + if not ignore_cache: + show_ids = self._get_show_ids() + else: + show_ids = self._get_show_ids.refresh(self) + + # attempt with country + if not show_id and country_code: + logger.debug('Getting show id with country') + show_id = show_ids.get('%s %s' % (series_sanitized, country_code.lower())) + + # attempt with year + if not show_id and year: + logger.debug('Getting show id with year') + show_id = show_ids.get('%s %d' % (series_sanitized, year)) + + # attempt clean + if not show_id: + logger.debug('Getting show id') + show_id = show_ids.get(series_sanitized) + + if not show_id: + now = datetime.datetime.now() + last_fetch = region.get(self.last_show_ids_fetch_key) + + # re-fetch show ids once per day if any show ID not found + if not ignore_cache and last_fetch != NO_VALUE and last_fetch + datetime.timedelta(days=1) < now: + logger.info("Show id not found; re-fetching show ids") + return self.get_show_id(series, year=year, country_code=country_code, ignore_cache=True) + logger.debug("Not refreshing show ids, as the last fetch has been too recent") + + # search as last resort + # broken right now + # if not show_id: + # logger.warning('Series %s not found in show ids', series) + # show_id = self._search_show_id(series) return show_id @@ -202,6 +229,8 @@ class Addic7edProvider(_Addic7edProvider): """ # get the show page logger.info('Getting show ids') + region.set(self.last_show_ids_fetch_key, datetime.datetime.now()) + r = self.session.get(self.server_url + 'shows.php', timeout=10) r.raise_for_status() @@ -210,14 +239,15 @@ class Addic7edProvider(_Addic7edProvider): # Assuming the site's markup is bad, and stripping it down to only contain what's needed. show_cells = re.findall(show_cells_re, r.content) if show_cells: - soup = ParserBeautifulSoup(b''.join(show_cells), ['lxml', 'html.parser']) + soup = ParserBeautifulSoup(b''.join(show_cells).decode('utf-8', 'ignore'), ['lxml', 'html.parser']) else: # If RegEx fails, fall back to original r.content and use 'html.parser' soup = ParserBeautifulSoup(r.content, ['html.parser']) # populate the show ids show_ids = {} - for show in soup.select('td > h3 > a[href^="/show/"]'): + shows = soup.select('td > h3 > a[href^="/show/"]') + for show in shows: show_clean = sanitize(show.text, default_characters=self.sanitize_characters) try: show_id = int(show['href'][6:])