core: update to subliminal_patch:head; addic7ed: show ids fetching

pull/684/head
panni 5 years ago
parent e452394841
commit 8799938b4e

@ -62,7 +62,7 @@ class SZProviderPool(ProviderPool):
def __init__(self, providers=None, provider_configs=None, blacklist=None, throttle_callback=None, def __init__(self, providers=None, provider_configs=None, blacklist=None, throttle_callback=None,
pre_download_hook=None, post_download_hook=None, language_hook=None): pre_download_hook=None, post_download_hook=None, language_hook=None):
#: Name of providers to use #: Name of providers to use
self.providers = providers or provider_registry.names() self.providers = providers
#: Provider configuration #: Provider configuration
self.provider_configs = provider_configs or {} self.provider_configs = provider_configs or {}

@ -6,9 +6,11 @@ import subliminal
import time import time
from random import randint from random import randint
from dogpile.cache.api import NO_VALUE
from requests import Session from requests import Session
from subliminal.cache import region from subliminal.cache import region
from subliminal.exceptions import DownloadLimitExceeded, AuthenticationError from subliminal.exceptions import DownloadLimitExceeded, AuthenticationError, ConfigurationError
from subliminal.providers.addic7ed import Addic7edProvider as _Addic7edProvider, \ from subliminal.providers.addic7ed import Addic7edProvider as _Addic7edProvider, \
Addic7edSubtitle as _Addic7edSubtitle, ParserBeautifulSoup Addic7edSubtitle as _Addic7edSubtitle, ParserBeautifulSoup
from subliminal.subtitle import fix_line_ending from subliminal.subtitle import fix_line_ending
@ -68,11 +70,15 @@ class Addic7edProvider(_Addic7edProvider):
server_url = 'https://www.addic7ed.com/' server_url = 'https://www.addic7ed.com/'
sanitize_characters = {'-', ':', '(', ')', '.', '/'} sanitize_characters = {'-', ':', '(', ')', '.', '/'}
last_show_ids_fetch_key = "addic7ed_last_id_fetch"
def __init__(self, username=None, password=None, use_random_agents=False): def __init__(self, username=None, password=None, use_random_agents=False):
super(Addic7edProvider, self).__init__(username=username, password=password) super(Addic7edProvider, self).__init__(username=username, password=password)
self.USE_ADDICTED_RANDOM_AGENTS = use_random_agents self.USE_ADDICTED_RANDOM_AGENTS = use_random_agents
if not all((username, password)):
raise ConfigurationError('Username and password must be specified')
def initialize(self): def initialize(self):
self.session = Session() self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % subliminal.__short_version__ self.session.headers['User-Agent'] = 'Subliminal/%s' % subliminal.__short_version__
@ -103,7 +109,8 @@ class Addic7edProvider(_Addic7edProvider):
'remember': 'true'} 'remember': 'true'}
tries = 0 tries = 0
while tries < 3: while tries <= 3:
tries += 1
r = self.session.get(self.server_url + 'login.php', timeout=10, headers={"Referer": self.server_url}) r = self.session.get(self.server_url + 'login.php', timeout=10, headers={"Referer": self.server_url})
if "g-recaptcha" in r.content or "grecaptcha" in r.content: if "g-recaptcha" in r.content or "grecaptcha" in r.content:
logger.info('Addic7ed: Solving captcha. This might take a couple of minutes, but should only ' logger.info('Addic7ed: Solving captcha. This might take a couple of minutes, but should only '
@ -125,7 +132,10 @@ class Addic7edProvider(_Addic7edProvider):
result = pitcher.throw() result = pitcher.throw()
if not result: if not result:
raise Exception("Addic7ed: Couldn't solve captcha!") if tries >= 3:
raise Exception("Addic7ed: Couldn't solve captcha!")
logger.info("Addic7ed: Couldn't solve captcha! Retrying")
continue
data[g] = result data[g] = result
@ -135,12 +145,15 @@ class Addic7edProvider(_Addic7edProvider):
if "relax, slow down" in r.content: if "relax, slow down" in r.content:
raise TooManyRequests(self.username) raise TooManyRequests(self.username)
if "Try again" in r.content or "Wrong password" in r.content: if "Wrong password" in r.content or "doesn't exist" in r.content:
raise AuthenticationError(self.username) raise AuthenticationError(self.username)
if r.status_code != 302: if r.status_code != 302:
logger.error("Addic7ed: Something went wrong when logging in") if tries >= 3:
raise AuthenticationError(self.username) logger.error("Addic7ed: Something went wrong when logging in")
raise AuthenticationError(self.username)
logger.info("Addic7ed: Something went wrong when logging in; retrying")
continue
break break
store_verification("addic7ed", self.session) store_verification("addic7ed", self.session)
@ -151,7 +164,7 @@ class Addic7edProvider(_Addic7edProvider):
def terminate(self): def terminate(self):
self.session.close() self.session.close()
def get_show_id(self, series, year=None, country_code=None): def get_show_id(self, series, year=None, country_code=None, ignore_cache=False):
"""Get the best matching show id for `series`, `year` and `country_code`. """Get the best matching show id for `series`, `year` and `country_code`.
First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`. First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`.
@ -163,32 +176,46 @@ class Addic7edProvider(_Addic7edProvider):
:type country_code: str :type country_code: str
:return: the show id, if found. :return: the show id, if found.
:rtype: int :rtype: int
""" """
series_sanitized = sanitize(series).lower()
show_ids = self._get_show_ids()
show_id = None show_id = None
show_ids = {sanitize(series).lower(), sanitize(series.replace(".", "")).lower()}
# attempt with country logger.debug("Trying show ids: %s", show_ids)
if not show_id and country_code: for series_sanitized in show_ids:
logger.debug('Getting show id with country') if not ignore_cache:
show_id = show_ids.get('%s %s' % (series_sanitized, country_code.lower())) show_ids = self._get_show_ids()
else:
# attempt with year show_ids = self._get_show_ids.refresh(self)
if not show_id and year:
logger.debug('Getting show id with year') # attempt with country
show_id = show_ids.get('%s %d' % (series_sanitized, year)) if not show_id and country_code:
logger.debug('Getting show id with country')
# attempt clean show_id = show_ids.get('%s %s' % (series_sanitized, country_code.lower()))
if not show_id:
logger.debug('Getting show id') # attempt with year
show_id = show_ids.get(series_sanitized) if not show_id and year:
logger.debug('Getting show id with year')
# search as last resort show_id = show_ids.get('%s %d' % (series_sanitized, year))
# broken right now
# if not show_id: # attempt clean
# logger.warning('Series %s not found in show ids', series) if not show_id:
# show_id = self._search_show_id(series) logger.debug('Getting show id')
show_id = show_ids.get(series_sanitized)
if not show_id:
now = datetime.datetime.now()
last_fetch = region.get(self.last_show_ids_fetch_key)
# re-fetch show ids once per day if any show ID not found
if not ignore_cache and last_fetch != NO_VALUE and last_fetch + datetime.timedelta(days=1) < now:
logger.info("Show id not found; re-fetching show ids")
return self.get_show_id(series, year=year, country_code=country_code, ignore_cache=True)
logger.debug("Not refreshing show ids, as the last fetch has been too recent")
# search as last resort
# broken right now
# if not show_id:
# logger.warning('Series %s not found in show ids', series)
# show_id = self._search_show_id(series)
return show_id return show_id
@ -202,6 +229,8 @@ class Addic7edProvider(_Addic7edProvider):
""" """
# get the show page # get the show page
logger.info('Getting show ids') logger.info('Getting show ids')
region.set(self.last_show_ids_fetch_key, datetime.datetime.now())
r = self.session.get(self.server_url + 'shows.php', timeout=10) r = self.session.get(self.server_url + 'shows.php', timeout=10)
r.raise_for_status() r.raise_for_status()
@ -210,14 +239,15 @@ class Addic7edProvider(_Addic7edProvider):
# Assuming the site's markup is bad, and stripping it down to only contain what's needed. # Assuming the site's markup is bad, and stripping it down to only contain what's needed.
show_cells = re.findall(show_cells_re, r.content) show_cells = re.findall(show_cells_re, r.content)
if show_cells: if show_cells:
soup = ParserBeautifulSoup(b''.join(show_cells), ['lxml', 'html.parser']) soup = ParserBeautifulSoup(b''.join(show_cells).decode('utf-8', 'ignore'), ['lxml', 'html.parser'])
else: else:
# If RegEx fails, fall back to original r.content and use 'html.parser' # If RegEx fails, fall back to original r.content and use 'html.parser'
soup = ParserBeautifulSoup(r.content, ['html.parser']) soup = ParserBeautifulSoup(r.content, ['html.parser'])
# populate the show ids # populate the show ids
show_ids = {} show_ids = {}
for show in soup.select('td > h3 > a[href^="/show/"]'): shows = soup.select('td > h3 > a[href^="/show/"]')
for show in shows:
show_clean = sanitize(show.text, default_characters=self.sanitize_characters) show_clean = sanitize(show.text, default_characters=self.sanitize_characters)
try: try:
show_id = int(show['href'][6:]) show_id = int(show['href'][6:])

Loading…
Cancel
Save