diff --git a/libs/subliminal_patch/providers/subscene.py b/libs/subliminal_patch/providers/subscene.py index 2dc38b691..b940eb787 100644 --- a/libs/subliminal_patch/providers/subscene.py +++ b/libs/subliminal_patch/providers/subscene.py @@ -210,7 +210,7 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin): for series in [video.series] + video.alternative_series: term = u"%s - %s Season" % (series, p.number_to_words("%sth" % video.season).capitalize()) logger.debug('Searching for alternative results: %s', term) - film = search(term, session=self.session, release=False) + film = search(term, session=self.session, release=False, throttle=self.search_throttle) if film and film.subtitles: logger.debug('Alternative results found: %s', len(film.subtitles)) subtitles += self.parse_results(video, film) @@ -222,7 +222,7 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin): term = u"%s S%02i" % (series, video.season) logger.debug('Searching for packs: %s', term) time.sleep(self.search_throttle) - film = search(term, session=self.session) + film = search(term, session=self.session, throttle=self.search_throttle) if film and film.subtitles: logger.debug('Pack results found: %s', len(film.subtitles)) subtitles += self.parse_results(video, film) @@ -236,7 +236,8 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin): more_than_one = len([video.title] + video.alternative_titles) > 1 for title in [video.title] + video.alternative_titles: logger.debug('Searching for movie results: %s', title) - film = search(title, year=video.year, session=self.session, limit_to=None, release=False) + film = search(title, year=video.year, session=self.session, limit_to=None, release=False, + throttle=self.search_throttle) if film and film.subtitles: subtitles += self.parse_results(video, film) if more_than_one: diff --git a/libs/subscene_api/subscene.py b/libs/subscene_api/subscene.py index 5b53a8c95..e0ba77138 100644 --- a/libs/subscene_api/subscene.py +++ b/libs/subscene_api/subscene.py @@ -28,6 +28,8 @@ import re import enum import sys +import requests +import time is_PY2 = sys.version_info[0] < 3 if is_PY2: @@ -55,7 +57,9 @@ def soup_for(url, session=None, user_agent=DEFAULT_USER_AGENT): r = Request(url, data=None, headers=dict(HEADERS, **{"User-Agent": user_agent})) html = urlopen(r).read().decode("utf-8") else: - html = session.get(url).text + ret = session.get(url) + ret.raise_for_status() + html = ret.text return BeautifulSoup(html, "html.parser") @@ -243,17 +247,34 @@ def get_first_film(soup, section, year=None, session=None): return Film.from_url(url, session=session) -def search(term, release=True, session=None, year=None, limit_to=SearchTypes.Exact): - soup = soup_for("%s/subtitles/%s?q=%s" % (SITE_DOMAIN, "release" if release else "search", term), session=session) +def search(term, release=True, session=None, year=None, limit_to=SearchTypes.Exact, throttle=0): + # note to subscene: if you actually start to randomize the endpoint, we'll have to query your server even more + endpoints = ["searching", "search", "srch", "find"] + if release: + endpoints = ["release"] - if "Subtitle search by" in str(soup): - rows = soup.find("table").tbody.find_all("tr") - subtitles = Subtitle.from_rows(rows) - return Film(term, subtitles=subtitles) + soup = None + for endpoint in endpoints: + try: + soup = soup_for("%s/subtitles/%s?q=%s" % (SITE_DOMAIN, endpoint, term), + session=session) + except requests.HTTPError, e: + if e.response.status_code == 404: + time.sleep(throttle) + # fixme: detect endpoint from html + continue + raise + break - for junk, search_type in SearchTypes.__members__.items(): - if section_exists(soup, search_type): - return get_first_film(soup, search_type, year=year, session=session) + if soup: + if "Subtitle search by" in str(soup): + rows = soup.find("table").tbody.find_all("tr") + subtitles = Subtitle.from_rows(rows) + return Film(term, subtitles=subtitles) - if limit_to == search_type: - return + for junk, search_type in SearchTypes.__members__.items(): + if section_exists(soup, search_type): + return get_first_film(soup, search_type, year=year, session=session) + + if limit_to == search_type: + return