From abcf03e3892072b4ca27a15e1ae2f5f2e8ee35b9 Mon Sep 17 00:00:00 2001 From: Bazarr Date: Thu, 7 May 2020 12:16:25 +0100 Subject: [PATCH 1/5] fixed retries to get download link to avoid throttling --- .../providers/legendasdivx.py | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/libs/subliminal_patch/providers/legendasdivx.py b/libs/subliminal_patch/providers/legendasdivx.py index e9f505bdf..4fff72552 100644 --- a/libs/subliminal_patch/providers/legendasdivx.py +++ b/libs/subliminal_patch/providers/legendasdivx.py @@ -170,7 +170,7 @@ class LegendasdivxProvider(Provider): logger.error("Couldn't retrieve session ID, check your credentials") raise AuthenticationError("Please check your credentials.") except Exception as e: - if 'bloqueado' in res.text.lower(): # blocked IP address + if (res and 'bloqueado' in res.text.lower()): # blocked IP address logger.error("LegendasDivx.pt :: Your IP is blocked on this server.") raise ParseResponseError("Legendasdivx.pt :: %r" % res.text) logger.error("LegendasDivx.pt :: Uncaught error: %r" % repr(e)) @@ -193,9 +193,9 @@ class LegendasdivxProvider(Provider): for _subbox in _allsubs: hits = 0 for th in _subbox.findAll("th", {"class": "color2"}): - if th.string == 'Hits:': - hits = int(th.parent.find("td").string) - if th.string == 'Idioma:': + if th.text == 'Hits:': + hits = int(th.parent.find("td").text) + if th.text == 'Idioma:': lang = th.parent.find("td").find("img").get('src') if 'brazil' in lang.lower(): lang = Language.fromopensubtitles('pob') @@ -209,13 +209,12 @@ class LegendasdivxProvider(Provider): download = _subbox.find("a", {"class": "sub_download"}) # sometimes BSoup can't find 'a' tag and returns None. - i = 0 - while not (download): # must get it... trying again... - download = _subbox.find("a", {"class": "sub_download"}) - i=+1 - logger.debug("Try number {0} try!".format(str(i))) - dl = download.get('href') - logger.debug("Found subtitle on: %s" % self.download_link.format(link=dl)) + try: + dl = download.get('href') + logger.debug("Found subtitle link on: {0}").format(self.download_link.format(link=dl)) + except: + logger.debug("Couldn't find download link. Trying next...") + continue # get subtitle uploader sub_header = _subbox.find("div", {"class" :"sub_header"}) @@ -268,7 +267,7 @@ class LegendasdivxProvider(Provider): self.session.headers.update(self.headers.items()) res = self.session.get(_searchurl.format(query=querytext)) - if "A legenda não foi encontrada" in res.text: + if (res and "A legenda não foi encontrada" in res.text): logger.warning('%s not found', querytext) return [] @@ -281,12 +280,13 @@ class LegendasdivxProvider(Provider): #get number of pages bases on results found page_header = bsoup.find("div", {"class": "pager_bar"}) - results_found = re.search(r'\((.*?) encontradas\)', page_header.text).group(1) + results_found = re.search(r'\((.*?) encontradas\)', page_header.text).group(1) if page_header else 0 + logger.debug("Found %s subtitles" % str(results_found)) num_pages = (int(results_found) // 10) + 1 num_pages = min(MAX_PAGES, num_pages) if num_pages > 1: - for num_page in range(2, num_pages+2): + for num_page in range(2, num_pages+1): _search_next = self.searchurl.format(query=querytext) + "&page={0}".format(str(num_page)) logger.debug("Moving to next page: %s" % _search_next) res = self.session.get(_search_next) @@ -305,6 +305,8 @@ class LegendasdivxProvider(Provider): if res: if res.status_code in ['500', '503']: raise ServiceUnavailable("Legendasdivx.pt :: 503 - Service Unavailable") + elif res.status_code == '403': + raise ParseResponseError("Legendasdivx.pt :: 403 - Forbidden") elif 'limite' in res.text.lower(): # daily downloads limit reached raise DownloadLimitReached("Legendasdivx.pt :: Download limit reached") elif 'bloqueado' in res.text.lower(): # blocked IP address From 8da77a72eb0e22ab187a5df4dee4ac3681a1637c Mon Sep 17 00:00:00 2001 From: Bazarr Date: Thu, 7 May 2020 12:53:26 +0100 Subject: [PATCH 2/5] simplig --- libs/subliminal_patch/providers/legendasdivx.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/libs/subliminal_patch/providers/legendasdivx.py b/libs/subliminal_patch/providers/legendasdivx.py index 4fff72552..77e1cd22a 100644 --- a/libs/subliminal_patch/providers/legendasdivx.py +++ b/libs/subliminal_patch/providers/legendasdivx.py @@ -8,6 +8,7 @@ import rarfile import zipfile from requests import Session +from requests.exceptions import HTTPError from guessit import guessit from subliminal.exceptions import ConfigurationError, AuthenticationError, ServiceUnavailable, DownloadLimitExceeded from subliminal_patch.providers import Provider @@ -303,11 +304,7 @@ class LegendasdivxProvider(Provider): res = self.session.get(subtitle.page_link) res.raise_for_status() if res: - if res.status_code in ['500', '503']: - raise ServiceUnavailable("Legendasdivx.pt :: 503 - Service Unavailable") - elif res.status_code == '403': - raise ParseResponseError("Legendasdivx.pt :: 403 - Forbidden") - elif 'limite' in res.text.lower(): # daily downloads limit reached + if 'limite' in res.text.lower(): # daily downloads limit reached raise DownloadLimitReached("Legendasdivx.pt :: Download limit reached") elif 'bloqueado' in res.text.lower(): # blocked IP address raise ParseResponseError("Legendasdivx.pt :: %r" % res.text) From 1cdf07884f6652e974caa4f9eebd231568306ff9 Mon Sep 17 00:00:00 2001 From: Bazarr Date: Thu, 7 May 2020 13:07:34 +0100 Subject: [PATCH 3/5] simplified raise exceptions (raise_for_status() already raises exceptions --- libs/subliminal_patch/providers/legendasdivx.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/libs/subliminal_patch/providers/legendasdivx.py b/libs/subliminal_patch/providers/legendasdivx.py index 77e1cd22a..fda404622 100644 --- a/libs/subliminal_patch/providers/legendasdivx.py +++ b/libs/subliminal_patch/providers/legendasdivx.py @@ -163,6 +163,11 @@ class LegendasdivxProvider(Provider): res = self.session.post(self.loginpage, data) res.raise_for_status() + if (res and 'bloqueado' in res.text.lower()): # blocked IP address + logger.error("LegendasDivx.pt :: Your IP is blocked on this server.") + raise ParseResponseError("Legendasdivx.pt :: %r" % res.text) + + #make sure we're logged in try: logger.debug('Logged in successfully: PHPSESSID: %s' % self.session.cookies.get_dict()['PHPSESSID']) @@ -171,9 +176,6 @@ class LegendasdivxProvider(Provider): logger.error("Couldn't retrieve session ID, check your credentials") raise AuthenticationError("Please check your credentials.") except Exception as e: - if (res and 'bloqueado' in res.text.lower()): # blocked IP address - logger.error("LegendasDivx.pt :: Your IP is blocked on this server.") - raise ParseResponseError("Legendasdivx.pt :: %r" % res.text) logger.error("LegendasDivx.pt :: Uncaught error: %r" % repr(e)) raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r" % repr(e)) @@ -305,8 +307,10 @@ class LegendasdivxProvider(Provider): res.raise_for_status() if res: if 'limite' in res.text.lower(): # daily downloads limit reached - raise DownloadLimitReached("Legendasdivx.pt :: Download limit reached") + logger.error("LegendasDivx.pt :: Daily download limit reached!") + raise DownloadLimitReached("Legendasdivx.pt :: Daily download limit reached!") elif 'bloqueado' in res.text.lower(): # blocked IP address + logger.error("LegendasDivx.pt :: Your IP is blocked on this server.") raise ParseResponseError("Legendasdivx.pt :: %r" % res.text) archive = self._get_archive(res.content) From 83d81a6946155ab3ad73afc9ecffec24fe9ca517 Mon Sep 17 00:00:00 2001 From: Bazarr Date: Thu, 7 May 2020 13:09:50 +0100 Subject: [PATCH 4/5] no need to import HTTPerror exception. Not treated --- libs/subliminal_patch/providers/legendasdivx.py | 1 - 1 file changed, 1 deletion(-) diff --git a/libs/subliminal_patch/providers/legendasdivx.py b/libs/subliminal_patch/providers/legendasdivx.py index fda404622..54e755a0c 100644 --- a/libs/subliminal_patch/providers/legendasdivx.py +++ b/libs/subliminal_patch/providers/legendasdivx.py @@ -8,7 +8,6 @@ import rarfile import zipfile from requests import Session -from requests.exceptions import HTTPError from guessit import guessit from subliminal.exceptions import ConfigurationError, AuthenticationError, ServiceUnavailable, DownloadLimitExceeded from subliminal_patch.providers import Provider From 075f053f17033fbcc20acc5edbfa94bec32535d5 Mon Sep 17 00:00:00 2001 From: Bazarr Date: Thu, 7 May 2020 13:47:00 +0100 Subject: [PATCH 5/5] treat exceptions when HTTPError is returned --- bazarr/get_providers.py | 2 +- .../providers/legendasdivx.py | 33 +++++++++++++------ 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/bazarr/get_providers.py b/bazarr/get_providers.py index 577518c6e..b630d6a62 100644 --- a/bazarr/get_providers.py +++ b/bazarr/get_providers.py @@ -37,7 +37,7 @@ PROVIDER_THROTTLE_MAP = { DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours") }, "legendasdivx": { - TooManyRequests: (datetime.timedelta(hours=2), "2 hours"), + TooManyRequests: (datetime.timedelta(hours=3), "3 hours"), DownloadLimitExceeded: (datetime.timedelta(hours=6), "6 hours"), ParseResponseError: (datetime.timedelta(hours=1), "1 hours"), } diff --git a/libs/subliminal_patch/providers/legendasdivx.py b/libs/subliminal_patch/providers/legendasdivx.py index 54e755a0c..0be22757d 100644 --- a/libs/subliminal_patch/providers/legendasdivx.py +++ b/libs/subliminal_patch/providers/legendasdivx.py @@ -8,8 +8,10 @@ import rarfile import zipfile from requests import Session +from requests.exceptions import HTTPError from guessit import guessit from subliminal.exceptions import ConfigurationError, AuthenticationError, ServiceUnavailable, DownloadLimitExceeded +from subliminal_patch.exceptions import TooManyRequests from subliminal_patch.providers import Provider from subliminal.providers import ParserBeautifulSoup from subliminal_patch.subtitle import Subtitle @@ -160,20 +162,23 @@ class LegendasdivxProvider(Provider): data['password'] = self.password res = self.session.post(self.loginpage, data) - res.raise_for_status() if (res and 'bloqueado' in res.text.lower()): # blocked IP address logger.error("LegendasDivx.pt :: Your IP is blocked on this server.") - raise ParseResponseError("Legendasdivx.pt :: %r" % res.text) + raise TooManyRequests("Legendasdivx.pt :: Your IP is blocked on this server.") #make sure we're logged in try: + res.raise_for_status() logger.debug('Logged in successfully: PHPSESSID: %s' % self.session.cookies.get_dict()['PHPSESSID']) - self.logged_in = True + self.logged_in = True except KeyError: logger.error("Couldn't retrieve session ID, check your credentials") raise AuthenticationError("Please check your credentials.") + except HTTPError as e: + logger.error("Legendasdivx.pt :: HTTP Error %s" % e) + raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s" % e) except Exception as e: logger.error("LegendasDivx.pt :: Uncaught error: %r" % repr(e)) raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r" % repr(e)) @@ -303,14 +308,22 @@ class LegendasdivxProvider(Provider): def download_subtitle(self, subtitle): res = self.session.get(subtitle.page_link) - res.raise_for_status() + if res: - if 'limite' in res.text.lower(): # daily downloads limit reached - logger.error("LegendasDivx.pt :: Daily download limit reached!") - raise DownloadLimitReached("Legendasdivx.pt :: Daily download limit reached!") - elif 'bloqueado' in res.text.lower(): # blocked IP address - logger.error("LegendasDivx.pt :: Your IP is blocked on this server.") - raise ParseResponseError("Legendasdivx.pt :: %r" % res.text) + try: + res.raise_for_status() + if 'limite' in res.text.lower(): # daily downloads limit reached + logger.error("LegendasDivx.pt :: Daily download limit reached!") + raise DownloadLimitReached("Legendasdivx.pt :: Daily download limit reached!") + elif 'bloqueado' in res.text.lower(): # blocked IP address + logger.error("LegendasDivx.pt :: Your IP is blocked on this server.") + raise TooManyRequests("LegendasDivx.pt :: Your IP is blocked on this server.") + except HTTPError as e: + logger.error("Legendasdivx.pt :: HTTP Error %s" % e) + raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s" % e) + except Exception as e: + logger.error("LegendasDivx.pt :: Uncaught error: %r" % repr(e)) + raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r" % repr(e)) archive = self._get_archive(res.content) # extract the subtitle