Merge pull request #969 from redglory/development

fixed retries to get download link to avoid throttling
pull/986/head
morpheus65535 4 years ago committed by GitHub
commit 1a3943415a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -37,7 +37,7 @@ PROVIDER_THROTTLE_MAP = {
DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours") DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours")
}, },
"legendasdivx": { "legendasdivx": {
TooManyRequests: (datetime.timedelta(hours=2), "2 hours"), TooManyRequests: (datetime.timedelta(hours=3), "3 hours"),
DownloadLimitExceeded: (datetime.timedelta(hours=6), "6 hours"), DownloadLimitExceeded: (datetime.timedelta(hours=6), "6 hours"),
ParseResponseError: (datetime.timedelta(hours=1), "1 hours"), ParseResponseError: (datetime.timedelta(hours=1), "1 hours"),
} }

@ -8,8 +8,10 @@ import rarfile
import zipfile import zipfile
from requests import Session from requests import Session
from requests.exceptions import HTTPError
from guessit import guessit from guessit import guessit
from subliminal.exceptions import ConfigurationError, AuthenticationError, ServiceUnavailable, DownloadLimitExceeded from subliminal.exceptions import ConfigurationError, AuthenticationError, ServiceUnavailable, DownloadLimitExceeded
from subliminal_patch.exceptions import TooManyRequests
from subliminal_patch.providers import Provider from subliminal_patch.providers import Provider
from subliminal.providers import ParserBeautifulSoup from subliminal.providers import ParserBeautifulSoup
from subliminal_patch.subtitle import Subtitle from subliminal_patch.subtitle import Subtitle
@ -160,19 +162,24 @@ class LegendasdivxProvider(Provider):
data['password'] = self.password data['password'] = self.password
res = self.session.post(self.loginpage, data) res = self.session.post(self.loginpage, data)
res.raise_for_status()
if (res and 'bloqueado' in res.text.lower()): # blocked IP address
logger.error("LegendasDivx.pt :: Your IP is blocked on this server.")
raise TooManyRequests("Legendasdivx.pt :: Your IP is blocked on this server.")
#make sure we're logged in
try: try:
res.raise_for_status()
logger.debug('Logged in successfully: PHPSESSID: %s' % logger.debug('Logged in successfully: PHPSESSID: %s' %
self.session.cookies.get_dict()['PHPSESSID']) self.session.cookies.get_dict()['PHPSESSID'])
self.logged_in = True self.logged_in = True
except KeyError: except KeyError:
logger.error("Couldn't retrieve session ID, check your credentials") logger.error("Couldn't retrieve session ID, check your credentials")
raise AuthenticationError("Please check your credentials.") raise AuthenticationError("Please check your credentials.")
except HTTPError as e:
logger.error("Legendasdivx.pt :: HTTP Error %s" % e)
raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s" % e)
except Exception as e: except Exception as e:
if 'bloqueado' in res.text.lower(): # blocked IP address
logger.error("LegendasDivx.pt :: Your IP is blocked on this server.")
raise ParseResponseError("Legendasdivx.pt :: %r" % res.text)
logger.error("LegendasDivx.pt :: Uncaught error: %r" % repr(e)) logger.error("LegendasDivx.pt :: Uncaught error: %r" % repr(e))
raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r" % repr(e)) raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r" % repr(e))
@ -193,9 +200,9 @@ class LegendasdivxProvider(Provider):
for _subbox in _allsubs: for _subbox in _allsubs:
hits = 0 hits = 0
for th in _subbox.findAll("th", {"class": "color2"}): for th in _subbox.findAll("th", {"class": "color2"}):
if th.string == 'Hits:': if th.text == 'Hits:':
hits = int(th.parent.find("td").string) hits = int(th.parent.find("td").text)
if th.string == 'Idioma:': if th.text == 'Idioma:':
lang = th.parent.find("td").find("img").get('src') lang = th.parent.find("td").find("img").get('src')
if 'brazil' in lang.lower(): if 'brazil' in lang.lower():
lang = Language.fromopensubtitles('pob') lang = Language.fromopensubtitles('pob')
@ -209,13 +216,12 @@ class LegendasdivxProvider(Provider):
download = _subbox.find("a", {"class": "sub_download"}) download = _subbox.find("a", {"class": "sub_download"})
# sometimes BSoup can't find 'a' tag and returns None. # sometimes BSoup can't find 'a' tag and returns None.
i = 0 try:
while not (download): # must get it... trying again...
download = _subbox.find("a", {"class": "sub_download"})
i=+1
logger.debug("Try number {0} try!".format(str(i)))
dl = download.get('href') dl = download.get('href')
logger.debug("Found subtitle on: %s" % self.download_link.format(link=dl)) logger.debug("Found subtitle link on: {0}").format(self.download_link.format(link=dl))
except:
logger.debug("Couldn't find download link. Trying next...")
continue
# get subtitle uploader # get subtitle uploader
sub_header = _subbox.find("div", {"class" :"sub_header"}) sub_header = _subbox.find("div", {"class" :"sub_header"})
@ -268,7 +274,7 @@ class LegendasdivxProvider(Provider):
self.session.headers.update(self.headers.items()) self.session.headers.update(self.headers.items())
res = self.session.get(_searchurl.format(query=querytext)) res = self.session.get(_searchurl.format(query=querytext))
if "A legenda não foi encontrada" in res.text: if (res and "A legenda não foi encontrada" in res.text):
logger.warning('%s not found', querytext) logger.warning('%s not found', querytext)
return [] return []
@ -281,12 +287,13 @@ class LegendasdivxProvider(Provider):
#get number of pages bases on results found #get number of pages bases on results found
page_header = bsoup.find("div", {"class": "pager_bar"}) page_header = bsoup.find("div", {"class": "pager_bar"})
results_found = re.search(r'\((.*?) encontradas\)', page_header.text).group(1) results_found = re.search(r'\((.*?) encontradas\)', page_header.text).group(1) if page_header else 0
logger.debug("Found %s subtitles" % str(results_found))
num_pages = (int(results_found) // 10) + 1 num_pages = (int(results_found) // 10) + 1
num_pages = min(MAX_PAGES, num_pages) num_pages = min(MAX_PAGES, num_pages)
if num_pages > 1: if num_pages > 1:
for num_page in range(2, num_pages+2): for num_page in range(2, num_pages+1):
_search_next = self.searchurl.format(query=querytext) + "&page={0}".format(str(num_page)) _search_next = self.searchurl.format(query=querytext) + "&page={0}".format(str(num_page))
logger.debug("Moving to next page: %s" % _search_next) logger.debug("Moving to next page: %s" % _search_next)
res = self.session.get(_search_next) res = self.session.get(_search_next)
@ -301,14 +308,22 @@ class LegendasdivxProvider(Provider):
def download_subtitle(self, subtitle): def download_subtitle(self, subtitle):
res = self.session.get(subtitle.page_link) res = self.session.get(subtitle.page_link)
res.raise_for_status()
if res: if res:
if res.status_code in ['500', '503']: try:
raise ServiceUnavailable("Legendasdivx.pt :: 503 - Service Unavailable") res.raise_for_status()
elif 'limite' in res.text.lower(): # daily downloads limit reached if 'limite' in res.text.lower(): # daily downloads limit reached
raise DownloadLimitReached("Legendasdivx.pt :: Download limit reached") logger.error("LegendasDivx.pt :: Daily download limit reached!")
raise DownloadLimitReached("Legendasdivx.pt :: Daily download limit reached!")
elif 'bloqueado' in res.text.lower(): # blocked IP address elif 'bloqueado' in res.text.lower(): # blocked IP address
raise ParseResponseError("Legendasdivx.pt :: %r" % res.text) logger.error("LegendasDivx.pt :: Your IP is blocked on this server.")
raise TooManyRequests("LegendasDivx.pt :: Your IP is blocked on this server.")
except HTTPError as e:
logger.error("Legendasdivx.pt :: HTTP Error %s" % e)
raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s" % e)
except Exception as e:
logger.error("LegendasDivx.pt :: Uncaught error: %r" % repr(e))
raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r" % repr(e))
archive = self._get_archive(res.content) archive = self._get_archive(res.content)
# extract the subtitle # extract the subtitle

Loading…
Cancel
Save