diff --git a/bazarr/config.py b/bazarr/config.py index f29ed5bbe..d20b82b03 100644 --- a/bazarr/config.py +++ b/bazarr/config.py @@ -108,7 +108,8 @@ defaults = { }, 'legendasdivx': { 'username': '', - 'password': '' + 'password': '', + 'skip_wrong_fps': 'False' }, 'legendastv': { 'username': '', diff --git a/bazarr/get_providers.py b/bazarr/get_providers.py index e80d46ce9..4fad9c5ce 100644 --- a/bazarr/get_providers.py +++ b/bazarr/get_providers.py @@ -9,10 +9,22 @@ import time from get_args import args from config import settings from websocket_handler import event_stream -from subliminal_patch.exceptions import TooManyRequests, APIThrottled, ParseResponseError +from subliminal_patch.exceptions import TooManyRequests, APIThrottled, ParseResponseError, IPAddressBlocked from subliminal.exceptions import DownloadLimitExceeded, ServiceUnavailable from subliminal import region as subliminal_cache_region +def time_until_end_of_day(dt=None): + # type: (datetime.datetime) -> datetime.timedelta + """ + Get timedelta until end of day on the datetime passed, or current time. + """ + if dt is None: + dt = datetime.datetime.now() + tomorrow = dt + datetime.timedelta(days=1) + return datetime.datetime.combine(tomorrow, datetime.time.min) - dt + +hours_until_end_of_day = time_until_end_of_day().seconds // 3600 + 1 + VALID_THROTTLE_EXCEPTIONS = (TooManyRequests, DownloadLimitExceeded, ServiceUnavailable, APIThrottled, ParseResponseError) VALID_COUNT_EXCEPTIONS = ('TooManyRequests', 'ServiceUnavailable', 'APIThrottled') @@ -33,14 +45,16 @@ PROVIDER_THROTTLE_MAP = { "addic7ed": { DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours"), TooManyRequests: (datetime.timedelta(minutes=5), "5 minutes"), + IPAddressBlocked: (datetime.timedelta(hours=1), "1 hours"), + }, "titulky": { DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours") }, "legendasdivx": { - TooManyRequests: (datetime.timedelta(hours=2), "2 hours"), + TooManyRequests: (datetime.timedelta(hours=3), "3 hours"), DownloadLimitExceeded: (datetime.timedelta(hours=6), "6 hours"), - ParseResponseError: (datetime.timedelta(hours=1), "1 hours"), + IPAddressBlocked: (datetime.timedelta(hours=hours_until_end_of_day), "{} hours".format(str(hours_until_end_of_day))), } } @@ -122,6 +136,7 @@ def get_providers_auth(): }, 'legendasdivx': {'username': settings.legendasdivx.username, 'password': settings.legendasdivx.password, + 'skip_wrong_fps': settings.legendasdivx.getboolean('skip_wrong_fps'), }, 'legendastv': {'username': settings.legendastv.username, 'password': settings.legendastv.password, diff --git a/libs/subliminal_patch/exceptions.py b/libs/subliminal_patch/exceptions.py index 82f33ade8..6cd73e769 100644 --- a/libs/subliminal_patch/exceptions.py +++ b/libs/subliminal_patch/exceptions.py @@ -7,11 +7,13 @@ class TooManyRequests(ProviderError): """Exception raised by providers when too many requests are made.""" pass - class APIThrottled(ProviderError): pass - class ParseResponseError(ProviderError): """Exception raised by providers when they are not able to parse the response.""" pass + +class IPAddressBlocked(ProviderError): + """Exception raised when providers block requests from IP Address.""" + pass \ No newline at end of file diff --git a/libs/subliminal_patch/providers/addic7ed.py b/libs/subliminal_patch/providers/addic7ed.py index 880a4729c..af6813b4c 100644 --- a/libs/subliminal_patch/providers/addic7ed.py +++ b/libs/subliminal_patch/providers/addic7ed.py @@ -9,13 +9,14 @@ from random import randint from dogpile.cache.api import NO_VALUE from requests import Session +from requests.exceptions import ConnectionError from subliminal.cache import region from subliminal.exceptions import DownloadLimitExceeded, AuthenticationError, ConfigurationError from subliminal.providers.addic7ed import Addic7edProvider as _Addic7edProvider, \ Addic7edSubtitle as _Addic7edSubtitle, ParserBeautifulSoup from subliminal.subtitle import fix_line_ending from subliminal_patch.utils import sanitize -from subliminal_patch.exceptions import TooManyRequests +from subliminal_patch.exceptions import TooManyRequests, IPAddressBlocked from subliminal_patch.pitcher import pitchers, load_verification, store_verification from subzero.language import Language @@ -91,15 +92,19 @@ class Addic7edProvider(_Addic7edProvider): # login if self.username and self.password: def check_verification(cache_region): - rr = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10, - headers={"Referer": self.server_url}) - if rr.status_code == 302: - logger.info('Addic7ed: Login expired') - cache_region.delete("addic7ed_data") - else: - logger.info('Addic7ed: Re-using old login') - self.logged_in = True - return True + try: + rr = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10, + headers={"Referer": self.server_url}) + if rr.status_code == 302: + logger.info('Addic7ed: Login expired') + cache_region.delete("addic7ed_data") + else: + logger.info('Addic7ed: Re-using old login') + self.logged_in = True + return True + except ConnectionError as e: + logger.debug("Addic7ed: There was a problem reaching the server: %s." % e) + raise IPAddressBlocked("Addic7ed: Your IP is temporarily blocked.") if load_verification("addic7ed", self.session, callback=check_verification): return diff --git a/libs/subliminal_patch/providers/legendasdivx.py b/libs/subliminal_patch/providers/legendasdivx.py index e9f505bdf..02ad95cce 100644 --- a/libs/subliminal_patch/providers/legendasdivx.py +++ b/libs/subliminal_patch/providers/legendasdivx.py @@ -2,22 +2,27 @@ from __future__ import absolute_import import logging import io -import re import os -import rarfile +import re import zipfile +from time import sleep +from requests.exceptions import HTTPError +import rarfile -from requests import Session from guessit import guessit +from subliminal.cache import region from subliminal.exceptions import ConfigurationError, AuthenticationError, ServiceUnavailable, DownloadLimitExceeded -from subliminal_patch.providers import Provider from subliminal.providers import ParserBeautifulSoup -from subliminal_patch.subtitle import Subtitle -from subliminal.video import Episode, Movie from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending, guess_matches -from subzero.language import Language -from subliminal_patch.score import get_scores from subliminal.utils import sanitize, sanitize_release_group +from subliminal.video import Episode, Movie +from subliminal_patch.exceptions import TooManyRequests, IPAddressBlocked +from subliminal_patch.http import RetryingCFSession +from subliminal_patch.providers import Provider +from subliminal_patch.score import get_scores, framerate_equal +from subliminal_patch.subtitle import Subtitle +from subzero.language import Language +from dogpile.cache.api import NO_VALUE logger = logging.getLogger(__name__) @@ -25,7 +30,7 @@ class LegendasdivxSubtitle(Subtitle): """Legendasdivx Subtitle.""" provider_name = 'legendasdivx' - def __init__(self, language, video, data): + def __init__(self, language, video, data, skip_wrong_fps=True): super(LegendasdivxSubtitle, self).__init__(language) self.language = language self.page_link = data['link'] @@ -33,8 +38,11 @@ class LegendasdivxSubtitle(Subtitle): self.exact_match = data['exact_match'] self.description = data['description'] self.video = video + self.sub_frame_rate = data['frame_rate'] self.video_filename = data['video_filename'] self.uploader = data['uploader'] + self.wrong_fps = False + self.skip_wrong_fps = skip_wrong_fps @property def id(self): @@ -47,6 +55,23 @@ class LegendasdivxSubtitle(Subtitle): def get_matches(self, video): matches = set() + # if skip_wrong_fps = True no point to continue if they don't match + subtitle_fps = None + try: + subtitle_fps = float(self.sub_frame_rate) + except ValueError: + pass + + # check fps match and skip based on configuration + if video.fps and subtitle_fps and not framerate_equal(video.fps, subtitle_fps): + self.wrong_fps = True + + if self.skip_wrong_fps: + logger.debug("Legendasdivx :: Skipping subtitle due to FPS mismatch (expected: %s, got: %s)", video.fps, self.sub_frame_rate) + # not a single match :) + return set() + logger.debug("Legendasdivx :: Frame rate mismatch (expected: %s, got: %s, but continuing...)", video.fps, self.sub_frame_rate) + description = sanitize(self.description) if sanitize(self.video_filename) in description: @@ -105,8 +130,6 @@ class LegendasdivxSubtitle(Subtitle): matches.update(['video_codec']) break - # running guessit on a huge description may break guessit - # matches |= guess_matches(video, guessit(self.description)) return matches class LegendasdivxProvider(Provider): @@ -118,71 +141,82 @@ class LegendasdivxProvider(Provider): 'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Origin': 'https://www.legendasdivx.pt', - 'Referer': 'https://www.legendasdivx.pt', - 'Pragma': 'no-cache', - 'Cache-Control': 'no-cache' + 'Referer': 'https://www.legendasdivx.pt' } loginpage = site + '/forum/ucp.php?mode=login' - logoutpage = site + '/sair.php' searchurl = site + '/modules.php?name=Downloads&file=jz&d_op=search&op=_jz00&query={query}' download_link = site + '/modules.php{link}' - def __init__(self, username, password): + def __init__(self, username, password, skip_wrong_fps=True): # make sure login credentials are configured. if any((username, password)) and not all((username, password)): - raise ConfigurationError('Username and password must be specified') + raise ConfigurationError('Legendasdivx.pt :: Username and password must be specified') self.username = username self.password = password - self.logged_in = False + self.skip_wrong_fps = skip_wrong_fps def initialize(self): - self.session = Session() - self.session.headers.update(self.headers) - self.login() + logger.info("Legendasdivx.pt :: Creating session for requests") + self.session = RetryingCFSession() + # re-use PHP Session if present + prev_cookies = region.get("legendasdivx_cookies2") + if prev_cookies != NO_VALUE: + logger.debug("Legendasdivx.pt :: Re-using previous legendasdivx cookies: %s", prev_cookies) + self.session.cookies.update(prev_cookies) + # Login if session has expired + else: + logger.debug("Legendasdivx.pt :: Session cookies not found!") + self.session.headers.update(self.headers) + self.login() def terminate(self): - self.logout() + # session close self.session.close() def login(self): - logger.info('Logging in') - - res = self.session.get(self.loginpage) - bsoup = ParserBeautifulSoup(res.content, ['lxml']) - - _allinputs = bsoup.findAll('input') - data = {} - # necessary to set 'sid' for POST request - for field in _allinputs: - data[field.get('name')] = field.get('value') - - data['username'] = self.username - data['password'] = self.password - - res = self.session.post(self.loginpage, data) - res.raise_for_status() - + logger.info('Legendasdivx.pt :: Logging in') try: - logger.debug('Logged in successfully: PHPSESSID: %s' % - self.session.cookies.get_dict()['PHPSESSID']) - self.logged_in = True + res = self.session.get(self.loginpage) + res.raise_for_status() + bsoup = ParserBeautifulSoup(res.content, ['lxml']) + + _allinputs = bsoup.findAll('input') + data = {} + # necessary to set 'sid' for POST request + for field in _allinputs: + data[field.get('name')] = field.get('value') + + data['username'] = self.username + data['password'] = self.password + + res = self.session.post(self.loginpage, data) + res.raise_for_status() + #make sure we're logged in + logger.debug('Legendasdivx.pt :: Logged in successfully: PHPSESSID: %s', self.session.cookies.get_dict()['PHPSESSID']) + cj = self.session.cookies.copy() + store_cks = ("PHPSESSID", "phpbb3_2z8zs_sid", "phpbb3_2z8zs_k", "phpbb3_2z8zs_u", "lang") + for cn in iter(self.session.cookies.keys()): + if cn not in store_cks: + del cj[cn] + #store session cookies on cache + logger.debug("Legendasdivx.pt :: Storing legendasdivx session cookies: %r", cj) + region.set("legendasdivx_cookies2", cj) + except KeyError: - logger.error("Couldn't retrieve session ID, check your credentials") - raise AuthenticationError("Please check your credentials.") - except Exception as e: - if 'bloqueado' in res.text.lower(): # blocked IP address + logger.error("Legendasdivx.pt :: Couldn't get session ID, check your credentials") + raise AuthenticationError("Legendasdivx.pt :: Couldn't get session ID, check your credentials") + except HTTPError as e: + if "bloqueado" in res.text.lower(): # ip blocked on server logger.error("LegendasDivx.pt :: Your IP is blocked on this server.") - raise ParseResponseError("Legendasdivx.pt :: %r" % res.text) - logger.error("LegendasDivx.pt :: Uncaught error: %r" % repr(e)) - raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r" % repr(e)) - - def logout(self): - if self.logged_in: - logger.info('Legendasdivx:: Logging out') - r = self.session.get(self.logoutpage, timeout=10) - r.raise_for_status() - logger.debug('Legendasdivx :: Logged out') - self.logged_in = False + raise IPAddressBlocked("LegendasDivx.pt :: Your IP is blocked on this server.") + if 'limite' in res.text.lower(): # daily downloads limit reached + logger.error("LegendasDivx.pt :: Daily download limit reached!") + raise DownloadLimitExceeded("Legendasdivx.pt :: Daily download limit reached!") + logger.error("Legendasdivx.pt :: HTTP Error %s", e) + raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e) + except Exception as e: + logger.error("LegendasDivx.pt :: Uncaught error: %r", e) + raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e) def _process_page(self, video, bsoup, video_filename): @@ -192,47 +226,50 @@ class LegendasdivxProvider(Provider): for _subbox in _allsubs: hits = 0 - for th in _subbox.findAll("th", {"class": "color2"}): - if th.string == 'Hits:': - hits = int(th.parent.find("td").string) - if th.string == 'Idioma:': - lang = th.parent.find("td").find("img").get('src') + for th in _subbox.findAll("th"): + if th.text == 'Hits:': + hits = int(th.find_next("td").text) + if th.text == 'Idioma:': + lang = th.find_next("td").find("img").get('src') if 'brazil' in lang.lower(): lang = Language.fromopensubtitles('pob') elif 'portugal' in lang.lower(): lang = Language.fromopensubtitles('por') else: continue + if th.text == "Frame Rate:": + frame_rate = th.find_next("td").text.strip() + # get description for matches description = _subbox.find("td", {"class": "td_desc brd_up"}).get_text() #get subtitle link download = _subbox.find("a", {"class": "sub_download"}) - - # sometimes BSoup can't find 'a' tag and returns None. - i = 0 - while not (download): # must get it... trying again... - download = _subbox.find("a", {"class": "sub_download"}) - i=+1 - logger.debug("Try number {0} try!".format(str(i))) - dl = download.get('href') - logger.debug("Found subtitle on: %s" % self.download_link.format(link=dl)) + + # sometimes BSoup can't find 'a' tag and returns None. + try: + download_link = self.download_link.format(link=download.get('href')) + logger.debug("Legendasdivx.pt :: Found subtitle link on: %s ", download_link) + except: + logger.debug("Legendasdivx.pt :: Couldn't find download link. Trying next...") + continue # get subtitle uploader - sub_header = _subbox.find("div", {"class" :"sub_header"}) + sub_header = _subbox.find("div", {"class" :"sub_header"}) uploader = sub_header.find("a").text if sub_header else 'anonymous' exact_match = False if video.name.lower() in description.lower(): exact_match = True - data = {'link': self.site + '/modules.php' + download.get('href'), + data = {'link': download_link, 'exact_match': exact_match, 'hits': hits, 'uploader': uploader, + 'frame_rate': frame_rate, 'video_filename': video_filename, 'description': description } subtitles.append( - LegendasdivxSubtitle(lang, video, data) + LegendasdivxSubtitle(lang, video, data, skip_wrong_fps=self.skip_wrong_fps) ) return subtitles @@ -264,31 +301,58 @@ class LegendasdivxProvider(Provider): querytext = querytext + lang_filter if lang_filter else querytext - self.headers['Referer'] = self.site + '/index.php' - self.session.headers.update(self.headers.items()) - res = self.session.get(_searchurl.format(query=querytext)) - - if "A legenda não foi encontrada" in res.text: - logger.warning('%s not found', querytext) - return [] + try: + # sleep for a 1 second before another request + sleep(1) + self.headers['Referer'] = self.site + '/index.php' + self.session.headers.update(self.headers) + res = self.session.get(_searchurl.format(query=querytext), allow_redirects=False) + res.raise_for_status() + if (res.status_code == 200 and "A legenda não foi encontrada" in res.text): + logger.warning('Legendasdivx.pt :: %s not found', querytext) + return [] + if res.status_code == 302: # got redirected to login page. + # Seems that our session cookies are no longer valid... clean them from cache + region.delete("legendasdivx_cookies2") + logger.debug("Legendasdivx.pt :: Logging in again. Cookies have expired!") + self.login() # login and try again + res = self.session.get(_searchurl.format(query=querytext)) + res.raise_for_status() + except HTTPError as e: + if "bloqueado" in res.text.lower(): # ip blocked on server + logger.error("LegendasDivx.pt :: Your IP is blocked on this server.") + raise IPAddressBlocked("LegendasDivx.pt :: Your IP is blocked on this server.") + if 'limite' in res.text.lower(): # daily downloads limit reached + logger.error("LegendasDivx.pt :: Daily download limit reached!") + raise DownloadLimitExceeded("Legendasdivx.pt :: Daily download limit reached!") + logger.error("Legendasdivx.pt :: HTTP Error %s", e) + raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e) + except Exception as e: + logger.error("LegendasDivx.pt :: Uncaught error: %r", e) + raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e) bsoup = ParserBeautifulSoup(res.content, ['html.parser']) - subtitles = self._process_page(video, bsoup, video_filename) # search for more than 10 results (legendasdivx uses pagination) # don't throttle - maximum results = 6 * 10 MAX_PAGES = 6 - - #get number of pages bases on results found + + # get number of pages bases on results found page_header = bsoup.find("div", {"class": "pager_bar"}) - results_found = re.search(r'\((.*?) encontradas\)', page_header.text).group(1) + results_found = re.search(r'\((.*?) encontradas\)', page_header.text).group(1) if page_header else 0 + logger.debug("Legendasdivx.pt :: Found %s subtitles", str(results_found)) num_pages = (int(results_found) // 10) + 1 num_pages = min(MAX_PAGES, num_pages) + # process first page + subtitles = self._process_page(video, bsoup, video_filename) + + # more pages? if num_pages > 1: - for num_page in range(2, num_pages+2): + for num_page in range(2, num_pages+1): + sleep(1) # another 1 sec before requesting... _search_next = self.searchurl.format(query=querytext) + "&page={0}".format(str(num_page)) - logger.debug("Moving to next page: %s" % _search_next) + logger.debug("Legendasdivx.pt :: Moving on to next page: %s", _search_next) res = self.session.get(_search_next) next_page = ParserBeautifulSoup(res.content, ['html.parser']) subs = self._process_page(video, next_page, video_filename) @@ -301,25 +365,29 @@ class LegendasdivxProvider(Provider): def download_subtitle(self, subtitle): res = self.session.get(subtitle.page_link) - res.raise_for_status() - if res: - if res.status_code in ['500', '503']: - raise ServiceUnavailable("Legendasdivx.pt :: 503 - Service Unavailable") - elif 'limite' in res.text.lower(): # daily downloads limit reached - raise DownloadLimitReached("Legendasdivx.pt :: Download limit reached") - elif 'bloqueado' in res.text.lower(): # blocked IP address - raise ParseResponseError("Legendasdivx.pt :: %r" % res.text) - - archive = self._get_archive(res.content) - # extract the subtitle - subtitle_content = self._get_subtitle_from_archive(archive, subtitle) - subtitle.content = fix_line_ending(subtitle_content) - subtitle.normalize() - - return subtitle - - logger.error("Legendasdivx.pt :: there was a problem retrieving subtitle (status %s)" % res.status_code) - return + + try: + res.raise_for_status() + except HTTPError as e: + if "bloqueado" in res.text.lower(): # ip blocked on server + logger.error("LegendasDivx.pt :: Your IP is blocked on this server.") + raise IPAddressBlocked("LegendasDivx.pt :: Your IP is blocked on this server.") + if 'limite' in res.text.lower(): # daily downloads limit reached + logger.error("LegendasDivx.pt :: Daily download limit reached!") + raise DownloadLimitExceeded("Legendasdivx.pt :: Daily download limit reached!") + logger.error("Legendasdivx.pt :: HTTP Error %s", e) + raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e) + except Exception as e: + logger.error("LegendasDivx.pt :: Uncaught error: %r", e) + raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e) + + archive = self._get_archive(res.content) + # extract the subtitle + subtitle_content = self._get_subtitle_from_archive(archive, subtitle) + subtitle.content = fix_line_ending(subtitle_content) + subtitle.normalize() + + return subtitle def _get_archive(self, content): # open the archive @@ -353,26 +421,26 @@ class LegendasdivxProvider(Provider): if not name.lower().endswith(_subtitle_extensions): continue - _guess = guessit (name) + _guess = guessit(name) if isinstance(subtitle.video, Episode): - logger.debug ("guessing %s" % name) - logger.debug("subtitle S{}E{} video S{}E{}".format(_guess['season'],_guess['episode'],subtitle.video.season,subtitle.video.episode)) + logger.debug("guessing %s", name) + logger.debug("subtitle S%sE%s video S%sE%s", _guess['season'], _guess['episode'], subtitle.video.season, subtitle.video.episode) if subtitle.video.episode != _guess['episode'] or subtitle.video.season != _guess['season']: logger.debug('subtitle does not match video, skipping') continue matches = set() - matches |= guess_matches (subtitle.video, _guess) - logger.debug('srt matches: %s' % matches) - _score = sum ((_scores.get (match, 0) for match in matches)) + matches |= guess_matches(subtitle.video, _guess) + logger.debug('srt matches: %s', matches) + _score = sum((_scores.get(match, 0) for match in matches)) if _score > _max_score: _max_name = name _max_score = _score - logger.debug("new max: {} {}".format(name, _score)) + logger.debug("new max: %s %s", name, _score) if _max_score > 0: - logger.debug("returning from archive: {} scored {}".format(_max_name, _max_score)) + logger.debug("returning from archive: %s scored %s", _max_name, _max_score) return archive.read(_max_name) - raise ValueError("No subtitle found on compressed file. Max score was 0") \ No newline at end of file + raise ValueError("No subtitle found on compressed file. Max score was 0")