Merge branch 'development' into hermes

# Conflicts: # bazarr/get_providers.py # bazarr/main.py # views/providers.tpl
5 years ago · 3394363edc
parent 0b27b0854d 79a6de7d18
commit 3394363edc
5 changed files with 222 additions and 131 deletions
--- a/bazarr/config.py
+++ b/bazarr/config.py
@ -108,7 +108,8 @@ defaults = {
    },
    'legendasdivx': {
        'username': '',
-        'password': ''
+        'password': '',
+        'skip_wrong_fps': 'False'
    },
    'legendastv': {
        'username': '',
--- a/bazarr/get_providers.py
+++ b/bazarr/get_providers.py
@ -9,10 +9,22 @@ import time
 from get_args import args
 from config import settings
 from websocket_handler import event_stream
-from subliminal_patch.exceptions import TooManyRequests, APIThrottled, ParseResponseError
+from subliminal_patch.exceptions import TooManyRequests, APIThrottled, ParseResponseError, IPAddressBlocked
 from subliminal.exceptions import DownloadLimitExceeded, ServiceUnavailable
 from subliminal import region as subliminal_cache_region

+def time_until_end_of_day(dt=None):
+    # type: (datetime.datetime) -> datetime.timedelta
+    """
+    Get timedelta until end of day on the datetime passed, or current time.
+    """
+    if dt is None:
+        dt = datetime.datetime.now()
+    tomorrow = dt + datetime.timedelta(days=1)
+    return datetime.datetime.combine(tomorrow, datetime.time.min) - dt
+
+hours_until_end_of_day = time_until_end_of_day().seconds // 3600 + 1
+
 VALID_THROTTLE_EXCEPTIONS = (TooManyRequests, DownloadLimitExceeded, ServiceUnavailable, APIThrottled,
                             ParseResponseError)
 VALID_COUNT_EXCEPTIONS = ('TooManyRequests', 'ServiceUnavailable', 'APIThrottled')
@ -33,14 +45,16 @@ PROVIDER_THROTTLE_MAP = {
    "addic7ed": {
        DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours"),
        TooManyRequests: (datetime.timedelta(minutes=5), "5 minutes"),
+        IPAddressBlocked: (datetime.timedelta(hours=1), "1 hours"),
+
    },
    "titulky": {
        DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours")
    },
    "legendasdivx": {
-        TooManyRequests: (datetime.timedelta(hours=2), "2 hours"),
+        TooManyRequests: (datetime.timedelta(hours=3), "3 hours"),
        DownloadLimitExceeded: (datetime.timedelta(hours=6), "6 hours"),
-        ParseResponseError: (datetime.timedelta(hours=1), "1 hours"),
+        IPAddressBlocked: (datetime.timedelta(hours=hours_until_end_of_day), "{} hours".format(str(hours_until_end_of_day))),
    }
 }

@ -122,6 +136,7 @@ def get_providers_auth():
                     },
        'legendasdivx': {'username': settings.legendasdivx.username,
                       'password': settings.legendasdivx.password,
+                       'skip_wrong_fps': settings.legendasdivx.getboolean('skip_wrong_fps'),
                       },
        'legendastv': {'username': settings.legendastv.username,
                       'password': settings.legendastv.password,
--- a/libs/subliminal_patch/exceptions.py
+++ b/libs/subliminal_patch/exceptions.py
@ -7,11 +7,13 @@ class TooManyRequests(ProviderError):
    """Exception raised by providers when too many requests are made."""
    pass

-
 class APIThrottled(ProviderError):
    pass

-
 class ParseResponseError(ProviderError):
    """Exception raised by providers when they are not able to parse the response."""
    pass
+
+class IPAddressBlocked(ProviderError):
+	"""Exception raised when providers block requests from IP Address."""
+	pass
--- a/libs/subliminal_patch/providers/addic7ed.py
+++ b/libs/subliminal_patch/providers/addic7ed.py
@ -9,13 +9,14 @@ from random import randint

 from dogpile.cache.api import NO_VALUE
 from requests import Session
+from requests.exceptions import ConnectionError
 from subliminal.cache import region
 from subliminal.exceptions import DownloadLimitExceeded, AuthenticationError, ConfigurationError
 from subliminal.providers.addic7ed import Addic7edProvider as _Addic7edProvider, \
    Addic7edSubtitle as _Addic7edSubtitle, ParserBeautifulSoup
 from subliminal.subtitle import fix_line_ending
 from subliminal_patch.utils import sanitize
-from subliminal_patch.exceptions import TooManyRequests
+from subliminal_patch.exceptions import TooManyRequests, IPAddressBlocked
 from subliminal_patch.pitcher import pitchers, load_verification, store_verification
 from subzero.language import Language

@ -91,6 +92,7 @@ class Addic7edProvider(_Addic7edProvider):
        # login
        if self.username and self.password:
            def check_verification(cache_region):
+                try:
                    rr = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10,
                                          headers={"Referer": self.server_url})
                    if rr.status_code == 302:
@ -100,6 +102,9 @@ class Addic7edProvider(_Addic7edProvider):
                        logger.info('Addic7ed: Re-using old login')
                        self.logged_in = True
                        return True
+                except ConnectionError as e:
+                    logger.debug("Addic7ed: There was a problem reaching the server: %s." % e)
+                    raise IPAddressBlocked("Addic7ed: Your IP is temporarily blocked.")

            if load_verification("addic7ed", self.session, callback=check_verification):
                return
--- a/libs/subliminal_patch/providers/legendasdivx.py
+++ b/libs/subliminal_patch/providers/legendasdivx.py
@ -2,22 +2,27 @@
 from __future__ import absolute_import
 import logging
 import io
-import re
 import os
-import rarfile
+import re
 import zipfile
+from time import sleep
+from requests.exceptions import HTTPError
+import rarfile

-from requests import Session
 from guessit import guessit
+from subliminal.cache import region
 from subliminal.exceptions import ConfigurationError, AuthenticationError, ServiceUnavailable, DownloadLimitExceeded
-from subliminal_patch.providers import Provider
 from subliminal.providers import ParserBeautifulSoup
-from subliminal_patch.subtitle import Subtitle
-from subliminal.video import Episode, Movie
 from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending, guess_matches
-from subzero.language import Language
-from subliminal_patch.score import get_scores
 from subliminal.utils import sanitize, sanitize_release_group
+from subliminal.video import Episode, Movie
+from subliminal_patch.exceptions import TooManyRequests, IPAddressBlocked
+from subliminal_patch.http import RetryingCFSession
+from subliminal_patch.providers import Provider
+from subliminal_patch.score import get_scores, framerate_equal
+from subliminal_patch.subtitle import Subtitle
+from subzero.language import Language
+from dogpile.cache.api import NO_VALUE

 logger = logging.getLogger(__name__)

@ -25,7 +30,7 @@ class LegendasdivxSubtitle(Subtitle):
    """Legendasdivx Subtitle."""
    provider_name = 'legendasdivx'

-    def __init__(self, language, video, data):
+    def __init__(self, language, video, data, skip_wrong_fps=True):
        super(LegendasdivxSubtitle, self).__init__(language)
        self.language = language
        self.page_link = data['link']
@ -33,8 +38,11 @@ class LegendasdivxSubtitle(Subtitle):
        self.exact_match = data['exact_match']
        self.description = data['description']
        self.video = video
+        self.sub_frame_rate = data['frame_rate']
        self.video_filename = data['video_filename']
        self.uploader = data['uploader']
+        self.wrong_fps = False
+        self.skip_wrong_fps = skip_wrong_fps

    @property
    def id(self):
@ -47,6 +55,23 @@ class LegendasdivxSubtitle(Subtitle):
    def get_matches(self, video):
        matches = set()

+        # if skip_wrong_fps = True no point to continue if they don't match
+        subtitle_fps = None
+        try:
+            subtitle_fps = float(self.sub_frame_rate)
+        except ValueError:
+            pass
+
+        # check fps match and skip based on configuration
+        if video.fps and subtitle_fps and not framerate_equal(video.fps, subtitle_fps):
+            self.wrong_fps = True
+
+            if self.skip_wrong_fps:
+                logger.debug("Legendasdivx :: Skipping subtitle due to FPS mismatch (expected: %s, got: %s)", video.fps, self.sub_frame_rate)
+                # not a single match :)
+                return set()
+            logger.debug("Legendasdivx :: Frame rate mismatch (expected: %s, got: %s, but continuing...)", video.fps, self.sub_frame_rate)
+
        description = sanitize(self.description)

        if sanitize(self.video_filename) in description:
@ -105,8 +130,6 @@ class LegendasdivxSubtitle(Subtitle):
                    matches.update(['video_codec'])
                    break

-        # running guessit on a huge description may break guessit
-        # matches |= guess_matches(video, guessit(self.description))
        return matches

 class LegendasdivxProvider(Provider):
@ -118,36 +141,43 @@ class LegendasdivxProvider(Provider):
        'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"),
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Origin': 'https://www.legendasdivx.pt',
-        'Referer': 'https://www.legendasdivx.pt',
-        'Pragma': 'no-cache',
-        'Cache-Control': 'no-cache'
+        'Referer': 'https://www.legendasdivx.pt'
    }
    loginpage = site + '/forum/ucp.php?mode=login'
-    logoutpage = site + '/sair.php'
    searchurl = site + '/modules.php?name=Downloads&file=jz&d_op=search&op=_jz00&query={query}'
    download_link = site + '/modules.php{link}'

-    def __init__(self, username, password):
+    def __init__(self, username, password, skip_wrong_fps=True):
        # make sure login credentials are configured.
        if any((username, password)) and not all((username, password)):
-            raise ConfigurationError('Username and password must be specified')
+            raise ConfigurationError('Legendasdivx.pt :: Username and password must be specified')
        self.username = username
        self.password = password
-        self.logged_in = False
+        self.skip_wrong_fps = skip_wrong_fps

    def initialize(self):
-        self.session = Session()
+        logger.info("Legendasdivx.pt :: Creating session for requests")
+        self.session = RetryingCFSession()
+        # re-use PHP Session if present
+        prev_cookies = region.get("legendasdivx_cookies2")
+        if prev_cookies != NO_VALUE:
+            logger.debug("Legendasdivx.pt :: Re-using previous legendasdivx cookies: %s", prev_cookies)
+            self.session.cookies.update(prev_cookies)
+        # Login if session has expired
+        else:
+            logger.debug("Legendasdivx.pt :: Session cookies not found!")
            self.session.headers.update(self.headers)
            self.login()

    def terminate(self):
-        self.logout()
+        # session close
        self.session.close()

    def login(self):
-        logger.info('Logging in')
-        
+        logger.info('Legendasdivx.pt :: Logging in')
+        try:
            res = self.session.get(self.loginpage)
+            res.raise_for_status()
            bsoup = ParserBeautifulSoup(res.content, ['lxml'])

            _allinputs = bsoup.findAll('input')
@ -161,28 +191,32 @@ class LegendasdivxProvider(Provider):

            res = self.session.post(self.loginpage, data)
            res.raise_for_status()
+            #make sure we're logged in
+            logger.debug('Legendasdivx.pt :: Logged in successfully: PHPSESSID: %s', self.session.cookies.get_dict()['PHPSESSID'])
+            cj = self.session.cookies.copy()
+            store_cks = ("PHPSESSID", "phpbb3_2z8zs_sid", "phpbb3_2z8zs_k", "phpbb3_2z8zs_u", "lang")
+            for cn in iter(self.session.cookies.keys()):
+                if cn not in store_cks:
+                    del cj[cn]
+            #store session cookies on cache
+            logger.debug("Legendasdivx.pt :: Storing legendasdivx session cookies: %r", cj)
+            region.set("legendasdivx_cookies2", cj)

-        try:
-            logger.debug('Logged in successfully: PHPSESSID: %s' %
-                         self.session.cookies.get_dict()['PHPSESSID'])
-            self.logged_in = True   
        except KeyError:
-            logger.error("Couldn't retrieve session ID, check your credentials")
-            raise AuthenticationError("Please check your credentials.")
-        except Exception as e:
-            if 'bloqueado' in res.text.lower(): # blocked IP address 
+            logger.error("Legendasdivx.pt :: Couldn't get session ID, check your credentials")
+            raise AuthenticationError("Legendasdivx.pt :: Couldn't get session ID, check your credentials")
+        except HTTPError as e:
+            if "bloqueado" in res.text.lower(): # ip blocked on server
                logger.error("LegendasDivx.pt :: Your IP is blocked on this server.")
-                raise ParseResponseError("Legendasdivx.pt :: %r" % res.text)
-            logger.error("LegendasDivx.pt :: Uncaught error: %r" % repr(e))
-            raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r" % repr(e))
-
-    def logout(self):
-        if self.logged_in:
-            logger.info('Legendasdivx:: Logging out')
-            r = self.session.get(self.logoutpage, timeout=10)
-            r.raise_for_status()
-            logger.debug('Legendasdivx :: Logged out')
-            self.logged_in = False
+                raise IPAddressBlocked("LegendasDivx.pt :: Your IP is blocked on this server.")
+            if 'limite' in res.text.lower(): # daily downloads limit reached
+                logger.error("LegendasDivx.pt :: Daily download limit reached!")
+                raise DownloadLimitExceeded("Legendasdivx.pt :: Daily download limit reached!")
+            logger.error("Legendasdivx.pt :: HTTP Error %s", e)
+            raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e)
+        except Exception as e:
+            logger.error("LegendasDivx.pt :: Uncaught error: %r", e)
+            raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e)

    def _process_page(self, video, bsoup, video_filename):

@ -192,30 +226,32 @@ class LegendasdivxProvider(Provider):

        for _subbox in _allsubs:
            hits = 0
-            for th in _subbox.findAll("th", {"class": "color2"}):
-                if th.string == 'Hits:':
-                    hits = int(th.parent.find("td").string)
-                if th.string == 'Idioma:':
-                    lang = th.parent.find("td").find("img").get('src')
+            for th in _subbox.findAll("th"):
+                if th.text == 'Hits:':
+                    hits = int(th.find_next("td").text)
+                if th.text == 'Idioma:':
+                    lang = th.find_next("td").find("img").get('src')
                    if 'brazil' in lang.lower():
                        lang = Language.fromopensubtitles('pob')
                    elif 'portugal' in lang.lower():
                        lang = Language.fromopensubtitles('por')
                    else:
                        continue
+                if th.text == "Frame Rate:":
+                    frame_rate = th.find_next("td").text.strip()
+
            # get description for matches
            description = _subbox.find("td", {"class": "td_desc brd_up"}).get_text()
            #get subtitle link
            download = _subbox.find("a", {"class": "sub_download"})

            # sometimes BSoup can't find 'a' tag and returns None.
-            i = 0
-            while not (download): # must get it... trying again...
-                download = _subbox.find("a", {"class": "sub_download"})
-                i=+1
-                logger.debug("Try number {0} try!".format(str(i)))
-            dl = download.get('href')
-            logger.debug("Found subtitle on: %s" % self.download_link.format(link=dl))
+            try:
+                download_link = self.download_link.format(link=download.get('href'))
+                logger.debug("Legendasdivx.pt :: Found subtitle link on: %s ", download_link)
+            except:
+                logger.debug("Legendasdivx.pt :: Couldn't find download link. Trying next...")
+                continue

            # get subtitle uploader
            sub_header = _subbox.find("div", {"class" :"sub_header"})
@ -224,15 +260,16 @@ class LegendasdivxProvider(Provider):
            exact_match = False
            if video.name.lower() in description.lower():
                exact_match = True
-            data = {'link': self.site + '/modules.php' + download.get('href'),
+            data = {'link': download_link,
                    'exact_match': exact_match,
                    'hits': hits,
                    'uploader': uploader,
+                    'frame_rate': frame_rate,
                    'video_filename': video_filename,
                    'description': description
                    }
            subtitles.append(
-                LegendasdivxSubtitle(lang, video, data)
+                LegendasdivxSubtitle(lang, video, data, skip_wrong_fps=self.skip_wrong_fps)
            )
        return subtitles

@ -264,31 +301,58 @@ class LegendasdivxProvider(Provider):

        querytext = querytext + lang_filter if lang_filter else querytext

+        try:
+            # sleep for a 1 second before another request
+            sleep(1)
            self.headers['Referer'] = self.site + '/index.php'
-        self.session.headers.update(self.headers.items())
-        res = self.session.get(_searchurl.format(query=querytext))
-
-        if "A legenda não foi encontrada" in res.text:
-            logger.warning('%s not found', querytext)
+            self.session.headers.update(self.headers)
+            res = self.session.get(_searchurl.format(query=querytext), allow_redirects=False)
+            res.raise_for_status()
+            if (res.status_code == 200 and "A legenda não foi encontrada" in res.text):
+                logger.warning('Legendasdivx.pt :: %s not found', querytext)
                return []
+            if res.status_code == 302: # got redirected to login page.
+                # Seems that our session cookies are no longer valid... clean them from cache
+                region.delete("legendasdivx_cookies2")
+                logger.debug("Legendasdivx.pt :: Logging in again. Cookies have expired!")
+                self.login() # login and try again
+                res = self.session.get(_searchurl.format(query=querytext))
+                res.raise_for_status()
+        except HTTPError as e:
+            if "bloqueado" in res.text.lower(): # ip blocked on server
+                logger.error("LegendasDivx.pt :: Your IP is blocked on this server.")
+                raise IPAddressBlocked("LegendasDivx.pt :: Your IP is blocked on this server.")
+            if 'limite' in res.text.lower(): # daily downloads limit reached
+                logger.error("LegendasDivx.pt :: Daily download limit reached!")
+                raise DownloadLimitExceeded("Legendasdivx.pt :: Daily download limit reached!")
+            logger.error("Legendasdivx.pt :: HTTP Error %s", e)
+            raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e)
+        except Exception as e:
+            logger.error("LegendasDivx.pt :: Uncaught error: %r", e)
+            raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e)

        bsoup = ParserBeautifulSoup(res.content, ['html.parser'])
-        subtitles = self._process_page(video, bsoup, video_filename)

        # search for more than 10 results (legendasdivx uses pagination)
        # don't throttle - maximum results = 6 * 10
        MAX_PAGES = 6

-        #get number of pages bases on results found
+        # get number of pages bases on results found
        page_header = bsoup.find("div", {"class": "pager_bar"})
-        results_found = re.search(r'\((.*?) encontradas\)', page_header.text).group(1)
+        results_found = re.search(r'\((.*?) encontradas\)', page_header.text).group(1) if page_header else 0
+        logger.debug("Legendasdivx.pt :: Found %s subtitles", str(results_found))
        num_pages = (int(results_found) // 10) + 1
        num_pages = min(MAX_PAGES, num_pages)

+        # process first page
+        subtitles = self._process_page(video, bsoup, video_filename)
+
+        # more pages?
        if num_pages > 1:
-            for num_page in range(2, num_pages+2):
+            for num_page in range(2, num_pages+1):
+                sleep(1) # another 1 sec before requesting...
                _search_next = self.searchurl.format(query=querytext) + "&page={0}".format(str(num_page))
-                logger.debug("Moving to next page: %s" % _search_next)
+                logger.debug("Legendasdivx.pt :: Moving on to next page: %s", _search_next)
                res = self.session.get(_search_next)
                next_page = ParserBeautifulSoup(res.content, ['html.parser'])
                subs = self._process_page(video, next_page, video_filename)
@ -301,14 +365,21 @@ class LegendasdivxProvider(Provider):

    def download_subtitle(self, subtitle):
        res = self.session.get(subtitle.page_link)
+
+        try:
            res.raise_for_status()
-        if res:
-            if res.status_code in ['500', '503']:
-                raise ServiceUnavailable("Legendasdivx.pt :: 503 - Service Unavailable")
-            elif 'limite' in res.text.lower(): # daily downloads limit reached
-                raise DownloadLimitReached("Legendasdivx.pt :: Download limit reached")
-            elif 'bloqueado' in res.text.lower(): # blocked IP address 
-                raise ParseResponseError("Legendasdivx.pt :: %r" % res.text)
+        except HTTPError as e:
+            if "bloqueado" in res.text.lower(): # ip blocked on server
+                logger.error("LegendasDivx.pt :: Your IP is blocked on this server.")
+                raise IPAddressBlocked("LegendasDivx.pt :: Your IP is blocked on this server.")
+            if 'limite' in res.text.lower(): # daily downloads limit reached
+                logger.error("LegendasDivx.pt :: Daily download limit reached!")
+                raise DownloadLimitExceeded("Legendasdivx.pt :: Daily download limit reached!")
+            logger.error("Legendasdivx.pt :: HTTP Error %s", e)
+            raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e)
+        except Exception as e:
+            logger.error("LegendasDivx.pt :: Uncaught error: %r", e)
+            raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e)

        archive = self._get_archive(res.content)
        # extract the subtitle
@ -318,9 +389,6 @@ class LegendasdivxProvider(Provider):

        return subtitle

-        logger.error("Legendasdivx.pt :: there was a problem retrieving subtitle (status %s)" % res.status_code)
-        return
-
    def _get_archive(self, content):
        # open the archive
        # stole^H^H^H^H^H inspired from subvix provider
@ -353,26 +421,26 @@ class LegendasdivxProvider(Provider):
            if not name.lower().endswith(_subtitle_extensions):
                continue

-            _guess = guessit (name)
+            _guess = guessit(name)
            if isinstance(subtitle.video, Episode):
-                logger.debug ("guessing %s" % name)
-                logger.debug("subtitle S{}E{} video S{}E{}".format(_guess['season'],_guess['episode'],subtitle.video.season,subtitle.video.episode))
+                logger.debug("guessing %s", name)
+                logger.debug("subtitle S%sE%s video S%sE%s", _guess['season'], _guess['episode'], subtitle.video.season, subtitle.video.episode)

                if subtitle.video.episode != _guess['episode'] or subtitle.video.season != _guess['season']:
                    logger.debug('subtitle does not match video, skipping')
                    continue

            matches = set()
-            matches |= guess_matches (subtitle.video, _guess)
-            logger.debug('srt matches: %s' % matches)
-            _score = sum ((_scores.get (match, 0) for match in matches))
+            matches |= guess_matches(subtitle.video, _guess)
+            logger.debug('srt matches: %s', matches)
+            _score = sum((_scores.get(match, 0) for match in matches))
            if _score > _max_score:
                _max_name = name
                _max_score = _score
-                logger.debug("new max: {} {}".format(name, _score))
+                logger.debug("new max: %s %s", name, _score)

        if _max_score > 0:
-            logger.debug("returning from archive: {} scored {}".format(_max_name, _max_score))
+            logger.debug("returning from archive: %s scored %s", _max_name, _max_score)
            return archive.read(_max_name)

        raise ValueError("No subtitle found on compressed file. Max score was 0")