fixed Parsed Exceptios, refactored code and implemented pagination search

5 years ago · 2dcfc433c3
parent 21bd7bc792
commit 2dcfc433c3
2 changed files with 137 additions and 99 deletions
--- a/bazarr/get_providers.py
+++ b/bazarr/get_providers.py
@ -35,6 +35,11 @@ PROVIDER_THROTTLE_MAP = {
    },
    "titulky": {
        DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours")
+    },
+    "legendasdivx": {
+        TooManyRequests: (datetime.timedelta(hours=2), "2 hours"),
+        DownloadLimitExceeded: (datetime.timedelta(hours=6), "6 hours"),
+        ParseResponseError: (datetime.timedelta(hours=1), "1 hours"),
    }
 }

--- a/libs/subliminal_patch/providers/legendasdivx.py
+++ b/libs/subliminal_patch/providers/legendasdivx.py
@ -2,13 +2,14 @@
 from __future__ import absolute_import
 import logging
 import io
+import re
 import os
 import rarfile
 import zipfile

 from requests import Session
 from guessit import guessit
-from subliminal_patch.exceptions import ParseResponseError
+from subliminal.exceptions import ConfigurationError, AuthenticationError, ServiceUnavailable, DownloadLimitExceeded
 from subliminal_patch.providers import Provider
 from subliminal.providers import ParserBeautifulSoup
 from subliminal_patch.subtitle import Subtitle
@ -16,6 +17,7 @@ from subliminal.video import Episode, Movie
 from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending, guess_matches
 from subzero.language import Language
 from subliminal_patch.score import get_scores
+from subliminal.utils import sanitize, sanitize_release_group

 logger = logging.getLogger(__name__)

@ -29,9 +31,9 @@ class LegendasdivxSubtitle(Subtitle):
        self.page_link = data['link']
        self.hits = data['hits']
        self.exact_match = data['exact_match']
-        self.description = data['description'].lower()
+        self.description = data['description']
        self.video = video
-        self.videoname = data['videoname']
+        self.video_filename = data['video_filename']
        self.uploader = data['uploader']

    @property
@ -45,40 +47,37 @@ class LegendasdivxSubtitle(Subtitle):
    def get_matches(self, video):
        matches = set()

-        if self.videoname.lower() in self.description:
+        description = sanitize(self.description)
+
+        if sanitize(self.video_filename) in description:
            matches.update(['title'])
            matches.update(['season'])
            matches.update(['episode'])

        # episode
-        if video.title and video.title.lower() in self.description:
+        if video.title and sanitize(video.title) in description:
            matches.update(['title'])
-        if video.year and '{:04d}'.format(video.year) in self.description:
+        if video.year and '{:04d}'.format(video.year) in description:
            matches.update(['year'])

        if isinstance(video, Episode):
            # already matched in search query
-            if video.season and 's{:02d}'.format(video.season) in self.description:
+            if video.season and 's{:02d}'.format(video.season) in description:
                matches.update(['season'])
-            if video.episode and 'e{:02d}'.format(video.episode) in self.description:
+            if video.episode and 'e{:02d}'.format(video.episode) in description:
                matches.update(['episode'])
            if video.episode and video.season and video.series:
-                if '{}.s{:02d}e{:02d}'.format(video.series.lower(),video.season,video.episode) in self.description:
-                        matches.update(['series'])
-                        matches.update(['season'])
-                        matches.update(['episode'])
-                if '{} s{:02d}e{:02d}'.format(video.series.lower(),video.season,video.episode) in self.description:
+                if '{} s{:02d}e{:02d}'.format(sanitize(video.series), video.season, video.episode) in description:
                    matches.update(['series'])
                    matches.update(['season'])
                    matches.update(['episode'])

        # release_group
-        if video.release_group  and video.release_group.lower() in self.description:
+        if video.release_group and sanitize_release_group(video.release_group) in sanitize_release_group(description):
            matches.update(['release_group'])

        # resolution
-
-        if video.resolution and video.resolution.lower() in self.description:
+        if video.resolution and video.resolution.lower() in description:
            matches.update(['resolution'])

        # format
@ -88,9 +87,9 @@ class LegendasdivxSubtitle(Subtitle):
            if formats[0] == "web-dl":
                formats.append("webdl")
                formats.append("webrip")
-                formats.append("web ")
+                formats.append("web")
            for frmt in formats:
-                if frmt.lower() in self.description:
+                if frmt in description:
                    matches.update(['format'])
                    break

@ -98,11 +97,11 @@ class LegendasdivxSubtitle(Subtitle):
        if video.video_codec:
            video_codecs = [video.video_codec.lower()]
            if video_codecs[0] == "h264":
-                formats.append("x264")
+                video_codecs.append("x264")
            elif video_codecs[0] == "h265":
-                formats.append("x265")
-            for vc in formats:
-                if vc.lower() in self.description:
+                video_codecs.append("x265")
+            for vc in video_codecs:
+                if vc in description:
                    matches.update(['video_codec'])
                    break

@ -124,15 +123,21 @@ class LegendasdivxProvider(Provider):
        'Cache-Control': 'no-cache'
    }
    loginpage = site + '/forum/ucp.php?mode=login'
+    logoutpage = site + '/sair.php'
    searchurl = site + '/modules.php?name=Downloads&file=jz&d_op=search&op=_jz00&query={query}'
-    language_list = list(languages)
+    download_link = site + '/modules.php{link}'

    def __init__(self, username, password):
+        # make sure login credentials are configured.
+        if any((username, password)) and not all((username, password)):
+            raise ConfigurationError('Username and password must be specified')
        self.username = username
        self.password = password
+        self.logged_in = False

    def initialize(self):
        self.session = Session()
+        self.session.headers.update(self.headers)
        self.login()

    def terminate(self):
@ -141,106 +146,103 @@ class LegendasdivxProvider(Provider):

    def login(self):
        logger.info('Logging in')
-        self.headers['Referer'] = self.site + '/index.php'
-        self.session.headers.update(self.headers.items())
+        
        res = self.session.get(self.loginpage)
        bsoup = ParserBeautifulSoup(res.content, ['lxml'])
-
+        
        _allinputs = bsoup.findAll('input')
-        fields = {}
+        data = {}
+        # necessary to set 'sid' for POST request
        for field in _allinputs:
-            fields[field.get('name')] = field.get('value')
-
-        fields['username'] = self.username
-        fields['password'] = self.password
-        fields['autologin'] = 'on'
-        fields['viewonline'] = 'on'
-
-        self.headers['Referer'] = self.loginpage
-        self.session.headers.update(self.headers.items())
-        res = self.session.post(self.loginpage, fields)
+            data[field.get('name')] = field.get('value')
+        
+        data['username'] = self.username
+        data['password'] = self.password
+
+        res = self.session.post(self.loginpage, data)
+        res.raise_for_status()
+        
        try:
-            logger.debug('Got session id %s' %
+            logger.debug('Logged in successfully: PHPSESSID: %s' %
                         self.session.cookies.get_dict()['PHPSESSID'])
-        except KeyError as e:
-            logger.error(repr(e))
-            logger.error("Didn't get session id, check your credentials")
-            return False
+            self.logged_in = True   
+        except KeyError:
+            logger.error("Couldn't retrieve session ID, check your credentials")
+            raise AuthenticationError("Please check your credentials.")
        except Exception as e:
-            logger.error(repr(e))
-            logger.error('uncached error #legendasdivx #AA')
-            return False
-
-        return True
+            if 'bloqueado' in res.text.lower(): # blocked IP address 
+                logger.error("LegendasDivx.pt :: Your IP is blocked on this server.")
+                raise ParseResponseError("Legendasdivx.pt :: %r" % res.text)
+            logger.error("LegendasDivx.pt :: Uncaught error: %r" % repr(e))
+            raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r" % repr(e))

    def logout(self):
-        # need to figure this out
-        return True
+        if self.logged_in:
+            logger.info('Legendasdivx:: Logging out')
+            r = self.session.get(self.logoutpage, timeout=10)
+            r.raise_for_status()
+            logger.debug('Legendasdivx :: Logged out')
+            self.logged_in = False
+
+    def _process_page(self, video, bsoup, video_filename):

-    def _process_page(self, video, bsoup, querytext, videoname):
        subtitles = []
+
        _allsubs = bsoup.findAll("div", {"class": "sub_box"})
-        lang = Language.fromopensubtitles("pob")
+
        for _subbox in _allsubs:
-            hits=0
+            hits = 0
            for th in _subbox.findAll("th", {"class": "color2"}):
                if th.string == 'Hits:':
                    hits = int(th.parent.find("td").string)
                if th.string == 'Idioma:':
-                    lang = th.parent.find("td").find ("img").get ('src')
-                    if 'brazil' in lang:
+                    lang = th.parent.find("td").find("img").get('src')
+                    if 'brazil' in lang.lower():
                        lang = Language.fromopensubtitles('pob')
-                    else:
+                    elif 'portugal' in lang.lower():
                        lang = Language.fromopensubtitles('por')
-
-            description = _subbox.find("td", {"class": "td_desc brd_up"})
+                    else:
+                        continue
+            # get description for matches
+            description = _subbox.find("td", {"class": "td_desc brd_up"}).get_text()
+            #get subtitle link
            download = _subbox.find("a", {"class": "sub_download"})
-            try:
-                # sometimes BSoup just doesn't get the link
-                logger.debug(download.get('href'))
-            except Exception as e:
-                logger.warning('skipping subbox on %s' % self.searchurl.format(query=querytext))
-                continue
+            
+            # sometimes BSoup can't find 'a' tag and returns None. 
+            i = 0
+            while not (download): # must get it... trying again...
+                download = _subbox.find("a", {"class": "sub_download"})
+                i=+1
+                logger.debug("Try number {0} try!".format(str(i)))
+            dl = download.get('href')
+            logger.debug("Found subtitle on: %s" % self.download_link.format(link=dl))

            # get subtitle uploader
            sub_header = _subbox.find("div", {"class" :"sub_header"}) 
            uploader = sub_header.find("a").text if sub_header else '<n/a>'

            exact_match = False
-            if video.name.lower() in description.get_text().lower():
+            if video.name.lower() in description.lower():
                exact_match = True
            data = {'link': self.site + '/modules.php' + download.get('href'),
                    'exact_match': exact_match,
                    'hits': hits,
                    'uploader': uploader,
-                    'videoname': videoname,
-                    'description': description.get_text()
+                    'video_filename': video_filename,
+                    'description': description
                    }
            subtitles.append(
                LegendasdivxSubtitle(lang, video, data)
            )
        return subtitles

-    def query(self, video, language):
-        try:
-            logger.debug('Got session id %s' %
-                         self.session.cookies.get_dict()['PHPSESSID'])
-        except Exception as e:
-            self.login()
-
-        language_ids = '0'
-        if isinstance(language, (tuple, list, set)):
-            if len(language) == 1:
-                language_ids = ','.join(sorted(l.opensubtitles for l in language))
-                if language_ids == 'por':
-                    language_ids = '&form_cat=28'
-                else:
-                    language_ids = '&form_cat=29'
-
-        videoname = video.name
-        videoname = os.path.basename(videoname)
-        videoname, _ = os.path.splitext(videoname)
-        # querytext = videoname.lower()
+    def query(self, video, languages):
+
+        video_filename = video.name
+        video_filename = os.path.basename(video_filename)
+        video_filename, _ = os.path.splitext(video_filename)
+        video_filename = sanitize_release_group(video_filename)
+
        _searchurl = self.searchurl
        if video.imdb_id is None:
            if isinstance(video, Episode):
@ -250,22 +252,47 @@ class LegendasdivxProvider(Provider):
        else:
            querytext = video.imdb_id

+        # language query filter
+        if isinstance(languages, (tuple, list, set)):
+            language_ids = ','.join(sorted(l.opensubtitles for l in languages))
+            if 'por' in language_ids: # prioritize portuguese subtitles
+                lang_filter = '&form_cat=28' # pt
+            elif 'pob' in language_ids:
+                lang_filter = '&form_cat=29' # br
+            else:
+                lang_filter = ''
+
+        querytext = querytext + lang_filter if lang_filter else querytext

-        # querytext = querytext.replace(
-        #     ".", "+").replace("[", "").replace("]", "")
-        if language_ids != '0':
-            querytext = querytext + language_ids
        self.headers['Referer'] = self.site + '/index.php'
        self.session.headers.update(self.headers.items())
        res = self.session.get(_searchurl.format(query=querytext))
-        # form_cat=28 = br
-        # form_cat=29 = pt
+
        if "A legenda não foi encontrada" in res.text:
            logger.warning('%s not found', querytext)
            return []

        bsoup = ParserBeautifulSoup(res.content, ['html.parser'])
-        subtitles = self._process_page(video, bsoup, querytext, videoname)
+        subtitles = self._process_page(video, bsoup, video_filename)
+
+        # search for more than 10 results (legendasdivx uses pagination)
+        # don't throttle - maximum results = 6 * 10
+        MAX_PAGES = 6
+        
+        #get number of pages bases on results found
+        page_header = bsoup.find("div", {"class": "pager_bar"})
+        results_found = re.search(r'\((.*?) encontradas\)', page_header.text).group(1)
+        num_pages = (int(results_found) // 10) + 1
+        num_pages = min(MAX_PAGES, num_pages)
+
+        if num_pages > 1:
+            for num_page in range(2, num_pages+2):
+                _search_next = self.searchurl.format(query=querytext) + "&page={0}".format(str(num_page))
+                logger.debug("Moving to next page: %s" % _search_next)
+                res = self.session.get(_search_next)
+                next_page = ParserBeautifulSoup(res.content, ['html.parser'])
+                subs = self._process_page(video, next_page, video_filename)
+                subtitles.extend(subs)

        return subtitles

@ -274,9 +301,14 @@ class LegendasdivxProvider(Provider):

    def download_subtitle(self, subtitle):
        res = self.session.get(subtitle.page_link)
+        res.raise_for_status()
        if res:
-            if res.text == '500':
-                raise ValueError('Error 500 on server')
+            if res.status_code in ['500', '503']:
+                raise ServiceUnavailable("Legendasdivx.pt :: 503 - Service Unavailable")
+            elif 'limite' in res.text.lower(): # daily downloads limit reached
+                raise DownloadLimitReached("Legendasdivx.pt :: Download limit reached")
+            elif 'bloqueado' in res.text.lower(): # blocked IP address 
+                raise ParseResponseError("Legendasdivx.pt :: %r" % res.text)

            archive = self._get_archive(res.content)
            # extract the subtitle
@ -285,7 +317,9 @@ class LegendasdivxProvider(Provider):
            subtitle.normalize()

            return subtitle
-        raise ValueError('Problems conecting to the server')
+
+        logger.error("Legendasdivx.pt :: there was a problem retrieving subtitle (status %s)" % res.status_code)
+        return

    def _get_archive(self, content):
        # open the archive
@ -298,7 +332,6 @@ class LegendasdivxProvider(Provider):
            logger.debug('Identified zip archive')
            archive = zipfile.ZipFile(archive_stream)
        else:
-            # raise ParseResponseError('Unsupported compressed format')
            raise Exception('Unsupported compressed format')

        return archive
@ -309,7 +342,7 @@ class LegendasdivxProvider(Provider):
        _tmp.remove('.txt')
        _subtitle_extensions = tuple(_tmp)
        _max_score = 0
-        _scores = get_scores (subtitle.video)
+        _scores = get_scores(subtitle.video)

        for name in archive.namelist():
            # discard hidden files
@ -342,4 +375,4 @@ class LegendasdivxProvider(Provider):
            logger.debug("returning from archive: {} scored {}".format(_max_name, _max_score))
            return archive.read(_max_name)

-        raise ParseResponseError('Can not find the subtitle in the compressed file')
+        raise ValueError("No subtitle found on compressed file. Max score was 0")