fixed Parsed Exceptios, refactored code and implemented pagination search

pull/960/head
Bazarr 4 years ago
parent 21bd7bc792
commit 2dcfc433c3

@ -35,6 +35,11 @@ PROVIDER_THROTTLE_MAP = {
},
"titulky": {
DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours")
},
"legendasdivx": {
TooManyRequests: (datetime.timedelta(hours=2), "2 hours"),
DownloadLimitExceeded: (datetime.timedelta(hours=6), "6 hours"),
ParseResponseError: (datetime.timedelta(hours=1), "1 hours"),
}
}

@ -2,13 +2,14 @@
from __future__ import absolute_import
import logging
import io
import re
import os
import rarfile
import zipfile
from requests import Session
from guessit import guessit
from subliminal_patch.exceptions import ParseResponseError
from subliminal.exceptions import ConfigurationError, AuthenticationError, ServiceUnavailable, DownloadLimitExceeded
from subliminal_patch.providers import Provider
from subliminal.providers import ParserBeautifulSoup
from subliminal_patch.subtitle import Subtitle
@ -16,6 +17,7 @@ from subliminal.video import Episode, Movie
from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending, guess_matches
from subzero.language import Language
from subliminal_patch.score import get_scores
from subliminal.utils import sanitize, sanitize_release_group
logger = logging.getLogger(__name__)
@ -29,9 +31,9 @@ class LegendasdivxSubtitle(Subtitle):
self.page_link = data['link']
self.hits = data['hits']
self.exact_match = data['exact_match']
self.description = data['description'].lower()
self.description = data['description']
self.video = video
self.videoname = data['videoname']
self.video_filename = data['video_filename']
self.uploader = data['uploader']
@property
@ -45,40 +47,37 @@ class LegendasdivxSubtitle(Subtitle):
def get_matches(self, video):
matches = set()
if self.videoname.lower() in self.description:
description = sanitize(self.description)
if sanitize(self.video_filename) in description:
matches.update(['title'])
matches.update(['season'])
matches.update(['episode'])
# episode
if video.title and video.title.lower() in self.description:
if video.title and sanitize(video.title) in description:
matches.update(['title'])
if video.year and '{:04d}'.format(video.year) in self.description:
if video.year and '{:04d}'.format(video.year) in description:
matches.update(['year'])
if isinstance(video, Episode):
# already matched in search query
if video.season and 's{:02d}'.format(video.season) in self.description:
if video.season and 's{:02d}'.format(video.season) in description:
matches.update(['season'])
if video.episode and 'e{:02d}'.format(video.episode) in self.description:
if video.episode and 'e{:02d}'.format(video.episode) in description:
matches.update(['episode'])
if video.episode and video.season and video.series:
if '{}.s{:02d}e{:02d}'.format(video.series.lower(),video.season,video.episode) in self.description:
matches.update(['series'])
matches.update(['season'])
matches.update(['episode'])
if '{} s{:02d}e{:02d}'.format(video.series.lower(),video.season,video.episode) in self.description:
if '{} s{:02d}e{:02d}'.format(sanitize(video.series), video.season, video.episode) in description:
matches.update(['series'])
matches.update(['season'])
matches.update(['episode'])
# release_group
if video.release_group and video.release_group.lower() in self.description:
if video.release_group and sanitize_release_group(video.release_group) in sanitize_release_group(description):
matches.update(['release_group'])
# resolution
if video.resolution and video.resolution.lower() in self.description:
if video.resolution and video.resolution.lower() in description:
matches.update(['resolution'])
# format
@ -88,9 +87,9 @@ class LegendasdivxSubtitle(Subtitle):
if formats[0] == "web-dl":
formats.append("webdl")
formats.append("webrip")
formats.append("web ")
formats.append("web")
for frmt in formats:
if frmt.lower() in self.description:
if frmt in description:
matches.update(['format'])
break
@ -98,11 +97,11 @@ class LegendasdivxSubtitle(Subtitle):
if video.video_codec:
video_codecs = [video.video_codec.lower()]
if video_codecs[0] == "h264":
formats.append("x264")
video_codecs.append("x264")
elif video_codecs[0] == "h265":
formats.append("x265")
for vc in formats:
if vc.lower() in self.description:
video_codecs.append("x265")
for vc in video_codecs:
if vc in description:
matches.update(['video_codec'])
break
@ -124,15 +123,21 @@ class LegendasdivxProvider(Provider):
'Cache-Control': 'no-cache'
}
loginpage = site + '/forum/ucp.php?mode=login'
logoutpage = site + '/sair.php'
searchurl = site + '/modules.php?name=Downloads&file=jz&d_op=search&op=_jz00&query={query}'
language_list = list(languages)
download_link = site + '/modules.php{link}'
def __init__(self, username, password):
# make sure login credentials are configured.
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
self.username = username
self.password = password
self.logged_in = False
def initialize(self):
self.session = Session()
self.session.headers.update(self.headers)
self.login()
def terminate(self):
@ -141,106 +146,103 @@ class LegendasdivxProvider(Provider):
def login(self):
logger.info('Logging in')
self.headers['Referer'] = self.site + '/index.php'
self.session.headers.update(self.headers.items())
res = self.session.get(self.loginpage)
bsoup = ParserBeautifulSoup(res.content, ['lxml'])
_allinputs = bsoup.findAll('input')
fields = {}
data = {}
# necessary to set 'sid' for POST request
for field in _allinputs:
fields[field.get('name')] = field.get('value')
fields['username'] = self.username
fields['password'] = self.password
fields['autologin'] = 'on'
fields['viewonline'] = 'on'
self.headers['Referer'] = self.loginpage
self.session.headers.update(self.headers.items())
res = self.session.post(self.loginpage, fields)
data[field.get('name')] = field.get('value')
data['username'] = self.username
data['password'] = self.password
res = self.session.post(self.loginpage, data)
res.raise_for_status()
try:
logger.debug('Got session id %s' %
logger.debug('Logged in successfully: PHPSESSID: %s' %
self.session.cookies.get_dict()['PHPSESSID'])
except KeyError as e:
logger.error(repr(e))
logger.error("Didn't get session id, check your credentials")
return False
self.logged_in = True
except KeyError:
logger.error("Couldn't retrieve session ID, check your credentials")
raise AuthenticationError("Please check your credentials.")
except Exception as e:
logger.error(repr(e))
logger.error('uncached error #legendasdivx #AA')
return False
return True
if 'bloqueado' in res.text.lower(): # blocked IP address
logger.error("LegendasDivx.pt :: Your IP is blocked on this server.")
raise ParseResponseError("Legendasdivx.pt :: %r" % res.text)
logger.error("LegendasDivx.pt :: Uncaught error: %r" % repr(e))
raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r" % repr(e))
def logout(self):
# need to figure this out
return True
if self.logged_in:
logger.info('Legendasdivx:: Logging out')
r = self.session.get(self.logoutpage, timeout=10)
r.raise_for_status()
logger.debug('Legendasdivx :: Logged out')
self.logged_in = False
def _process_page(self, video, bsoup, video_filename):
def _process_page(self, video, bsoup, querytext, videoname):
subtitles = []
_allsubs = bsoup.findAll("div", {"class": "sub_box"})
lang = Language.fromopensubtitles("pob")
for _subbox in _allsubs:
hits=0
hits = 0
for th in _subbox.findAll("th", {"class": "color2"}):
if th.string == 'Hits:':
hits = int(th.parent.find("td").string)
if th.string == 'Idioma:':
lang = th.parent.find("td").find ("img").get ('src')
if 'brazil' in lang:
lang = th.parent.find("td").find("img").get('src')
if 'brazil' in lang.lower():
lang = Language.fromopensubtitles('pob')
else:
elif 'portugal' in lang.lower():
lang = Language.fromopensubtitles('por')
description = _subbox.find("td", {"class": "td_desc brd_up"})
else:
continue
# get description for matches
description = _subbox.find("td", {"class": "td_desc brd_up"}).get_text()
#get subtitle link
download = _subbox.find("a", {"class": "sub_download"})
try:
# sometimes BSoup just doesn't get the link
logger.debug(download.get('href'))
except Exception as e:
logger.warning('skipping subbox on %s' % self.searchurl.format(query=querytext))
continue
# sometimes BSoup can't find 'a' tag and returns None.
i = 0
while not (download): # must get it... trying again...
download = _subbox.find("a", {"class": "sub_download"})
i=+1
logger.debug("Try number {0} try!".format(str(i)))
dl = download.get('href')
logger.debug("Found subtitle on: %s" % self.download_link.format(link=dl))
# get subtitle uploader
sub_header = _subbox.find("div", {"class" :"sub_header"})
uploader = sub_header.find("a").text if sub_header else '<n/a>'
exact_match = False
if video.name.lower() in description.get_text().lower():
if video.name.lower() in description.lower():
exact_match = True
data = {'link': self.site + '/modules.php' + download.get('href'),
'exact_match': exact_match,
'hits': hits,
'uploader': uploader,
'videoname': videoname,
'description': description.get_text()
'video_filename': video_filename,
'description': description
}
subtitles.append(
LegendasdivxSubtitle(lang, video, data)
)
return subtitles
def query(self, video, language):
try:
logger.debug('Got session id %s' %
self.session.cookies.get_dict()['PHPSESSID'])
except Exception as e:
self.login()
language_ids = '0'
if isinstance(language, (tuple, list, set)):
if len(language) == 1:
language_ids = ','.join(sorted(l.opensubtitles for l in language))
if language_ids == 'por':
language_ids = '&form_cat=28'
else:
language_ids = '&form_cat=29'
videoname = video.name
videoname = os.path.basename(videoname)
videoname, _ = os.path.splitext(videoname)
# querytext = videoname.lower()
def query(self, video, languages):
video_filename = video.name
video_filename = os.path.basename(video_filename)
video_filename, _ = os.path.splitext(video_filename)
video_filename = sanitize_release_group(video_filename)
_searchurl = self.searchurl
if video.imdb_id is None:
if isinstance(video, Episode):
@ -250,22 +252,47 @@ class LegendasdivxProvider(Provider):
else:
querytext = video.imdb_id
# language query filter
if isinstance(languages, (tuple, list, set)):
language_ids = ','.join(sorted(l.opensubtitles for l in languages))
if 'por' in language_ids: # prioritize portuguese subtitles
lang_filter = '&form_cat=28' # pt
elif 'pob' in language_ids:
lang_filter = '&form_cat=29' # br
else:
lang_filter = ''
querytext = querytext + lang_filter if lang_filter else querytext
# querytext = querytext.replace(
# ".", "+").replace("[", "").replace("]", "")
if language_ids != '0':
querytext = querytext + language_ids
self.headers['Referer'] = self.site + '/index.php'
self.session.headers.update(self.headers.items())
res = self.session.get(_searchurl.format(query=querytext))
# form_cat=28 = br
# form_cat=29 = pt
if "A legenda não foi encontrada" in res.text:
logger.warning('%s not found', querytext)
return []
bsoup = ParserBeautifulSoup(res.content, ['html.parser'])
subtitles = self._process_page(video, bsoup, querytext, videoname)
subtitles = self._process_page(video, bsoup, video_filename)
# search for more than 10 results (legendasdivx uses pagination)
# don't throttle - maximum results = 6 * 10
MAX_PAGES = 6
#get number of pages bases on results found
page_header = bsoup.find("div", {"class": "pager_bar"})
results_found = re.search(r'\((.*?) encontradas\)', page_header.text).group(1)
num_pages = (int(results_found) // 10) + 1
num_pages = min(MAX_PAGES, num_pages)
if num_pages > 1:
for num_page in range(2, num_pages+2):
_search_next = self.searchurl.format(query=querytext) + "&page={0}".format(str(num_page))
logger.debug("Moving to next page: %s" % _search_next)
res = self.session.get(_search_next)
next_page = ParserBeautifulSoup(res.content, ['html.parser'])
subs = self._process_page(video, next_page, video_filename)
subtitles.extend(subs)
return subtitles
@ -274,9 +301,14 @@ class LegendasdivxProvider(Provider):
def download_subtitle(self, subtitle):
res = self.session.get(subtitle.page_link)
res.raise_for_status()
if res:
if res.text == '500':
raise ValueError('Error 500 on server')
if res.status_code in ['500', '503']:
raise ServiceUnavailable("Legendasdivx.pt :: 503 - Service Unavailable")
elif 'limite' in res.text.lower(): # daily downloads limit reached
raise DownloadLimitReached("Legendasdivx.pt :: Download limit reached")
elif 'bloqueado' in res.text.lower(): # blocked IP address
raise ParseResponseError("Legendasdivx.pt :: %r" % res.text)
archive = self._get_archive(res.content)
# extract the subtitle
@ -285,7 +317,9 @@ class LegendasdivxProvider(Provider):
subtitle.normalize()
return subtitle
raise ValueError('Problems conecting to the server')
logger.error("Legendasdivx.pt :: there was a problem retrieving subtitle (status %s)" % res.status_code)
return
def _get_archive(self, content):
# open the archive
@ -298,7 +332,6 @@ class LegendasdivxProvider(Provider):
logger.debug('Identified zip archive')
archive = zipfile.ZipFile(archive_stream)
else:
# raise ParseResponseError('Unsupported compressed format')
raise Exception('Unsupported compressed format')
return archive
@ -309,7 +342,7 @@ class LegendasdivxProvider(Provider):
_tmp.remove('.txt')
_subtitle_extensions = tuple(_tmp)
_max_score = 0
_scores = get_scores (subtitle.video)
_scores = get_scores(subtitle.video)
for name in archive.namelist():
# discard hidden files
@ -342,4 +375,4 @@ class LegendasdivxProvider(Provider):
logger.debug("returning from archive: {} scored {}".format(_max_name, _max_score))
return archive.read(_max_name)
raise ParseResponseError('Can not find the subtitle in the compressed file')
raise ValueError("No subtitle found on compressed file. Max score was 0")
Loading…
Cancel
Save