|
|
|
@ -2,20 +2,22 @@
|
|
|
|
|
from __future__ import absolute_import
|
|
|
|
|
import logging
|
|
|
|
|
import io
|
|
|
|
|
import re
|
|
|
|
|
import os
|
|
|
|
|
import rarfile
|
|
|
|
|
import zipfile
|
|
|
|
|
|
|
|
|
|
from requests import Session
|
|
|
|
|
from guessit import guessit
|
|
|
|
|
from subliminal_patch.exceptions import ParseResponseError
|
|
|
|
|
from subliminal.exceptions import ConfigurationError, AuthenticationError, ServiceUnavailable, DownloadLimitExceeded
|
|
|
|
|
from subliminal_patch.providers import Provider
|
|
|
|
|
from subliminal.providers import ParserBeautifulSoup
|
|
|
|
|
from subliminal_patch.subtitle import Subtitle
|
|
|
|
|
from subliminal.video import Episode, Movie
|
|
|
|
|
from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending,guess_matches
|
|
|
|
|
from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending, guess_matches
|
|
|
|
|
from subzero.language import Language
|
|
|
|
|
from subliminal_patch.score import get_scores
|
|
|
|
|
from subliminal.utils import sanitize, sanitize_release_group
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
@ -27,11 +29,12 @@ class LegendasdivxSubtitle(Subtitle):
|
|
|
|
|
super(LegendasdivxSubtitle, self).__init__(language)
|
|
|
|
|
self.language = language
|
|
|
|
|
self.page_link = data['link']
|
|
|
|
|
self.hits=data['hits']
|
|
|
|
|
self.exact_match=data['exact_match']
|
|
|
|
|
self.description=data['description'].lower()
|
|
|
|
|
self.hits = data['hits']
|
|
|
|
|
self.exact_match = data['exact_match']
|
|
|
|
|
self.description = data['description']
|
|
|
|
|
self.video = video
|
|
|
|
|
self.videoname =data['videoname']
|
|
|
|
|
self.video_filename = data['video_filename']
|
|
|
|
|
self.uploader = data['uploader']
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def id(self):
|
|
|
|
@ -44,40 +47,37 @@ class LegendasdivxSubtitle(Subtitle):
|
|
|
|
|
def get_matches(self, video):
|
|
|
|
|
matches = set()
|
|
|
|
|
|
|
|
|
|
if self.videoname.lower() in self.description:
|
|
|
|
|
description = sanitize(self.description)
|
|
|
|
|
|
|
|
|
|
if sanitize(self.video_filename) in description:
|
|
|
|
|
matches.update(['title'])
|
|
|
|
|
matches.update(['season'])
|
|
|
|
|
matches.update(['episode'])
|
|
|
|
|
|
|
|
|
|
# episode
|
|
|
|
|
if video.title and video.title.lower() in self.description:
|
|
|
|
|
if video.title and sanitize(video.title) in description:
|
|
|
|
|
matches.update(['title'])
|
|
|
|
|
if video.year and '{:04d}'.format(video.year) in self.description:
|
|
|
|
|
if video.year and '{:04d}'.format(video.year) in description:
|
|
|
|
|
matches.update(['year'])
|
|
|
|
|
|
|
|
|
|
if isinstance(video, Episode):
|
|
|
|
|
# already matched in search query
|
|
|
|
|
if video.season and 's{:02d}'.format(video.season) in self.description:
|
|
|
|
|
if video.season and 's{:02d}'.format(video.season) in description:
|
|
|
|
|
matches.update(['season'])
|
|
|
|
|
if video.episode and 'e{:02d}'.format(video.episode) in self.description:
|
|
|
|
|
if video.episode and 'e{:02d}'.format(video.episode) in description:
|
|
|
|
|
matches.update(['episode'])
|
|
|
|
|
if video.episode and video.season and video.series:
|
|
|
|
|
if '{}.s{:02d}e{:02d}'.format(video.series.lower(),video.season,video.episode) in self.description:
|
|
|
|
|
matches.update(['series'])
|
|
|
|
|
matches.update(['season'])
|
|
|
|
|
matches.update(['episode'])
|
|
|
|
|
if '{} s{:02d}e{:02d}'.format(video.series.lower(),video.season,video.episode) in self.description:
|
|
|
|
|
if '{} s{:02d}e{:02d}'.format(sanitize(video.series), video.season, video.episode) in description:
|
|
|
|
|
matches.update(['series'])
|
|
|
|
|
matches.update(['season'])
|
|
|
|
|
matches.update(['episode'])
|
|
|
|
|
|
|
|
|
|
# release_group
|
|
|
|
|
if video.release_group and video.release_group.lower() in self.description:
|
|
|
|
|
if video.release_group and sanitize_release_group(video.release_group) in sanitize_release_group(description):
|
|
|
|
|
matches.update(['release_group'])
|
|
|
|
|
|
|
|
|
|
# resolution
|
|
|
|
|
|
|
|
|
|
if video.resolution and video.resolution.lower() in self.description:
|
|
|
|
|
if video.resolution and video.resolution.lower() in description:
|
|
|
|
|
matches.update(['resolution'])
|
|
|
|
|
|
|
|
|
|
# format
|
|
|
|
@ -87,9 +87,9 @@ class LegendasdivxSubtitle(Subtitle):
|
|
|
|
|
if formats[0] == "web-dl":
|
|
|
|
|
formats.append("webdl")
|
|
|
|
|
formats.append("webrip")
|
|
|
|
|
formats.append("web ")
|
|
|
|
|
formats.append("web")
|
|
|
|
|
for frmt in formats:
|
|
|
|
|
if frmt.lower() in self.description:
|
|
|
|
|
if frmt in description:
|
|
|
|
|
matches.update(['format'])
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
@ -97,11 +97,11 @@ class LegendasdivxSubtitle(Subtitle):
|
|
|
|
|
if video.video_codec:
|
|
|
|
|
video_codecs = [video.video_codec.lower()]
|
|
|
|
|
if video_codecs[0] == "h264":
|
|
|
|
|
formats.append("x264")
|
|
|
|
|
video_codecs.append("x264")
|
|
|
|
|
elif video_codecs[0] == "h265":
|
|
|
|
|
formats.append("x265")
|
|
|
|
|
for vc in formats:
|
|
|
|
|
if vc.lower() in self.description:
|
|
|
|
|
video_codecs.append("x265")
|
|
|
|
|
for vc in video_codecs:
|
|
|
|
|
if vc in description:
|
|
|
|
|
matches.update(['video_codec'])
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
@ -109,9 +109,6 @@ class LegendasdivxSubtitle(Subtitle):
|
|
|
|
|
# matches |= guess_matches(video, guessit(self.description))
|
|
|
|
|
return matches
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LegendasdivxProvider(Provider):
|
|
|
|
|
"""Legendasdivx Provider."""
|
|
|
|
|
languages = {Language('por', 'BR')} | {Language('por')}
|
|
|
|
@ -126,15 +123,21 @@ class LegendasdivxProvider(Provider):
|
|
|
|
|
'Cache-Control': 'no-cache'
|
|
|
|
|
}
|
|
|
|
|
loginpage = site + '/forum/ucp.php?mode=login'
|
|
|
|
|
logoutpage = site + '/sair.php'
|
|
|
|
|
searchurl = site + '/modules.php?name=Downloads&file=jz&d_op=search&op=_jz00&query={query}'
|
|
|
|
|
language_list = list(languages)
|
|
|
|
|
download_link = site + '/modules.php{link}'
|
|
|
|
|
|
|
|
|
|
def __init__(self, username, password):
|
|
|
|
|
# make sure login credentials are configured.
|
|
|
|
|
if any((username, password)) and not all((username, password)):
|
|
|
|
|
raise ConfigurationError('Username and password must be specified')
|
|
|
|
|
self.username = username
|
|
|
|
|
self.password = password
|
|
|
|
|
self.logged_in = False
|
|
|
|
|
|
|
|
|
|
def initialize(self):
|
|
|
|
|
self.session = Session()
|
|
|
|
|
self.session.headers.update(self.headers)
|
|
|
|
|
self.login()
|
|
|
|
|
|
|
|
|
|
def terminate(self):
|
|
|
|
@ -143,100 +146,103 @@ class LegendasdivxProvider(Provider):
|
|
|
|
|
|
|
|
|
|
def login(self):
|
|
|
|
|
logger.info('Logging in')
|
|
|
|
|
self.headers['Referer'] = self.site + '/index.php'
|
|
|
|
|
self.session.headers.update(self.headers.items())
|
|
|
|
|
|
|
|
|
|
res = self.session.get(self.loginpage)
|
|
|
|
|
bsoup = ParserBeautifulSoup(res.content, ['lxml'])
|
|
|
|
|
|
|
|
|
|
_allinputs = bsoup.findAll('input')
|
|
|
|
|
fields = {}
|
|
|
|
|
data = {}
|
|
|
|
|
# necessary to set 'sid' for POST request
|
|
|
|
|
for field in _allinputs:
|
|
|
|
|
fields[field.get('name')] = field.get('value')
|
|
|
|
|
data[field.get('name')] = field.get('value')
|
|
|
|
|
|
|
|
|
|
fields['username'] = self.username
|
|
|
|
|
fields['password'] = self.password
|
|
|
|
|
fields['autologin'] = 'on'
|
|
|
|
|
fields['viewonline'] = 'on'
|
|
|
|
|
data['username'] = self.username
|
|
|
|
|
data['password'] = self.password
|
|
|
|
|
|
|
|
|
|
res = self.session.post(self.loginpage, data)
|
|
|
|
|
res.raise_for_status()
|
|
|
|
|
|
|
|
|
|
self.headers['Referer'] = self.loginpage
|
|
|
|
|
self.session.headers.update(self.headers.items())
|
|
|
|
|
res = self.session.post(self.loginpage, fields)
|
|
|
|
|
try:
|
|
|
|
|
logger.debug('Got session id %s' %
|
|
|
|
|
logger.debug('Logged in successfully: PHPSESSID: %s' %
|
|
|
|
|
self.session.cookies.get_dict()['PHPSESSID'])
|
|
|
|
|
except KeyError as e:
|
|
|
|
|
logger.error(repr(e))
|
|
|
|
|
logger.error("Didn't get session id, check your credentials")
|
|
|
|
|
return False
|
|
|
|
|
self.logged_in = True
|
|
|
|
|
except KeyError:
|
|
|
|
|
logger.error("Couldn't retrieve session ID, check your credentials")
|
|
|
|
|
raise AuthenticationError("Please check your credentials.")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(repr(e))
|
|
|
|
|
logger.error('uncached error #legendasdivx #AA')
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
if 'bloqueado' in res.text.lower(): # blocked IP address
|
|
|
|
|
logger.error("LegendasDivx.pt :: Your IP is blocked on this server.")
|
|
|
|
|
raise ParseResponseError("Legendasdivx.pt :: %r" % res.text)
|
|
|
|
|
logger.error("LegendasDivx.pt :: Uncaught error: %r" % repr(e))
|
|
|
|
|
raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r" % repr(e))
|
|
|
|
|
|
|
|
|
|
def logout(self):
|
|
|
|
|
# need to figure this out
|
|
|
|
|
return True
|
|
|
|
|
if self.logged_in:
|
|
|
|
|
logger.info('Legendasdivx:: Logging out')
|
|
|
|
|
r = self.session.get(self.logoutpage, timeout=10)
|
|
|
|
|
r.raise_for_status()
|
|
|
|
|
logger.debug('Legendasdivx :: Logged out')
|
|
|
|
|
self.logged_in = False
|
|
|
|
|
|
|
|
|
|
def _process_page(self, video, bsoup, video_filename):
|
|
|
|
|
|
|
|
|
|
def _process_page(self, video, bsoup, querytext, videoname):
|
|
|
|
|
subtitles = []
|
|
|
|
|
|
|
|
|
|
_allsubs = bsoup.findAll("div", {"class": "sub_box"})
|
|
|
|
|
lang = Language.fromopensubtitles("pob")
|
|
|
|
|
|
|
|
|
|
for _subbox in _allsubs:
|
|
|
|
|
hits=0
|
|
|
|
|
hits = 0
|
|
|
|
|
for th in _subbox.findAll("th", {"class": "color2"}):
|
|
|
|
|
if th.string == 'Hits:':
|
|
|
|
|
hits = int(th.parent.find("td").string)
|
|
|
|
|
if th.string == 'Idioma:':
|
|
|
|
|
lang = th.parent.find("td").find ("img").get ('src')
|
|
|
|
|
if 'brazil' in lang:
|
|
|
|
|
lang = th.parent.find("td").find("img").get('src')
|
|
|
|
|
if 'brazil' in lang.lower():
|
|
|
|
|
lang = Language.fromopensubtitles('pob')
|
|
|
|
|
else:
|
|
|
|
|
elif 'portugal' in lang.lower():
|
|
|
|
|
lang = Language.fromopensubtitles('por')
|
|
|
|
|
|
|
|
|
|
description = _subbox.find("td", {"class": "td_desc brd_up"})
|
|
|
|
|
else:
|
|
|
|
|
continue
|
|
|
|
|
# get description for matches
|
|
|
|
|
description = _subbox.find("td", {"class": "td_desc brd_up"}).get_text()
|
|
|
|
|
#get subtitle link
|
|
|
|
|
download = _subbox.find("a", {"class": "sub_download"})
|
|
|
|
|
try:
|
|
|
|
|
# sometimes BSoup just doesn't get the link
|
|
|
|
|
logger.debug(download.get('href'))
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.warning('skipping subbox on %s' % self.searchurl.format(query=querytext))
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# sometimes BSoup can't find 'a' tag and returns None.
|
|
|
|
|
i = 0
|
|
|
|
|
while not (download): # must get it... trying again...
|
|
|
|
|
download = _subbox.find("a", {"class": "sub_download"})
|
|
|
|
|
i=+1
|
|
|
|
|
logger.debug("Try number {0} try!".format(str(i)))
|
|
|
|
|
dl = download.get('href')
|
|
|
|
|
logger.debug("Found subtitle on: %s" % self.download_link.format(link=dl))
|
|
|
|
|
|
|
|
|
|
# get subtitle uploader
|
|
|
|
|
sub_header = _subbox.find("div", {"class" :"sub_header"})
|
|
|
|
|
uploader = sub_header.find("a").text if sub_header else 'anonymous'
|
|
|
|
|
|
|
|
|
|
exact_match = False
|
|
|
|
|
if video.name.lower() in description.get_text().lower():
|
|
|
|
|
if video.name.lower() in description.lower():
|
|
|
|
|
exact_match = True
|
|
|
|
|
data = {'link': self.site + '/modules.php' + download.get('href'),
|
|
|
|
|
'exact_match': exact_match,
|
|
|
|
|
'hits': hits,
|
|
|
|
|
'videoname': videoname,
|
|
|
|
|
'description': description.get_text() }
|
|
|
|
|
'uploader': uploader,
|
|
|
|
|
'video_filename': video_filename,
|
|
|
|
|
'description': description
|
|
|
|
|
}
|
|
|
|
|
subtitles.append(
|
|
|
|
|
LegendasdivxSubtitle(lang, video, data)
|
|
|
|
|
)
|
|
|
|
|
return subtitles
|
|
|
|
|
|
|
|
|
|
def query(self, video, language):
|
|
|
|
|
try:
|
|
|
|
|
logger.debug('Got session id %s' %
|
|
|
|
|
self.session.cookies.get_dict()['PHPSESSID'])
|
|
|
|
|
except Exception as e:
|
|
|
|
|
self.login()
|
|
|
|
|
|
|
|
|
|
language_ids = '0'
|
|
|
|
|
if isinstance(language, (tuple, list, set)):
|
|
|
|
|
if len(language) == 1:
|
|
|
|
|
language_ids = ','.join(sorted(l.opensubtitles for l in language))
|
|
|
|
|
if language_ids == 'por':
|
|
|
|
|
language_ids = '&form_cat=28'
|
|
|
|
|
else:
|
|
|
|
|
language_ids = '&form_cat=29'
|
|
|
|
|
|
|
|
|
|
videoname = video.name
|
|
|
|
|
videoname = os.path.basename(videoname)
|
|
|
|
|
videoname, _ = os.path.splitext(videoname)
|
|
|
|
|
# querytext = videoname.lower()
|
|
|
|
|
def query(self, video, languages):
|
|
|
|
|
|
|
|
|
|
video_filename = video.name
|
|
|
|
|
video_filename = os.path.basename(video_filename)
|
|
|
|
|
video_filename, _ = os.path.splitext(video_filename)
|
|
|
|
|
video_filename = sanitize_release_group(video_filename)
|
|
|
|
|
|
|
|
|
|
_searchurl = self.searchurl
|
|
|
|
|
if video.imdb_id is None:
|
|
|
|
|
if isinstance(video, Episode):
|
|
|
|
@ -246,22 +252,47 @@ class LegendasdivxProvider(Provider):
|
|
|
|
|
else:
|
|
|
|
|
querytext = video.imdb_id
|
|
|
|
|
|
|
|
|
|
# language query filter
|
|
|
|
|
if isinstance(languages, (tuple, list, set)):
|
|
|
|
|
language_ids = ','.join(sorted(l.opensubtitles for l in languages))
|
|
|
|
|
if 'por' in language_ids: # prioritize portuguese subtitles
|
|
|
|
|
lang_filter = '&form_cat=28' # pt
|
|
|
|
|
elif 'pob' in language_ids:
|
|
|
|
|
lang_filter = '&form_cat=29' # br
|
|
|
|
|
else:
|
|
|
|
|
lang_filter = ''
|
|
|
|
|
|
|
|
|
|
querytext = querytext + lang_filter if lang_filter else querytext
|
|
|
|
|
|
|
|
|
|
# querytext = querytext.replace(
|
|
|
|
|
# ".", "+").replace("[", "").replace("]", "")
|
|
|
|
|
if language_ids != '0':
|
|
|
|
|
querytext = querytext + language_ids
|
|
|
|
|
self.headers['Referer'] = self.site + '/index.php'
|
|
|
|
|
self.session.headers.update(self.headers.items())
|
|
|
|
|
res = self.session.get(_searchurl.format(query=querytext))
|
|
|
|
|
# form_cat=28 = br
|
|
|
|
|
# form_cat=29 = pt
|
|
|
|
|
|
|
|
|
|
if "A legenda não foi encontrada" in res.text:
|
|
|
|
|
logger.warning('%s not found', querytext)
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
bsoup = ParserBeautifulSoup(res.content, ['html.parser'])
|
|
|
|
|
subtitles = self._process_page(video, bsoup, querytext, videoname)
|
|
|
|
|
subtitles = self._process_page(video, bsoup, video_filename)
|
|
|
|
|
|
|
|
|
|
# search for more than 10 results (legendasdivx uses pagination)
|
|
|
|
|
# don't throttle - maximum results = 6 * 10
|
|
|
|
|
MAX_PAGES = 6
|
|
|
|
|
|
|
|
|
|
#get number of pages bases on results found
|
|
|
|
|
page_header = bsoup.find("div", {"class": "pager_bar"})
|
|
|
|
|
results_found = re.search(r'\((.*?) encontradas\)', page_header.text).group(1)
|
|
|
|
|
num_pages = (int(results_found) // 10) + 1
|
|
|
|
|
num_pages = min(MAX_PAGES, num_pages)
|
|
|
|
|
|
|
|
|
|
if num_pages > 1:
|
|
|
|
|
for num_page in range(2, num_pages+2):
|
|
|
|
|
_search_next = self.searchurl.format(query=querytext) + "&page={0}".format(str(num_page))
|
|
|
|
|
logger.debug("Moving to next page: %s" % _search_next)
|
|
|
|
|
res = self.session.get(_search_next)
|
|
|
|
|
next_page = ParserBeautifulSoup(res.content, ['html.parser'])
|
|
|
|
|
subs = self._process_page(video, next_page, video_filename)
|
|
|
|
|
subtitles.extend(subs)
|
|
|
|
|
|
|
|
|
|
return subtitles
|
|
|
|
|
|
|
|
|
@ -270,9 +301,14 @@ class LegendasdivxProvider(Provider):
|
|
|
|
|
|
|
|
|
|
def download_subtitle(self, subtitle):
|
|
|
|
|
res = self.session.get(subtitle.page_link)
|
|
|
|
|
res.raise_for_status()
|
|
|
|
|
if res:
|
|
|
|
|
if res.text == '500':
|
|
|
|
|
raise ValueError('Error 500 on server')
|
|
|
|
|
if res.status_code in ['500', '503']:
|
|
|
|
|
raise ServiceUnavailable("Legendasdivx.pt :: 503 - Service Unavailable")
|
|
|
|
|
elif 'limite' in res.text.lower(): # daily downloads limit reached
|
|
|
|
|
raise DownloadLimitReached("Legendasdivx.pt :: Download limit reached")
|
|
|
|
|
elif 'bloqueado' in res.text.lower(): # blocked IP address
|
|
|
|
|
raise ParseResponseError("Legendasdivx.pt :: %r" % res.text)
|
|
|
|
|
|
|
|
|
|
archive = self._get_archive(res.content)
|
|
|
|
|
# extract the subtitle
|
|
|
|
@ -281,7 +317,9 @@ class LegendasdivxProvider(Provider):
|
|
|
|
|
subtitle.normalize()
|
|
|
|
|
|
|
|
|
|
return subtitle
|
|
|
|
|
raise ValueError('Problems conecting to the server')
|
|
|
|
|
|
|
|
|
|
logger.error("Legendasdivx.pt :: there was a problem retrieving subtitle (status %s)" % res.status_code)
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
def _get_archive(self, content):
|
|
|
|
|
# open the archive
|
|
|
|
@ -294,7 +332,6 @@ class LegendasdivxProvider(Provider):
|
|
|
|
|
logger.debug('Identified zip archive')
|
|
|
|
|
archive = zipfile.ZipFile(archive_stream)
|
|
|
|
|
else:
|
|
|
|
|
# raise ParseResponseError('Unsupported compressed format')
|
|
|
|
|
raise Exception('Unsupported compressed format')
|
|
|
|
|
|
|
|
|
|
return archive
|
|
|
|
@ -305,7 +342,7 @@ class LegendasdivxProvider(Provider):
|
|
|
|
|
_tmp.remove('.txt')
|
|
|
|
|
_subtitle_extensions = tuple(_tmp)
|
|
|
|
|
_max_score = 0
|
|
|
|
|
_scores = get_scores (subtitle.video)
|
|
|
|
|
_scores = get_scores(subtitle.video)
|
|
|
|
|
|
|
|
|
|
for name in archive.namelist():
|
|
|
|
|
# discard hidden files
|
|
|
|
@ -338,4 +375,4 @@ class LegendasdivxProvider(Provider):
|
|
|
|
|
logger.debug("returning from archive: {} scored {}".format(_max_name, _max_score))
|
|
|
|
|
return archive.read(_max_name)
|
|
|
|
|
|
|
|
|
|
raise ParseResponseError('Can not find the subtitle in the compressed file')
|
|
|
|
|
raise ValueError("No subtitle found on compressed file. Max score was 0")
|