Merge branch 'development' into hermes

pull/997/head
Louis Vézina 5 years ago
commit 442f5d2a5e

@ -209,7 +209,7 @@ class TVsubtitlesProvider(Provider):
if subtitles: if subtitles:
return subtitles return subtitles
else: else:
logger.error('No show id found for %r (%r)', video.series, {'year': video.year}) logger.debug('No show id found for %r (%r)', video.series, {'year': video.year})
return [] return []

@ -55,7 +55,7 @@ class ArgenteamSubtitle(Subtitle):
return self._release_info return self._release_info
combine = [] combine = []
for attr in ("format", "version", "video_codec"): for attr in ("format", "version"):
value = getattr(self, attr) value = getattr(self, attr)
if value: if value:
combine.append(value) combine.append(value)
@ -76,9 +76,11 @@ class ArgenteamSubtitle(Subtitle):
if video.series and (sanitize(self.title) in ( if video.series and (sanitize(self.title) in (
sanitize(name) for name in [video.series] + video.alternative_series)): sanitize(name) for name in [video.series] + video.alternative_series)):
matches.add('series') matches.add('series')
# season # season
if video.season and self.season == video.season: if video.season and self.season == video.season:
matches.add('season') matches.add('season')
# episode # episode
if video.episode and self.episode == video.episode: if video.episode and self.episode == video.episode:
matches.add('episode') matches.add('episode')
@ -87,6 +89,9 @@ class ArgenteamSubtitle(Subtitle):
if video.tvdb_id and str(self.tvdb_id) == str(video.tvdb_id): if video.tvdb_id and str(self.tvdb_id) == str(video.tvdb_id):
matches.add('tvdb_id') matches.add('tvdb_id')
# year (year is not available for series, but we assume it matches)
matches.add('year')
elif isinstance(video, Movie) and self.movie_kind == 'movie': elif isinstance(video, Movie) and self.movie_kind == 'movie':
# title # title
if video.title and (sanitize(self.title) in ( if video.title and (sanitize(self.title) in (
@ -230,29 +235,29 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
has_multiple_ids = len(argenteam_ids) > 1 has_multiple_ids = len(argenteam_ids) > 1
for aid in argenteam_ids: for aid in argenteam_ids:
response = self.session.get(url, params={'id': aid}, timeout=10) response = self.session.get(url, params={'id': aid}, timeout=10)
response.raise_for_status() response.raise_for_status()
content = response.json() content = response.json()
imdb_id = year = None if content is not None: # eg https://argenteam.net/api/v1/episode?id=11534
returned_title = title imdb_id = year = None
if not is_episode and "info" in content: returned_title = title
imdb_id = content["info"].get("imdb") if not is_episode and "info" in content:
year = content["info"].get("year") imdb_id = content["info"].get("imdb")
returned_title = content["info"].get("title", title) year = content["info"].get("year")
returned_title = content["info"].get("title", title)
for r in content['releases']:
for s in r['subtitles']: for r in content['releases']:
movie_kind = "episode" if is_episode else "movie" for s in r['subtitles']:
page_link = self.BASE_URL + movie_kind + "/" + str(aid) movie_kind = "episode" if is_episode else "movie"
# use https and new domain page_link = self.BASE_URL + movie_kind + "/" + str(aid)
download_link = s['uri'].replace('http://www.argenteam.net/', self.BASE_URL) # use https and new domain
sub = ArgenteamSubtitle(language, page_link, download_link, movie_kind, returned_title, download_link = s['uri'].replace('http://www.argenteam.net/', self.BASE_URL)
season, episode, year, r.get('team'), r.get('tags'), sub = ArgenteamSubtitle(language, page_link, download_link, movie_kind, returned_title,
r.get('source'), r.get('codec'), content.get("tvdb"), imdb_id, season, episode, year, r.get('team'), r.get('tags'),
asked_for_release_group=video.release_group, r.get('source'), r.get('codec'), content.get("tvdb"), imdb_id,
asked_for_episode=episode) asked_for_release_group=video.release_group,
subtitles.append(sub) asked_for_episode=episode)
subtitles.append(sub)
if has_multiple_ids: if has_multiple_ids:
time.sleep(self.multi_result_throttle) time.sleep(self.multi_result_throttle)

@ -6,6 +6,7 @@ import os
import re import re
import zipfile import zipfile
from time import sleep from time import sleep
from urllib.parse import quote
from requests.exceptions import HTTPError from requests.exceptions import HTTPError
import rarfile import rarfile
@ -39,7 +40,6 @@ class LegendasdivxSubtitle(Subtitle):
self.description = data['description'] self.description = data['description']
self.video = video self.video = video
self.sub_frame_rate = data['frame_rate'] self.sub_frame_rate = data['frame_rate']
self.video_filename = data['video_filename']
self.uploader = data['uploader'] self.uploader = data['uploader']
self.wrong_fps = False self.wrong_fps = False
self.skip_wrong_fps = skip_wrong_fps self.skip_wrong_fps = skip_wrong_fps
@ -74,28 +74,41 @@ class LegendasdivxSubtitle(Subtitle):
description = sanitize(self.description) description = sanitize(self.description)
if sanitize(self.video_filename) in description: video_filename = video.name
matches.update(['title']) video_filename = os.path.basename(video_filename)
matches.update(['season']) video_filename, _ = os.path.splitext(video_filename)
matches.update(['episode']) video_filename = sanitize_release_group(video_filename)
# episode if sanitize(video_filename) in description:
if video.title and sanitize(video.title) in description:
matches.update(['title']) matches.update(['title'])
# relying people won' use just S01E01 for the file name
if isinstance(video, Episode):
matches.update(['series'])
matches.update(['season'])
matches.update(['episode'])
# can match both movies and series
if video.year and '{:04d}'.format(video.year) in description: if video.year and '{:04d}'.format(video.year) in description:
matches.update(['year']) matches.update(['year'])
# match movie title (include alternative movie names)
if isinstance(video, Movie):
if video.title:
for movie_name in [video.title] + video.alternative_titles:
if sanitize(movie_name) in description:
matches.update(['title'])
if isinstance(video, Episode): if isinstance(video, Episode):
# already matched in search query if video.title and sanitize(video.title) in description:
matches.update(['title'])
if video.series:
for series_name in [video.series] + video.alternative_series:
if sanitize(series_name) in description:
matches.update(['series'])
if video.season and 's{:02d}'.format(video.season) in description: if video.season and 's{:02d}'.format(video.season) in description:
matches.update(['season']) matches.update(['season'])
if video.episode and 'e{:02d}'.format(video.episode) in description: if video.episode and 'e{:02d}'.format(video.episode) in description:
matches.update(['episode']) matches.update(['episode'])
if video.episode and video.season and video.series:
if '{} s{:02d}e{:02d}'.format(sanitize(video.series), video.season, video.episode) in description:
matches.update(['series'])
matches.update(['season'])
matches.update(['episode'])
# release_group # release_group
if video.release_group and sanitize_release_group(video.release_group) in sanitize_release_group(description): if video.release_group and sanitize_release_group(video.release_group) in sanitize_release_group(description):
@ -156,14 +169,14 @@ class LegendasdivxProvider(Provider):
self.skip_wrong_fps = skip_wrong_fps self.skip_wrong_fps = skip_wrong_fps
def initialize(self): def initialize(self):
logger.info("Legendasdivx.pt :: Creating session for requests") logger.debug("Legendasdivx.pt :: Creating session for requests")
self.session = RetryingCFSession() self.session = RetryingCFSession()
# re-use PHP Session if present # re-use PHP Session if present
prev_cookies = region.get("legendasdivx_cookies2") prev_cookies = region.get("legendasdivx_cookies2")
if prev_cookies != NO_VALUE: if prev_cookies != NO_VALUE:
logger.debug("Legendasdivx.pt :: Re-using previous legendasdivx cookies: %s", prev_cookies) logger.debug("Legendasdivx.pt :: Re-using previous legendasdivx cookies: %s", prev_cookies)
self.session.cookies.update(prev_cookies) self.session.cookies.update(prev_cookies)
# Login if session has expired # login if session has expired
else: else:
logger.debug("Legendasdivx.pt :: Session cookies not found!") logger.debug("Legendasdivx.pt :: Session cookies not found!")
self.session.headers.update(self.headers) self.session.headers.update(self.headers)
@ -174,7 +187,7 @@ class LegendasdivxProvider(Provider):
self.session.close() self.session.close()
def login(self): def login(self):
logger.info('Legendasdivx.pt :: Logging in') logger.debug('Legendasdivx.pt :: Logging in')
try: try:
res = self.session.get(self.loginpage) res = self.session.get(self.loginpage)
res.raise_for_status() res.raise_for_status()
@ -191,14 +204,14 @@ class LegendasdivxProvider(Provider):
res = self.session.post(self.loginpage, data) res = self.session.post(self.loginpage, data)
res.raise_for_status() res.raise_for_status()
#make sure we're logged in # make sure we're logged in
logger.debug('Legendasdivx.pt :: Logged in successfully: PHPSESSID: %s', self.session.cookies.get_dict()['PHPSESSID']) logger.debug('Legendasdivx.pt :: Logged in successfully: PHPSESSID: %s', self.session.cookies.get_dict()['PHPSESSID'])
cj = self.session.cookies.copy() cj = self.session.cookies.copy()
store_cks = ("PHPSESSID", "phpbb3_2z8zs_sid", "phpbb3_2z8zs_k", "phpbb3_2z8zs_u", "lang") store_cks = ("PHPSESSID", "phpbb3_2z8zs_sid", "phpbb3_2z8zs_k", "phpbb3_2z8zs_u", "lang")
for cn in iter(self.session.cookies.keys()): for cn in iter(self.session.cookies.keys()):
if cn not in store_cks: if cn not in store_cks:
del cj[cn] del cj[cn]
#store session cookies on cache # store session cookies on cache
logger.debug("Legendasdivx.pt :: Storing legendasdivx session cookies: %r", cj) logger.debug("Legendasdivx.pt :: Storing legendasdivx session cookies: %r", cj)
region.set("legendasdivx_cookies2", cj) region.set("legendasdivx_cookies2", cj)
@ -206,7 +219,7 @@ class LegendasdivxProvider(Provider):
logger.error("Legendasdivx.pt :: Couldn't get session ID, check your credentials") logger.error("Legendasdivx.pt :: Couldn't get session ID, check your credentials")
raise AuthenticationError("Legendasdivx.pt :: Couldn't get session ID, check your credentials") raise AuthenticationError("Legendasdivx.pt :: Couldn't get session ID, check your credentials")
except HTTPError as e: except HTTPError as e:
if "bloqueado" in res.text.lower(): # ip blocked on server if "bloqueado" in res.text.lower():
logger.error("LegendasDivx.pt :: Your IP is blocked on this server.") logger.error("LegendasDivx.pt :: Your IP is blocked on this server.")
raise IPAddressBlocked("LegendasDivx.pt :: Your IP is blocked on this server.") raise IPAddressBlocked("LegendasDivx.pt :: Your IP is blocked on this server.")
logger.error("Legendasdivx.pt :: HTTP Error %s", e) logger.error("Legendasdivx.pt :: HTTP Error %s", e)
@ -215,13 +228,14 @@ class LegendasdivxProvider(Provider):
logger.error("LegendasDivx.pt :: Uncaught error: %r", e) logger.error("LegendasDivx.pt :: Uncaught error: %r", e)
raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e) raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e)
def _process_page(self, video, bsoup, video_filename): def _process_page(self, video, bsoup):
subtitles = [] subtitles = []
_allsubs = bsoup.findAll("div", {"class": "sub_box"}) _allsubs = bsoup.findAll("div", {"class": "sub_box"})
for _subbox in _allsubs: for _subbox in _allsubs:
hits = 0 hits = 0
for th in _subbox.findAll("th"): for th in _subbox.findAll("th"):
if th.text == 'Hits:': if th.text == 'Hits:':
@ -239,10 +253,12 @@ class LegendasdivxProvider(Provider):
# get description for matches # get description for matches
description = _subbox.find("td", {"class": "td_desc brd_up"}).get_text() description = _subbox.find("td", {"class": "td_desc brd_up"}).get_text()
#get subtitle link
download = _subbox.find("a", {"class": "sub_download"})
# sometimes BSoup can't find 'a' tag and returns None. # get subtitle link from footer
sub_footer = _subbox.find("div", {"class": "sub_footer"})
download = sub_footer.find("a", {"class": "sub_download"}) if sub_footer else None
# sometimes 'a' tag is not found and returns None. Most likely HTML format error!
try: try:
download_link = self.download_link.format(link=download.get('href')) download_link = self.download_link.format(link=download.get('href'))
logger.debug("Legendasdivx.pt :: Found subtitle link on: %s ", download_link) logger.debug("Legendasdivx.pt :: Found subtitle link on: %s ", download_link)
@ -257,12 +273,12 @@ class LegendasdivxProvider(Provider):
exact_match = False exact_match = False
if video.name.lower() in description.lower(): if video.name.lower() in description.lower():
exact_match = True exact_match = True
data = {'link': download_link, data = {'link': download_link,
'exact_match': exact_match, 'exact_match': exact_match,
'hits': hits, 'hits': hits,
'uploader': uploader, 'uploader': uploader,
'frame_rate': frame_rate, 'frame_rate': frame_rate,
'video_filename': video_filename,
'description': description 'description': description
} }
subtitles.append( subtitles.append(
@ -272,27 +288,22 @@ class LegendasdivxProvider(Provider):
def query(self, video, languages): def query(self, video, languages):
video_filename = video.name
video_filename = os.path.basename(video_filename)
video_filename, _ = os.path.splitext(video_filename)
video_filename = sanitize_release_group(video_filename)
_searchurl = self.searchurl _searchurl = self.searchurl
if video.imdb_id is None:
if isinstance(video, Episode): if isinstance(video, Movie):
querytext = "{} S{:02d}E{:02d}".format(video.series, video.season, video.episode) querytext = video.imdb_id if video.imdb_id else video.title
elif isinstance(video, Movie):
querytext = video.title if isinstance(video, Episode):
else: querytext = '"{} S{:02d}E{:02d}"'.format(video.series, video.season, video.episode)
querytext = video.imdb_id querytext = quote(quote(querytext))
# language query filter # language query filter
if isinstance(languages, (tuple, list, set)): if isinstance(languages, (tuple, list, set)):
language_ids = ','.join(sorted(l.opensubtitles for l in languages)) language_ids = ','.join(sorted(l.opensubtitles for l in languages))
if 'por' in language_ids: # prioritize portuguese subtitles if 'por' in language_ids: # prioritize portuguese subtitles
lang_filter = '&form_cat=28' # pt lang_filter = '&form_cat=28'
elif 'pob' in language_ids: elif 'pob' in language_ids:
lang_filter = '&form_cat=29' # br lang_filter = '&form_cat=29'
else: else:
lang_filter = '' lang_filter = ''
@ -306,17 +317,26 @@ class LegendasdivxProvider(Provider):
res = self.session.get(_searchurl.format(query=querytext), allow_redirects=False) res = self.session.get(_searchurl.format(query=querytext), allow_redirects=False)
res.raise_for_status() res.raise_for_status()
if (res.status_code == 200 and "A legenda não foi encontrada" in res.text): if (res.status_code == 200 and "A legenda não foi encontrada" in res.text):
logger.warning('Legendasdivx.pt :: %s not found', querytext) logger.warning('Legendasdivx.pt :: query %s return no results!', querytext)
return [] # for series, if no results found, try again just with series and season (subtitle packs)
if isinstance(video, Episode):
logger.debug("Legendasdivx.pt :: trying again with just series and season on query.")
querytext = re.sub("(e|E)(\d{2})", "", querytext)
res = self.session.get(_searchurl.format(query=querytext), allow_redirects=False)
res.raise_for_status()
if (res.status_code == 200 and "A legenda não foi encontrada" in res.text):
logger.warning('Legendasdivx.pt :: query %s return no results (for series and season only).', querytext)
return []
if res.status_code == 302: # got redirected to login page. if res.status_code == 302: # got redirected to login page.
# Seems that our session cookies are no longer valid... clean them from cache # seems that our session cookies are no longer valid... clean them from cache
region.delete("legendasdivx_cookies2") region.delete("legendasdivx_cookies2")
logger.debug("Legendasdivx.pt :: Logging in again. Cookies have expired!") logger.debug("Legendasdivx.pt :: Logging in again. Cookies have expired!")
self.login() # login and try again # login and try again
self.login()
res = self.session.get(_searchurl.format(query=querytext)) res = self.session.get(_searchurl.format(query=querytext))
res.raise_for_status() res.raise_for_status()
except HTTPError as e: except HTTPError as e:
if "bloqueado" in res.text.lower(): # ip blocked on server if "bloqueado" in res.text.lower():
logger.error("LegendasDivx.pt :: Your IP is blocked on this server.") logger.error("LegendasDivx.pt :: Your IP is blocked on this server.")
raise IPAddressBlocked("LegendasDivx.pt :: Your IP is blocked on this server.") raise IPAddressBlocked("LegendasDivx.pt :: Your IP is blocked on this server.")
logger.error("Legendasdivx.pt :: HTTP Error %s", e) logger.error("Legendasdivx.pt :: HTTP Error %s", e)
@ -339,7 +359,7 @@ class LegendasdivxProvider(Provider):
num_pages = min(MAX_PAGES, num_pages) num_pages = min(MAX_PAGES, num_pages)
# process first page # process first page
subtitles = self._process_page(video, bsoup, video_filename) subtitles = self._process_page(video, bsoup)
# more pages? # more pages?
if num_pages > 1: if num_pages > 1:
@ -349,7 +369,7 @@ class LegendasdivxProvider(Provider):
logger.debug("Legendasdivx.pt :: Moving on to next page: %s", _search_next) logger.debug("Legendasdivx.pt :: Moving on to next page: %s", _search_next)
res = self.session.get(_search_next) res = self.session.get(_search_next)
next_page = ParserBeautifulSoup(res.content, ['html.parser']) next_page = ParserBeautifulSoup(res.content, ['html.parser'])
subs = self._process_page(video, next_page, video_filename) subs = self._process_page(video, next_page)
subtitles.extend(subs) subtitles.extend(subs)
return subtitles return subtitles
@ -363,7 +383,7 @@ class LegendasdivxProvider(Provider):
res = self.session.get(subtitle.page_link) res = self.session.get(subtitle.page_link)
res.raise_for_status() res.raise_for_status()
except HTTPError as e: except HTTPError as e:
if "bloqueado" in res.text.lower(): # ip blocked on server if "bloqueado" in res.text.lower():
logger.error("LegendasDivx.pt :: Your IP is blocked on this server.") logger.error("LegendasDivx.pt :: Your IP is blocked on this server.")
raise IPAddressBlocked("LegendasDivx.pt :: Your IP is blocked on this server.") raise IPAddressBlocked("LegendasDivx.pt :: Your IP is blocked on this server.")
logger.error("Legendasdivx.pt :: HTTP Error %s", e) logger.error("Legendasdivx.pt :: HTTP Error %s", e)
@ -373,21 +393,22 @@ class LegendasdivxProvider(Provider):
raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e) raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e)
# make sure we haven't maxed out our daily limit # make sure we haven't maxed out our daily limit
if (res.status_code == 200 and 'limite' in res.text.lower()): # daily downloads limit reached if (res.status_code == 200 and 'limite de downloads diário atingido' in res.text.lower()):
logger.error("LegendasDivx.pt :: Daily download limit reached!") logger.error("LegendasDivx.pt :: Daily download limit reached!")
raise DownloadLimitExceeded("Legendasdivx.pt :: Daily download limit reached!") raise DownloadLimitExceeded("Legendasdivx.pt :: Daily download limit reached!")
archive = self._get_archive(res.content) archive = self._get_archive(res.content)
# extract the subtitle # extract the subtitle
subtitle_content = self._get_subtitle_from_archive(archive, subtitle) if archive:
subtitle.content = fix_line_ending(subtitle_content) subtitle_content = self._get_subtitle_from_archive(archive, subtitle)
subtitle.normalize() if subtitle_content:
subtitle.content = fix_line_ending(subtitle_content)
return subtitle subtitle.normalize()
return subtitle
return
def _get_archive(self, content): def _get_archive(self, content):
# open the archive # open the archive
# stole^H^H^H^H^H inspired from subvix provider
archive_stream = io.BytesIO(content) archive_stream = io.BytesIO(content)
if rarfile.is_rarfile(archive_stream): if rarfile.is_rarfile(archive_stream):
logger.debug('Legendasdivx.pt :: Identified rar archive') logger.debug('Legendasdivx.pt :: Identified rar archive')
@ -396,8 +417,8 @@ class LegendasdivxProvider(Provider):
logger.debug('Legendasdivx.pt :: Identified zip archive') logger.debug('Legendasdivx.pt :: Identified zip archive')
archive = zipfile.ZipFile(archive_stream) archive = zipfile.ZipFile(archive_stream)
else: else:
raise ValueError('Legendasdivx.pt :: Unsupported compressed format') logger.error('Legendasdivx.pt :: Unsupported compressed format')
return None
return archive return archive
def _get_subtitle_from_archive(self, archive, subtitle): def _get_subtitle_from_archive(self, archive, subtitle):
@ -428,7 +449,7 @@ class LegendasdivxProvider(Provider):
matches = set() matches = set()
matches |= guess_matches(subtitle.video, _guess) matches |= guess_matches(subtitle.video, _guess)
logger.debug('Legendasdivx.pt :: srt matches: %s', matches) logger.debug('Legendasdivx.pt :: sub matches: %s', matches)
_score = sum((_scores.get(match, 0) for match in matches)) _score = sum((_scores.get(match, 0) for match in matches))
if _score > _max_score: if _score > _max_score:
_max_name = name _max_name = name
@ -439,4 +460,5 @@ class LegendasdivxProvider(Provider):
logger.debug("Legendasdivx.pt :: returning from archive: %s scored %s", _max_name, _max_score) logger.debug("Legendasdivx.pt :: returning from archive: %s scored %s", _max_name, _max_score)
return archive.read(_max_name) return archive.read(_max_name)
raise ValueError("Legendasdivx.pt :: No subtitle found on compressed file. Max score was 0") logger.error("Legendasdivx.pt :: No subtitle found on compressed file. Max score was 0")
return None

Loading…
Cancel
Save