From 392855b4eb799a0fc87f828e71b432dd03922c14 Mon Sep 17 00:00:00 2001 From: Bazarr Date: Tue, 12 May 2020 01:26:39 +0100 Subject: [PATCH 1/5] refactor matches --- .../providers/legendasdivx.py | 101 ++++++++++-------- 1 file changed, 56 insertions(+), 45 deletions(-) diff --git a/libs/subliminal_patch/providers/legendasdivx.py b/libs/subliminal_patch/providers/legendasdivx.py index c30c9317b..500e7e6a3 100644 --- a/libs/subliminal_patch/providers/legendasdivx.py +++ b/libs/subliminal_patch/providers/legendasdivx.py @@ -6,6 +6,7 @@ import os import re import zipfile from time import sleep +from urllib.parse import quote from requests.exceptions import HTTPError import rarfile @@ -39,7 +40,6 @@ class LegendasdivxSubtitle(Subtitle): self.description = data['description'] self.video = video self.sub_frame_rate = data['frame_rate'] - self.video_filename = data['video_filename'] self.uploader = data['uploader'] self.wrong_fps = False self.skip_wrong_fps = skip_wrong_fps @@ -74,28 +74,41 @@ class LegendasdivxSubtitle(Subtitle): description = sanitize(self.description) - if sanitize(self.video_filename) in description: - matches.update(['title']) - matches.update(['season']) - matches.update(['episode']) + video_filename = video.name + video_filename = os.path.basename(video_filename) + video_filename, _ = os.path.splitext(video_filename) + video_filename = sanitize_release_group(video_filename) - # episode - if video.title and sanitize(video.title) in description: + if sanitize(video_filename) in description: matches.update(['title']) + # relying people won' use just S01E01 for the file name + if isinstance(video, Episode): + matches.update(['series']) + matches.update(['season']) + matches.update(['episode']) + + # can match both movies and series if video.year and '{:04d}'.format(video.year) in description: matches.update(['year']) + # match movie title (include alternative movie names) + if isinstance(video, Movie): + if video.title: + for movie_name in [video.title] + video.alternative_titles: + if sanitize(movie_name) in description: + matches.update(['title']) + if isinstance(video, Episode): - # already matched in search query + if video.title and sanitize(video.title) in description: + matches.update(['title']) + if video.series: + for series_name in [video.series] + video.alternative_series: + if sanitize(series_name) in description: + matches.update(['series']) if video.season and 's{:02d}'.format(video.season) in description: matches.update(['season']) if video.episode and 'e{:02d}'.format(video.episode) in description: matches.update(['episode']) - if video.episode and video.season and video.series: - if '{} s{:02d}e{:02d}'.format(sanitize(video.series), video.season, video.episode) in description: - matches.update(['series']) - matches.update(['season']) - matches.update(['episode']) # release_group if video.release_group and sanitize_release_group(video.release_group) in sanitize_release_group(description): @@ -163,7 +176,7 @@ class LegendasdivxProvider(Provider): if prev_cookies != NO_VALUE: logger.debug("Legendasdivx.pt :: Re-using previous legendasdivx cookies: %s", prev_cookies) self.session.cookies.update(prev_cookies) - # Login if session has expired + # login if session has expired else: logger.debug("Legendasdivx.pt :: Session cookies not found!") self.session.headers.update(self.headers) @@ -191,14 +204,14 @@ class LegendasdivxProvider(Provider): res = self.session.post(self.loginpage, data) res.raise_for_status() - #make sure we're logged in + # make sure we're logged in logger.debug('Legendasdivx.pt :: Logged in successfully: PHPSESSID: %s', self.session.cookies.get_dict()['PHPSESSID']) cj = self.session.cookies.copy() store_cks = ("PHPSESSID", "phpbb3_2z8zs_sid", "phpbb3_2z8zs_k", "phpbb3_2z8zs_u", "lang") for cn in iter(self.session.cookies.keys()): if cn not in store_cks: del cj[cn] - #store session cookies on cache + # store session cookies on cache logger.debug("Legendasdivx.pt :: Storing legendasdivx session cookies: %r", cj) region.set("legendasdivx_cookies2", cj) @@ -206,7 +219,7 @@ class LegendasdivxProvider(Provider): logger.error("Legendasdivx.pt :: Couldn't get session ID, check your credentials") raise AuthenticationError("Legendasdivx.pt :: Couldn't get session ID, check your credentials") except HTTPError as e: - if "bloqueado" in res.text.lower(): # ip blocked on server + if "bloqueado" in res.text.lower(): logger.error("LegendasDivx.pt :: Your IP is blocked on this server.") raise IPAddressBlocked("LegendasDivx.pt :: Your IP is blocked on this server.") logger.error("Legendasdivx.pt :: HTTP Error %s", e) @@ -215,13 +228,14 @@ class LegendasdivxProvider(Provider): logger.error("LegendasDivx.pt :: Uncaught error: %r", e) raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e) - def _process_page(self, video, bsoup, video_filename): + def _process_page(self, video, bsoup): subtitles = [] _allsubs = bsoup.findAll("div", {"class": "sub_box"}) for _subbox in _allsubs: + hits = 0 for th in _subbox.findAll("th"): if th.text == 'Hits:': @@ -239,10 +253,12 @@ class LegendasdivxProvider(Provider): # get description for matches description = _subbox.find("td", {"class": "td_desc brd_up"}).get_text() - #get subtitle link - download = _subbox.find("a", {"class": "sub_download"}) - # sometimes BSoup can't find 'a' tag and returns None. + # get subtitle link from footer + sub_footer = _subbox.find("div", {"class": "sub_footer"}) + download = sub_footer.find("a", {"class": "sub_download"}) if sub_footer else None + + # sometimes 'a' tag is not found and returns None. Most likely HTML format error! try: download_link = self.download_link.format(link=download.get('href')) logger.debug("Legendasdivx.pt :: Found subtitle link on: %s ", download_link) @@ -257,12 +273,12 @@ class LegendasdivxProvider(Provider): exact_match = False if video.name.lower() in description.lower(): exact_match = True + data = {'link': download_link, 'exact_match': exact_match, 'hits': hits, 'uploader': uploader, 'frame_rate': frame_rate, - 'video_filename': video_filename, 'description': description } subtitles.append( @@ -272,27 +288,22 @@ class LegendasdivxProvider(Provider): def query(self, video, languages): - video_filename = video.name - video_filename = os.path.basename(video_filename) - video_filename, _ = os.path.splitext(video_filename) - video_filename = sanitize_release_group(video_filename) - _searchurl = self.searchurl - if video.imdb_id is None: - if isinstance(video, Episode): - querytext = "{} S{:02d}E{:02d}".format(video.series, video.season, video.episode) - elif isinstance(video, Movie): - querytext = video.title - else: - querytext = video.imdb_id + + if isinstance(video, Movie): + querytext = video.imdb_id if video.imdb_id else video.title + + if isinstance(video, Episode): + querytext = '"{} S{:02d}E{:02d}"'.format(video.series, video.season, video.episode) + querytext = quote(quote(querytext)) # language query filter if isinstance(languages, (tuple, list, set)): language_ids = ','.join(sorted(l.opensubtitles for l in languages)) if 'por' in language_ids: # prioritize portuguese subtitles - lang_filter = '&form_cat=28' # pt + lang_filter = '&form_cat=28' elif 'pob' in language_ids: - lang_filter = '&form_cat=29' # br + lang_filter = '&form_cat=29' else: lang_filter = '' @@ -309,14 +320,15 @@ class LegendasdivxProvider(Provider): logger.warning('Legendasdivx.pt :: %s not found', querytext) return [] if res.status_code == 302: # got redirected to login page. - # Seems that our session cookies are no longer valid... clean them from cache + # seems that our session cookies are no longer valid... clean them from cache region.delete("legendasdivx_cookies2") logger.debug("Legendasdivx.pt :: Logging in again. Cookies have expired!") - self.login() # login and try again + # login and try again + self.login() res = self.session.get(_searchurl.format(query=querytext)) res.raise_for_status() except HTTPError as e: - if "bloqueado" in res.text.lower(): # ip blocked on server + if "bloqueado" in res.text.lower(): logger.error("LegendasDivx.pt :: Your IP is blocked on this server.") raise IPAddressBlocked("LegendasDivx.pt :: Your IP is blocked on this server.") logger.error("Legendasdivx.pt :: HTTP Error %s", e) @@ -339,7 +351,7 @@ class LegendasdivxProvider(Provider): num_pages = min(MAX_PAGES, num_pages) # process first page - subtitles = self._process_page(video, bsoup, video_filename) + subtitles = self._process_page(video, bsoup) # more pages? if num_pages > 1: @@ -349,7 +361,7 @@ class LegendasdivxProvider(Provider): logger.debug("Legendasdivx.pt :: Moving on to next page: %s", _search_next) res = self.session.get(_search_next) next_page = ParserBeautifulSoup(res.content, ['html.parser']) - subs = self._process_page(video, next_page, video_filename) + subs = self._process_page(video, next_page) subtitles.extend(subs) return subtitles @@ -363,7 +375,7 @@ class LegendasdivxProvider(Provider): res = self.session.get(subtitle.page_link) res.raise_for_status() except HTTPError as e: - if "bloqueado" in res.text.lower(): # ip blocked on server + if "bloqueado" in res.text.lower(): logger.error("LegendasDivx.pt :: Your IP is blocked on this server.") raise IPAddressBlocked("LegendasDivx.pt :: Your IP is blocked on this server.") logger.error("Legendasdivx.pt :: HTTP Error %s", e) @@ -373,7 +385,7 @@ class LegendasdivxProvider(Provider): raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e) # make sure we haven't maxed out our daily limit - if (res.status_code == 200 and 'limite' in res.text.lower()): # daily downloads limit reached + if (res.status_code == 200 and 'limite' in res.text.lower()): logger.error("LegendasDivx.pt :: Daily download limit reached!") raise DownloadLimitExceeded("Legendasdivx.pt :: Daily download limit reached!") @@ -387,7 +399,6 @@ class LegendasdivxProvider(Provider): def _get_archive(self, content): # open the archive - # stole^H^H^H^H^H inspired from subvix provider archive_stream = io.BytesIO(content) if rarfile.is_rarfile(archive_stream): logger.debug('Legendasdivx.pt :: Identified rar archive') @@ -428,7 +439,7 @@ class LegendasdivxProvider(Provider): matches = set() matches |= guess_matches(subtitle.video, _guess) - logger.debug('Legendasdivx.pt :: srt matches: %s', matches) + logger.debug('Legendasdivx.pt :: sub matches: %s', matches) _score = sum((_scores.get(match, 0) for match in matches)) if _score > _max_score: _max_name = name From 82b41f8115dc1a90d9f6d95bf775dadf67e49a53 Mon Sep 17 00:00:00 2001 From: Bazarr Date: Thu, 14 May 2020 00:19:39 +0100 Subject: [PATCH 2/5] search for season packs and stop unnecessary exceptions --- .../providers/legendasdivx.py | 37 ++++++++++++------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/libs/subliminal_patch/providers/legendasdivx.py b/libs/subliminal_patch/providers/legendasdivx.py index 500e7e6a3..3864d1547 100644 --- a/libs/subliminal_patch/providers/legendasdivx.py +++ b/libs/subliminal_patch/providers/legendasdivx.py @@ -169,7 +169,7 @@ class LegendasdivxProvider(Provider): self.skip_wrong_fps = skip_wrong_fps def initialize(self): - logger.info("Legendasdivx.pt :: Creating session for requests") + logger.debug("Legendasdivx.pt :: Creating session for requests") self.session = RetryingCFSession() # re-use PHP Session if present prev_cookies = region.get("legendasdivx_cookies2") @@ -187,7 +187,7 @@ class LegendasdivxProvider(Provider): self.session.close() def login(self): - logger.info('Legendasdivx.pt :: Logging in') + logger.debug('Legendasdivx.pt :: Logging in') try: res = self.session.get(self.loginpage) res.raise_for_status() @@ -317,8 +317,16 @@ class LegendasdivxProvider(Provider): res = self.session.get(_searchurl.format(query=querytext), allow_redirects=False) res.raise_for_status() if (res.status_code == 200 and "A legenda não foi encontrada" in res.text): - logger.warning('Legendasdivx.pt :: %s not found', querytext) - return [] + logger.warning('Legendasdivx.pt :: query %s return no results!', querytext) + # for series, if no results found, try again just with series and season (subtitle packs) + if isinstance(video, Episode): + logger.debug("Legendasdivx.pt :: trying again with just series and season on query.") + querytext = re.sub("(e|E)(\d{2})", "", querytext) + res = self.session.get(_searchurl.format(query=querytext), allow_redirects=False) + res.raise_for_status() + if (res.status_code == 200 and "A legenda não foi encontrada" in res.text): + logger.warning('Legendasdivx.pt :: query %s return no results (for series and season only).', querytext) + return [] if res.status_code == 302: # got redirected to login page. # seems that our session cookies are no longer valid... clean them from cache region.delete("legendasdivx_cookies2") @@ -385,17 +393,19 @@ class LegendasdivxProvider(Provider): raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e) # make sure we haven't maxed out our daily limit - if (res.status_code == 200 and 'limite' in res.text.lower()): + if (res.status_code == 200 and 'limite de downloads diário atingido' in res.text.lower()): logger.error("LegendasDivx.pt :: Daily download limit reached!") raise DownloadLimitExceeded("Legendasdivx.pt :: Daily download limit reached!") archive = self._get_archive(res.content) # extract the subtitle - subtitle_content = self._get_subtitle_from_archive(archive, subtitle) - subtitle.content = fix_line_ending(subtitle_content) - subtitle.normalize() - - return subtitle + if archive: + subtitle_content = self._get_subtitle_from_archive(archive, subtitle) + if subtitle_content: + subtitle.content = fix_line_ending(subtitle_content) + subtitle.normalize() + return subtitle + return def _get_archive(self, content): # open the archive @@ -407,8 +417,8 @@ class LegendasdivxProvider(Provider): logger.debug('Legendasdivx.pt :: Identified zip archive') archive = zipfile.ZipFile(archive_stream) else: - raise ValueError('Legendasdivx.pt :: Unsupported compressed format') - + logger.error('Legendasdivx.pt :: Unsupported compressed format') + return None return archive def _get_subtitle_from_archive(self, archive, subtitle): @@ -450,4 +460,5 @@ class LegendasdivxProvider(Provider): logger.debug("Legendasdivx.pt :: returning from archive: %s scored %s", _max_name, _max_score) return archive.read(_max_name) - raise ValueError("Legendasdivx.pt :: No subtitle found on compressed file. Max score was 0") + logger.error("Legendasdivx.pt :: No subtitle found on compressed file. Max score was 0") + return None From f3c3655dae8f062d176d82cbd2219b4bdc44d5f2 Mon Sep 17 00:00:00 2001 From: ngosang Date: Sat, 16 May 2020 15:27:49 +0200 Subject: [PATCH 3/5] argenteam provider: fix null exception and improve scoring --- libs/subliminal_patch/providers/argenteam.py | 45 +++++++++++--------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/libs/subliminal_patch/providers/argenteam.py b/libs/subliminal_patch/providers/argenteam.py index c3707925f..4c7f1fc0c 100644 --- a/libs/subliminal_patch/providers/argenteam.py +++ b/libs/subliminal_patch/providers/argenteam.py @@ -76,9 +76,11 @@ class ArgenteamSubtitle(Subtitle): if video.series and (sanitize(self.title) in ( sanitize(name) for name in [video.series] + video.alternative_series)): matches.add('series') + # season if video.season and self.season == video.season: matches.add('season') + # episode if video.episode and self.episode == video.episode: matches.add('episode') @@ -87,6 +89,9 @@ class ArgenteamSubtitle(Subtitle): if video.tvdb_id and str(self.tvdb_id) == str(video.tvdb_id): matches.add('tvdb_id') + # year (year is not available for series, but we assume it matches) + matches.add('year') + elif isinstance(video, Movie) and self.movie_kind == 'movie': # title if video.title and (sanitize(self.title) in ( @@ -230,29 +235,29 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin): has_multiple_ids = len(argenteam_ids) > 1 for aid in argenteam_ids: response = self.session.get(url, params={'id': aid}, timeout=10) - response.raise_for_status() content = response.json() - imdb_id = year = None - returned_title = title - if not is_episode and "info" in content: - imdb_id = content["info"].get("imdb") - year = content["info"].get("year") - returned_title = content["info"].get("title", title) - - for r in content['releases']: - for s in r['subtitles']: - movie_kind = "episode" if is_episode else "movie" - page_link = self.BASE_URL + movie_kind + "/" + str(aid) - # use https and new domain - download_link = s['uri'].replace('http://www.argenteam.net/', self.BASE_URL) - sub = ArgenteamSubtitle(language, page_link, download_link, movie_kind, returned_title, - season, episode, year, r.get('team'), r.get('tags'), - r.get('source'), r.get('codec'), content.get("tvdb"), imdb_id, - asked_for_release_group=video.release_group, - asked_for_episode=episode) - subtitles.append(sub) + if content is not None: # eg https://argenteam.net/api/v1/episode?id=11534 + imdb_id = year = None + returned_title = title + if not is_episode and "info" in content: + imdb_id = content["info"].get("imdb") + year = content["info"].get("year") + returned_title = content["info"].get("title", title) + + for r in content['releases']: + for s in r['subtitles']: + movie_kind = "episode" if is_episode else "movie" + page_link = self.BASE_URL + movie_kind + "/" + str(aid) + # use https and new domain + download_link = s['uri'].replace('http://www.argenteam.net/', self.BASE_URL) + sub = ArgenteamSubtitle(language, page_link, download_link, movie_kind, returned_title, + season, episode, year, r.get('team'), r.get('tags'), + r.get('source'), r.get('codec'), content.get("tvdb"), imdb_id, + asked_for_release_group=video.release_group, + asked_for_episode=episode) + subtitles.append(sub) if has_multiple_ids: time.sleep(self.multi_result_throttle) From 499ec5ac12514489cf204b682eb243520520d7ef Mon Sep 17 00:00:00 2001 From: ngosang Date: Sat, 16 May 2020 15:43:05 +0200 Subject: [PATCH 4/5] argenteam provider: fix null exception and improve scoring (part 2) --- libs/subliminal_patch/providers/argenteam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/subliminal_patch/providers/argenteam.py b/libs/subliminal_patch/providers/argenteam.py index 4c7f1fc0c..5faf027ba 100644 --- a/libs/subliminal_patch/providers/argenteam.py +++ b/libs/subliminal_patch/providers/argenteam.py @@ -55,7 +55,7 @@ class ArgenteamSubtitle(Subtitle): return self._release_info combine = [] - for attr in ("format", "version", "video_codec"): + for attr in ("format", "version"): value = getattr(self, attr) if value: combine.append(value) From 55bb3791da81c883bf3ceaea0f56a8ab88e3e218 Mon Sep 17 00:00:00 2001 From: ngosang Date: Sat, 16 May 2020 15:44:43 +0200 Subject: [PATCH 5/5] tvsubtitles provider: hide error message when not found Test: The Boys --- libs/subliminal/providers/tvsubtitles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/subliminal/providers/tvsubtitles.py b/libs/subliminal/providers/tvsubtitles.py index 277dbdaa6..188d4448c 100644 --- a/libs/subliminal/providers/tvsubtitles.py +++ b/libs/subliminal/providers/tvsubtitles.py @@ -209,7 +209,7 @@ class TVsubtitlesProvider(Provider): if subtitles: return subtitles else: - logger.error('No show id found for %r (%r)', video.series, {'year': video.year}) + logger.debug('No show id found for %r (%r)', video.series, {'year': video.year}) return []