From bf836ad521d79fdc15a9332933e126001d50a78b Mon Sep 17 00:00:00 2001 From: ngosang Date: Sun, 4 Oct 2020 01:41:41 +0200 Subject: [PATCH 1/2] Fix Subdivx provider. resolves #1133 --- libs/subliminal_patch/providers/subdivx.py | 83 ++++++++++++---------- 1 file changed, 47 insertions(+), 36 deletions(-) diff --git a/libs/subliminal_patch/providers/subdivx.py b/libs/subliminal_patch/providers/subdivx.py index f144a4cdc..eb3cac855 100644 --- a/libs/subliminal_patch/providers/subdivx.py +++ b/libs/subliminal_patch/providers/subdivx.py @@ -9,11 +9,6 @@ import zipfile import rarfile from subzero.language import Language from requests import Session -from six import PY2 -if PY2: - from urlparse import urlparse -else: - from urllib.parse import urlparse from subliminal import __short_version__ from subliminal.exceptions import ServiceUnavailable @@ -74,22 +69,25 @@ class SubdivxSubtitle(Subtitle): formats = [video.source.lower()] if formats[0] == "web": formats.append("webdl") + formats.append("web-dl") formats.append("webrip") formats.append("web ") for frmt in formats: - if frmt.lower() in self.description: + if frmt in self.description: matches.add('source') break # video_codec if video.video_codec: video_codecs = [video.video_codec.lower()] - if video_codecs[0] == "H.264": - formats.append("x264") - elif video_codecs[0] == "H.265": - formats.append("x265") - for vc in formats: - if vc.lower() in self.description: + if video_codecs[0] == "h.264": + video_codecs.append("h264") + video_codecs.append("x264") + elif video_codecs[0] == "h.265": + video_codecs.append("h265") + video_codecs.append("x265") + for vc in video_codecs: + if vc in self.description: matches.add('video_codec') break @@ -99,7 +97,7 @@ class SubdivxSubtitle(Subtitle): class SubdivxSubtitlesProvider(Provider): provider_name = 'subdivx' hash_verifiable = False - languages = {Language.fromalpha2(l) for l in ['es']} + languages = {Language.fromalpha2(lang) for lang in ['es']} subtitle_class = SubdivxSubtitle server_url = 'https://www.subdivx.com/' @@ -117,7 +115,6 @@ class SubdivxSubtitlesProvider(Provider): self.session.close() def query(self, video, languages): - if isinstance(video, Episode): query = "{} S{:02d}E{:02d}".format(video.series, video.season, video.episode) else: @@ -179,14 +176,10 @@ class SubdivxSubtitlesProvider(Provider): subtitle_content = self._get_subtitle_from_archive(archive, subtitle) subtitle.content = fix_line_ending(subtitle_content) - def _check_response(self, response): - if response.status_code != 200: - raise ServiceUnavailable('Bad status code: ' + str(response.status_code)) - def _parse_subtitles_page(self, video, response, language): subtitles = [] - page_soup = ParserBeautifulSoup(response.content.decode('iso-8859-1', 'ignore'), ['lxml', 'html.parser']) + page_soup = ParserBeautifulSoup(response.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) title_soups = page_soup.find_all("div", {'id': 'menu_detalle_buscador'}) body_soups = page_soup.find_all("div", {'id': 'buscador_detalle'}) @@ -215,7 +208,7 @@ class SubdivxSubtitlesProvider(Provider): response = self.session.get(subtitle.page_link, timeout=20) self._check_response(response) try: - page_soup = ParserBeautifulSoup(response.content.decode('iso-8859-1', 'ignore'), ['lxml', 'html.parser']) + page_soup = ParserBeautifulSoup(response.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) links_soup = page_soup.find_all("a", {'class': 'detalle_link'}) for link_soup in links_soup: if link_soup['href'].startswith('bajar'): @@ -229,7 +222,13 @@ class SubdivxSubtitlesProvider(Provider): raise APIThrottled('Download link not found') - def _get_archive(self, content): + @staticmethod + def _check_response(response): + if response.status_code != 200: + raise ServiceUnavailable('Bad status code: ' + str(response.status_code)) + + @staticmethod + def _get_archive(content): # open the archive archive_stream = io.BytesIO(content) if rarfile.is_rarfile(archive_stream): @@ -243,35 +242,47 @@ class SubdivxSubtitlesProvider(Provider): return archive - def _get_subtitle_from_archive(self, archive, subtitle): - _max_score = 0 - _scores = get_scores (subtitle.video) - + @staticmethod + def _get_subtitle_from_archive(archive, subtitle): + _valid_names = [] for name in archive.namelist(): # discard hidden files - if os.path.split(name)[-1].startswith('.'): - continue - # discard non-subtitle files - if not name.lower().endswith(SUBTITLE_EXTENSIONS): - continue + if not os.path.split(name)[-1].startswith('.') and name.lower().endswith(SUBTITLE_EXTENSIONS): + _valid_names.append(name) + + # archive with only 1 subtitle + if len(_valid_names) == 1: + logger.debug("returning from archive: {} (single subtitle file)".format(_valid_names[0])) + return archive.read(_valid_names[0]) + + # in archives with more than 1 subtitle (season pack) we try to guess the best subtitle file + _scores = get_scores(subtitle.video) + _max_score = 0 + _max_name = "" + for name in _valid_names: + _guess = guessit(name) + if 'season' not in _guess: + _guess['season'] = -1 + if 'episode' not in _guess: + _guess['episode'] = -1 - _guess = guessit (name) if isinstance(subtitle.video, Episode): - logger.debug ("guessing %s" % name) - logger.debug("subtitle S{}E{} video S{}E{}".format(_guess['season'],_guess['episode'],subtitle.video.season,subtitle.video.episode)) + logger.debug("guessing %s" % name) + logger.debug("subtitle S{}E{} video S{}E{}".format( + _guess['season'], _guess['episode'], subtitle.video.season, subtitle.video.episode)) if subtitle.video.episode != _guess['episode'] or subtitle.video.season != _guess['season']: logger.debug('subtitle does not match video, skipping') continue matches = set() - matches |= guess_matches (subtitle.video, _guess) - _score = sum ((_scores.get (match, 0) for match in matches)) + matches |= guess_matches(subtitle.video, _guess) + _score = sum((_scores.get(match, 0) for match in matches)) logger.debug('srt matches: %s, score %d' % (matches, _score)) if _score > _max_score: - _max_name = name _max_score = _score + _max_name = name logger.debug("new max: {} {}".format(name, _score)) if _max_score > 0: From c87c4766cfb6beaaf37f413f8b12fa25572c1c7f Mon Sep 17 00:00:00 2001 From: ngosang Date: Sun, 4 Oct 2020 04:05:05 +0200 Subject: [PATCH 2/2] Rewrite Subtitulamos prodiver. resolves #1132 --- .../providers/subtitulamostv.py | 139 +++++++++--------- 1 file changed, 68 insertions(+), 71 deletions(-) diff --git a/libs/subliminal_patch/providers/subtitulamostv.py b/libs/subliminal_patch/providers/subtitulamostv.py index 952f89e4c..a533ff039 100644 --- a/libs/subliminal_patch/providers/subtitulamostv.py +++ b/libs/subliminal_patch/providers/subtitulamostv.py @@ -1,75 +1,69 @@ # -*- coding: utf-8 -*- -import json import logging import os -import re -import io -from babelfish import language_converters -from guessit import guessit from requests import Session from subzero.language import Language from subliminal import Movie, Episode, ProviderError, __short_version__ -from subliminal.exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded, ProviderError from subliminal_patch.subtitle import Subtitle, guess_matches +from subliminal.providers import ParserBeautifulSoup from subliminal.subtitle import fix_line_ending, SUBTITLE_EXTENSIONS from subliminal_patch.providers import Provider logger = logging.getLogger(__name__) -server_url = 'https://subtitulamos.tv/' - class SubtitulamosTVSubtitle(Subtitle): provider_name = 'subtitulamostv' hash_verifiable = False - def __init__(self, language, page_link, download_link, description, title, matches, release_info): - super(SubtitulamosTVSubtitle, self).__init__(language, hearing_impaired=False, - page_link=page_link) + def __init__(self, language, page_link, download_link, title, release_info): + super(SubtitulamosTVSubtitle, self).__init__(language, hearing_impaired=False, page_link=page_link) self.download_link = download_link - self.description = description.lower() self.title = title self.release_info = release_info - self.found_matches = matches @property def id(self): return self.download_link def get_matches(self, video): - matches = self.found_matches + matches = {'series', 'season', 'episode', 'year'} + + title_lower = self.title.lower() + release_info_lower = self.release_info.lower() - # release_group - if video.release_group and video.release_group.lower() in self.description: + if video.title and video.title.lower() in title_lower: + matches.add('title') + + if video.release_group and video.release_group.lower() in release_info_lower: matches.add('release_group') - # resolution - if video.resolution and video.resolution.lower() in self.description: + if video.resolution and video.resolution.lower() in release_info_lower: matches.add('resolution') - # source if video.source: formats = [video.source.lower()] if formats[0] == "web": formats.append("webdl") + formats.append("web-dl") formats.append("webrip") - formats.append("web ") for frmt in formats: - if frmt.lower() in self.description: + if frmt in release_info_lower: matches.add('source') break - # video_codec if video.video_codec: video_codecs = [video.video_codec.lower()] - if video_codecs[0] == "H.264": - formats.append("x264") - elif video_codecs[0] == "H.265": - formats.append("x265") - for vc in formats: - if vc.lower() in self.description: + if video_codecs[0] == "h.264": + video_codecs.append("h264") + video_codecs.append("x264") + elif video_codecs[0] == "h.265": + video_codecs.append("h265") + video_codecs.append("x265") + for vc in video_codecs: + if vc in release_info_lower: matches.add('video_codec') break @@ -78,9 +72,14 @@ class SubtitulamosTVSubtitle(Subtitle): class SubtitulamosTVProvider(Provider): """Subtitulamostv Provider""" - languages = {Language.fromietf(l) for l in ['en','es']} + languages = {Language.fromietf(lang) for lang in ['en', 'es']} video_types = (Episode,) + server_url = 'https://subtitulamos.tv' + + def __init__(self): + self.session = None + def initialize(self): self.session = Session() self.session.headers = { @@ -90,58 +89,56 @@ class SubtitulamosTVProvider(Provider): self.session.close() def query(self, languages, video): - # query the server - result = None - year = (" (%d)" % video.year) if video.year else "" - q = "%s%s %dx%02d" % (video.series, year, video.season, video.episode) - logger.debug('Searching subtitles "%s"', q) + subtitle_name = "%s %dx%02d" % (video.series, video.season, video.episode) + logger.debug('Searching subtitles "%s"' % subtitle_name) - res = self.session.get( - server_url + 'search/query', params={'q':q}, timeout=10) - res.raise_for_status() - result = res.json() + response = self.session.get(self.server_url + '/search/query', params={'q': video.series}, timeout=10) + response.raise_for_status() + result = response.json() subtitles = [] - for s in [s for s in result if len(s['episodes'])]: - for e in s['episodes']: - res = self.session.get( - server_url + 'episodes/%d' % e['id'], timeout=10) - res.raise_for_status() - html = res.text - for lang_m in re.finditer(r"
(.*?)<\/div>.*?(?=
|
)", html, re.S): - lang = lang_m.group(1) - language = "es" - if "English" in lang: + for serie in result: + # skip non-matching series + if video.series.lower() != serie['name'].lower(): + continue + + response = self.session.get(self.server_url + "/shows/%d/season/%d" % (serie['id'], video.season), + timeout=10) + response.raise_for_status() + soup = ParserBeautifulSoup(response.text, ['lxml', 'html.parser']) + + for episode in soup.select('div.episode'): + episode_soup = episode.find('a') + episode_name = episode_soup.text + episode_url = episode_soup['href'] + + # skip non-matching episodes + if subtitle_name.lower() not in episode_name.lower(): + continue + + for lang in episode.select("div.subtitle-language"): + if "English" in lang.text: language = "en" + elif "EspaƱol" in lang.text: + language = "es" + else: + continue # not supported yet logger.debug('Found subtitles in "%s" language.', language) - for subt_m in re.finditer(r"
(.*?)
.*?(?:.*?
.*?(.*?)

)?", lang_m.group(0), re.S): - matches = set() - if video.alternative_series is None: - if video.series.lower() == s['name'].lower(): - matches.add('series') - elif s['name'].lower() in [video.series.lower()]+list(map(lambda name: name.lower(), video.alternative_series)): - matches.add('series') - if video.season == e['season']: - matches.add('season') - if video.episode == e['number']: - matches.add('episode') - if video.title == e['name']: - matches.add('title') - #if video.year is None or ("(%d)" % video.year) in s['name']: - matches.add('year') + for release in lang.find_next_sibling("div").select("div.sub"): + release_name = release.select('div.version-name')[0].text + release_url = release.select('a[href*="/download"]')[0]['href'] + subtitles.append( SubtitulamosTVSubtitle( - Language.fromietf(language), - server_url + 'episodes/%d' % e['id'], - server_url + subt_m.group(2), - subt_m.group(1)+(subt_m.group(3) if not subt_m.group(3) is None else ""), - e['name'], - matches, - '%s %dx%d,%s,%s' % (s['name'], e['season'], e['number'], subt_m.group(1), lang_m.group(1)), + Language.fromietf(language), + self.server_url + episode_url, + self.server_url + release_url, + episode_name, + release_name ) ) - + return subtitles def list_subtitles(self, video, languages):