diff --git a/libs/subliminal_patch/providers/subdivx.py b/libs/subliminal_patch/providers/subdivx.py index 6b693f391..8d04ad0fc 100644 --- a/libs/subliminal_patch/providers/subdivx.py +++ b/libs/subliminal_patch/providers/subdivx.py @@ -17,11 +17,15 @@ else: from subliminal import __short_version__ from subliminal.exceptions import ServiceUnavailable -from subliminal.providers import ParserBeautifulSoup, Provider -from subliminal.subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending,guess_matches +from subliminal.providers import ParserBeautifulSoup +from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending,guess_matches from subliminal.video import Episode, Movie from subliminal_patch.exceptions import APIThrottled from six.moves import range +from subliminal_patch.score import get_scores +from subliminal_patch.subtitle import Subtitle +from subliminal_patch.providers import Provider +from guessit import guessit logger = logging.getLogger(__name__) @@ -30,20 +34,20 @@ class SubdivxSubtitle(Subtitle): provider_name = 'subdivx' hash_verifiable = False - def __init__(self, language, page_link, title, description, uploader): + def __init__(self, language, video, page_link, title, description, uploader): super(SubdivxSubtitle, self).__init__(language, hearing_impaired=False, page_link=page_link) + self.video = video self.title = title self.description = description self.uploader = uploader + self.release_info = self.title + if self.description and self.description.strip(): + self.release_info += ' | ' + self.description @property def id(self): return self.page_link - @property - def release_info(self): - return self.description - def get_matches(self, video): matches = set() @@ -112,12 +116,14 @@ class SubdivxSubtitlesProvider(Provider): def terminate(self): self.session.close() - def query(self, keyword, season=None, episode=None, year=None): - query = keyword - if season and episode: - query += ' S{season:02d}E{episode:02d}'.format(season=season, episode=episode) - elif year: - query += ' {:4d}'.format(year) + def query(self, video, languages): + + if isinstance(video, Episode): + query = "{} S{:02d}E{:02d}".format(video.series, video.season, video.episode) + else: + query = video.title + if video.year: + query += ' {:4d}'.format(video.year) params = { 'q': query, # search string @@ -135,7 +141,7 @@ class SubdivxSubtitlesProvider(Provider): self._check_response(response) try: - page_subtitles = self._parse_subtitles_page(response, language) + page_subtitles = self._parse_subtitles_page(video, response, language) except Exception as e: logger.error('Error parsing subtitles list: ' + str(e)) break @@ -151,24 +157,7 @@ class SubdivxSubtitlesProvider(Provider): return subtitles def list_subtitles(self, video, languages): - if isinstance(video, Episode): - titles = [video.series] + video.alternative_series - elif isinstance(video, Movie): - titles = [video.title] + video.alternative_titles - else: - titles = [] - - subtitles = [] - for title in titles: - if isinstance(video, Episode): - subtitles += [s for s in self.query(title, season=video.season, - episode=video.episode, year=video.year) - if s.language in languages] - elif isinstance(video, Movie): - subtitles += [s for s in self.query(title, year=video.year) - if s.language in languages] - - return subtitles + return self.query(video, languages) def download_subtitle(self, subtitle): if isinstance(subtitle, SubdivxSubtitle): @@ -186,14 +175,14 @@ class SubdivxSubtitlesProvider(Provider): archive = self._get_archive(response.content) # extract the subtitle - subtitle_content = self._get_subtitle_from_archive(archive) + subtitle_content = self._get_subtitle_from_archive(archive, subtitle) subtitle.content = fix_line_ending(subtitle_content) def _check_response(self, response): if response.status_code != 200: raise ServiceUnavailable('Bad status code: ' + str(response.status_code)) - def _parse_subtitles_page(self, response, language): + def _parse_subtitles_page(self, video, response, language): subtitles = [] page_soup = ParserBeautifulSoup(response.content.decode('iso-8859-1', 'ignore'), ['lxml', 'html.parser']) @@ -214,7 +203,7 @@ class SubdivxSubtitlesProvider(Provider): # uploader uploader = body_soup.find("a", {'class': 'link1'}).text - subtitle = self.subtitle_class(language, page_link, title, description, uploader) + subtitle = self.subtitle_class(language, video, page_link, title, description, uploader) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) @@ -253,7 +242,10 @@ class SubdivxSubtitlesProvider(Provider): return archive - def _get_subtitle_from_archive(self, archive): + def _get_subtitle_from_archive(self, archive, subtitle): + _max_score = 0 + _scores = get_scores (subtitle.video) + for name in archive.namelist(): # discard hidden files if os.path.split(name)[-1].startswith('.'): @@ -263,6 +255,26 @@ class SubdivxSubtitlesProvider(Provider): if not name.lower().endswith(SUBTITLE_EXTENSIONS): continue - return archive.read(name) + _guess = guessit (name) + if isinstance(subtitle.video, Episode): + logger.debug ("guessing %s" % name) + logger.debug("subtitle S{}E{} video S{}E{}".format(_guess['season'],_guess['episode'],subtitle.video.season,subtitle.video.episode)) + + if subtitle.video.episode != _guess['episode'] or subtitle.video.season != _guess['season']: + logger.debug('subtitle does not match video, skipping') + continue + + matches = set() + matches |= guess_matches (subtitle.video, _guess) + _score = sum ((_scores.get (match, 0) for match in matches)) + logger.debug('srt matches: %s, score %d' % (matches, _score)) + if _score > _max_score: + _max_name = name + _max_score = _score + logger.debug("new max: {} {}".format(name, _score)) + + if _max_score > 0: + logger.debug("returning from archive: {} scored {}".format(_max_name, _max_score)) + return archive.read(_max_name) raise APIThrottled('Can not find the subtitle in the compressed file')