From ee1506ed54b5a610dca7ca47ff6a09f13773c84e Mon Sep 17 00:00:00 2001 From: Vitiko Date: Tue, 13 Jun 2023 02:13:42 -0400 Subject: [PATCH] Subf2m provider: add support for IMDB IDs --- libs/subliminal_patch/providers/subf2m.py | 124 ++++++++++++++++------ tests/subliminal_patch/test_subf2m.py | 39 ++++--- 2 files changed, 115 insertions(+), 48 deletions(-) diff --git a/libs/subliminal_patch/providers/subf2m.py b/libs/subliminal_patch/providers/subf2m.py index 7f8cb6bfd..11a60f99b 100644 --- a/libs/subliminal_patch/providers/subf2m.py +++ b/libs/subliminal_patch/providers/subf2m.py @@ -7,12 +7,10 @@ import re import time import urllib.parse -from guessit import guessit - -from requests import Session from bs4 import BeautifulSoup as bso from guessit import guessit from requests import Session +from subliminal.exceptions import ConfigurationError from subliminal_patch.core import Episode from subliminal_patch.core import Movie from subliminal_patch.exceptions import APIThrottled @@ -38,9 +36,9 @@ class Subf2mSubtitle(Subtitle): self.episode_title = None self._matches = set( - ("title", "year") + ("title", "year", "imdb_id") if episode_number is None - else ("title", "series", "year", "season", "episode") + else ("title", "series", "year", "season", "episode", "imdb_id") ) def get_matches(self, video): @@ -153,10 +151,11 @@ class Subf2mProvider(Provider): video_types = (Episode, Movie) subtitle_class = Subf2mSubtitle - def __init__(self, verify_ssl=True, user_agent=None, session_factory=None): + def __init__(self, user_agent, verify_ssl=True, session_factory=None): super().__init__() - if not user_agent: - raise ValueError("User-agent config missing") + + if not (user_agent or "").strip(): + raise ConfigurationError("User-agent config missing") self._user_agent = user_agent self._verify_ssl = verify_ssl @@ -214,18 +213,17 @@ class Subf2mProvider(Provider): for title in soup.select("li div[class='title'] a"): yield title - def _search_movie(self, title, year): + def _search_movie(self, title, year, return_len=3): title = title.lower() year = str(year) - found_movie = None - results = [] for result in self._gen_results(title): text = result.text.lower() match = self._movie_title_regex.match(text) if not match: continue + match_title = match.group(1) match_year = match.group(3) if year == match_year: @@ -238,19 +236,21 @@ class Subf2mProvider(Provider): if results: results.sort(key=lambda x: x["similarity"], reverse=True) - found_movie = results[0]["href"] - logger.debug("Movie found: %s", results[0]) - return found_movie + results = [result["href"] for result in results] + if results: + results = set(results[:return_len]) + logger.debug("Results: %s", results) + return results - def _search_tv_show_season(self, title, season, year=None): + return [] + + def _search_tv_show_season(self, title, season, year=None, return_len=3): try: season_str = _SEASONS[season - 1].lower() except IndexError: logger.debug("Season number not supported: %s", season) return None - found_tv_show_season = None - results = [] for result in self._gen_results(title): text = result.text.lower() @@ -278,13 +278,20 @@ class Subf2mProvider(Provider): if results: results.sort(key=lambda x: x["similarity"], reverse=True) - found_tv_show_season = results[0]["href"] - logger.debug("TV Show season found: %s", results[0]) + results = [result["href"] for result in results] + if results: + results = set(results[:return_len]) + logger.debug("Results: %s", results) + return results - return found_tv_show_season + return [] - def _find_movie_subtitles(self, path, language): + def _find_movie_subtitles(self, path, language, imdb_id): soup = self._get_subtitle_page_soup(path, language) + imdb_matched = _match_imdb(soup, imdb_id) + if not imdb_matched: + return [] + subtitles = [] for item in soup.select("li.item"): @@ -298,9 +305,12 @@ class Subf2mProvider(Provider): return subtitles def _find_episode_subtitles( - self, path, season, episode, language, episode_title=None + self, path, season, episode, language, episode_title=None, imdb_id=None ): soup = self._get_subtitle_page_soup(path, language) + imdb_matched = _match_imdb(soup, imdb_id) + if not imdb_matched: + return [] subtitles = [] @@ -359,27 +369,45 @@ class Subf2mProvider(Provider): is_episode = isinstance(video, Episode) if is_episode: - result = self._search_tv_show_season(video.series, video.season, video.year) + paths = self._search_tv_show_season(video.series, video.season, video.year) else: - result = self._search_movie(video.title, video.year) + paths = self._search_movie(video.title, video.year) - if result is None: + if not paths: logger.debug("No results") return [] - subtitles = [] + subs = [] + for path in paths: + must_break = False + + logger.debug("Looking for subs from %s", path) + + for language in languages: + if is_episode: + subs.extend( + self._find_episode_subtitles( + path, + video.season, + video.episode, + language, + video.title, + video.series_imdb_id, + ) + ) - for language in languages: - if is_episode: - subtitles.extend( - self._find_episode_subtitles( - result, video.season, video.episode, language, video.title + else: + subs.extend( + self._find_movie_subtitles(path, language, video.imdb_id) ) - ) - else: - subtitles.extend(self._find_movie_subtitles(result, language)) - return subtitles + must_break = subs != [] + + if must_break: + logger.debug("Good path found: %s. Not running over others.", path) + break + + return subs def download_subtitle(self, subtitle): # TODO: add MustGetBlacklisted support @@ -426,6 +454,32 @@ _EPISODE_SPECIAL_RE = re.compile( ) +def _match_imdb(soup, imdb_id): + try: + parsed_imdb_id = ( + soup.select_one( + "#content > div.subtitles.byFilm > div.box.clearfix > div.top.left > div.header > h2 > a" + ) + .get("href") # type: ignore + .split("/")[-1] # type: ignore + .strip() + ) + except AttributeError: + logger.debug("Couldn't get IMDB ID") + parsed_imdb_id = None + + if parsed_imdb_id is not None and parsed_imdb_id != imdb_id: + logger.debug("Wrong IMDB ID: '%s' != '%s'", parsed_imdb_id, imdb_id) + return False + + if parsed_imdb_id is None: + logger.debug("Matching subtitles as IMDB ID was not parsed.") + else: + logger.debug("Good IMDB ID: '%s' == '%s'", parsed_imdb_id, imdb_id) + + return True + + def _get_episode_from_release(release: str): match = _EPISODE_SPECIAL_RE.search(release) if match is None: diff --git a/tests/subliminal_patch/test_subf2m.py b/tests/subliminal_patch/test_subf2m.py index cdf201734..2df17af2d 100644 --- a/tests/subliminal_patch/test_subf2m.py +++ b/tests/subliminal_patch/test_subf2m.py @@ -1,10 +1,10 @@ import pytest from subliminal_patch.providers import subf2m +from subliminal_patch.providers.subf2m import ConfigurationError from subliminal_patch.providers.subf2m import Subf2mProvider from subliminal_patch.providers.subf2m import Subf2mSubtitle from subzero.language import Language - _U_A = "Mozilla/5.0 (Linux; Android 10; SM-G996U Build/QP1A.190711.020; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Mobile Safari/537.36" @@ -26,13 +26,15 @@ def provider(): ("Cure", 1997, "/subtitles/cure-kyua"), ], ) -def test_search_movie(provider, movies, title, year, expected_url): - movie = list(movies.values())[0] - movie.title = title - movie.year = year +def test_search_movie(provider, title, year, expected_url): + result = provider._search_movie(title, year) + assert expected_url in result + - result = provider._search_movie(movie.title, movie.year) - assert result == expected_url +def test_init_empty_user_agent_raises_configurationerror(): + with pytest.raises(ConfigurationError): + with Subf2mProvider(user_agent=" ") as provider: + assert provider @pytest.mark.parametrize( @@ -52,27 +54,37 @@ def test_search_movie(provider, movies, title, year, expected_url): ) def test_search_tv_show_season(provider, series_title, season, year, expected_url): result = provider._search_tv_show_season(series_title, season, year) - assert result == expected_url + assert expected_url in result @pytest.mark.parametrize("language", [Language.fromalpha2("en"), Language("por", "BR")]) -def test_find_movie_subtitles(provider, language): +def test_find_movie_subtitles(provider, language, movies): path = "/subtitles/dune-2021" - for sub in provider._find_movie_subtitles(path, language): + for sub in provider._find_movie_subtitles(path, language, movies["dune"].imdb_id): assert sub.language == language @pytest.mark.parametrize("language", [Language.fromalpha2("en"), Language("por", "BR")]) -def test_find_episode_subtitles(provider, language): +def test_find_episode_subtitles(provider, language, episodes): path = "/subtitles/breaking-bad-first-season" - for sub in provider._find_episode_subtitles(path, 1, 1, language): + subs = provider._find_episode_subtitles( + path, 1, 1, language, imdb_id=episodes["breaking_bad_s01e01"].series_imdb_id + ) + assert subs + + for sub in subs: assert sub.language == language def test_find_episode_subtitles_from_complete_series_path(provider): path = "/subtitles/courage-the-cowardly-dog" - for sub in provider._find_episode_subtitles(path, 1, 1, Language.fromalpha2("en")): + subs = provider._find_episode_subtitles( + path, 1, 1, Language.fromalpha2("en"), imdb_id="tt0220880" + ) + assert subs + + for sub in subs: assert sub.language == Language.fromalpha2("en") @@ -82,6 +94,7 @@ def test_list_and_download_subtitles_complete_series_pack(provider, episodes): episode.series = "Sam & Max: Freelance Police" episode.name = "The Glazed McGuffin Affair" episode.title = "The Glazed McGuffin Affair" + episode.series_imdb_id = "tt0125646" episode.season = 1 episode.episode = 21