Subf2m provider: add support for IMDB IDs

pull/2131/head
Vitiko 1 year ago
parent e3c4def89a
commit ee1506ed54

@ -7,12 +7,10 @@ import re
import time
import urllib.parse
from guessit import guessit
from requests import Session
from bs4 import BeautifulSoup as bso
from guessit import guessit
from requests import Session
from subliminal.exceptions import ConfigurationError
from subliminal_patch.core import Episode
from subliminal_patch.core import Movie
from subliminal_patch.exceptions import APIThrottled
@ -38,9 +36,9 @@ class Subf2mSubtitle(Subtitle):
self.episode_title = None
self._matches = set(
("title", "year")
("title", "year", "imdb_id")
if episode_number is None
else ("title", "series", "year", "season", "episode")
else ("title", "series", "year", "season", "episode", "imdb_id")
)
def get_matches(self, video):
@ -153,10 +151,11 @@ class Subf2mProvider(Provider):
video_types = (Episode, Movie)
subtitle_class = Subf2mSubtitle
def __init__(self, verify_ssl=True, user_agent=None, session_factory=None):
def __init__(self, user_agent, verify_ssl=True, session_factory=None):
super().__init__()
if not user_agent:
raise ValueError("User-agent config missing")
if not (user_agent or "").strip():
raise ConfigurationError("User-agent config missing")
self._user_agent = user_agent
self._verify_ssl = verify_ssl
@ -214,18 +213,17 @@ class Subf2mProvider(Provider):
for title in soup.select("li div[class='title'] a"):
yield title
def _search_movie(self, title, year):
def _search_movie(self, title, year, return_len=3):
title = title.lower()
year = str(year)
found_movie = None
results = []
for result in self._gen_results(title):
text = result.text.lower()
match = self._movie_title_regex.match(text)
if not match:
continue
match_title = match.group(1)
match_year = match.group(3)
if year == match_year:
@ -238,19 +236,21 @@ class Subf2mProvider(Provider):
if results:
results.sort(key=lambda x: x["similarity"], reverse=True)
found_movie = results[0]["href"]
logger.debug("Movie found: %s", results[0])
return found_movie
results = [result["href"] for result in results]
if results:
results = set(results[:return_len])
logger.debug("Results: %s", results)
return results
def _search_tv_show_season(self, title, season, year=None):
return []
def _search_tv_show_season(self, title, season, year=None, return_len=3):
try:
season_str = _SEASONS[season - 1].lower()
except IndexError:
logger.debug("Season number not supported: %s", season)
return None
found_tv_show_season = None
results = []
for result in self._gen_results(title):
text = result.text.lower()
@ -278,13 +278,20 @@ class Subf2mProvider(Provider):
if results:
results.sort(key=lambda x: x["similarity"], reverse=True)
found_tv_show_season = results[0]["href"]
logger.debug("TV Show season found: %s", results[0])
results = [result["href"] for result in results]
if results:
results = set(results[:return_len])
logger.debug("Results: %s", results)
return results
return found_tv_show_season
return []
def _find_movie_subtitles(self, path, language):
def _find_movie_subtitles(self, path, language, imdb_id):
soup = self._get_subtitle_page_soup(path, language)
imdb_matched = _match_imdb(soup, imdb_id)
if not imdb_matched:
return []
subtitles = []
for item in soup.select("li.item"):
@ -298,9 +305,12 @@ class Subf2mProvider(Provider):
return subtitles
def _find_episode_subtitles(
self, path, season, episode, language, episode_title=None
self, path, season, episode, language, episode_title=None, imdb_id=None
):
soup = self._get_subtitle_page_soup(path, language)
imdb_matched = _match_imdb(soup, imdb_id)
if not imdb_matched:
return []
subtitles = []
@ -359,27 +369,45 @@ class Subf2mProvider(Provider):
is_episode = isinstance(video, Episode)
if is_episode:
result = self._search_tv_show_season(video.series, video.season, video.year)
paths = self._search_tv_show_season(video.series, video.season, video.year)
else:
result = self._search_movie(video.title, video.year)
paths = self._search_movie(video.title, video.year)
if result is None:
if not paths:
logger.debug("No results")
return []
subtitles = []
subs = []
for path in paths:
must_break = False
logger.debug("Looking for subs from %s", path)
for language in languages:
if is_episode:
subs.extend(
self._find_episode_subtitles(
path,
video.season,
video.episode,
language,
video.title,
video.series_imdb_id,
)
)
for language in languages:
if is_episode:
subtitles.extend(
self._find_episode_subtitles(
result, video.season, video.episode, language, video.title
else:
subs.extend(
self._find_movie_subtitles(path, language, video.imdb_id)
)
)
else:
subtitles.extend(self._find_movie_subtitles(result, language))
return subtitles
must_break = subs != []
if must_break:
logger.debug("Good path found: %s. Not running over others.", path)
break
return subs
def download_subtitle(self, subtitle):
# TODO: add MustGetBlacklisted support
@ -426,6 +454,32 @@ _EPISODE_SPECIAL_RE = re.compile(
)
def _match_imdb(soup, imdb_id):
try:
parsed_imdb_id = (
soup.select_one(
"#content > div.subtitles.byFilm > div.box.clearfix > div.top.left > div.header > h2 > a"
)
.get("href") # type: ignore
.split("/")[-1] # type: ignore
.strip()
)
except AttributeError:
logger.debug("Couldn't get IMDB ID")
parsed_imdb_id = None
if parsed_imdb_id is not None and parsed_imdb_id != imdb_id:
logger.debug("Wrong IMDB ID: '%s' != '%s'", parsed_imdb_id, imdb_id)
return False
if parsed_imdb_id is None:
logger.debug("Matching subtitles as IMDB ID was not parsed.")
else:
logger.debug("Good IMDB ID: '%s' == '%s'", parsed_imdb_id, imdb_id)
return True
def _get_episode_from_release(release: str):
match = _EPISODE_SPECIAL_RE.search(release)
if match is None:

@ -1,10 +1,10 @@
import pytest
from subliminal_patch.providers import subf2m
from subliminal_patch.providers.subf2m import ConfigurationError
from subliminal_patch.providers.subf2m import Subf2mProvider
from subliminal_patch.providers.subf2m import Subf2mSubtitle
from subzero.language import Language
_U_A = "Mozilla/5.0 (Linux; Android 10; SM-G996U Build/QP1A.190711.020; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Mobile Safari/537.36"
@ -26,13 +26,15 @@ def provider():
("Cure", 1997, "/subtitles/cure-kyua"),
],
)
def test_search_movie(provider, movies, title, year, expected_url):
movie = list(movies.values())[0]
movie.title = title
movie.year = year
def test_search_movie(provider, title, year, expected_url):
result = provider._search_movie(title, year)
assert expected_url in result
result = provider._search_movie(movie.title, movie.year)
assert result == expected_url
def test_init_empty_user_agent_raises_configurationerror():
with pytest.raises(ConfigurationError):
with Subf2mProvider(user_agent=" ") as provider:
assert provider
@pytest.mark.parametrize(
@ -52,27 +54,37 @@ def test_search_movie(provider, movies, title, year, expected_url):
)
def test_search_tv_show_season(provider, series_title, season, year, expected_url):
result = provider._search_tv_show_season(series_title, season, year)
assert result == expected_url
assert expected_url in result
@pytest.mark.parametrize("language", [Language.fromalpha2("en"), Language("por", "BR")])
def test_find_movie_subtitles(provider, language):
def test_find_movie_subtitles(provider, language, movies):
path = "/subtitles/dune-2021"
for sub in provider._find_movie_subtitles(path, language):
for sub in provider._find_movie_subtitles(path, language, movies["dune"].imdb_id):
assert sub.language == language
@pytest.mark.parametrize("language", [Language.fromalpha2("en"), Language("por", "BR")])
def test_find_episode_subtitles(provider, language):
def test_find_episode_subtitles(provider, language, episodes):
path = "/subtitles/breaking-bad-first-season"
for sub in provider._find_episode_subtitles(path, 1, 1, language):
subs = provider._find_episode_subtitles(
path, 1, 1, language, imdb_id=episodes["breaking_bad_s01e01"].series_imdb_id
)
assert subs
for sub in subs:
assert sub.language == language
def test_find_episode_subtitles_from_complete_series_path(provider):
path = "/subtitles/courage-the-cowardly-dog"
for sub in provider._find_episode_subtitles(path, 1, 1, Language.fromalpha2("en")):
subs = provider._find_episode_subtitles(
path, 1, 1, Language.fromalpha2("en"), imdb_id="tt0220880"
)
assert subs
for sub in subs:
assert sub.language == Language.fromalpha2("en")
@ -82,6 +94,7 @@ def test_list_and_download_subtitles_complete_series_pack(provider, episodes):
episode.series = "Sam & Max: Freelance Police"
episode.name = "The Glazed McGuffin Affair"
episode.title = "The Glazed McGuffin Affair"
episode.series_imdb_id = "tt0125646"
episode.season = 1
episode.episode = 21

Loading…
Cancel
Save