Subf2m provider improvements (#1973)

* subf2m provider: add more languages

* subf2m provider: use urllib parse rather than string replacement

* subf2m provider: change movie title matching to match exact year and use similarity based title matching

* subf2m provider: change tvshow title matching to match exact season and use similarity based title matching

* no log: Subf2m Provider: add tests

* Subf2m Provider: add serbian support

Co-authored-by: Vitiko <averroista@protonmail.com>
pull/1974/head v1.1.3-beta.4
silentcommitter 2 years ago committed by GitHub
parent 21359b32b5
commit d4203ee7cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -2,10 +2,13 @@
import functools import functools
import logging import logging
import urllib.parse
import re
from bs4 import BeautifulSoup as bso from bs4 import BeautifulSoup as bso
from guessit import guessit from guessit import guessit
from requests import Session from requests import Session
from difflib import SequenceMatcher
from subliminal_patch.core import Episode from subliminal_patch.core import Episode
from subliminal_patch.core import Movie from subliminal_patch.core import Movie
from subliminal_patch.exceptions import APIThrottled from subliminal_patch.exceptions import APIThrottled
@ -82,12 +85,37 @@ _LANGUAGE_MAP = {
"dutch": "dut", "dutch": "dut",
"hebrew": "heb", "hebrew": "heb",
"indonesian": "ind", "indonesian": "ind",
"danish": "dan",
"norwegian": "nor",
"bengali": "ben",
"bulgarian": "bul",
"croatian": "hrv",
"swedish": "swe",
"vietnamese": "vie",
"czech": "cze",
"finnish": "fin",
"french": "fre",
"german": "ger",
"greek": "gre",
"hungarian": "hun",
"icelandic": "ice",
"japanese": "jpn",
"macedonian": "mac",
"malay": "may",
"polish": "pol",
"romanian": "rum",
"russian": "rus",
"serbian": "srp",
"thai": "tha",
"turkish": "tur",
} }
class Subf2mProvider(Provider): class Subf2mProvider(Provider):
provider_name = "subf2m" provider_name = "subf2m"
_movie_title_regex = re.compile(r"^(.+?)( \((\d{4})\))?$")
_tv_show_title_regex = re.compile(r"^(.+?) - (.*?) season( \((\d{4})\))?$")
_supported_languages = {} _supported_languages = {}
_supported_languages["brazillian-portuguese"] = Language("por", "BR") _supported_languages["brazillian-portuguese"] = Language("por", "BR")
@ -112,7 +140,7 @@ class Subf2mProvider(Provider):
def _gen_results(self, query): def _gen_results(self, query):
req = self._session.get( req = self._session.get(
f"{_BASE_URL}/subtitles/searchbytitle?query={query.replace(' ', '+')}&l=", f"{_BASE_URL}/subtitles/searchbytitle?query={urllib.parse.quote(query)}&l=",
stream=True, stream=True,
) )
text = "\n".join(line for line in req.iter_lines(decode_unicode=True) if line) text = "\n".join(line for line in req.iter_lines(decode_unicode=True) if line)
@ -123,35 +151,61 @@ class Subf2mProvider(Provider):
def _search_movie(self, title, year): def _search_movie(self, title, year):
title = title.lower() title = title.lower()
year = f"({year})" year = str(year)
found_movie = None found_movie = None
results = []
for result in self._gen_results(title): for result in self._gen_results(title):
text = result.text.lower() text = result.text.lower()
if title.lower() in text and year in text: match = self._movie_title_regex.match(text)
found_movie = result.get("href") if not match:
logger.debug("Movie found: %s", found_movie) continue
break match_title = match.group(1)
match_year = match.group(3)
if year == match_year:
results.append(
{
"href": result.get("href"),
"similarity": SequenceMatcher(None, title, match_title).ratio(),
}
)
if results:
results.sort(key=lambda x: x["similarity"], reverse=True)
found_movie = results[0]["href"]
logger.debug("Movie found: %s", results[0])
return found_movie return found_movie
def _search_tv_show_season(self, title, season): def _search_tv_show_season(self, title, season):
try: try:
season_str = f"{_SEASONS[season - 1]} Season" season_str = _SEASONS[season - 1].lower()
except IndexError: except IndexError:
logger.debug("Season number not supported: %s", season) logger.debug("Season number not supported: %s", season)
return None return None
expected_result = f"{title} - {season_str}".lower()
found_tv_show_season = None found_tv_show_season = None
results = []
for result in self._gen_results(title): for result in self._gen_results(title):
if expected_result in result.text.lower(): text = result.text.lower()
found_tv_show_season = result.get("href") match = self._tv_show_title_regex.match(text)
logger.debug("TV Show season found: %s", found_tv_show_season) if not match:
break continue
match_title = match.group(1)
match_season = match.group(2)
if season_str == match_season:
results.append(
{
"href": result.get("href"),
"similarity": SequenceMatcher(None, title, match_title).ratio(),
}
)
if results:
results.sort(key=lambda x: x["similarity"], reverse=True)
found_tv_show_season = results[0]["href"]
logger.debug("TV Show season found: %s", results[0])
return found_tv_show_season return found_tv_show_season

@ -5,20 +5,45 @@ from subliminal_patch.providers.subf2m import Subf2mSubtitle
from subzero.language import Language from subzero.language import Language
def test_search_movie(movies): @pytest.mark.parametrize(
movie = movies["dune"] "title,year,expected_url",
[
(
"Dead Man's Chest",
2006,
"/subtitles/pirates-of-the-caribbean-2-dead-mans-chest",
),
("Dune", 2021, "/subtitles/dune-2021"),
("Cure", 1997, "/subtitles/cure-kyua"),
],
)
def test_search_movie(movies, title, year, expected_url):
movie = list(movies.values())[0]
movie.title = title
movie.year = year
with Subf2mProvider() as provider: with Subf2mProvider() as provider:
result = provider._search_movie(movie.title, movie.year) result = provider._search_movie(movie.title, movie.year)
assert result == "/subtitles/dune-2021" assert result == expected_url
def test_search_tv_show_season(episodes): @pytest.mark.parametrize(
episode = episodes["breaking_bad_s01e01"] "title,season,expected_url",
[
("Breaking Bad", 1, "/subtitles/breaking-bad-first-season"),
("House Of The Dragon", 1, "/subtitles/house-of-the-dragon-first-season"),
("The Bear", 1, "/subtitles/the-bear-first-season"),
],
)
def test_search_tv_show_season(episodes, title, season, expected_url):
episode = list(episodes.values())[0]
episode.name = title
episode.series = title
episode.season = season
with Subf2mProvider() as provider: with Subf2mProvider() as provider:
result = provider._search_tv_show_season(episode.series, episode.season) result = provider._search_tv_show_season(episode.series, episode.season)
assert result == "/subtitles/breaking-bad-first-season" assert result == expected_url
@pytest.mark.parametrize("language", [Language.fromalpha2("en"), Language("por", "BR")]) @pytest.mark.parametrize("language", [Language.fromalpha2("en"), Language("por", "BR")])

Loading…
Cancel
Save