From d3d7991db74fa0468d3d945f3b0d8ac7878136ae Mon Sep 17 00:00:00 2001 From: josdion Date: Fri, 20 Mar 2020 09:56:18 +0200 Subject: [PATCH] subssabbz, subsunacs - various fixes to improve match-finding - fix inconsistent names of some TV shows - sanitaze movie names - do not remove apostrophe when sanitize TV and movie names --- libs/subliminal_patch/providers/subssabbz.py | 28 +++++++++++++++----- libs/subliminal_patch/providers/subsunacs.py | 27 ++++++++++++++----- 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/libs/subliminal_patch/providers/subssabbz.py b/libs/subliminal_patch/providers/subssabbz.py index 709029169..2f9321c12 100644 --- a/libs/subliminal_patch/providers/subssabbz.py +++ b/libs/subliminal_patch/providers/subssabbz.py @@ -12,7 +12,7 @@ from requests import Session from guessit import guessit from subliminal_patch.providers import Provider from subliminal_patch.subtitle import Subtitle -from subliminal_patch.utils import sanitize +from subliminal_patch.utils import sanitize, fix_inconsistent_naming from subliminal.exceptions import ProviderError from subliminal.utils import sanitize_release_group from subliminal.subtitle import guess_matches @@ -23,6 +23,21 @@ from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST logger = logging.getLogger(__name__) +def fix_tv_naming(title): + """Fix TV show titles with inconsistent naming using dictionary, but do not sanitize them. + + :param str title: original title. + :return: new title. + :rtype: str + + """ + return fix_inconsistent_naming(title, {"Marvel's Daredevil": "Daredevil", + "Marvel's Luke Cage": "Luke Cage", + "Marvel's Iron Fist": "Iron Fist", + "Marvel's Jessica Jones": "Jessica Jones", + "DC's Legends of Tomorrow": "Legends of Tomorrow" + }, True) + class SubsSabBzSubtitle(Subtitle): """SubsSabBz Subtitle.""" provider_name = 'subssabbz' @@ -34,6 +49,7 @@ class SubsSabBzSubtitle(Subtitle): self.page_link = link self.type = type self.video = video + self.release_info = os.path.splitext(filename)[0] @property def id(self): @@ -60,8 +76,6 @@ class SubsSabBzSubtitle(Subtitle): matches.add('hash') matches |= guess_matches(video, guessit(self.filename, {'type': self.type})) - - matches.add(id(self)) return matches @@ -99,10 +113,10 @@ class SubsSabBzProvider(Provider): } if isEpisode: - params['movie'] = "%s %02d %02d" % (sanitize(video.series), video.season, video.episode) + params['movie'] = "%s %02d %02d" % (sanitize(fix_tv_naming(video.series), {'\''}), video.season, video.episode) else: params['yr'] = video.year - params['movie'] = (video.title) + params['movie'] = sanitize(video.title, {'\''}) if language == 'en' or language == 'eng': params['select-language'] = 1 @@ -121,8 +135,8 @@ class SubsSabBzProvider(Provider): soup = BeautifulSoup(response.content, 'html.parser') rows = soup.findAll('tr', {'class': 'subs-row'}) - # Search on first 10 rows only - for row in rows[:10]: + # Search on first 20 rows only + for row in rows[:20]: a_element_wrapper = row.find('td', { 'class': 'c2field' }) if a_element_wrapper: element = a_element_wrapper.find('a') diff --git a/libs/subliminal_patch/providers/subsunacs.py b/libs/subliminal_patch/providers/subsunacs.py index 5af116d99..72e0febc7 100644 --- a/libs/subliminal_patch/providers/subsunacs.py +++ b/libs/subliminal_patch/providers/subsunacs.py @@ -12,7 +12,7 @@ from requests import Session from guessit import guessit from subliminal_patch.providers import Provider from subliminal_patch.subtitle import Subtitle -from subliminal_patch.utils import sanitize +from subliminal_patch.utils import sanitize, fix_inconsistent_naming from subliminal.exceptions import ProviderError from subliminal.utils import sanitize_release_group from subliminal.subtitle import guess_matches @@ -23,6 +23,20 @@ from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST logger = logging.getLogger(__name__) +def fix_tv_naming(title): + """Fix TV show titles with inconsistent naming using dictionary, but do not sanitize them. + + :param str title: original title. + :return: new title. + :rtype: str + + """ + return fix_inconsistent_naming(title, {"Marvel's Daredevil": "Daredevil", + "Marvel's Luke Cage": "Luke Cage", + "Marvel's Iron Fist": "Iron Fist", + "DC's Legends of Tomorrow": "Legends of Tomorrow" + }, True) + class SubsUnacsSubtitle(Subtitle): """SubsUnacs Subtitle.""" provider_name = 'subsunacs' @@ -34,6 +48,7 @@ class SubsUnacsSubtitle(Subtitle): self.page_link = link self.type = type self.video = video + self.release_info = os.path.splitext(filename)[0] @property def id(self): @@ -60,8 +75,6 @@ class SubsUnacsSubtitle(Subtitle): matches.add('hash') matches |= guess_matches(video, guessit(self.filename, {'type': self.type})) - - matches.add(id(self)) return matches @@ -103,10 +116,10 @@ class SubsUnacsProvider(Provider): 'imdbcheck': 1} if isEpisode: - params['m'] = "%s %02d %02d" % (sanitize(video.series), video.season, video.episode) + params['m'] = "%s %02d %02d" % (sanitize(fix_tv_naming(video.series), {'\''}), video.season, video.episode) else: params['y'] = video.year - params['m'] = (video.title) + params['m'] = sanitize(video.title, {'\''}) if language == 'en' or language == 'eng': params['l'] = 1 @@ -125,8 +138,8 @@ class SubsUnacsProvider(Provider): soup = BeautifulSoup(response.content, 'html.parser') rows = soup.findAll('td', {'class': 'tdMovie'}) - # Search on first 10 rows only - for row in rows[:10]: + # Search on first 20 rows only + for row in rows[:20]: element = row.find('a', {'class': 'tooltip'}) if element: link = element.get('href')