Merge pull request #1248 from vitiko98/fix-providers

Fix TuSubtitulo provider (#1242) and improve various spanish providers
pull/1249/head
morpheus65535 4 years ago committed by GitHub
commit 229f978a23
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -9,141 +9,45 @@ from zipfile import ZipFile
from guessit import guessit
from requests import Session
from subliminal import Episode, Movie
from subliminal.score import get_equivalent_release_groups
from subliminal.utils import sanitize_release_group, sanitize
from subliminal.utils import sanitize
from subliminal_patch.providers import Provider
from subliminal_patch.subtitle import Subtitle, guess_matches
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
from subzero.language import Language
BASE_URL = "https://argenteam.net/"
API_URL = BASE_URL + "api/v1/"
logger = logging.getLogger(__name__)
class ArgenteamSubtitle(Subtitle):
provider_name = 'argenteam'
provider_name = "argenteam"
hearing_impaired_verifiable = False
_release_info = None
def __init__(self, language, page_link, download_link, movie_kind, title, season, episode, year, release, version, source,
video_codec, tvdb_id, imdb_id, asked_for_episode=None, asked_for_release_group=None, *args, **kwargs):
super(ArgenteamSubtitle, self).__init__(language, page_link=page_link, *args, **kwargs)
def __init__(self, language, page_link, download_link, release_info, matches):
super(ArgenteamSubtitle, self).__init__(language, page_link=page_link)
self.page_link = page_link
self.download_link = download_link
self.movie_kind = movie_kind
self.title = title
self.year = year
self.season = season
self.episode = episode
self.release = release
self.version = version
self.asked_for_release_group = asked_for_release_group
self.asked_for_episode = asked_for_episode
self.matches = None
self.source = source
self.video_codec = video_codec
self.tvdb_id = tvdb_id
self.imdb_id = "tt" + imdb_id if imdb_id else None
self.releases = self.release_info
self.found_matches = matches
self.release_info = release_info
@property
def id(self):
return self.download_link
@property
def release_info(self):
if self._release_info:
return self._release_info
combine = []
for attr in ("source", "version"):
value = getattr(self, attr)
if value:
combine.append(value)
self._release_info = u".".join(combine) + (u"-"+self.release if self.release else "")
return self._release_info
def __repr__(self):
ep_addon = (" S%02dE%02d" % (self.season, self.episode)) if self.episode else ""
return '<%s %r [%s]>' % (
self.__class__.__name__, u"%s%s%s." % (self.title, " (%s)" % self.year if self.year else "", ep_addon) +
self.release_info, self.language)
def get_matches(self, video):
matches = set()
# series
if isinstance(video, Episode) and self.movie_kind == 'episode':
if video.series and (sanitize(self.title) in (
sanitize(name) for name in [video.series] + video.alternative_series)):
matches.add('series')
# season
if video.season and self.season == video.season:
matches.add('season')
# episode
if video.episode and self.episode == video.episode:
matches.add('episode')
# tvdb_id
if video.tvdb_id and str(self.tvdb_id) == str(video.tvdb_id):
matches.add('tvdb_id')
# year (year is not available for series, but we assume it matches)
matches.add('year')
elif isinstance(video, Movie) and self.movie_kind == 'movie':
# title
if video.title and (sanitize(self.title) in (
sanitize(name) for name in [video.title] + video.alternative_titles)):
matches.add('title')
# imdb_id
if video.imdb_id and self.imdb_id and str(self.imdb_id) == str(video.imdb_id):
matches.add('imdb_id')
# year
if video.year and self.year == video.year:
matches.add('year')
else:
logger.info('%r is not a valid movie_kind', self.movie_kind)
return matches
# release_group
if video.release_group and self.release:
rg = sanitize_release_group(video.release_group)
if any(r in sanitize_release_group(self.release) for r in get_equivalent_release_groups(rg)):
matches.add('release_group')
# blatantly assume we've got a matching source if the release group matches
# fixme: smart?
#matches.add('source')
# resolution
if video.resolution and self.version and str(video.resolution) in self.version.lower():
matches.add('resolution')
# source
if video.source and self.source:
formats = [video.source]
if video.source == "Web":
formats.append("WEB")
for fmt in formats:
if fmt.lower() in self.source.lower():
matches.add('source')
break
matches |= guess_matches(video, guessit(self.release_info), partial=True)
self.matches = matches
return matches
# Download links always have the srt filename with the release info.
# We combine it with the release info as guessit will return the first key match.
new_file = self.download_link.split("/")[-1] + self.release_info
self.found_matches |= guess_matches(video, guessit(new_file))
return self.found_matches
class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
provider_name = 'argenteam'
languages = {Language.fromalpha2(l) for l in ['es']}
provider_name = "argenteam"
languages = {Language.fromalpha2(l) for l in ["es"]}
video_types = (Episode, Movie)
BASE_URL = "https://argenteam.net/"
API_URL = BASE_URL + "api/v1/"
subtitle_class = ArgenteamSubtitle
hearing_impaired_verifiable = False
language_list = list(languages)
@ -155,77 +59,130 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
def initialize(self):
self.session = Session()
self.session.headers = {'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")}
self.session.headers = {
"User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")
}
def terminate(self):
self.session.close()
def search_ids(self, title, year=None, imdb_id=None, season=None, episode=None, titles=None):
"""Search movie or episode id from the `title`, `season` and `episode`.
:param imdb_id: imdb id of the given movie
:param titles: all titles of the given series or movie
:param year: release year of the given movie
:param str title: series of the episode or movie name
:param int season: season of the episode.
:param int episode: episode number.
:return: list of ids
:rtype: list
"""
# make the search
def search_ids(self, title, **kwargs):
query = title
titles = titles or []
titles = kwargs.get("titles") or []
is_episode = False
if season and episode:
if kwargs.get("season") and kwargs.get("episode"):
is_episode = True
query = '%s S%#02dE%#02d' % (title, season, episode)
query = f"{title} S{kwargs['season']:02}E{kwargs['episode']:02}"
logger.info(f"Searching ID (episode: {is_episode}) for {query}")
logger.info(u'Searching %s ID for %r', "episode" if is_episode else "movie", query)
r = self.session.get(self.API_URL + 'search', params={'q': query}, timeout=10)
r = self.session.get(API_URL + "search", params={"q": query}, timeout=10)
r.raise_for_status()
results = r.json()
match_ids = []
if results['total'] >= 1:
if results["total"] >= 1:
for result in results["results"]:
if (result['type'] == "episode" and not is_episode) or (result['type'] == "movie" and is_episode):
if (result["type"] == "episode" and not is_episode) or (
result["type"] == "movie" and is_episode
):
continue
# shortcut in case of matching imdb id
if not is_episode and imdb_id and "imdb" in result and "tt%s" % result["imdb"] == str(imdb_id):
logger.debug("Movie matched by IMDB ID %s, taking shortcut", imdb_id)
match_ids = [result['id']]
# shortcut in case of matching imdb id (don't match NoneType)
if not is_episode and f"tt{result.get('imdb', 'n/a')}" == kwargs.get(
"imdb_id"
):
logger.debug(f"Movie matched by IMDB ID, taking shortcut")
match_ids = [result["id"]]
break
# advanced title check in case of multiple movie results
if results['total'] > 1:
if not is_episode and year:
if result["title"] and not (sanitize(result["title"]) in (u"%s %s" % (sanitize(name), year)
for name in titles)):
if results["total"] > 1:
if not is_episode and kwargs.get("year"):
if result["title"] and not (
sanitize(result["title"])
in (
"%s %s" % (sanitize(name), kwargs.get("year"))
for name in titles
)
):
continue
match_ids.append(result['id'])
match_ids.append(result["id"])
else:
logger.error(u'No episode ID found for %r', query)
logger.error(f"No episode ID found for {query}")
if match_ids:
logger.debug(u"Found matching IDs: %s", ", ".join(str(id) for id in match_ids))
logger.debug(
f"Found matching IDs: {', '.join(str(id) for id in match_ids)}"
)
return match_ids
def get_query_matches(self, video, **kwargs):
matches = set()
if isinstance(video, Episode) and kwargs.get("movie_kind") == "episode":
if video.series and (
sanitize(kwargs.get("title"))
in (
sanitize(name) for name in [video.series] + video.alternative_series
)
):
matches.add("series")
if video.season and kwargs.get("season") == video.season:
matches.add("season")
if video.episode and kwargs.get("episode") == video.episode:
matches.add("episode")
if video.tvdb_id and kwargs.get("tvdb_id") == str(video.tvdb_id):
matches.add("tvdb_id")
# year (year is not available for series, but we assume it matches)
matches.add("year")
elif isinstance(video, Movie) and kwargs.get("movie_kind") == "movie":
if video.title and (
sanitize(kwargs.get("title"))
in (sanitize(name) for name in [video.title] + video.alternative_titles)
):
matches.add("title")
if video.imdb_id and f"tt{kwargs.get('imdb_id')}" == str(video.imdb_id):
matches.add("imdb_id")
if video.year and kwargs.get("year") == video.year:
matches.add("year")
else:
logger.info(f"{kwargs.get('movie_kind')} is not a valid movie_kind")
return matches
def combine_release_info(self, release_dict):
keys = ("source", "codec", "tags", "team")
combine = [release_dict.get(key) for key in keys if release_dict.get(key)]
if combine:
return ".".join(combine)
return "Unknown"
def query(self, title, video, titles=None):
is_episode = isinstance(video, Episode)
season = episode = None
url = self.API_URL + 'movie'
url = API_URL + "movie"
if is_episode:
season = video.season
episode = video.episode
url = self.API_URL + 'episode'
argenteam_ids = self.search_ids(title, season=season, episode=episode, titles=titles)
url = API_URL + "episode"
argenteam_ids = self.search_ids(
title, season=season, episode=episode, titles=titles
)
else:
argenteam_ids = self.search_ids(title, year=video.year, imdb_id=video.imdb_id, titles=titles)
argenteam_ids = self.search_ids(
title, year=video.year, imdb_id=video.imdb_id, titles=titles
)
if not argenteam_ids:
return []
@ -234,30 +191,45 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
subtitles = []
has_multiple_ids = len(argenteam_ids) > 1
for aid in argenteam_ids:
response = self.session.get(url, params={'id': aid}, timeout=10)
response = self.session.get(url, params={"id": aid}, timeout=10)
response.raise_for_status()
content = response.json()
if content is not None: # eg https://argenteam.net/api/v1/episode?id=11534
imdb_id = year = None
returned_title = title
if not is_episode and "info" in content:
imdb_id = content["info"].get("imdb")
year = content["info"].get("year")
returned_title = content["info"].get("title", title)
for r in content['releases']:
for s in r['subtitles']:
movie_kind = "episode" if is_episode else "movie"
page_link = self.BASE_URL + movie_kind + "/" + str(aid)
# use https and new domain
download_link = s['uri'].replace('http://www.argenteam.net/', self.BASE_URL)
sub = ArgenteamSubtitle(language, page_link, download_link, movie_kind, returned_title,
season, episode, year, r.get('team'), r.get('tags'),
r.get('source'), r.get('codec'), content.get("tvdb"), imdb_id,
asked_for_release_group=video.release_group,
asked_for_episode=episode)
subtitles.append(sub)
if not content:
continue
imdb_id = year = None
returned_title = title
if not is_episode and "info" in content:
imdb_id = content["info"].get("imdb")
year = content["info"].get("year")
returned_title = content["info"].get("title", title)
for r in content["releases"]:
for s in r["subtitles"]:
movie_kind = "episode" if is_episode else "movie"
page_link = f"{BASE_URL}{movie_kind}/{aid}"
release_info = self.combine_release_info(r)
download_link = s["uri"].replace("http", "https")
matches_ = self.get_query_matches(
video,
movie_kind=movie_kind,
season=season,
episode=episode,
title=returned_title,
year=year,
imdb_id=imdb_id,
tvdb_id=content.get("tvdb"),
)
subtitles.append(
ArgenteamSubtitle(
language,
page_link,
download_link,
release_info,
matches_,
)
)
if has_multiple_ids:
time.sleep(self.multi_result_throttle)
@ -280,7 +252,7 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
def download_subtitle(self, subtitle):
# download as a zip
logger.info('Downloading subtitle %r', subtitle)
logger.info("Downloading subtitle %r", subtitle)
r = self.session.get(subtitle.download_link, timeout=10)
r.raise_for_status()

@ -13,7 +13,7 @@ from requests import Session
from subliminal import __short_version__
from subliminal.exceptions import ServiceUnavailable
from subliminal.providers import ParserBeautifulSoup
from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending,guess_matches
from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending, guess_matches
from subliminal.video import Episode, Movie
from subliminal_patch.exceptions import APIThrottled
from six.moves import range
@ -26,18 +26,20 @@ logger = logging.getLogger(__name__)
class SubdivxSubtitle(Subtitle):
provider_name = 'subdivx'
provider_name = "subdivx"
hash_verifiable = False
def __init__(self, language, video, page_link, title, description, uploader):
super(SubdivxSubtitle, self).__init__(language, hearing_impaired=False, page_link=page_link)
super(SubdivxSubtitle, self).__init__(
language, hearing_impaired=False, page_link=page_link
)
self.video = video
self.title = title
self.description = description
self.uploader = uploader
self.release_info = self.title
if self.description and self.description.strip():
self.release_info += ' | ' + self.description
self.release_info += " | " + self.description
@property
def id(self):
@ -49,60 +51,33 @@ class SubdivxSubtitle(Subtitle):
# episode
if isinstance(video, Episode):
# already matched in search query
matches.update(['title', 'series', 'season', 'episode', 'year'])
matches.update(["title", "series", "season", "episode", "year"])
# movie
elif isinstance(video, Movie):
# already matched in search query
matches.update(['title', 'year'])
# release_group
if video.release_group and video.release_group.lower() in self.description:
matches.add('release_group')
# resolution
if video.resolution and video.resolution.lower() in self.description:
matches.add('resolution')
# source
if video.source:
formats = [video.source.lower()]
if formats[0] == "web":
formats.append("webdl")
formats.append("web-dl")
formats.append("webrip")
formats.append("web ")
for frmt in formats:
if frmt in self.description:
matches.add('source')
break
# video_codec
if video.video_codec:
video_codecs = [video.video_codec.lower()]
if video_codecs[0] == "h.264":
video_codecs.append("h264")
video_codecs.append("x264")
elif video_codecs[0] == "h.265":
video_codecs.append("h265")
video_codecs.append("x265")
elif video_codecs[0] == "divx":
video_codecs.append("divx")
for vc in video_codecs:
if vc in self.description:
matches.add('video_codec')
break
matches.update(["title", "year"])
# Special string comparisons are unnecessary. Guessit can match keys
# from any string and find even more keywords.
matches |= guess_matches(
video,
guessit(
self.description,
{"type": "episode" if isinstance(video, Episode) else "movie"},
),
)
return matches
class SubdivxSubtitlesProvider(Provider):
provider_name = 'subdivx'
provider_name = "subdivx"
hash_verifiable = False
languages = {Language.fromalpha2(lang) for lang in ['es']}
languages = {Language.fromalpha2(lang) for lang in ["es"]}
subtitle_class = SubdivxSubtitle
server_url = 'https://www.subdivx.com/'
server_url = "https://www.subdivx.com/"
multi_result_throttle = 2
language_list = list(languages)
@ -111,36 +86,31 @@ class SubdivxSubtitlesProvider(Provider):
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__)
self.session.headers["User-Agent"] = f"Subliminal/{__short_version__}"
def terminate(self):
self.session.close()
def query(self, video, languages):
if isinstance(video, Episode):
query = "{} S{:02d}E{:02d}".format(video.series, video.season, video.episode)
query = f"{video.series} S{video.season:02}E{video.episode:02}"
else:
# Subdvix has problems searching foreign movies if the year is
# appended. For example: if we search "Memories of Murder 2003",
# Subdix won't return any results; but if we search "Memories of
# Murder", it will. That's because in Subdvix foreign titles have
# the year after the original title ("Salinui chueok (2003) aka
# Memories of Murder").
# A proper solution would be filtering results with the year in
# _parse_subtitles_page.
# appended. A proper solution would be filtering results with the
# year in self._parse_subtitles_page.
query = video.title
params = {
'q': query, # search string
'accion': 5, # action search
'oxdown': 1, # order by downloads descending
'pg': 1 # page 1
"q": query, # search string
"accion": 5, # action search
"oxdown": 1, # order by downloads descending
"pg": 1, # page 1
}
logger.debug('Searching subtitles %r', query)
logger.debug(f"Searching subtitles: {query}")
subtitles = []
language = self.language_list[0]
search_link = self.server_url + 'index.php'
search_link = self.server_url + "index.php"
while True:
response = self.session.get(search_link, params=params, timeout=20)
self._check_response(response)
@ -148,7 +118,7 @@ class SubdivxSubtitlesProvider(Provider):
try:
page_subtitles = self._parse_subtitles_page(video, response, language)
except Exception as e:
logger.error('Error parsing subtitles list: ' + str(e))
logger.error(f"Error parsing subtitles list: {e}")
break
subtitles += page_subtitles
@ -156,7 +126,7 @@ class SubdivxSubtitlesProvider(Provider):
if len(page_subtitles) < 100:
break # this is the last page
params['pg'] += 1 # search next page
params["pg"] += 1 # search next page
time.sleep(self.multi_result_throttle)
return subtitles
@ -167,14 +137,17 @@ class SubdivxSubtitlesProvider(Provider):
def download_subtitle(self, subtitle):
if isinstance(subtitle, SubdivxSubtitle):
# download the subtitle
logger.info('Downloading subtitle %r', subtitle)
logger.info("Downloading subtitle %r", subtitle)
# get download link
download_link = self._get_download_link(subtitle)
# download zip / rar file with the subtitle
response = self.session.get(self.server_url + download_link, headers={'Referer': subtitle.page_link},
timeout=30)
response = self.session.get(
self.server_url + download_link,
headers={"Referer": subtitle.page_link},
timeout=30,
)
self._check_response(response)
# open the compressed archive
@ -187,9 +160,11 @@ class SubdivxSubtitlesProvider(Provider):
def _parse_subtitles_page(self, video, response, language):
subtitles = []
page_soup = ParserBeautifulSoup(response.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])
title_soups = page_soup.find_all("div", {'id': 'menu_detalle_buscador'})
body_soups = page_soup.find_all("div", {'id': 'buscador_detalle'})
page_soup = ParserBeautifulSoup(
response.content.decode("utf-8", "ignore"), ["lxml", "html.parser"]
)
title_soups = page_soup.find_all("div", {"id": "menu_detalle_buscador"})
body_soups = page_soup.find_all("div", {"id": "buscador_detalle"})
for subtitle in range(0, len(title_soups)):
title_soup, body_soup = title_soups[subtitle], body_soups[subtitle]
@ -204,15 +179,17 @@ class SubdivxSubtitlesProvider(Provider):
page_link = title_soup.find("a")["href"]
# description
description = body_soup.find("div", {'id': 'buscador_detalle_sub'}).text
description = body_soup.find("div", {"id": "buscador_detalle_sub"}).text
description = description.replace(",", " ").lower()
# uploader
uploader = body_soup.find("a", {'class': 'link1'}).text
uploader = body_soup.find("a", {"class": "link1"}).text
subtitle = self.subtitle_class(language, video, page_link, title, description, uploader)
subtitle = self.subtitle_class(
language, video, page_link, title, description, uploader
)
logger.debug('Found subtitle %r', subtitle)
logger.debug("Found subtitle %r", subtitle)
subtitles.append(subtitle)
return subtitles
@ -221,37 +198,39 @@ class SubdivxSubtitlesProvider(Provider):
response = self.session.get(subtitle.page_link, timeout=20)
self._check_response(response)
try:
page_soup = ParserBeautifulSoup(response.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])
links_soup = page_soup.find_all("a", {'class': 'detalle_link'})
page_soup = ParserBeautifulSoup(
response.content.decode("utf-8", "ignore"), ["lxml", "html.parser"]
)
links_soup = page_soup.find_all("a", {"class": "detalle_link"})
for link_soup in links_soup:
if link_soup['href'].startswith('bajar'):
return self.server_url + link_soup['href']
links_soup = page_soup.find_all("a", {'class': 'link1'})
if link_soup["href"].startswith("bajar"):
return self.server_url + link_soup["href"]
links_soup = page_soup.find_all("a", {"class": "link1"})
for link_soup in links_soup:
if "bajar.php" in link_soup['href']:
return link_soup['href']
if "bajar.php" in link_soup["href"]:
return link_soup["href"]
except Exception as e:
raise APIThrottled('Error parsing download link: ' + str(e))
raise APIThrottled(f"Error parsing download link: {e}")
raise APIThrottled('Download link not found')
raise APIThrottled("Download link not found")
@staticmethod
def _check_response(response):
if response.status_code != 200:
raise ServiceUnavailable('Bad status code: ' + str(response.status_code))
raise ServiceUnavailable(f"Bad status code: {response.status_code}")
@staticmethod
def _get_archive(content):
# open the archive
archive_stream = io.BytesIO(content)
if rarfile.is_rarfile(archive_stream):
logger.debug('Identified rar archive')
logger.debug("Identified rar archive")
archive = rarfile.RarFile(archive_stream)
elif zipfile.is_zipfile(archive_stream):
logger.debug('Identified zip archive')
logger.debug("Identified zip archive")
archive = zipfile.ZipFile(archive_stream)
else:
raise APIThrottled('Unsupported compressed format')
raise APIThrottled("Unsupported compressed format")
return archive
@ -261,12 +240,16 @@ class SubdivxSubtitlesProvider(Provider):
for name in archive.namelist():
# discard hidden files
# discard non-subtitle files
if not os.path.split(name)[-1].startswith('.') and name.lower().endswith(SUBTITLE_EXTENSIONS):
if not os.path.split(name)[-1].startswith(".") and name.lower().endswith(
SUBTITLE_EXTENSIONS
):
_valid_names.append(name)
# archive with only 1 subtitle
if len(_valid_names) == 1:
logger.debug("returning from archive: {} (single subtitle file)".format(_valid_names[0]))
logger.debug(
f"returning from archive: {_valid_names[0]} (single subtitle file)"
)
return archive.read(_valid_names[0])
# in archives with more than 1 subtitle (season pack) we try to guess the best subtitle file
@ -275,31 +258,36 @@ class SubdivxSubtitlesProvider(Provider):
_max_name = ""
for name in _valid_names:
_guess = guessit(name)
if 'season' not in _guess:
_guess['season'] = -1
if 'episode' not in _guess:
_guess['episode'] = -1
if "season" not in _guess:
_guess["season"] = -1
if "episode" not in _guess:
_guess["episode"] = -1
if isinstance(subtitle.video, Episode):
logger.debug("guessing %s" % name)
logger.debug("subtitle S{}E{} video S{}E{}".format(
_guess['season'], _guess['episode'], subtitle.video.season, subtitle.video.episode))
if subtitle.video.episode != _guess['episode'] or subtitle.video.season != _guess['season']:
logger.debug('subtitle does not match video, skipping')
logger.debug(
f"subtitle S{_guess['season']}E{_guess['episode']} video "
f"S{subtitle.video.season}E{subtitle.video.episode}"
)
if (
subtitle.video.episode != _guess["episode"]
or subtitle.video.season != _guess["season"]
):
logger.debug("subtitle does not match video, skipping")
continue
matches = set()
matches |= guess_matches(subtitle.video, _guess)
_score = sum((_scores.get(match, 0) for match in matches))
logger.debug('srt matches: %s, score %d' % (matches, _score))
logger.debug("srt matches: %s, score %d" % (matches, _score))
if _score > _max_score:
_max_score = _score
_max_name = name
logger.debug("new max: {} {}".format(name, _score))
logger.debug(f"new max: {name} {_score}")
if _max_score > 0:
logger.debug("returning from archive: {} scored {}".format(_max_name, _max_score))
logger.debug(f"returning from archive: {_max_name} scored {_max_score}")
return archive.read(_max_name)
raise APIThrottled('Can not find the subtitle in the compressed file')
raise APIThrottled("Can not find the subtitle in the compressed file")

@ -17,8 +17,9 @@ from subzero.language import Language
logger = logging.getLogger(__name__)
server_url = "http://sapidb.caretas.club/"
page_url = "https://sucha.caretas.club/"
SERVER_URL = "http://sapidb.caretas.club/"
PAGE_URL = "https://sucha.caretas.club/"
UNDESIRED_FILES = ("[eng]", ".en.", ".eng.", ".fr.", ".pt.")
class SuchaSubtitle(Subtitle):
@ -35,7 +36,7 @@ class SuchaSubtitle(Subtitle):
matches,
):
super(SuchaSubtitle, self).__init__(
language, hearing_impaired=False, page_link=page_url
language, hearing_impaired=False, page_link=PAGE_URL
)
self.download_id = download_id
self.download_type = download_type
@ -71,7 +72,6 @@ class SuchaSubtitle(Subtitle):
class SuchaProvider(Provider):
"""Sucha Provider"""
languages = {Language.fromalpha2(l) for l in ["es"]}
language_list = list(languages)
video_types = (Episode, Movie)
@ -89,22 +89,21 @@ class SuchaProvider(Provider):
movie_year = video.year if video.year else "0"
is_episode = isinstance(video, Episode)
language = self.language_list[0]
if is_episode:
q = {
"query": "{} S{:02}E{:02}".format(
video.series, video.season, video.episode
)
}
q = {"query": f"{video.series} S{video.season:02}E{video.episode:02}"}
else:
q = {"query": video.title, "year": movie_year}
logger.debug("Searching subtitles: {}".format(q["query"]))
res = self.session.get(
server_url + ("episode" if is_episode else "movie"), params=q, timeout=10
logger.debug(f"Searching subtitles: {q}")
result = self.session.get(
SERVER_URL + ("episode" if is_episode else "movie"), params=q, timeout=10
)
res.raise_for_status()
result = res.json()
result.raise_for_status()
result_ = result.json()
subtitles = []
for i in result:
for i in result_:
matches = set()
try:
if (
@ -115,18 +114,18 @@ class SuchaProvider(Provider):
except TypeError:
logger.debug("No subtitles found")
return []
if is_episode:
if (
q["query"].lower() in i["title"].lower()
or q["query"].lower() in i["alt_title"].lower()
):
matches.add("title")
matches.add("series")
matches.add("season")
matches.add("episode")
matches.add("year")
matches_ = ("title", "series", "season", "episode", "year")
[matches.add(match) for match in matches_]
if str(i["year"]) == video.year:
matches.add("year")
subtitles.append(
SuchaSubtitle(
language,
@ -144,40 +143,41 @@ class SuchaProvider(Provider):
def _check_response(self, response):
if response.status_code != 200:
raise ServiceUnavailable("Bad status code: " + str(response.status_code))
raise ServiceUnavailable(f"Bad status code: {response.status_code}")
def _get_archive(self, content):
archive_stream = io.BytesIO(content)
if rarfile.is_rarfile(archive_stream):
logger.debug("Identified rar archive")
archive = rarfile.RarFile(archive_stream)
elif zipfile.is_zipfile(archive_stream):
return rarfile.RarFile(archive_stream)
if zipfile.is_zipfile(archive_stream):
logger.debug("Identified zip archive")
archive = zipfile.ZipFile(archive_stream)
else:
raise APIThrottled("Unsupported compressed format")
return archive
return zipfile.ZipFile(archive_stream)
raise APIThrottled("Unsupported compressed format")
def get_file(self, archive):
for name in archive.namelist():
if os.path.split(name)[-1].startswith("."):
continue
if not name.lower().endswith(SUBTITLE_EXTENSIONS):
continue
if (
"[eng]" in name.lower()
or ".en." in name.lower()
or ".eng." in name.lower()
):
if any(undesired in name.lower() for undesired in UNDESIRED_FILES):
continue
logger.debug("Returning from archive: {}".format(name))
logger.debug(f"Returning from archive: {name}")
return archive.read(name)
raise APIThrottled("Can not find the subtitle in the compressed file")
def download_subtitle(self, subtitle):
logger.info("Downloading subtitle %r", subtitle)
response = self.session.get(
server_url + "download",
SERVER_URL + "download",
params={"id": subtitle.download_id, "type": subtitle.download_type},
timeout=10,
)

@ -1,75 +1,64 @@
# -*- coding: utf-8 -*-
import logging
from urllib import parse
import random
import re
from bs4 import BeautifulSoup as bso
from urllib import parse
from bs4 import BeautifulSoup as bso
from requests import Session
from subzero.language import Language
from guessit import guessit
from subliminal import Episode
from subliminal.exceptions import ServiceUnavailable
from subliminal_patch.subtitle import Subtitle
from subliminal.subtitle import fix_line_ending
from subliminal_patch.exceptions import APIThrottled
from subliminal_patch.providers import Provider
from subliminal_patch.subtitle import Subtitle
from subliminal.subtitle import fix_line_ending, guess_matches
from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
logger = logging.getLogger(__name__)
BASE = "https://www.tusubtitulo.com/series.php?/"
BASE_URL = "https://www.tusubtitulo.com"
CSS1 = "span.iconos-subtitulos"
CSS2 = "ul > li.rng.download.green > a.fas.fa-bullhorn.notifi_icon"
class TuSubtituloSubtitle(Subtitle):
provider_name = "tusubtitulo"
hash_verifiable = False
def __init__(self, language, filename, download_link, page_link, matches):
def __init__(self, language, sub_dict, matches):
super(TuSubtituloSubtitle, self).__init__(
language, hearing_impaired=False, page_link=page_link
language, hearing_impaired=False, page_link=sub_dict["download_url"]
)
self.download_link = download_link
self.page_link = page_link
self.language = language
self.release_info = filename
self.filename = filename
self.sub_dict = sub_dict
self.release_info = sub_dict["metadata"]
self.found_matches = matches
@property
def id(self):
return self.download_link
return self.sub_dict["download_url"]
def get_matches(self, video):
if video.resolution and video.resolution.lower() in self.release_info.lower():
self.found_matches.add("resolution")
if video.source and video.source.lower() in self.release_info.lower():
self.found_matches.add("source")
if video.video_codec:
if video.video_codec == "H.264" and "x264" in self.release_info.lower():
self.found_matches.add("video_codec")
elif video.video_codec == "H.265" and "x265" in self.release_info.lower():
self.found_matches.add("video_codec")
elif video.video_codec.lower() in self.release_info.lower():
self.found_matches.add("video_codec")
if (
video.release_group
and video.release_group.lower() in self.release_info.lower()
):
self.found_matches.add("release_group")
if video.audio_codec:
if video.audio_codec.lower().replace(" ", ".") in self.release_info.lower():
self.found_matches.add("audio_codec")
self.found_matches |= guess_matches(
video,
guessit(
self.release_info,
{"type": "episode"},
),
)
return self.found_matches
class TuSubtituloProvider(Provider):
"""TuSubtitulo.com Provider"""
BASE = "https://www.tusubtitulo.com/series.php?/"
languages = {Language.fromietf(lang) for lang in ["en", "es"]}
logger.debug(languages)
video_types = (Episode,)
@ -77,41 +66,29 @@ class TuSubtituloProvider(Provider):
def initialize(self):
self.session = Session()
self.session.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36",
"referer": "https://www.tusubtitulo.com",
"User-Agent": random.choice(AGENT_LIST),
"referer": BASE_URL,
}
def terminate(self):
self.session.close()
def index_titles(self):
r = self.session.get(BASE)
r = self.session.get(f"{BASE_URL}/series.php?/")
r.raise_for_status()
soup = bso(r.content, "html.parser")
titles = []
for a in soup.find_all("a"):
href_url = a.get("href")
if "show" in href_url:
titles.append({"title": a.text, "url": href_url})
return titles
def is_season_available(self, seasons, season):
for i in seasons:
if i == season:
return True
yield {"title": a.text, "url": href_url}
def title_available(self, item):
try:
title_content = item[2].find_all("a")[0]
episode_number = re.search(
r".*\d+x(0+)?(\d+) - .*?", title_content.text
).group(2)
episode_id = title_content.get("href").split("/")[4]
return {
"episode_number": episode_number,
"episode_id": episode_id,
"episode_url": title_content.get("href"),
}
title = item[2].find_all("a")[0]
episode_number = re.search(r".*\d+x(0+)?(\d+) - .*?", title.text).group(2)
episode_id = title.get("href").split("/")[4]
return {"episode_number": episode_number, "episode_id": episode_id}
except IndexError:
return
@ -121,86 +98,84 @@ class TuSubtituloProvider(Provider):
if "Vers" in text:
source = text.replace("Versión ", "")
if not source:
source = "Unknown"
return "Unknown"
return source
except IndexError:
return
def get_episode_dicts(self, episodes, season_subs, season_number):
for i in episodes:
for t in season_subs:
if i["episode_id"] == t["episode_id"]:
yield {
"episode": i["episode_number"],
"season": season_number,
"metadata": t["metadata"],
"download_url": t["download_url"],
"language": t["language"],
}
def scrape_episode_info(self, source_var, tables, tr):
inc = 1
while True:
try:
content = tables[tr + inc].find_all("td")
language = content[4].text
if "eng" in language.lower():
language = "en"
elif "esp" in language.lower():
language = "es"
else:
language = None
completed = "%" not in content[5].text
download_url = (
content[6].find_all("a")[1].get("href").split("?sub=")[-1]
)
episode_id = download_url.split("/")[4]
if language and completed:
yield {
"episode_id": episode_id,
"metadata": source_var,
"download_url": download_url,
"language": language,
}
inc += 1
except IndexError:
break
def get_episodes(self, show_id, season):
logger.debug("https://www.tusubtitulo.com/show/{}/{}".format(show_id, season))
r2 = self.session.get(
"https://www.tusubtitulo.com/show/{}/{}".format(show_id, season),
)
r2.raise_for_status()
sopa = bso(r2.content, "lxml")
r = self.session.get(f"{BASE_URL}/show/{show_id}/{season}")
r.raise_for_status()
sopa = bso(r.content, "lxml")
tables = sopa.find_all("tr")
seasons = [i.text for i in tables[1].find_all("a")]
if not self.is_season_available(seasons, season):
logger.debug("Season not found")
if not any(season == season_ for season_ in seasons):
return
season_subs = []
episodes = []
for tr in range(len(tables)):
data = tables[tr].find_all("td")
title = self.title_available(data)
if title:
episodes.append(title)
source_var = self.source_separator(data)
if source_var:
inc = 1
while True:
try:
content = tables[tr + inc].find_all("td")
language = content[4].text
if "eng" in language.lower():
language = "en"
elif "esp" in language.lower():
language = "es"
else:
language = None
completed = True if not "%" in content[5].text else False
url = content[6].find_all("a")[0].get("href")
sub_id = parse.parse_qs(parse.urlparse(url).query)["id"][0]
lang_id = parse.parse_qs(parse.urlparse(url).query)["lang"][0]
version_ = parse.parse_qs(parse.urlparse(url).query)["version"][
0
]
download_url = (
"https://www.tusubtitulo.com/updated/{}/{}/{}".format(
lang_id, sub_id, version_
)
)
if language and completed:
season_subs.append(
{
"episode_id": sub_id,
"metadata": source_var,
"download_url": download_url,
"language": language,
}
)
inc += 1
except IndexError:
break
final_list = []
for i in episodes:
for t in season_subs:
if i["episode_id"] == t["episode_id"]:
final_list.append(
{
"episode_number": i["episode_number"],
"episode_url": i["episode_url"],
"metadata": t["metadata"],
"download_url": t["download_url"],
"language": t["language"],
}
)
return final_list
if not source_var:
continue
season_subs += list(self.scrape_episode_info(source_var, tables, tr))
return list(self.get_episode_dicts(episodes, season_subs, season))
def search(self, title, season, episode):
titles = self.index_titles()
titles = list(self.index_titles())
found_tv_show = None
for i in titles:
if title.lower() == i["title"].lower():
@ -209,58 +184,81 @@ class TuSubtituloProvider(Provider):
if not found_tv_show:
logger.debug("Show not found")
return
tv_show_id = found_tv_show["url"].split("/")[2].replace(" ", "")
results = self.get_episodes(tv_show_id, season)
episode_list = []
if results:
for i in results:
if i["episode_number"] == episode:
if i["episode"] == episode:
episode_list.append(i)
if episode_list:
return episode_list
logger.debug("Episode not found")
else:
logger.debug("No results")
logger.debug("No results")
def scrape_download_url(self, episode_dict):
logger.debug("Scrapping download URL")
r = self.session.get(episode_dict["download_url"])
r.raise_for_status()
discriminator = f".{episode_dict['season']}.{episode_dict['episode']}."
soup = bso(r.content, "lxml")
for url, selected in zip(soup.select(CSS1), soup.select(CSS2)):
meta = ".".join(
selected.get("href").split(discriminator)[-1].split(".")[:-1]
)
if meta in episode_dict["download_url"]:
id_url = url.find_all("a")[0].get("href")
sub_id = parse.parse_qs(parse.urlparse(id_url).query)["id"][0]
lang_id = parse.parse_qs(parse.urlparse(id_url).query)["lang"][0]
version_ = parse.parse_qs(parse.urlparse(id_url).query)["fversion"][0]
return f"{BASE_URL}/updated/{lang_id}/{sub_id}/{version_}"
def query(self, languages, video):
query = "{} {} {}".format(video.series, video.season, video.episode)
logger.debug("Searching subtitles: {}".format(query))
query = f"{video.series} {video.season} {video.episode}"
logger.debug(f"Searching subtitles: {query}")
results = self.search(video.series, str(video.season), str(video.episode))
if results:
subtitles = []
for i in results:
for sub in results:
matches = set()
# self.search only returns results for the specific episode
matches.add("title")
matches.add("series")
matches.add("season")
matches.add("episode")
matches.add("year")
matches_ = ("title", "series", "season", "episode", "year")
[matches.add(match) for match in matches_]
subtitles.append(
TuSubtituloSubtitle(
Language.fromietf(i["language"]),
i["metadata"],
i["download_url"],
i["episode_url"],
Language.fromietf(sub["language"]),
sub,
matches,
)
)
return subtitles
else:
logger.debug("No subtitles found")
return []
logger.debug("No subtitles found")
return []
def list_subtitles(self, video, languages):
return self.query(languages, video)
def _check_response(self, response):
if response.status_code != 200:
raise ServiceUnavailable("Bad status code: " + str(response.status_code))
raise ServiceUnavailable(f"Bad status code: {response.status_code}")
def download_subtitle(self, subtitle):
logger.info("Downloading subtitle %r", subtitle)
response = self.session.get(
subtitle.download_link, headers={"Referer": subtitle.page_link}, timeout=10
)
download_url_ = self.scrape_download_url(subtitle.sub_dict)
if not download_url_:
raise APIThrottled("Can't scrape download url")
response = self.session.get(download_url_, timeout=10, allow_redirects=True)
response.raise_for_status()
self._check_response(response)
subtitle.content = fix_line_ending(response.content)

Loading…
Cancel
Save