# coding=utf-8 import io import logging import os import time import traceback from urllib import parse import requests import inflect import re import json import html import zipfile import rarfile from babelfish import language_converters from guessit import guessit from dogpile.cache.api import NO_VALUE from subliminal import Episode, ProviderError from subliminal.exceptions import ConfigurationError, ServiceUnavailable from subliminal.utils import sanitize_release_group from subliminal.cache import region from subliminal_patch.http import RetryingCFSession from subliminal_patch.providers import Provider from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin from subliminal_patch.subtitle import Subtitle, guess_matches from subliminal_patch.converters.subscene import language_ids, supported_languages from subscene_api.subscene import search, SearchTypes, Subtitle as APISubtitle, SITE_DOMAIN from subzero.language import Language p = inflect.engine() language_converters.register('subscene = subliminal_patch.converters.subscene:SubsceneConverter') logger = logging.getLogger(__name__) class SubsceneSubtitle(Subtitle): provider_name = 'subscene' hearing_impaired_verifiable = True is_pack = False page_link = None season = None episode = None releases = None def __init__(self, language, release_info, hearing_impaired=False, page_link=None, encoding=None, mods=None, asked_for_release_group=None, asked_for_episode=None): super(SubsceneSubtitle, self).__init__(language, hearing_impaired=hearing_impaired, page_link=page_link, encoding=encoding, mods=mods) self.release_info = self.releases = release_info self.asked_for_episode = asked_for_episode self.asked_for_release_group = asked_for_release_group self.season = None self.episode = None @classmethod def from_api(cls, s): return cls(Language.fromsubscene(s.language.strip()), s.title, hearing_impaired=s.hearing_impaired, page_link=s.url) @property def id(self): return self.page_link @property def numeric_id(self): return self.page_link.split("/")[-1] def get_matches(self, video): matches = set() if self.release_info.strip() == get_video_filename(video): logger.debug("Using hash match as the release name is the same") matches |= {"hash"} # episode if isinstance(video, Episode): guess = guessit(self.release_info, {'type': 'episode'}) self.season = guess.get("season") self.episode = guess.get("episode") matches |= guess_matches(video, guess) if "season" in matches and "episode" not in guess: # pack matches.add("episode") logger.debug("%r is a pack", self) self.is_pack = True if "title" in guess and "year" in matches: if video.series in guess['title']: matches.add("series") # movie else: guess = guessit(self.release_info, {'type': 'movie'}) matches |= guess_matches(video, guess) if video.release_group and "release_group" not in matches and "release_group" in guess: if sanitize_release_group(video.release_group) in sanitize_release_group(guess["release_group"]): matches.add("release_group") self.matches = matches return matches def get_download_link(self, session): return APISubtitle.get_zipped_url(self.page_link, session) def get_video_filename(video): return os.path.splitext(os.path.basename(video.original_name))[0] class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin): """ This currently only searches for the filename on SubScene. It doesn't open every found subtitle page to avoid massive hammering, thus it can't determine whether a subtitle is only-foreign or not. """ subtitle_class = SubsceneSubtitle languages = supported_languages languages.update(set(Language.rebuild(l, forced=True) for l in languages)) languages.update(set(Language.rebuild(l, hi=True) for l in languages)) session = None skip_wrong_fps = False hearing_impaired_verifiable = True only_foreign = False username = None password = None search_throttle = 8 # seconds def __init__(self, only_foreign=False, username=None, password=None): if not all((username, password)): raise ConfigurationError('Username and password must be specified') self.only_foreign = only_foreign self.username = username self.password = password def initialize(self): logger.info("Creating session") self.session = RetryingCFSession() prev_cookies = region.get("subscene_cookies2") if prev_cookies != NO_VALUE: logger.debug("Re-using old subscene cookies: %r", prev_cookies) self.session.cookies.update(prev_cookies) else: logger.debug("Logging in") self.login() def login(self): r = self.session.get("https://subscene.com/account/login") if "Server Error" in r.text: logger.error("Login unavailable; Maintenance?") raise ServiceUnavailable("Login unavailable; Maintenance?") match = re.search(r"", r.text) if match: h = html data = json.loads(h.unescape(match.group(1))) login_url = parse.urljoin(data["siteUrl"], data["loginUrl"]) time.sleep(1.0) r = self.session.post(login_url, { "username": self.username, "password": self.password, data["antiForgery"]["name"]: data["antiForgery"]["value"] }) pep_content = re.search(r"
" r".+name=\"id_token\".+?value=\"(?P.+?)\".*?" r"access_token\".+?value=\"(?P.+?)\".+?" r"token_type.+?value=\"(?P.+?)\".+?" r"expires_in.+?value=\"(?P.+?)\".+?" r"scope.+?value=\"(?P.+?)\".+?" r"state.+?value=\"(?P.+?)\".+?" r"session_state.+?value=\"(?P.+?)\"", r.text, re.MULTILINE | re.DOTALL) if pep_content: r = self.session.post(SITE_DOMAIN, pep_content.groupdict()) try: r.raise_for_status() except Exception: raise ProviderError("Something went wrong when trying to log in: %s", traceback.format_exc()) else: cj = self.session.cookies.copy() store_cks = ("scene", "idsrv", "idsrv.xsrf", "idsvr.clients", "idsvr.session", "idsvr.username") for cn in self.session.cookies.keys(): if cn not in store_cks: del cj[cn] logger.debug("Storing cookies: %r", cj) region.set("subscene_cookies2", cj) return raise ProviderError("Something went wrong when trying to log in #1") def terminate(self): logger.info("Closing session") self.session.close() def _create_filters(self, languages): self.filters = dict(HearingImpaired="2") acc_filters = self.filters.copy() if self.only_foreign: self.filters["ForeignOnly"] = "True" acc_filters["ForeignOnly"] = self.filters["ForeignOnly"].lower() logger.info("Only searching for foreign/forced subtitles") selected_ids = [] for l in languages: lid = language_ids.get(l.basename, language_ids.get(l.alpha3, None)) if lid: selected_ids.append(str(lid)) acc_filters["SelectedIds"] = selected_ids self.filters["LanguageFilter"] = ",".join(acc_filters["SelectedIds"]) last_filters = region.get("subscene_filters") if last_filters != acc_filters: region.set("subscene_filters", acc_filters) logger.debug("Setting account filters to %r", acc_filters) self.session.post("https://u.subscene.com/filter", acc_filters, allow_redirects=False) logger.debug("Filter created: '%s'" % self.filters) def _enable_filters(self): self.session.cookies.update(self.filters) logger.debug("Filters applied") def list_subtitles(self, video, languages): if not video.original_name: logger.info("Skipping search because we don't know the original release name") return [] self._create_filters(languages) self._enable_filters() return [s for s in self.query(video) if s.language in languages] def download_subtitle(self, subtitle): if subtitle.pack_data: logger.info("Using previously downloaded pack data") if rarfile.is_rarfile(io.BytesIO(subtitle.pack_data)): logger.debug('Identified rar archive') archive = rarfile.RarFile(io.BytesIO(subtitle.pack_data)) elif zipfile.is_zipfile(io.BytesIO(subtitle.pack_data)): logger.debug('Identified zip archive') archive = zipfile.ZipFile(io.BytesIO(subtitle.pack_data)) else: logger.error('Unsupported compressed format') return subtitle.pack_data = None try: subtitle.content = self.get_subtitle_from_archive(subtitle, archive) return except ProviderError: pass # open the archive r = self.session.get(subtitle.get_download_link(self.session), timeout=10) r.raise_for_status() archive_stream = io.BytesIO(r.content) if rarfile.is_rarfile(archive_stream): logger.debug('Identified rar archive') archive = rarfile.RarFile(archive_stream) elif zipfile.is_zipfile(archive_stream): logger.debug('Identified zip archive') archive = zipfile.ZipFile(archive_stream) else: logger.error('Unsupported compressed format') return subtitle.content = self.get_subtitle_from_archive(subtitle, archive) # store archive as pack_data for later caching subtitle.pack_data = r.content def parse_results(self, video, film): subtitles = [] for s in film.subtitles: try: subtitle = SubsceneSubtitle.from_api(s) except NotImplementedError as e: logger.info(e) continue subtitle.asked_for_release_group = video.release_group if isinstance(video, Episode): subtitle.asked_for_episode = video.episode if self.only_foreign: subtitle.language = Language.rebuild(subtitle.language, forced=True) # set subtitle language to hi if it's hearing_impaired if subtitle.hearing_impaired: subtitle.language = Language.rebuild(subtitle.language, hi=True) subtitles.append(subtitle) logger.debug('Found subtitle %r', subtitle) return subtitles def do_search(self, *args, **kwargs): try: return search(*args, **kwargs) except requests.HTTPError: region.delete("subscene_cookies2") def query(self, video): subtitles = [] if isinstance(video, Episode): titles = list(set([video.series] + video.alternative_series[:1])) more_than_one = len(titles) > 1 for series in titles: term = u"%s - %s Season" % (series, p.number_to_words("%sth" % video.season).capitalize()) logger.debug('Searching with series and season: %s', term) film = self.do_search(term, session=self.session, release=False, throttle=self.search_throttle, limit_to=SearchTypes.TvSerie) if not film and video.season == 1: logger.debug('Searching with series name: %s', series) film = self.do_search(series, session=self.session, release=False, throttle=self.search_throttle, limit_to=SearchTypes.TvSerie) if film and film.subtitles: logger.debug('Searching found: %s', len(film.subtitles)) subtitles += self.parse_results(video, film) else: logger.debug('No results found') if more_than_one: time.sleep(self.search_throttle) else: titles = list(set([video.title] + video.alternative_titles[:1])) more_than_one = len(titles) > 1 for title in titles: logger.debug('Searching for movie results: %r', title) film = self.do_search(title, year=video.year, session=self.session, limit_to=None, release=False, throttle=self.search_throttle) if film and film.subtitles: subtitles += self.parse_results(video, film) if more_than_one: time.sleep(self.search_throttle) logger.info("%s subtitles found" % len(subtitles)) return subtitles