You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
439 lines
17 KiB
439 lines
17 KiB
# coding=utf-8
|
|
from __future__ import absolute_import
|
|
import base64
|
|
import logging
|
|
import os
|
|
import traceback
|
|
import re
|
|
import zlib
|
|
import time
|
|
import requests
|
|
|
|
from babelfish import language_converters
|
|
from dogpile.cache.api import NO_VALUE
|
|
from guessit import guessit
|
|
from subliminal.exceptions import ConfigurationError, ServiceUnavailable
|
|
from subliminal.providers.opensubtitles import OpenSubtitlesProvider as _OpenSubtitlesProvider,\
|
|
OpenSubtitlesSubtitle as _OpenSubtitlesSubtitle, Episode, Movie, ServerProxy, Unauthorized, NoSession, \
|
|
DownloadLimitReached, InvalidImdbid, UnknownUserAgent, DisabledUserAgent, OpenSubtitlesError
|
|
from .mixins import ProviderRetryMixin
|
|
from subliminal.subtitle import fix_line_ending
|
|
from subliminal_patch.http import SubZeroRequestsTransport
|
|
from subliminal_patch.utils import sanitize, fix_inconsistent_naming
|
|
from subliminal.cache import region
|
|
from subliminal_patch.score import framerate_equal
|
|
from subliminal_patch.subtitle import guess_matches
|
|
from subzero.language import Language
|
|
|
|
from ..exceptions import TooManyRequests, APIThrottled
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def fix_tv_naming(title):
|
|
"""Fix TV show titles with inconsistent naming using dictionary, but do not sanitize them.
|
|
|
|
:param str title: original title.
|
|
:return: new title.
|
|
:rtype: str
|
|
|
|
"""
|
|
return fix_inconsistent_naming(title, {"Superman & Lois": "Superman and Lois",
|
|
}, True)
|
|
|
|
|
|
def fix_movie_naming(title):
|
|
return fix_inconsistent_naming(title, {
|
|
}, True)
|
|
|
|
|
|
class OpenSubtitlesSubtitle(_OpenSubtitlesSubtitle):
|
|
hash_verifiable = True
|
|
hearing_impaired_verifiable = True
|
|
|
|
def __init__(self, language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind, hash, movie_name,
|
|
movie_release_name, movie_year, movie_imdb_id, series_season, series_episode, query_parameters,
|
|
filename, encoding, fps, skip_wrong_fps=True):
|
|
super(OpenSubtitlesSubtitle, self).__init__(language, hearing_impaired, page_link, subtitle_id,
|
|
matched_by, movie_kind, hash,
|
|
movie_name, movie_release_name, movie_year, movie_imdb_id,
|
|
series_season, series_episode, filename, encoding)
|
|
self.query_parameters = query_parameters or {}
|
|
self.fps = fps
|
|
self.release_info = movie_release_name
|
|
self.wrong_fps = False
|
|
self.skip_wrong_fps = skip_wrong_fps
|
|
self.movie_imdb_id = movie_imdb_id
|
|
|
|
def get_fps(self):
|
|
try:
|
|
return float(self.fps)
|
|
except:
|
|
return None
|
|
|
|
def get_matches(self, video, hearing_impaired=False):
|
|
matches = super(OpenSubtitlesSubtitle, self).get_matches(video)
|
|
|
|
type_ = "episode" if isinstance(video, Episode) else "movie"
|
|
matches |= guess_matches(video, guessit(self.movie_release_name, {'type': type_}))
|
|
matches |= guess_matches(video, guessit(self.filename, {'type': type_}))
|
|
|
|
# episode
|
|
if type_ == "episode" and self.movie_kind == "episode":
|
|
# series
|
|
if fix_tv_naming(video.series) and (sanitize(self.series_name) in (
|
|
sanitize(name) for name in [fix_tv_naming(video.series)] + video.alternative_series)):
|
|
matches.add('series')
|
|
# movie
|
|
elif type_ == "movie" and self.movie_kind == "movie":
|
|
# title
|
|
if fix_movie_naming(video.title) and (sanitize(self.movie_name) in (
|
|
sanitize(name) for name in [fix_movie_naming(video.title)] + video.alternative_titles)):
|
|
matches.add('title')
|
|
|
|
sub_fps = None
|
|
try:
|
|
sub_fps = float(self.fps)
|
|
except ValueError:
|
|
pass
|
|
|
|
# video has fps info, sub also, and sub's fps is greater than 0
|
|
if video.fps and sub_fps and not framerate_equal(video.fps, self.fps):
|
|
self.wrong_fps = True
|
|
|
|
if self.skip_wrong_fps:
|
|
logger.debug("Wrong FPS (expected: %s, got: %s, lowering score massively)", video.fps, self.fps)
|
|
# fixme: may be too harsh
|
|
return set()
|
|
else:
|
|
logger.debug("Wrong FPS (expected: %s, got: %s, continuing)", video.fps, self.fps)
|
|
|
|
# matched by tag?
|
|
if self.matched_by == "tag":
|
|
# treat a tag match equally to a hash match
|
|
logger.debug("Subtitle matched by tag, treating it as a hash-match. Tag: '%s'",
|
|
self.query_parameters.get("tag", None))
|
|
matches.add("hash")
|
|
|
|
# imdb_id match so we'll consider year as matching
|
|
if self.movie_imdb_id and video.imdb_id and (self.movie_imdb_id == video.imdb_id):
|
|
matches.add("year")
|
|
|
|
return matches
|
|
|
|
|
|
class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
|
|
only_foreign = False
|
|
also_foreign = False
|
|
subtitle_class = OpenSubtitlesSubtitle
|
|
hash_verifiable = True
|
|
hearing_impaired_verifiable = True
|
|
skip_wrong_fps = True
|
|
is_vip = False
|
|
use_ssl = True
|
|
timeout = 15
|
|
|
|
default_url = "//api.opensubtitles.org/xml-rpc"
|
|
vip_url = "//vip-api.opensubtitles.org/xml-rpc"
|
|
|
|
languages = {Language.fromopensubtitles(l) for l in language_converters['szopensubtitles'].codes}
|
|
languages.update(set(Language.rebuild(l, forced=True) for l in languages))
|
|
languages.update(set(Language.rebuild(l, hi=True) for l in languages))
|
|
|
|
def __init__(self, username=None, password=None, use_tag_search=False, only_foreign=False, also_foreign=False,
|
|
skip_wrong_fps=True, is_vip=False, use_ssl=True, timeout=15):
|
|
if any((username, password)) and not all((username, password)):
|
|
raise ConfigurationError('Username and password must be specified')
|
|
|
|
self.username = username or ''
|
|
self.password = password or ''
|
|
self.use_tag_search = use_tag_search
|
|
self.only_foreign = only_foreign
|
|
self.also_foreign = also_foreign
|
|
self.skip_wrong_fps = skip_wrong_fps
|
|
self.token = None
|
|
self.is_vip = is_vip
|
|
self.use_ssl = use_ssl
|
|
self.timeout = timeout
|
|
|
|
logger.debug("Using timeout: %d", timeout)
|
|
|
|
if use_ssl:
|
|
logger.debug("Using HTTPS connection")
|
|
|
|
self.default_url = ("https:" if use_ssl else "http:") + self.default_url
|
|
self.vip_url = ("https:" if use_ssl else "http:") + self.vip_url
|
|
|
|
if use_tag_search:
|
|
logger.info("Using tag/exact filename search")
|
|
|
|
if only_foreign:
|
|
logger.info("Only searching for foreign/forced subtitles")
|
|
|
|
def get_server_proxy(self, url, timeout=None):
|
|
return ServerProxy(url, SubZeroRequestsTransport(use_https=self.use_ssl, timeout=timeout or self.timeout,
|
|
user_agent=os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")))
|
|
|
|
def log_in_url(self, server_url):
|
|
self.token = None
|
|
self.server = self.get_server_proxy(server_url)
|
|
|
|
response = self.retry(
|
|
lambda: checked(
|
|
lambda: self.server.LogIn(self.username, self.password, 'eng',
|
|
os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"))
|
|
)
|
|
)
|
|
|
|
self.token = response['token']
|
|
logger.debug('Logged in with token %r', self.token[:10]+"X"*(len(self.token)-10))
|
|
|
|
region.set("os_token", bytearray(self.token, encoding='utf-8'))
|
|
region.set("os_server_url", bytearray(server_url, encoding='utf-8'))
|
|
|
|
def log_in(self):
|
|
logger.info('Logging in')
|
|
|
|
try:
|
|
self.log_in_url(self.vip_url if self.is_vip else self.default_url)
|
|
|
|
except Unauthorized:
|
|
if self.is_vip:
|
|
logger.info("VIP server login failed, falling back")
|
|
try:
|
|
self.log_in_url(self.default_url)
|
|
except Unauthorized:
|
|
pass
|
|
|
|
if not self.token:
|
|
logger.error("Login failed, please check your credentials")
|
|
raise Unauthorized
|
|
|
|
def use_token_or_login(self, func):
|
|
if not self.token:
|
|
self.log_in()
|
|
return func()
|
|
try:
|
|
return func()
|
|
except Unauthorized:
|
|
self.log_in()
|
|
return func()
|
|
|
|
def initialize(self):
|
|
token_cache = region.get("os_token")
|
|
url_cache = region.get("os_server_url")
|
|
|
|
if token_cache is not NO_VALUE and url_cache is not NO_VALUE:
|
|
self.token = token_cache.decode("utf-8")
|
|
self.server = self.get_server_proxy(url_cache.decode("utf-8"))
|
|
logger.debug("Using previous login token: %r", self.token[:10] + "X" * (len(self.token) - 10))
|
|
else:
|
|
self.server = None
|
|
self.token = None
|
|
|
|
def terminate(self):
|
|
self.server = None
|
|
self.token = None
|
|
|
|
def list_subtitles(self, video, languages):
|
|
"""
|
|
:param video:
|
|
:param languages:
|
|
:return:
|
|
|
|
patch: query movies even if hash is known; add tag parameter
|
|
"""
|
|
|
|
season = episode = None
|
|
if isinstance(video, Episode):
|
|
query = [video.series] + video.alternative_series
|
|
season = video.season
|
|
episode = episode = min(video.episode) if isinstance(video.episode, list) else video.episode
|
|
|
|
if video.is_special:
|
|
season = None
|
|
episode = None
|
|
query = [u"%s %s" % (series, video.title) for series in [video.series] + video.alternative_series]
|
|
logger.info("%s: Searching for special: %r", self.__class__, query)
|
|
# elif ('opensubtitles' not in video.hashes or not video.size) and not video.imdb_id:
|
|
# query = video.name.split(os.sep)[-1]
|
|
else:
|
|
query = [video.title] + video.alternative_titles
|
|
|
|
if isinstance(video, Episode):
|
|
imdb_id = video.series_imdb_id
|
|
else:
|
|
imdb_id = video.imdb_id
|
|
|
|
return self.query(video, languages, hash=video.hashes.get('opensubtitles'), size=video.size,
|
|
imdb_id=imdb_id, query=query, season=season, episode=episode, tag=video.original_name,
|
|
use_tag_search=self.use_tag_search, only_foreign=self.only_foreign,
|
|
also_foreign=self.also_foreign)
|
|
|
|
def query(self, video, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None,
|
|
tag=None, use_tag_search=False, only_foreign=False, also_foreign=False):
|
|
# fill the search criteria
|
|
criteria = []
|
|
if hash and size:
|
|
criteria.append({'moviehash': hash, 'moviebytesize': str(size)})
|
|
if use_tag_search and tag:
|
|
criteria.append({'tag': tag})
|
|
if imdb_id:
|
|
if season and episode:
|
|
criteria.append({'imdbid': imdb_id[2:], 'season': season, 'episode': episode})
|
|
else:
|
|
criteria.append({'imdbid': imdb_id[2:]})
|
|
# Commented out after the issue with episode released after October 17th 2020.
|
|
# if query and season and episode:
|
|
# for q in query:
|
|
# criteria.append({'query': q.replace('\'', ''), 'season': season, 'episode': episode})
|
|
# elif query:
|
|
# for q in query:
|
|
# criteria.append({'query': q.replace('\'', '')})
|
|
if not criteria:
|
|
raise ValueError('Not enough information')
|
|
|
|
# add the language
|
|
for criterion in criteria:
|
|
criterion['sublanguageid'] = ','.join(sorted(l.opensubtitles for l in languages))
|
|
|
|
# query the server
|
|
logger.info('Searching subtitles %r', criteria)
|
|
response = self.use_token_or_login(
|
|
lambda: self.retry(lambda: checked(lambda: self.server.SearchSubtitles(self.token, criteria)))
|
|
)
|
|
|
|
subtitles = []
|
|
|
|
# exit if no data
|
|
if not response['data']:
|
|
logger.info('No subtitles found')
|
|
return subtitles
|
|
|
|
# loop over subtitle items
|
|
for subtitle_item in response['data']:
|
|
_subtitle_item = subtitle_item
|
|
|
|
# in case OS messes their API results up again, check whether we've got a dict or a string as subtitle_item
|
|
if hasattr(_subtitle_item, "startswith"):
|
|
_subtitle_item = response["data"][subtitle_item]
|
|
|
|
# read the item
|
|
language = Language.fromopensubtitles(_subtitle_item['SubLanguageID'])
|
|
hearing_impaired = bool(int(_subtitle_item['SubHearingImpaired']))
|
|
page_link = _subtitle_item['SubtitlesLink']
|
|
subtitle_id = int(_subtitle_item['IDSubtitleFile'])
|
|
matched_by = _subtitle_item['MatchedBy']
|
|
movie_kind = _subtitle_item['MovieKind']
|
|
hash = _subtitle_item['MovieHash']
|
|
movie_name = _subtitle_item['MovieName']
|
|
movie_release_name = _subtitle_item['MovieReleaseName']
|
|
movie_year = int(_subtitle_item['MovieYear']) if _subtitle_item['MovieYear'] else None
|
|
if season or episode:
|
|
movie_imdb_id = 'tt' + _subtitle_item['SeriesIMDBParent']
|
|
else:
|
|
movie_imdb_id = 'tt' + _subtitle_item['IDMovieImdb']
|
|
movie_fps = _subtitle_item.get('MovieFPS')
|
|
series_season = int(_subtitle_item['SeriesSeason']) if _subtitle_item['SeriesSeason'] else None
|
|
series_episode = int(_subtitle_item['SeriesEpisode']) if _subtitle_item['SeriesEpisode'] else None
|
|
filename = _subtitle_item['SubFileName']
|
|
encoding = _subtitle_item.get('SubEncoding') or None
|
|
foreign_parts_only = bool(int(_subtitle_item.get('SubForeignPartsOnly', 0)))
|
|
|
|
# foreign/forced subtitles only wanted
|
|
if only_foreign and not foreign_parts_only:
|
|
continue
|
|
|
|
# foreign/forced not wanted
|
|
elif not only_foreign and not also_foreign and foreign_parts_only:
|
|
continue
|
|
|
|
# set subtitle language to forced if it's foreign_parts_only
|
|
elif (also_foreign or only_foreign) and foreign_parts_only:
|
|
language = Language.rebuild(language, forced=True)
|
|
|
|
# set subtitle language to hi if it's hearing_impaired
|
|
if hearing_impaired:
|
|
language = Language.rebuild(language, hi=True)
|
|
|
|
if language not in languages:
|
|
continue
|
|
|
|
if video.imdb_id and (movie_imdb_id != re.sub("(?<![^a-zA-Z])0+","", video.imdb_id)):
|
|
continue
|
|
|
|
query_parameters = _subtitle_item.get("QueryParameters")
|
|
|
|
subtitle = self.subtitle_class(language, hearing_impaired, page_link, subtitle_id, matched_by,
|
|
movie_kind,
|
|
hash, movie_name, movie_release_name, movie_year, movie_imdb_id,
|
|
series_season, series_episode, query_parameters, filename, encoding,
|
|
movie_fps, skip_wrong_fps=self.skip_wrong_fps)
|
|
subtitle.uploader = _subtitle_item['UserNickName'] if _subtitle_item['UserNickName'] else 'anonymous'
|
|
logger.debug('Found subtitle %r by %s', subtitle, matched_by)
|
|
subtitles.append(subtitle)
|
|
|
|
return subtitles
|
|
|
|
def download_subtitle(self, subtitle):
|
|
logger.info('Downloading subtitle %r', subtitle)
|
|
response = self.use_token_or_login(
|
|
lambda: checked(
|
|
lambda: self.server.DownloadSubtitles(self.token, [str(subtitle.subtitle_id)])
|
|
)
|
|
)
|
|
subtitle.content = fix_line_ending(zlib.decompress(base64.b64decode(response['data'][0]['data']), 47))
|
|
|
|
|
|
def checked(fn, raise_api_limit=False):
|
|
"""Run :fn: and check the response status before returning it.
|
|
|
|
:param fn: the function to make an XMLRPC call to OpenSubtitles.
|
|
:return: the response.
|
|
:raise: :class:`OpenSubtitlesError`
|
|
|
|
"""
|
|
response = None
|
|
try:
|
|
try:
|
|
response = fn()
|
|
except APIThrottled:
|
|
if not raise_api_limit:
|
|
logger.info("API request limit hit, waiting and trying again once.")
|
|
time.sleep(12)
|
|
return checked(fn, raise_api_limit=True)
|
|
raise
|
|
|
|
except requests.RequestException as e:
|
|
status_code = e.response.status_code
|
|
else:
|
|
status_code = int(response['status'][:3])
|
|
except:
|
|
status_code = None
|
|
|
|
if status_code == 401:
|
|
raise Unauthorized
|
|
if status_code == 406:
|
|
raise NoSession
|
|
if status_code == 407:
|
|
raise DownloadLimitReached
|
|
if status_code == 413:
|
|
raise InvalidImdbid
|
|
if status_code == 414:
|
|
raise UnknownUserAgent
|
|
if status_code == 415:
|
|
raise DisabledUserAgent
|
|
if status_code == 429:
|
|
if not raise_api_limit:
|
|
raise TooManyRequests
|
|
else:
|
|
raise APIThrottled
|
|
if status_code == 503:
|
|
raise ServiceUnavailable(str(status_code))
|
|
if status_code != 200:
|
|
if response and "status" in response:
|
|
raise OpenSubtitlesError(response['status'])
|
|
raise ServiceUnavailable("Unknown Error, empty response: %s: %r" % (status_code, response))
|
|
|
|
return response
|