Improved opensubtitles.com resilience and properly deal with the Invalid JSON/Bad Gateway exception.

pull/1704/head v1.0.3-beta.21
morpheus65535 3 years ago
parent 63f3454c8f
commit 669bd3376a

@ -5,13 +5,14 @@ import time
import datetime import datetime
from requests import Session, ConnectionError, Timeout, ReadTimeout, RequestException from requests import Session, ConnectionError, Timeout, ReadTimeout, RequestException
from requests.exceptions import JSONDecodeError
from subzero.language import Language from subzero.language import Language
from babelfish import language_converters from babelfish import language_converters
from subliminal import Episode, Movie from subliminal import Episode, Movie
from subliminal.score import get_equivalent_release_groups from subliminal.score import get_equivalent_release_groups
from subliminal.utils import sanitize_release_group, sanitize from subliminal.utils import sanitize_release_group, sanitize
from subliminal_patch.exceptions import TooManyRequests from subliminal_patch.exceptions import TooManyRequests, APIThrottled
from subliminal.exceptions import DownloadLimitExceeded, AuthenticationError, ConfigurationError, ServiceUnavailable, \ from subliminal.exceptions import DownloadLimitExceeded, AuthenticationError, ConfigurationError, ServiceUnavailable, \
ProviderError ProviderError
from .mixins import ProviderRetryMixin from .mixins import ProviderRetryMixin
@ -29,6 +30,8 @@ logger = logging.getLogger(__name__)
SHOW_EXPIRATION_TIME = datetime.timedelta(weeks=1).total_seconds() SHOW_EXPIRATION_TIME = datetime.timedelta(weeks=1).total_seconds()
TOKEN_EXPIRATION_TIME = datetime.timedelta(hours=12).total_seconds() TOKEN_EXPIRATION_TIME = datetime.timedelta(hours=12).total_seconds()
retry_amount=5
def fix_tv_naming(title): def fix_tv_naming(title):
"""Fix TV show titles with inconsistent naming using dictionary, but do not sanitize them. """Fix TV show titles with inconsistent naming using dictionary, but do not sanitize them.
@ -166,17 +169,16 @@ class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
lambda: self.session.post(self.server_url + 'login', lambda: self.session.post(self.server_url + 'login',
json={"username": self.username, "password": self.password}, json={"username": self.username, "password": self.password},
allow_redirects=False, allow_redirects=False,
timeout=30) timeout=30),
) validate_json=True,
json_key_name='token'
),
amount=retry_amount
) )
try: self.token = r.json()['token']
self.token = r.json()['token'] region.set("oscom_token", self.token)
except ValueError: return
raise ProviderError('Invalid JSON returned by provider')
else:
region.set("oscom_token", self.token)
return
@staticmethod @staticmethod
def sanitize_external_ids(external_id): def sanitize_external_ids(external_id):
@ -190,13 +192,16 @@ class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
title_id = None title_id = None
parameters = {'query': title.lower()} parameters = {'query': title.lower()}
logging.debug('Searching using this title: {}'.format(title)) logging.debug(f'Searching using this title: {title}')
results = self.retry( results = self.retry(
lambda: checked( lambda: checked(
lambda: self.session.get(self.server_url + 'features', params=parameters, timeout=30), lambda: self.session.get(self.server_url + 'features', params=parameters, timeout=30),
validate_token=True validate_token=True,
) validate_json=True,
json_key_name='data'
),
amount=retry_amount
) )
if results == 401: if results == 401:
@ -206,44 +211,44 @@ class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
results = self.retry( results = self.retry(
lambda: checked( lambda: checked(
lambda: self.session.get(self.server_url + 'features', params=parameters, timeout=30) lambda: self.session.get(self.server_url + 'features', params=parameters, timeout=30),
) validate_json=True,
json_key_name='data'
),
amount=retry_amount
) )
# deserialize results # deserialize results
try: results_dict = results.json()['data']
results_dict = results.json()['data']
except ValueError: # loop over results
raise ProviderError('Invalid JSON returned by provider') for result in results_dict:
else: if 'title' in result['attributes']:
# loop over results if isinstance(self.video, Episode):
for result in results_dict: if fix_tv_naming(title).lower() == result['attributes']['title'].lower() and \
if 'title' in result['attributes']: (not self.video.year or self.video.year == int(result['attributes']['year'])):
if isinstance(self.video, Episode): title_id = result['id']
if fix_tv_naming(title).lower() == result['attributes']['title'].lower() and \ break
(not self.video.year or self.video.year == int(result['attributes']['year'])):
title_id = result['id']
break
else:
if fix_movie_naming(title).lower() == result['attributes']['title'].lower() and \
(not self.video.year or self.video.year == int(result['attributes']['year'])):
title_id = result['id']
break
else: else:
continue if fix_movie_naming(title).lower() == result['attributes']['title'].lower() and \
(not self.video.year or self.video.year == int(result['attributes']['year'])):
title_id = result['id']
break
else:
continue
if title_id: if title_id:
logging.debug('Found this title ID: {}'.format(title_id)) logging.debug(f'Found this title ID: {title_id}')
return self.sanitize_external_ids(title_id) return self.sanitize_external_ids(title_id)
finally:
if not title_id: if not title_id:
logger.debug('No match found for {}'.format(title)) logger.debug(f'No match found for {title}')
def query(self, languages, video): def query(self, languages, video):
self.video = video self.video = video
if self.use_hash: if self.use_hash:
file_hash = self.video.hashes.get('opensubtitlescom') file_hash = self.video.hashes.get('opensubtitlescom')
logging.debug('Searching using this hash: {}'.format(hash)) logging.debug(f'Searching using this hash: {hash}')
else: else:
file_hash = None file_hash = None
@ -275,7 +280,7 @@ class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
forced = 'exclude' forced = 'exclude'
langs = ','.join(lang_strings) langs = ','.join(lang_strings)
logging.debug('Searching for this languages: {}'.format(lang_strings)) logging.debug(f'Searching for this languages: {lang_strings}')
# query the server # query the server
if isinstance(self.video, Episode): if isinstance(self.video, Episode):
@ -289,8 +294,11 @@ class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
('parent_feature_id', title_id) if title_id else ('imdb_id', imdb_id), ('parent_feature_id', title_id) if title_id else ('imdb_id', imdb_id),
('season_number', self.video.season), ('season_number', self.video.season),
('query', os.path.basename(self.video.name))), ('query', os.path.basename(self.video.name))),
timeout=30) timeout=30),
) validate_json=True,
json_key_name='data'
),
amount=retry_amount
) )
else: else:
res = self.retry( res = self.retry(
@ -301,55 +309,53 @@ class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
('languages', langs.lower()), ('languages', langs.lower()),
('moviehash', file_hash), ('moviehash', file_hash),
('query', os.path.basename(self.video.name))), ('query', os.path.basename(self.video.name))),
timeout=30) timeout=30),
) validate_json=True,
json_key_name='data'
),
amount=retry_amount
) )
subtitles = [] subtitles = []
try: result = res.json()
result = res.json()
if 'data' not in result: logging.debug(f"Query returned {len(result['data'])} subtitles")
raise ValueError
except ValueError: if len(result['data']):
raise ProviderError('Invalid JSON returned by provider') for item in result['data']:
else: if 'season_number' in item['attributes']['feature_details']:
logging.debug('Query returned {} subtitles'.format(len(result['data']))) season_number = item['attributes']['feature_details']['season_number']
else:
if len(result['data']): season_number = None
for item in result['data']:
if 'season_number' in item['attributes']['feature_details']: if 'episode_number' in item['attributes']['feature_details']:
season_number = item['attributes']['feature_details']['season_number'] episode_number = item['attributes']['feature_details']['episode_number']
else: else:
season_number = None episode_number = None
if 'episode_number' in item['attributes']['feature_details']: if 'moviehash_match' in item['attributes']:
episode_number = item['attributes']['feature_details']['episode_number'] moviehash_match = item['attributes']['moviehash_match']
else: else:
episode_number = None moviehash_match = False
if 'moviehash_match' in item['attributes']: if len(item['attributes']['files']):
moviehash_match = item['attributes']['moviehash_match'] subtitle = OpenSubtitlesComSubtitle(
else: language=Language.fromietf(item['attributes']['language']),
moviehash_match = False forced=item['attributes']['foreign_parts_only'],
hearing_impaired=item['attributes']['hearing_impaired'],
if len(item['attributes']['files']): page_link=item['attributes']['url'],
subtitle = OpenSubtitlesComSubtitle( file_id=item['attributes']['files'][0]['file_id'],
language=Language.fromietf(item['attributes']['language']), releases=item['attributes']['release'],
forced=item['attributes']['foreign_parts_only'], uploader=item['attributes']['uploader']['name'],
hearing_impaired=item['attributes']['hearing_impaired'], title=item['attributes']['feature_details']['movie_name'],
page_link=item['attributes']['url'], year=item['attributes']['feature_details']['year'],
file_id=item['attributes']['files'][0]['file_id'], season=season_number,
releases=item['attributes']['release'], episode=episode_number,
uploader=item['attributes']['uploader']['name'], hash_matched=moviehash_match
title=item['attributes']['feature_details']['movie_name'], )
year=item['attributes']['feature_details']['year'], subtitle.get_matches(self.video)
season=season_number, subtitles.append(subtitle)
episode=episode_number,
hash_matched=moviehash_match
)
subtitle.get_matches(self.video)
subtitles.append(subtitle)
return subtitles return subtitles
@ -373,39 +379,43 @@ class OpenSubtitlesComProvider(ProviderRetryMixin, Provider):
lambda: self.session.post(self.server_url + 'download', lambda: self.session.post(self.server_url + 'download',
json={'file_id': subtitle.file_id, 'sub_format': 'srt'}, json={'file_id': subtitle.file_id, 'sub_format': 'srt'},
headers=headers, headers=headers,
timeout=30) timeout=30),
) validate_json=True,
json_key_name='link'
),
amount=retry_amount
) )
try: download_data = res.json()
download_data = res.json() subtitle.download_link = download_data['link']
except ValueError:
raise ProviderError('Invalid JSON returned by provider')
else:
if 'link' not in download_data:
return False
subtitle.download_link = download_data['link']
r = self.retry( r = self.retry(
lambda: checked( lambda: checked(
lambda: self.session.get(subtitle.download_link, timeout=30) lambda: self.session.get(subtitle.download_link, timeout=30),
) validate_content=True
) ),
amount=retry_amount
)
if not r:
logger.debug(f'Could not download subtitle from {subtitle.download_link}')
subtitle.content = None
return
else:
subtitle_content = r.content subtitle_content = r.content
subtitle.content = fix_line_ending(subtitle_content)
if subtitle_content:
subtitle.content = fix_line_ending(subtitle_content)
else:
logger.debug('Could not download subtitle from {}'.format(subtitle.download_link))
def checked(fn, raise_api_limit=False, validate_token=False, validate_json=False, json_key_name=None,
def checked(fn, validate_token=False): validate_content=False):
"""Run :fn: and check the response status before returning it. """Run :fn: and check the response status before returning it.
:param fn: the function to make an API call to OpenSubtitles.com. :param fn: the function to make an API call to OpenSubtitles.com.
:param raise_api_limit: if True we wait a little bit longer before running the call again.
:param validate_token: test if token is valid and return 401 if not. :param validate_token: test if token is valid and return 401 if not.
:param validate_json: test if response is valid json.
:param json_key_name: test if returned json contain a specific key.
:param validate_content: test if response have a content (used with download).
:return: the response. :return: the response.
""" """
@ -413,12 +423,19 @@ def checked(fn, validate_token=False):
try: try:
try: try:
response = fn() response = fn()
except APIThrottled:
if not raise_api_limit:
logger.info("API request limit hit, waiting and trying again once.")
time.sleep(2)
return checked(fn, raise_api_limit=True)
raise
except (ConnectionError, Timeout, ReadTimeout): except (ConnectionError, Timeout, ReadTimeout):
raise ServiceUnavailable('Unknown Error, empty response: %s: %r' % (response.status_code, response)) raise ServiceUnavailable(f'Unknown Error, empty response: {response.status_code}: {response}')
except RequestException as e: except Exception:
status_code = e.response.status_code logging.exception('Unhandled exception raised.')
raise ProviderError('Unhandled exception raised. Check log.')
else: else:
status_code = int(response['status'][:3]) status_code = response.status_code
except Exception: except Exception:
status_code = None status_code = None
else: else:
@ -426,15 +443,34 @@ def checked(fn, validate_token=False):
if validate_token: if validate_token:
return 401 return 401
else: else:
raise AuthenticationError('Login failed: {}'.format(response.reason)) raise AuthenticationError(f'Login failed: {response.reason}')
elif status_code == 406: elif status_code == 406:
raise DownloadLimitExceeded("Daily download limit reached") raise DownloadLimitExceeded("Daily download limit reached")
elif status_code == 429: elif status_code == 429:
raise TooManyRequests() raise TooManyRequests()
elif status_code == 502:
raise APIThrottled()
elif 500 <= status_code <= 599: elif 500 <= status_code <= 599:
raise ProviderError(response.reason) raise ProviderError(response.reason)
if status_code != 200: if status_code != 200:
raise ProviderError('Bad status code: {}'.format(response.status_code)) raise ProviderError(f'Bad status code: {response.status_code}')
if validate_json:
try:
json_test = response.json()
except JSONDecodeError:
raise ProviderError('Invalid JSON returned by provider')
else:
if json_key_name not in json_test:
raise ProviderError(f'Invalid JSON returned by provider: no {json_key_name} key in returned json.')
if validate_content:
if not hasattr(response, 'content'):
logging.error('Download link returned no content attribute.')
return False
elif not response.content:
logging.error(f'This download link returned empty content: {response.url}')
return False
return response return response

Loading…
Cancel
Save