|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
from __future__ import absolute_import
|
|
|
|
|
|
|
|
import logging
|
|
|
|
import lzma
|
|
|
|
|
|
|
|
from guessit import guessit
|
|
|
|
from requests import Session
|
|
|
|
from subzero.language import Language
|
|
|
|
|
|
|
|
|
|
|
|
from subliminal.exceptions import ConfigurationError, ProviderError
|
|
|
|
from subliminal_patch.providers import Provider
|
|
|
|
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
|
|
|
|
from subliminal_patch.subtitle import Subtitle, guess_matches
|
|
|
|
from subliminal.video import Episode
|
|
|
|
|
|
|
|
try:
|
|
|
|
from lxml import etree
|
|
|
|
except ImportError:
|
|
|
|
try:
|
|
|
|
import xml.etree.cElementTree as etree
|
|
|
|
except ImportError:
|
|
|
|
import xml.etree.ElementTree as etree
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
supported_languages = [
|
|
|
|
"ara", # Arabic
|
|
|
|
"eng", # English
|
|
|
|
"fin", # Finnish
|
|
|
|
"fra", # French
|
|
|
|
"heb", # Hebrew
|
|
|
|
"ita", # Italian
|
|
|
|
"jpn", # Japanese
|
|
|
|
"por", # Portuguese
|
|
|
|
"pol", # Polish
|
|
|
|
"spa", # Spanish
|
|
|
|
"swe", # Swedish
|
|
|
|
"tha", # Thai
|
|
|
|
"tur", # Turkish
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
class AnimeToshoSubtitle(Subtitle):
|
|
|
|
"""AnimeTosho.org Subtitle."""
|
|
|
|
provider_name = 'animetosho'
|
|
|
|
|
|
|
|
def __init__(self, language, download_link, meta, release_info):
|
|
|
|
super(AnimeToshoSubtitle, self).__init__(language, page_link=download_link)
|
|
|
|
self.meta = meta
|
|
|
|
self.download_link = download_link
|
|
|
|
self.release_info = release_info
|
|
|
|
|
|
|
|
@property
|
|
|
|
def id(self):
|
|
|
|
return self.download_link
|
|
|
|
|
|
|
|
def get_matches(self, video):
|
|
|
|
matches = set()
|
|
|
|
matches |= guess_matches(video, guessit(self.meta['filename']))
|
|
|
|
|
|
|
|
# Add these data are explicit extracted from the API and they always have to match otherwise they wouldn't
|
|
|
|
# arrive at this point and would stop on list_subtitles.
|
|
|
|
matches.update(['title', 'series', 'tvdb_id', 'season', 'episode'])
|
|
|
|
|
|
|
|
return matches
|
|
|
|
|
|
|
|
|
|
|
|
class AnimeToshoProvider(Provider, ProviderSubtitleArchiveMixin):
|
|
|
|
"""AnimeTosho.org Provider."""
|
|
|
|
subtitle_class = AnimeToshoSubtitle
|
|
|
|
languages = {Language('por', 'BR')} | {Language(sl) for sl in supported_languages}
|
|
|
|
video_types = Episode
|
|
|
|
|
|
|
|
def __init__(self, search_threshold=None):
|
|
|
|
self.session = None
|
|
|
|
|
|
|
|
if not all([search_threshold]):
|
|
|
|
raise ConfigurationError("Search threshold, Api Client and Version must be specified!")
|
|
|
|
|
|
|
|
self.search_threshold = search_threshold
|
|
|
|
|
|
|
|
def initialize(self):
|
|
|
|
self.session = Session()
|
|
|
|
|
|
|
|
def terminate(self):
|
|
|
|
self.session.close()
|
|
|
|
|
|
|
|
def list_subtitles(self, video, languages):
|
|
|
|
if not video.series_anidb_episode_id:
|
|
|
|
logger.debug('Skipping video %r. It is not an anime or the anidb_episode_id could not be identified', video)
|
|
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
return [s for s in self._get_series(video.series_anidb_episode_id) if s.language in languages]
|
|
|
|
|
|
|
|
def download_subtitle(self, subtitle):
|
|
|
|
logger.info('Downloading subtitle %r', subtitle)
|
|
|
|
|
|
|
|
r = self.session.get(subtitle.page_link, timeout=10)
|
|
|
|
r.raise_for_status()
|
|
|
|
|
|
|
|
# Check if the bytes content starts with the xz magic number of the xz archives
|
|
|
|
if not self._is_xz_file(r.content):
|
|
|
|
raise ProviderError('Unidentified archive type')
|
|
|
|
|
|
|
|
subtitle.content = lzma.decompress(r.content)
|
|
|
|
|
|
|
|
return subtitle
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def _is_xz_file(content):
|
|
|
|
return content.startswith(b'\xFD\x37\x7A\x58\x5A\x00')
|
|
|
|
|
|
|
|
def _get_series(self, episode_id):
|
|
|
|
storage_download_url = 'https://animetosho.org/storage/attach/'
|
|
|
|
feed_api_url = 'https://feed.animetosho.org/json'
|
|
|
|
|
|
|
|
subtitles = []
|
|
|
|
|
|
|
|
entries = self._get_series_entries(episode_id)
|
|
|
|
|
|
|
|
for entry in entries:
|
|
|
|
r = self.session.get(
|
|
|
|
feed_api_url,
|
|
|
|
params={
|
|
|
|
'show': 'torrent',
|
|
|
|
'id': entry['id'],
|
|
|
|
},
|
|
|
|
timeout=10
|
|
|
|
)
|
|
|
|
r.raise_for_status()
|
|
|
|
|
|
|
|
for file in r.json()['files']:
|
|
|
|
if 'attachments' not in file:
|
|
|
|
continue
|
|
|
|
|
|
|
|
subtitle_files = list(filter(lambda f: f['type'] == 'subtitle', file['attachments']))
|
|
|
|
|
|
|
|
for subtitle_file in subtitle_files:
|
|
|
|
hex_id = format(subtitle_file['id'], '08x')
|
|
|
|
|
|
|
|
# Animetosho assumes missing languages as english as fallback when not specified.
|
|
|
|
lang = Language.fromalpha3b(subtitle_file['info'].get('lang', 'eng'))
|
|
|
|
|
|
|
|
# For Portuguese and Portuguese Brazilian they both share the same code, the name is the only
|
|
|
|
# identifier AnimeTosho provides. Also, some subtitles does not have name, in this case it could
|
|
|
|
# be a false negative but there is nothing we can use to guarantee it is PT-BR, we rather skip it.
|
|
|
|
if lang.alpha3 == 'por' and subtitle_file['info'].get('name', '').lower().find('brazil'):
|
|
|
|
lang = Language('por', 'BR')
|
|
|
|
|
|
|
|
subtitle = self.subtitle_class(
|
|
|
|
lang,
|
|
|
|
storage_download_url + '{}/{}.xz'.format(hex_id, subtitle_file['id']),
|
|
|
|
meta=file,
|
|
|
|
release_info=entry.get('title'),
|
|
|
|
)
|
|
|
|
|
|
|
|
logger.debug('Found subtitle %r', subtitle)
|
|
|
|
|
|
|
|
subtitles.append(subtitle)
|
|
|
|
|
|
|
|
return subtitles
|
|
|
|
|
|
|
|
def _get_series_entries(self, episode_id):
|
|
|
|
api_url = 'https://feed.animetosho.org/json'
|
|
|
|
|
|
|
|
r = self.session.get(
|
|
|
|
api_url,
|
|
|
|
params={
|
|
|
|
'eid': episode_id,
|
|
|
|
},
|
|
|
|
timeout=10
|
|
|
|
)
|
|
|
|
|
|
|
|
r.raise_for_status()
|
|
|
|
|
|
|
|
j = r.json()
|
|
|
|
|
|
|
|
# Ignore records that are not yet ready or has been abandoned by AnimeTosho.
|
|
|
|
entries = list(filter(lambda t: t['status'] == 'complete', j))[:self.search_threshold]
|
|
|
|
|
|
|
|
# Return the latest entries that have been added as it is used to cutoff via the user configuration threshold
|
|
|
|
entries.sort(key=lambda t: t['timestamp'], reverse=True)
|
|
|
|
|
|
|
|
return entries
|