You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
bazarr/custom_libs/subliminal_patch/providers/animetosho.py

188 lines
6.0 KiB

# -*- coding: utf-8 -*-
from __future__ import absolute_import
import logging
import lzma
from guessit import guessit
from requests import Session
from subzero.language import Language
from subliminal.exceptions import ConfigurationError, ProviderError
from subliminal_patch.providers import Provider
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
from subliminal_patch.subtitle import Subtitle, guess_matches
from subliminal.video import Episode
try:
from lxml import etree
except ImportError:
try:
import xml.etree.cElementTree as etree
except ImportError:
import xml.etree.ElementTree as etree
logger = logging.getLogger(__name__)
supported_languages = [
"ara", # Arabic
"eng", # English
"fin", # Finnish
"fra", # French
"heb", # Hebrew
"ita", # Italian
"jpn", # Japanese
"por", # Portuguese
"pol", # Polish
"spa", # Spanish
"swe", # Swedish
"tha", # Thai
"tur", # Turkish
]
class AnimeToshoSubtitle(Subtitle):
"""AnimeTosho.org Subtitle."""
provider_name = 'animetosho'
def __init__(self, language, download_link, meta, release_info):
super(AnimeToshoSubtitle, self).__init__(language, page_link=download_link)
self.meta = meta
self.download_link = download_link
self.release_info = release_info
@property
def id(self):
return self.download_link
def get_matches(self, video):
matches = set()
matches |= guess_matches(video, guessit(self.meta['filename']))
# Add these data are explicit extracted from the API and they always have to match otherwise they wouldn't
# arrive at this point and would stop on list_subtitles.
matches.update(['title', 'series', 'tvdb_id', 'season', 'episode'])
return matches
class AnimeToshoProvider(Provider, ProviderSubtitleArchiveMixin):
"""AnimeTosho.org Provider."""
subtitle_class = AnimeToshoSubtitle
languages = {Language('por', 'BR')} | {Language(sl) for sl in supported_languages}
video_types = Episode
def __init__(self, search_threshold=None):
self.session = None
if not all([search_threshold]):
raise ConfigurationError("Search threshold, Api Client and Version must be specified!")
self.search_threshold = search_threshold
def initialize(self):
self.session = Session()
def terminate(self):
self.session.close()
def list_subtitles(self, video, languages):
if not video.series_anidb_episode_id:
logger.debug('Skipping video %r. It is not an anime or the anidb_episode_id could not be identified', video)
return []
return [s for s in self._get_series(video.series_anidb_episode_id) if s.language in languages]
def download_subtitle(self, subtitle):
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(subtitle.page_link, timeout=10)
r.raise_for_status()
# Check if the bytes content starts with the xz magic number of the xz archives
if not self._is_xz_file(r.content):
raise ProviderError('Unidentified archive type')
subtitle.content = lzma.decompress(r.content)
return subtitle
@staticmethod
def _is_xz_file(content):
return content.startswith(b'\xFD\x37\x7A\x58\x5A\x00')
def _get_series(self, episode_id):
storage_download_url = 'https://animetosho.org/storage/attach/'
feed_api_url = 'https://feed.animetosho.org/json'
subtitles = []
entries = self._get_series_entries(episode_id)
for entry in entries:
r = self.session.get(
feed_api_url,
params={
'show': 'torrent',
'id': entry['id'],
},
timeout=10
)
r.raise_for_status()
for file in r.json()['files']:
if 'attachments' not in file:
continue
subtitle_files = list(filter(lambda f: f['type'] == 'subtitle', file['attachments']))
for subtitle_file in subtitle_files:
hex_id = format(subtitle_file['id'], '08x')
# Animetosho assumes missing languages as english as fallback when not specified.
lang = Language.fromalpha3b(subtitle_file['info'].get('lang', 'eng'))
# For Portuguese and Portuguese Brazilian they both share the same code, the name is the only
# identifier AnimeTosho provides. Also, some subtitles does not have name, in this case it could
# be a false negative but there is nothing we can use to guarantee it is PT-BR, we rather skip it.
if lang.alpha3 == 'por' and subtitle_file['info'].get('name', '').lower().find('brazil'):
lang = Language('por', 'BR')
subtitle = self.subtitle_class(
lang,
storage_download_url + '{}/{}.xz'.format(hex_id, subtitle_file['id']),
meta=file,
release_info=entry.get('title'),
)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
return subtitles
def _get_series_entries(self, episode_id):
api_url = 'https://feed.animetosho.org/json'
r = self.session.get(
api_url,
params={
'eid': episode_id,
},
timeout=10
)
r.raise_for_status()
j = r.json()
# Ignore records that are not yet ready or has been abandoned by AnimeTosho.
entries = list(filter(lambda t: t['status'] == 'complete', j))[:self.search_threshold]
# Return the latest entries that have been added as it is used to cutoff via the user configuration threshold
entries.sort(key=lambda t: t['timestamp'], reverse=True)
return entries