Merge branch 'development' of https://github.com/morpheus65535/bazarr into development
@ -1,13 +1,12 @@
|
|||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
|
|
||||||
# set Bazarr user-agent used to make requests
|
# set Bazarr user-agent used to make requests
|
||||||
headers = {"User-Agent": os.environ["SZ_USER_AGENT"]}
|
HEADERS = {"User-Agent": os.environ["SZ_USER_AGENT"]}
|
||||||
|
|
||||||
# hearing-impaired detection regex
|
|
||||||
hi_regex = re.compile(r'[*¶♫♪].{3,}[*¶♫♪]|[\[\(\{].{3,}[\]\)\}](?<!{\\an\d})')
|
|
||||||
|
|
||||||
# minimum file size for Bazarr to consider it a video
|
# minimum file size for Bazarr to consider it a video
|
||||||
MINIMUM_VIDEO_SIZE = 20480
|
MINIMUM_VIDEO_SIZE = 20480
|
||||||
|
|
||||||
|
# maximum size for a subtitles file
|
||||||
|
MAXIMUM_SUBTITLE_SIZE = 1 * 1024 * 1024
|
||||||
|
@ -0,0 +1,195 @@
|
|||||||
|
# coding=utf-8
|
||||||
|
# fmt: off
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import requests
|
||||||
|
from collections import namedtuple
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from requests.exceptions import HTTPError
|
||||||
|
|
||||||
|
from app.config import settings
|
||||||
|
from subliminal import Episode, region
|
||||||
|
from subliminal.cache import REFINER_EXPIRATION_TIME
|
||||||
|
from subliminal_patch.exceptions import TooManyRequests
|
||||||
|
|
||||||
|
try:
|
||||||
|
from lxml import etree
|
||||||
|
except ImportError:
|
||||||
|
try:
|
||||||
|
import xml.etree.cElementTree as etree
|
||||||
|
except ImportError:
|
||||||
|
import xml.etree.ElementTree as etree
|
||||||
|
|
||||||
|
refined_providers = {'animetosho'}
|
||||||
|
|
||||||
|
api_url = 'http://api.anidb.net:9001/httpapi'
|
||||||
|
|
||||||
|
cache_key_refiner = "anidb_refiner"
|
||||||
|
|
||||||
|
# Soft Limit for amount of requests per day
|
||||||
|
daily_limit_request_count = 200
|
||||||
|
|
||||||
|
|
||||||
|
class AniDBClient(object):
|
||||||
|
def __init__(self, api_client_key=None, api_client_ver=1, session=None):
|
||||||
|
self.session = session or requests.Session()
|
||||||
|
self.api_client_key = api_client_key
|
||||||
|
self.api_client_ver = api_client_ver
|
||||||
|
self.cache = region.get(cache_key_refiner, expiration_time=timedelta(days=1).total_seconds())
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_throttled(self):
|
||||||
|
return self.cache and self.cache.get('is_throttled')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def daily_api_request_count(self):
|
||||||
|
if not self.cache:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
return self.cache.get('daily_api_request_count', 0)
|
||||||
|
|
||||||
|
AnimeInfo = namedtuple('AnimeInfo', ['anime', 'episode_offset'])
|
||||||
|
|
||||||
|
@region.cache_on_arguments(expiration_time=timedelta(days=1).total_seconds())
|
||||||
|
def get_series_mappings(self):
|
||||||
|
r = self.session.get(
|
||||||
|
'https://raw.githubusercontent.com/Anime-Lists/anime-lists/master/anime-list.xml',
|
||||||
|
timeout=10
|
||||||
|
)
|
||||||
|
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
return r.content
|
||||||
|
|
||||||
|
@region.cache_on_arguments(expiration_time=timedelta(days=1).total_seconds())
|
||||||
|
def get_series_id(self, mappings, tvdb_series_season, tvdb_series_id, episode):
|
||||||
|
# Enrich the collection of anime with the episode offset
|
||||||
|
animes = [
|
||||||
|
self.AnimeInfo(anime, int(anime.attrib.get('episodeoffset', 0)))
|
||||||
|
for anime in mappings.findall(
|
||||||
|
f".//anime[@tvdbid='{tvdb_series_id}'][@defaulttvdbseason='{tvdb_series_season}']"
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
if not animes:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
# Sort the anime by offset in ascending order
|
||||||
|
animes.sort(key=lambda a: a.episode_offset)
|
||||||
|
|
||||||
|
# Different from Tvdb, Anidb have different ids for the Parts of a season
|
||||||
|
anidb_id = None
|
||||||
|
offset = 0
|
||||||
|
|
||||||
|
for index, anime_info in enumerate(animes):
|
||||||
|
anime, episode_offset = anime_info
|
||||||
|
anidb_id = int(anime.attrib.get('anidbid'))
|
||||||
|
if episode > episode_offset:
|
||||||
|
anidb_id = anidb_id
|
||||||
|
offset = episode_offset
|
||||||
|
|
||||||
|
return anidb_id, episode - offset
|
||||||
|
|
||||||
|
@region.cache_on_arguments(expiration_time=timedelta(days=1).total_seconds())
|
||||||
|
def get_series_episodes_ids(self, tvdb_series_id, season, episode):
|
||||||
|
mappings = etree.fromstring(self.get_series_mappings())
|
||||||
|
|
||||||
|
series_id, episode_no = self.get_series_id(mappings, season, tvdb_series_id, episode)
|
||||||
|
|
||||||
|
if not series_id:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
episodes = etree.fromstring(self.get_episodes(series_id))
|
||||||
|
|
||||||
|
return series_id, int(episodes.find(f".//episode[epno='{episode_no}']").attrib.get('id'))
|
||||||
|
|
||||||
|
@region.cache_on_arguments(expiration_time=REFINER_EXPIRATION_TIME)
|
||||||
|
def get_episodes(self, series_id):
|
||||||
|
if self.daily_api_request_count >= 200:
|
||||||
|
raise TooManyRequests('Daily API request limit exceeded')
|
||||||
|
|
||||||
|
r = self.session.get(
|
||||||
|
api_url,
|
||||||
|
params={
|
||||||
|
'request': 'anime',
|
||||||
|
'client': self.api_client_key,
|
||||||
|
'clientver': self.api_client_ver,
|
||||||
|
'protover': 1,
|
||||||
|
'aid': series_id
|
||||||
|
},
|
||||||
|
timeout=10)
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
xml_root = etree.fromstring(r.content)
|
||||||
|
|
||||||
|
response_code = xml_root.attrib.get('code')
|
||||||
|
if response_code == '500':
|
||||||
|
raise TooManyRequests('AniDB API Abuse detected. Banned status.')
|
||||||
|
elif response_code == '302':
|
||||||
|
raise HTTPError('AniDB API Client error. Client is disabled or does not exists.')
|
||||||
|
|
||||||
|
self.increment_daily_quota()
|
||||||
|
|
||||||
|
episode_elements = xml_root.find('episodes')
|
||||||
|
|
||||||
|
if not episode_elements:
|
||||||
|
raise ValueError
|
||||||
|
|
||||||
|
return etree.tostring(episode_elements, encoding='utf8', method='xml')
|
||||||
|
|
||||||
|
def increment_daily_quota(self):
|
||||||
|
daily_quota = self.daily_api_request_count + 1
|
||||||
|
|
||||||
|
if not self.cache:
|
||||||
|
region.set(cache_key_refiner, {'daily_api_request_count': daily_quota})
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
self.cache['daily_api_request_count'] = daily_quota
|
||||||
|
|
||||||
|
region.set(cache_key_refiner, self.cache)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def mark_as_throttled():
|
||||||
|
region.set(cache_key_refiner, {'is_throttled': True})
|
||||||
|
|
||||||
|
|
||||||
|
def refine_from_anidb(path, video):
|
||||||
|
if not isinstance(video, Episode) or not video.series_tvdb_id:
|
||||||
|
logging.debug(f'Video is not an Anime TV series, skipping refinement for {video}')
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
if refined_providers.intersection(settings.general.enabled_providers) and video.series_anidb_id is None:
|
||||||
|
refine_anidb_ids(video)
|
||||||
|
|
||||||
|
|
||||||
|
def refine_anidb_ids(video):
|
||||||
|
anidb_client = AniDBClient(settings.anidb.api_client, settings.anidb.api_client_ver)
|
||||||
|
|
||||||
|
season = video.season if video.season else 0
|
||||||
|
|
||||||
|
if anidb_client.is_throttled:
|
||||||
|
logging.warning(f'API daily limit reached. Skipping refinement for {video.series}')
|
||||||
|
|
||||||
|
return video
|
||||||
|
|
||||||
|
try:
|
||||||
|
anidb_series_id, anidb_episode_id = anidb_client.get_series_episodes_ids(
|
||||||
|
video.series_tvdb_id,
|
||||||
|
season, video.episode,
|
||||||
|
)
|
||||||
|
except TooManyRequests:
|
||||||
|
logging.error(f'API daily limit reached while refining {video.series}')
|
||||||
|
|
||||||
|
anidb_client.mark_as_throttled()
|
||||||
|
|
||||||
|
return video
|
||||||
|
|
||||||
|
if not anidb_episode_id:
|
||||||
|
logging.error(f'Could not find anime series {video.series}')
|
||||||
|
|
||||||
|
return video
|
||||||
|
|
||||||
|
video.series_anidb_id = anidb_series_id
|
||||||
|
video.series_anidb_episode_id = anidb_episode_id
|
@ -0,0 +1,90 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from babelfish import LanguageReverseConverter
|
||||||
|
from subliminal.exceptions import ConfigurationError
|
||||||
|
|
||||||
|
|
||||||
|
class SubdlConverter(LanguageReverseConverter):
|
||||||
|
def __init__(self):
|
||||||
|
self.from_subdl = {
|
||||||
|
"AR": ("ara", None, None), # Arabic
|
||||||
|
"DA": ("dan", None, None), # Danish
|
||||||
|
"NL": ("nld", None, None), # Dutch
|
||||||
|
"EN": ("eng", None, None), # English
|
||||||
|
"FA": ("fas", None, None), # Farsi_Persian
|
||||||
|
"FI": ("fin", None, None), # Finnish
|
||||||
|
"FR": ("fra", None, None), # French
|
||||||
|
"ID": ("ind", None, None), # Indonesian
|
||||||
|
"IT": ("ita", None, None), # Italian
|
||||||
|
"NO": ("nor", None, None), # Norwegian
|
||||||
|
"RO": ("ron", None, None), # Romanian
|
||||||
|
"ES": ("spa", None, None), # Spanish
|
||||||
|
"SV": ("swe", None, None), # Swedish
|
||||||
|
"VI": ("vie", None, None), # Vietnamese
|
||||||
|
"SQ": ("sqi", None, None), # Albanian
|
||||||
|
"AZ": ("aze", None, None), # Azerbaijani
|
||||||
|
"BE": ("bel", None, None), # Belarusian
|
||||||
|
"BN": ("ben", None, None), # Bengali
|
||||||
|
"BS": ("bos", None, None), # Bosnian
|
||||||
|
"BG": ("bul", None, None), # Bulgarian
|
||||||
|
"MY": ("mya", None, None), # Burmese
|
||||||
|
"CA": ("cat", None, None), # Catalan
|
||||||
|
"ZH": ("zho", None, None), # Chinese BG code
|
||||||
|
"HR": ("hrv", None, None), # Croatian
|
||||||
|
"CS": ("ces", None, None), # Czech
|
||||||
|
"EO": ("epo", None, None), # Esperanto
|
||||||
|
"ET": ("est", None, None), # Estonian
|
||||||
|
"KA": ("kat", None, None), # Georgian
|
||||||
|
"DE": ("deu", None, None), # German
|
||||||
|
"EL": ("ell", None, None), # Greek
|
||||||
|
"KL": ("kal", None, None), # Greenlandic
|
||||||
|
"HE": ("heb", None, None), # Hebrew
|
||||||
|
"HI": ("hin", None, None), # Hindi
|
||||||
|
"HU": ("hun", None, None), # Hungarian
|
||||||
|
"IS": ("isl", None, None), # Icelandic
|
||||||
|
"JA": ("jpn", None, None), # Japanese
|
||||||
|
"KO": ("kor", None, None), # Korean
|
||||||
|
"KU": ("kur", None, None), # Kurdish
|
||||||
|
"LV": ("lav", None, None), # Latvian
|
||||||
|
"LT": ("lit", None, None), # Lithuanian
|
||||||
|
"MK": ("mkd", None, None), # Macedonian
|
||||||
|
"MS": ("msa", None, None), # Malay
|
||||||
|
"ML": ("mal", None, None), # Malayalam
|
||||||
|
"PL": ("pol", None, None), # Polish
|
||||||
|
"PT": ("por", None, None), # Portuguese
|
||||||
|
"RU": ("rus", None, None), # Russian
|
||||||
|
"SR": ("srp", None, None), # Serbian
|
||||||
|
"SI": ("sin", None, None), # Sinhala
|
||||||
|
"SK": ("slk", None, None), # Slovak
|
||||||
|
"SL": ("slv", None, None), # Slovenian
|
||||||
|
"TL": ("tgl", None, None), # Tagalog
|
||||||
|
"TA": ("tam", None, None), # Tamil
|
||||||
|
"TE": ("tel", None, None), # Telugu
|
||||||
|
"TH": ("tha", None, None), # Thai
|
||||||
|
"TR": ("tur", None, None), # Turkish
|
||||||
|
"UK": ("ukr", None, None), # Ukrainian
|
||||||
|
"UR": ("urd", None, None), # Urdu
|
||||||
|
# custom languages
|
||||||
|
"BR_PT": ("por", "BR", None), # Brazilian Portuguese
|
||||||
|
"ZH_BG": ("zho", None, "Hant"), # Big 5 code
|
||||||
|
# unsupported language in Bazarr
|
||||||
|
# "BG_EN": "Bulgarian_English",
|
||||||
|
# "NL_EN": "Dutch_English",
|
||||||
|
# "EN_DE": "English_German",
|
||||||
|
# "HU_EN": "Hungarian_English",
|
||||||
|
# "MNI": "Manipuri",
|
||||||
|
}
|
||||||
|
self.to_subdl = {v: k for k, v in self.from_subdl.items()}
|
||||||
|
self.codes = set(self.from_subdl.keys())
|
||||||
|
|
||||||
|
def convert(self, alpha3, country=None, script=None):
|
||||||
|
if (alpha3, country, script) in self.to_subdl:
|
||||||
|
return self.to_subdl[(alpha3, country, script)]
|
||||||
|
|
||||||
|
raise ConfigurationError('Unsupported language for subdl: %s, %s, %s' % (alpha3, country, script))
|
||||||
|
|
||||||
|
def reverse(self, subdl):
|
||||||
|
if subdl in self.from_subdl:
|
||||||
|
return self.from_subdl[subdl]
|
||||||
|
|
||||||
|
raise ConfigurationError('Unsupported language code for subdl: %s' % subdl)
|
@ -1,92 +0,0 @@
|
|||||||
# coding=utf-8
|
|
||||||
|
|
||||||
from __future__ import absolute_import
|
|
||||||
from babelfish import LanguageReverseConverter
|
|
||||||
from subliminal.exceptions import ConfigurationError
|
|
||||||
from subzero.language import Language
|
|
||||||
|
|
||||||
|
|
||||||
# alpha3 codes extracted from `https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes`
|
|
||||||
# Subscene language list extracted from it's upload form
|
|
||||||
from_subscene = {
|
|
||||||
'Farsi/Persian': 'fas', 'Greek': 'ell', 'Greenlandic': 'kal',
|
|
||||||
'Malay': 'msa', 'Pashto': 'pus', 'Punjabi': 'pan', 'Swahili': 'swa'
|
|
||||||
}
|
|
||||||
|
|
||||||
from_subscene_with_country = {
|
|
||||||
'Brazillian Portuguese': ('por', 'BR')
|
|
||||||
}
|
|
||||||
|
|
||||||
to_subscene_with_country = {val: key for key, val in from_subscene_with_country.items()}
|
|
||||||
|
|
||||||
|
|
||||||
to_subscene = {v: k for k, v in from_subscene.items()}
|
|
||||||
|
|
||||||
exact_languages_alpha3 = [
|
|
||||||
'ara', 'aze', 'bel', 'ben', 'bos', 'bul', 'cat', 'ces', 'dan', 'deu',
|
|
||||||
'eng', 'epo', 'est', 'eus', 'fin', 'fra', 'heb', 'hin', 'hrv', 'hun',
|
|
||||||
'hye', 'ind', 'isl', 'ita', 'jpn', 'kat', 'kor', 'kur', 'lav', 'lit',
|
|
||||||
'mal', 'mkd', 'mni', 'mon', 'mya', 'nld', 'nor', 'pol', 'por', 'ron',
|
|
||||||
'rus', 'sin', 'slk', 'slv', 'som', 'spa', 'sqi', 'srp', 'sun', 'swe',
|
|
||||||
'tam', 'tel', 'tgl', 'tha', 'tur', 'ukr', 'urd', 'vie', 'yor'
|
|
||||||
]
|
|
||||||
|
|
||||||
language_ids = {
|
|
||||||
'ara': 2, 'dan': 10, 'nld': 11, 'eng': 13, 'fas': 46, 'fin': 17,
|
|
||||||
'fra': 18, 'heb': 22, 'ind': 44, 'ita': 26, 'msa': 50, 'nor': 30,
|
|
||||||
'ron': 33, 'spa': 38, 'swe': 39, 'vie': 45, 'sqi': 1, 'hye': 73,
|
|
||||||
'aze': 55, 'eus': 74, 'bel': 68, 'ben': 54, 'bos': 60, 'bul': 5,
|
|
||||||
'mya': 61, 'cat': 49, 'hrv': 8, 'ces': 9, 'epo': 47, 'est': 16,
|
|
||||||
'kat': 62, 'deu': 19, 'ell': 21, 'kal': 57, 'hin': 51, 'hun': 23,
|
|
||||||
'isl': 25, 'jpn': 27, 'kor': 28, 'kur': 52, 'lav': 29, 'lit': 43,
|
|
||||||
'mkd': 48, 'mal': 64, 'mni': 65, 'mon': 72, 'pus': 67, 'pol': 31,
|
|
||||||
'por': 32, 'pan': 66, 'rus': 34, 'srp': 35, 'sin': 58, 'slk': 36,
|
|
||||||
'slv': 37, 'som': 70, 'tgl': 53, 'tam': 59, 'tel': 63, 'tha': 40,
|
|
||||||
'tur': 41, 'ukr': 56, 'urd': 42, 'yor': 71, 'pt-BR': 4
|
|
||||||
}
|
|
||||||
|
|
||||||
# TODO: specify codes for unspecified_languages
|
|
||||||
unspecified_languages = [
|
|
||||||
'Big 5 code', 'Bulgarian/ English',
|
|
||||||
'Chinese BG code', 'Dutch/ English', 'English/ German',
|
|
||||||
'Hungarian/ English', 'Rohingya'
|
|
||||||
]
|
|
||||||
|
|
||||||
supported_languages = {Language(l) for l in exact_languages_alpha3}
|
|
||||||
|
|
||||||
alpha3_of_code = {l.name: l.alpha3 for l in supported_languages}
|
|
||||||
|
|
||||||
supported_languages.update({Language(l) for l in to_subscene})
|
|
||||||
|
|
||||||
supported_languages.update({Language(lang, cr) for lang, cr in to_subscene_with_country})
|
|
||||||
|
|
||||||
|
|
||||||
class SubsceneConverter(LanguageReverseConverter):
|
|
||||||
codes = {l.name for l in supported_languages}
|
|
||||||
|
|
||||||
def convert(self, alpha3, country=None, script=None):
|
|
||||||
if alpha3 in exact_languages_alpha3:
|
|
||||||
return Language(alpha3).name
|
|
||||||
|
|
||||||
if alpha3 in to_subscene:
|
|
||||||
return to_subscene[alpha3]
|
|
||||||
|
|
||||||
if (alpha3, country) in to_subscene_with_country:
|
|
||||||
return to_subscene_with_country[(alpha3, country)]
|
|
||||||
|
|
||||||
raise ConfigurationError('Unsupported language for subscene: %s, %s, %s' % (alpha3, country, script))
|
|
||||||
|
|
||||||
def reverse(self, code):
|
|
||||||
if code in from_subscene_with_country:
|
|
||||||
return from_subscene_with_country[code]
|
|
||||||
|
|
||||||
if code in from_subscene:
|
|
||||||
return (from_subscene[code],)
|
|
||||||
|
|
||||||
if code in alpha3_of_code:
|
|
||||||
return (alpha3_of_code[code],)
|
|
||||||
|
|
||||||
if code in unspecified_languages:
|
|
||||||
raise NotImplementedError("currently this language is unspecified: %s" % code)
|
|
||||||
|
|
||||||
raise ConfigurationError('Unsupported language code for subscene: %s' % code)
|
|
@ -0,0 +1,186 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import lzma
|
||||||
|
|
||||||
|
from guessit import guessit
|
||||||
|
from requests import Session
|
||||||
|
from subzero.language import Language
|
||||||
|
|
||||||
|
|
||||||
|
from subliminal.exceptions import ConfigurationError, ProviderError
|
||||||
|
from subliminal_patch.providers import Provider
|
||||||
|
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
|
||||||
|
from subliminal_patch.subtitle import Subtitle, guess_matches
|
||||||
|
from subliminal.video import Episode
|
||||||
|
|
||||||
|
try:
|
||||||
|
from lxml import etree
|
||||||
|
except ImportError:
|
||||||
|
try:
|
||||||
|
import xml.etree.cElementTree as etree
|
||||||
|
except ImportError:
|
||||||
|
import xml.etree.ElementTree as etree
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
supported_languages = [
|
||||||
|
"ara", # Arabic
|
||||||
|
"eng", # English
|
||||||
|
"fin", # Finnish
|
||||||
|
"fra", # French
|
||||||
|
"heb", # Hebrew
|
||||||
|
"ita", # Italian
|
||||||
|
"jpn", # Japanese
|
||||||
|
"por", # Portuguese
|
||||||
|
"pol", # Polish
|
||||||
|
"spa", # Spanish
|
||||||
|
"swe", # Swedish
|
||||||
|
"tha", # Thai
|
||||||
|
"tur", # Turkish
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class AnimeToshoSubtitle(Subtitle):
|
||||||
|
"""AnimeTosho.org Subtitle."""
|
||||||
|
provider_name = 'animetosho'
|
||||||
|
|
||||||
|
def __init__(self, language, download_link, meta, release_info):
|
||||||
|
super(AnimeToshoSubtitle, self).__init__(language, page_link=download_link)
|
||||||
|
self.meta = meta
|
||||||
|
self.download_link = download_link
|
||||||
|
self.release_info = release_info
|
||||||
|
|
||||||
|
@property
|
||||||
|
def id(self):
|
||||||
|
return self.download_link
|
||||||
|
|
||||||
|
def get_matches(self, video):
|
||||||
|
matches = set()
|
||||||
|
matches |= guess_matches(video, guessit(self.meta['filename']))
|
||||||
|
|
||||||
|
# Add these data are explicit extracted from the API and they always have to match otherwise they wouldn't
|
||||||
|
# arrive at this point and would stop on list_subtitles.
|
||||||
|
matches.update(['title', 'series', 'tvdb_id', 'season', 'episode'])
|
||||||
|
|
||||||
|
return matches
|
||||||
|
|
||||||
|
|
||||||
|
class AnimeToshoProvider(Provider, ProviderSubtitleArchiveMixin):
|
||||||
|
"""AnimeTosho.org Provider."""
|
||||||
|
subtitle_class = AnimeToshoSubtitle
|
||||||
|
languages = {Language('por', 'BR')} | {Language(sl) for sl in supported_languages}
|
||||||
|
video_types = Episode
|
||||||
|
|
||||||
|
def __init__(self, search_threshold=None):
|
||||||
|
self.session = None
|
||||||
|
|
||||||
|
if not all([search_threshold]):
|
||||||
|
raise ConfigurationError("Search threshold, Api Client and Version must be specified!")
|
||||||
|
|
||||||
|
self.search_threshold = search_threshold
|
||||||
|
|
||||||
|
def initialize(self):
|
||||||
|
self.session = Session()
|
||||||
|
|
||||||
|
def terminate(self):
|
||||||
|
self.session.close()
|
||||||
|
|
||||||
|
def list_subtitles(self, video, languages):
|
||||||
|
if not video.series_anidb_episode_id:
|
||||||
|
logger.debug('Skipping video %r. It is not an anime or the anidb_episode_id could not be identified', video)
|
||||||
|
|
||||||
|
return []
|
||||||
|
|
||||||
|
return [s for s in self._get_series(video.series_anidb_episode_id) if s.language in languages]
|
||||||
|
|
||||||
|
def download_subtitle(self, subtitle):
|
||||||
|
logger.info('Downloading subtitle %r', subtitle)
|
||||||
|
|
||||||
|
r = self.session.get(subtitle.page_link, timeout=10)
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
# Check if the bytes content starts with the xz magic number of the xz archives
|
||||||
|
if not self._is_xz_file(r.content):
|
||||||
|
raise ProviderError('Unidentified archive type')
|
||||||
|
|
||||||
|
subtitle.content = lzma.decompress(r.content)
|
||||||
|
|
||||||
|
return subtitle
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _is_xz_file(content):
|
||||||
|
return content.startswith(b'\xFD\x37\x7A\x58\x5A\x00')
|
||||||
|
|
||||||
|
def _get_series(self, episode_id):
|
||||||
|
storage_download_url = 'https://animetosho.org/storage/attach/'
|
||||||
|
feed_api_url = 'https://feed.animetosho.org/json'
|
||||||
|
|
||||||
|
subtitles = []
|
||||||
|
|
||||||
|
entries = self._get_series_entries(episode_id)
|
||||||
|
|
||||||
|
for entry in entries:
|
||||||
|
r = self.session.get(
|
||||||
|
feed_api_url,
|
||||||
|
params={
|
||||||
|
'show': 'torrent',
|
||||||
|
'id': entry['id'],
|
||||||
|
},
|
||||||
|
timeout=10
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
for file in r.json()['files']:
|
||||||
|
if 'attachments' not in file:
|
||||||
|
continue
|
||||||
|
|
||||||
|
subtitle_files = list(filter(lambda f: f['type'] == 'subtitle', file['attachments']))
|
||||||
|
|
||||||
|
for subtitle_file in subtitle_files:
|
||||||
|
hex_id = format(subtitle_file['id'], '08x')
|
||||||
|
|
||||||
|
lang = Language.fromalpha3b(subtitle_file['info']['lang'])
|
||||||
|
|
||||||
|
# For Portuguese and Portuguese Brazilian they both share the same code, the name is the only
|
||||||
|
# identifier AnimeTosho provides. Also, some subtitles does not have name, in this case it could
|
||||||
|
# be a false negative but there is nothing we can use to guarantee it is PT-BR, we rather skip it.
|
||||||
|
if lang.alpha3 == 'por' and subtitle_file['info'].get('name', '').lower().find('brazil'):
|
||||||
|
lang = Language('por', 'BR')
|
||||||
|
|
||||||
|
subtitle = self.subtitle_class(
|
||||||
|
lang,
|
||||||
|
storage_download_url + '{}/{}.xz'.format(hex_id, subtitle_file['id']),
|
||||||
|
meta=file,
|
||||||
|
release_info=entry.get('title'),
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug('Found subtitle %r', subtitle)
|
||||||
|
|
||||||
|
subtitles.append(subtitle)
|
||||||
|
|
||||||
|
return subtitles
|
||||||
|
|
||||||
|
def _get_series_entries(self, episode_id):
|
||||||
|
api_url = 'https://feed.animetosho.org/json'
|
||||||
|
|
||||||
|
r = self.session.get(
|
||||||
|
api_url,
|
||||||
|
params={
|
||||||
|
'eid': episode_id,
|
||||||
|
},
|
||||||
|
timeout=10
|
||||||
|
)
|
||||||
|
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
j = r.json()
|
||||||
|
|
||||||
|
# Ignore records that are not yet ready or has been abandoned by AnimeTosho.
|
||||||
|
entries = list(filter(lambda t: t['status'] == 'complete', j))[:self.search_threshold]
|
||||||
|
|
||||||
|
# Return the latest entries that have been added as it is used to cutoff via the user configuration threshold
|
||||||
|
entries.sort(key=lambda t: t['timestamp'], reverse=True)
|
||||||
|
|
||||||
|
return entries
|
@ -1,366 +0,0 @@
|
|||||||
# coding=utf-8
|
|
||||||
|
|
||||||
import io
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
import traceback
|
|
||||||
from urllib import parse
|
|
||||||
|
|
||||||
import requests
|
|
||||||
|
|
||||||
import inflect
|
|
||||||
import re
|
|
||||||
import json
|
|
||||||
|
|
||||||
import html
|
|
||||||
|
|
||||||
import zipfile
|
|
||||||
import rarfile
|
|
||||||
from babelfish import language_converters
|
|
||||||
from guessit import guessit
|
|
||||||
from dogpile.cache.api import NO_VALUE
|
|
||||||
from requests.exceptions import RequestException
|
|
||||||
from subliminal import Episode, ProviderError
|
|
||||||
from subliminal.video import Episode, Movie
|
|
||||||
from subliminal.exceptions import ConfigurationError, ServiceUnavailable
|
|
||||||
from subliminal.utils import sanitize_release_group
|
|
||||||
from subliminal.cache import region
|
|
||||||
from subliminal_patch.http import RetryingCFSession
|
|
||||||
from subliminal_patch.providers import Provider, reinitialize_on_error
|
|
||||||
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
|
|
||||||
from subliminal_patch.subtitle import Subtitle, guess_matches
|
|
||||||
from subliminal_patch.converters.subscene import language_ids, supported_languages
|
|
||||||
from subscene_api.subscene import search, SearchTypes, Subtitle as APISubtitle, SITE_DOMAIN
|
|
||||||
from subzero.language import Language
|
|
||||||
|
|
||||||
p = inflect.engine()
|
|
||||||
|
|
||||||
language_converters.register('subscene = subliminal_patch.converters.subscene:SubsceneConverter')
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class SubsceneSubtitle(Subtitle):
|
|
||||||
provider_name = 'subscene'
|
|
||||||
hearing_impaired_verifiable = True
|
|
||||||
is_pack = False
|
|
||||||
page_link = None
|
|
||||||
season = None
|
|
||||||
episode = None
|
|
||||||
releases = None
|
|
||||||
|
|
||||||
def __init__(self, language, release_info, hearing_impaired=False, page_link=None, encoding=None, mods=None,
|
|
||||||
asked_for_release_group=None, asked_for_episode=None):
|
|
||||||
super(SubsceneSubtitle, self).__init__(language, hearing_impaired=hearing_impaired, page_link=page_link,
|
|
||||||
encoding=encoding, mods=mods)
|
|
||||||
self.release_info = self.releases = release_info
|
|
||||||
self.asked_for_episode = asked_for_episode
|
|
||||||
self.asked_for_release_group = asked_for_release_group
|
|
||||||
self.season = None
|
|
||||||
self.episode = None
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_api(cls, s):
|
|
||||||
return cls(Language.fromsubscene(s.language.strip()), s.title, hearing_impaired=s.hearing_impaired,
|
|
||||||
page_link=s.url)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def id(self):
|
|
||||||
return self.page_link
|
|
||||||
|
|
||||||
@property
|
|
||||||
def numeric_id(self):
|
|
||||||
return self.page_link.split("/")[-1]
|
|
||||||
|
|
||||||
def get_matches(self, video):
|
|
||||||
matches = set()
|
|
||||||
|
|
||||||
if self.release_info.strip() == get_video_filename(video):
|
|
||||||
logger.debug("Using hash match as the release name is the same")
|
|
||||||
matches |= {"hash"}
|
|
||||||
|
|
||||||
# episode
|
|
||||||
if isinstance(video, Episode):
|
|
||||||
guess = guessit(self.release_info, {'type': 'episode'})
|
|
||||||
self.season = guess.get("season")
|
|
||||||
self.episode = guess.get("episode")
|
|
||||||
|
|
||||||
matches |= guess_matches(video, guess)
|
|
||||||
if "season" in matches and "episode" not in guess:
|
|
||||||
# pack
|
|
||||||
matches.add("episode")
|
|
||||||
logger.debug("%r is a pack", self)
|
|
||||||
self.is_pack = True
|
|
||||||
|
|
||||||
if "title" in guess and "year" in matches:
|
|
||||||
if video.series in guess['title']:
|
|
||||||
matches.add("series")
|
|
||||||
|
|
||||||
# movie
|
|
||||||
else:
|
|
||||||
guess = guessit(self.release_info, {'type': 'movie'})
|
|
||||||
matches |= guess_matches(video, guess)
|
|
||||||
|
|
||||||
if video.release_group and "release_group" not in matches and "release_group" in guess:
|
|
||||||
if sanitize_release_group(video.release_group) in sanitize_release_group(guess["release_group"]):
|
|
||||||
matches.add("release_group")
|
|
||||||
|
|
||||||
self.matches = matches
|
|
||||||
|
|
||||||
return matches
|
|
||||||
|
|
||||||
def get_download_link(self, session):
|
|
||||||
return APISubtitle.get_zipped_url(self.page_link, session)
|
|
||||||
|
|
||||||
|
|
||||||
def get_video_filename(video):
|
|
||||||
return os.path.splitext(os.path.basename(video.original_name))[0]
|
|
||||||
|
|
||||||
|
|
||||||
class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
|
|
||||||
"""
|
|
||||||
This currently only searches for the filename on SubScene. It doesn't open every found subtitle page to avoid
|
|
||||||
massive hammering, thus it can't determine whether a subtitle is only-foreign or not.
|
|
||||||
"""
|
|
||||||
subtitle_class = SubsceneSubtitle
|
|
||||||
languages = supported_languages
|
|
||||||
languages.update(set(Language.rebuild(l, forced=True) for l in languages))
|
|
||||||
languages.update(set(Language.rebuild(l, hi=True) for l in languages))
|
|
||||||
video_types = (Episode, Movie)
|
|
||||||
session = None
|
|
||||||
skip_wrong_fps = False
|
|
||||||
hearing_impaired_verifiable = True
|
|
||||||
only_foreign = False
|
|
||||||
username = None
|
|
||||||
password = None
|
|
||||||
|
|
||||||
search_throttle = 8 # seconds
|
|
||||||
|
|
||||||
def __init__(self, only_foreign=False, username=None, password=None):
|
|
||||||
if not all((username, password)):
|
|
||||||
raise ConfigurationError('Username and password must be specified')
|
|
||||||
|
|
||||||
self.only_foreign = only_foreign
|
|
||||||
self.username = username
|
|
||||||
self.password = password
|
|
||||||
|
|
||||||
def initialize(self):
|
|
||||||
logger.info("Creating session")
|
|
||||||
self.session = RetryingCFSession()
|
|
||||||
|
|
||||||
prev_cookies = region.get("subscene_cookies2")
|
|
||||||
if prev_cookies != NO_VALUE:
|
|
||||||
logger.debug("Re-using old subscene cookies: %r", prev_cookies)
|
|
||||||
self.session.cookies.update(prev_cookies)
|
|
||||||
|
|
||||||
else:
|
|
||||||
logger.debug("Logging in")
|
|
||||||
self.login()
|
|
||||||
|
|
||||||
def login(self):
|
|
||||||
r = self.session.get("https://subscene.com/account/login")
|
|
||||||
if "Server Error" in r.text:
|
|
||||||
logger.error("Login unavailable; Maintenance?")
|
|
||||||
raise ServiceUnavailable("Login unavailable; Maintenance?")
|
|
||||||
|
|
||||||
match = re.search(r"<script id='modelJson' type='application/json'>\s*(.+)\s*</script>", r.text)
|
|
||||||
|
|
||||||
if match:
|
|
||||||
h = html
|
|
||||||
data = json.loads(h.unescape(match.group(1)))
|
|
||||||
login_url = parse.urljoin(data["siteUrl"], data["loginUrl"])
|
|
||||||
time.sleep(1.0)
|
|
||||||
|
|
||||||
r = self.session.post(login_url,
|
|
||||||
{
|
|
||||||
"username": self.username,
|
|
||||||
"password": self.password,
|
|
||||||
data["antiForgery"]["name"]: data["antiForgery"]["value"]
|
|
||||||
})
|
|
||||||
pep_content = re.search(r"<form method=\"post\" action=\"https://subscene\.com/\">"
|
|
||||||
r".+name=\"id_token\".+?value=\"(?P<id_token>.+?)\".*?"
|
|
||||||
r"access_token\".+?value=\"(?P<access_token>.+?)\".+?"
|
|
||||||
r"token_type.+?value=\"(?P<token_type>.+?)\".+?"
|
|
||||||
r"expires_in.+?value=\"(?P<expires_in>.+?)\".+?"
|
|
||||||
r"scope.+?value=\"(?P<scope>.+?)\".+?"
|
|
||||||
r"state.+?value=\"(?P<state>.+?)\".+?"
|
|
||||||
r"session_state.+?value=\"(?P<session_state>.+?)\"",
|
|
||||||
r.text, re.MULTILINE | re.DOTALL)
|
|
||||||
|
|
||||||
if pep_content:
|
|
||||||
r = self.session.post(SITE_DOMAIN, pep_content.groupdict())
|
|
||||||
try:
|
|
||||||
r.raise_for_status()
|
|
||||||
except Exception:
|
|
||||||
raise ProviderError("Something went wrong when trying to log in: %s", traceback.format_exc())
|
|
||||||
else:
|
|
||||||
cj = self.session.cookies.copy()
|
|
||||||
store_cks = ("scene", "idsrv", "idsrv.xsrf", "idsvr.clients", "idsvr.session", "idsvr.username")
|
|
||||||
for cn in self.session.cookies.keys():
|
|
||||||
if cn not in store_cks:
|
|
||||||
del cj[cn]
|
|
||||||
|
|
||||||
logger.debug("Storing cookies: %r", cj)
|
|
||||||
region.set("subscene_cookies2", cj)
|
|
||||||
return
|
|
||||||
raise ProviderError("Something went wrong when trying to log in #1")
|
|
||||||
|
|
||||||
def terminate(self):
|
|
||||||
logger.info("Closing session")
|
|
||||||
self.session.close()
|
|
||||||
|
|
||||||
def _create_filters(self, languages):
|
|
||||||
self.filters = dict(HearingImpaired="2")
|
|
||||||
acc_filters = self.filters.copy()
|
|
||||||
if self.only_foreign:
|
|
||||||
self.filters["ForeignOnly"] = "True"
|
|
||||||
acc_filters["ForeignOnly"] = self.filters["ForeignOnly"].lower()
|
|
||||||
logger.info("Only searching for foreign/forced subtitles")
|
|
||||||
|
|
||||||
selected_ids = []
|
|
||||||
for l in languages:
|
|
||||||
lid = language_ids.get(l.basename, language_ids.get(l.alpha3, None))
|
|
||||||
if lid:
|
|
||||||
selected_ids.append(str(lid))
|
|
||||||
|
|
||||||
acc_filters["SelectedIds"] = selected_ids
|
|
||||||
self.filters["LanguageFilter"] = ",".join(acc_filters["SelectedIds"])
|
|
||||||
|
|
||||||
last_filters = region.get("subscene_filters")
|
|
||||||
if last_filters != acc_filters:
|
|
||||||
region.set("subscene_filters", acc_filters)
|
|
||||||
logger.debug("Setting account filters to %r", acc_filters)
|
|
||||||
self.session.post("https://u.subscene.com/filter", acc_filters, allow_redirects=False)
|
|
||||||
|
|
||||||
logger.debug("Filter created: '%s'" % self.filters)
|
|
||||||
|
|
||||||
def _enable_filters(self):
|
|
||||||
self.session.cookies.update(self.filters)
|
|
||||||
logger.debug("Filters applied")
|
|
||||||
|
|
||||||
def list_subtitles(self, video, languages):
|
|
||||||
if not video.original_name:
|
|
||||||
logger.info("Skipping search because we don't know the original release name")
|
|
||||||
return []
|
|
||||||
|
|
||||||
self._create_filters(languages)
|
|
||||||
self._enable_filters()
|
|
||||||
|
|
||||||
if isinstance(video, Episode):
|
|
||||||
international_titles = list(set([video.series] + video.alternative_series[:1]))
|
|
||||||
subtitles = [s for s in self.query(video, international_titles) if s.language in languages]
|
|
||||||
if not len(subtitles):
|
|
||||||
us_titles = [x + ' (US)' for x in international_titles]
|
|
||||||
subtitles = [s for s in self.query(video, us_titles) if s.language in languages]
|
|
||||||
return subtitles
|
|
||||||
else:
|
|
||||||
titles = list(set([video.title] + video.alternative_titles[:1]))
|
|
||||||
return [s for s in self.query(video, titles) if s.language in languages]
|
|
||||||
|
|
||||||
def download_subtitle(self, subtitle):
|
|
||||||
if subtitle.pack_data:
|
|
||||||
logger.info("Using previously downloaded pack data")
|
|
||||||
if rarfile.is_rarfile(io.BytesIO(subtitle.pack_data)):
|
|
||||||
logger.debug('Identified rar archive')
|
|
||||||
archive = rarfile.RarFile(io.BytesIO(subtitle.pack_data))
|
|
||||||
elif zipfile.is_zipfile(io.BytesIO(subtitle.pack_data)):
|
|
||||||
logger.debug('Identified zip archive')
|
|
||||||
archive = zipfile.ZipFile(io.BytesIO(subtitle.pack_data))
|
|
||||||
else:
|
|
||||||
logger.error('Unsupported compressed format')
|
|
||||||
return
|
|
||||||
subtitle.pack_data = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
subtitle.content = self.get_subtitle_from_archive(subtitle, archive)
|
|
||||||
return
|
|
||||||
except ProviderError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# open the archive
|
|
||||||
r = self.session.get(subtitle.get_download_link(self.session), timeout=10)
|
|
||||||
r.raise_for_status()
|
|
||||||
archive_stream = io.BytesIO(r.content)
|
|
||||||
|
|
||||||
if rarfile.is_rarfile(archive_stream):
|
|
||||||
logger.debug('Identified rar archive')
|
|
||||||
archive = rarfile.RarFile(archive_stream)
|
|
||||||
elif zipfile.is_zipfile(archive_stream):
|
|
||||||
logger.debug('Identified zip archive')
|
|
||||||
archive = zipfile.ZipFile(archive_stream)
|
|
||||||
else:
|
|
||||||
logger.error('Unsupported compressed format')
|
|
||||||
return
|
|
||||||
|
|
||||||
subtitle.content = self.get_subtitle_from_archive(subtitle, archive)
|
|
||||||
|
|
||||||
# store archive as pack_data for later caching
|
|
||||||
subtitle.pack_data = r.content
|
|
||||||
|
|
||||||
def parse_results(self, video, film):
|
|
||||||
subtitles = []
|
|
||||||
for s in film.subtitles:
|
|
||||||
try:
|
|
||||||
subtitle = SubsceneSubtitle.from_api(s)
|
|
||||||
except NotImplementedError as e:
|
|
||||||
logger.info(e)
|
|
||||||
continue
|
|
||||||
subtitle.asked_for_release_group = video.release_group
|
|
||||||
if isinstance(video, Episode):
|
|
||||||
subtitle.asked_for_episode = video.episode
|
|
||||||
|
|
||||||
if self.only_foreign:
|
|
||||||
subtitle.language = Language.rebuild(subtitle.language, forced=True)
|
|
||||||
|
|
||||||
# set subtitle language to hi if it's hearing_impaired
|
|
||||||
if subtitle.hearing_impaired:
|
|
||||||
subtitle.language = Language.rebuild(subtitle.language, hi=True)
|
|
||||||
|
|
||||||
subtitles.append(subtitle)
|
|
||||||
logger.debug('Found subtitle %r', subtitle)
|
|
||||||
|
|
||||||
return subtitles
|
|
||||||
|
|
||||||
def do_search(self, *args, **kwargs):
|
|
||||||
try:
|
|
||||||
return search(*args, **kwargs)
|
|
||||||
except requests.HTTPError:
|
|
||||||
region.delete("subscene_cookies2")
|
|
||||||
raise
|
|
||||||
|
|
||||||
@reinitialize_on_error((RequestException,), attempts=1)
|
|
||||||
def query(self, video, titles):
|
|
||||||
subtitles = []
|
|
||||||
if isinstance(video, Episode):
|
|
||||||
more_than_one = len(titles) > 1
|
|
||||||
for series in titles:
|
|
||||||
term = u"%s - %s Season" % (series, p.number_to_words("%sth" % video.season).capitalize())
|
|
||||||
logger.debug('Searching with series and season: %s', term)
|
|
||||||
film = self.do_search(term, session=self.session, release=False, throttle=self.search_throttle,
|
|
||||||
limit_to=SearchTypes.TvSerie)
|
|
||||||
if not film and video.season == 1:
|
|
||||||
logger.debug('Searching with series name: %s', series)
|
|
||||||
film = self.do_search(series, session=self.session, release=False, throttle=self.search_throttle,
|
|
||||||
limit_to=SearchTypes.TvSerie)
|
|
||||||
|
|
||||||
if film and film.subtitles:
|
|
||||||
logger.debug('Searching found: %s', len(film.subtitles))
|
|
||||||
subtitles += self.parse_results(video, film)
|
|
||||||
else:
|
|
||||||
logger.debug('No results found')
|
|
||||||
|
|
||||||
if more_than_one:
|
|
||||||
time.sleep(self.search_throttle)
|
|
||||||
else:
|
|
||||||
more_than_one = len(titles) > 1
|
|
||||||
for title in titles:
|
|
||||||
logger.debug('Searching for movie results: %r', title)
|
|
||||||
film = self.do_search(title, year=video.year, session=self.session, limit_to=None, release=False,
|
|
||||||
throttle=self.search_throttle)
|
|
||||||
if film and film.subtitles:
|
|
||||||
subtitles += self.parse_results(video, film)
|
|
||||||
if more_than_one:
|
|
||||||
time.sleep(self.search_throttle)
|
|
||||||
|
|
||||||
logger.info("%s subtitles found" % len(subtitles))
|
|
||||||
return subtitles
|
|
@ -1,410 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from difflib import SequenceMatcher
|
|
||||||
import functools
|
|
||||||
import logging
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
import urllib.parse
|
|
||||||
|
|
||||||
from bs4 import BeautifulSoup as bso
|
|
||||||
import cloudscraper
|
|
||||||
from guessit import guessit
|
|
||||||
from requests import Session
|
|
||||||
from requests.exceptions import HTTPError
|
|
||||||
from subliminal.exceptions import ProviderError
|
|
||||||
from subliminal_patch.core import Episode
|
|
||||||
from subliminal_patch.core import Movie
|
|
||||||
from subliminal_patch.exceptions import APIThrottled
|
|
||||||
from subliminal_patch.providers import Provider
|
|
||||||
from subliminal_patch.providers.utils import get_archive_from_bytes
|
|
||||||
from subliminal_patch.providers.utils import get_subtitle_from_archive
|
|
||||||
from subliminal_patch.providers.utils import update_matches
|
|
||||||
from subliminal_patch.subtitle import Subtitle
|
|
||||||
from subzero.language import Language
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class SubsceneSubtitle(Subtitle):
|
|
||||||
provider_name = "subscene_cloudscraper"
|
|
||||||
hash_verifiable = False
|
|
||||||
|
|
||||||
def __init__(self, language, page_link, release_info, episode_number=None):
|
|
||||||
super().__init__(language, page_link=page_link)
|
|
||||||
|
|
||||||
self.release_info = release_info
|
|
||||||
self.episode_number = episode_number
|
|
||||||
self.episode_title = None
|
|
||||||
|
|
||||||
self._matches = set(
|
|
||||||
("title", "year")
|
|
||||||
if episode_number is None
|
|
||||||
else ("title", "series", "year", "season", "episode")
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_matches(self, video):
|
|
||||||
update_matches(self._matches, video, self.release_info)
|
|
||||||
|
|
||||||
return self._matches
|
|
||||||
|
|
||||||
@property
|
|
||||||
def id(self):
|
|
||||||
return self.page_link
|
|
||||||
|
|
||||||
|
|
||||||
_BASE_URL = "https://subscene.com"
|
|
||||||
|
|
||||||
# TODO: add more seasons and languages
|
|
||||||
|
|
||||||
_SEASONS = (
|
|
||||||
"First",
|
|
||||||
"Second",
|
|
||||||
"Third",
|
|
||||||
"Fourth",
|
|
||||||
"Fifth",
|
|
||||||
"Sixth",
|
|
||||||
"Seventh",
|
|
||||||
"Eighth",
|
|
||||||
"Ninth",
|
|
||||||
"Tenth",
|
|
||||||
"Eleventh",
|
|
||||||
"Twelfth",
|
|
||||||
"Thirdteenth",
|
|
||||||
"Fourthteenth",
|
|
||||||
"Fifteenth",
|
|
||||||
"Sixteenth",
|
|
||||||
"Seventeenth",
|
|
||||||
"Eightheenth",
|
|
||||||
"Nineteenth",
|
|
||||||
"Tweentieth",
|
|
||||||
)
|
|
||||||
|
|
||||||
_LANGUAGE_MAP = {
|
|
||||||
"english": "eng",
|
|
||||||
"farsi_persian": "per",
|
|
||||||
"arabic": "ara",
|
|
||||||
"spanish": "spa",
|
|
||||||
"portuguese": "por",
|
|
||||||
"italian": "ita",
|
|
||||||
"dutch": "dut",
|
|
||||||
"hebrew": "heb",
|
|
||||||
"indonesian": "ind",
|
|
||||||
"danish": "dan",
|
|
||||||
"norwegian": "nor",
|
|
||||||
"bengali": "ben",
|
|
||||||
"bulgarian": "bul",
|
|
||||||
"croatian": "hrv",
|
|
||||||
"swedish": "swe",
|
|
||||||
"vietnamese": "vie",
|
|
||||||
"czech": "cze",
|
|
||||||
"finnish": "fin",
|
|
||||||
"french": "fre",
|
|
||||||
"german": "ger",
|
|
||||||
"greek": "gre",
|
|
||||||
"hungarian": "hun",
|
|
||||||
"icelandic": "ice",
|
|
||||||
"japanese": "jpn",
|
|
||||||
"macedonian": "mac",
|
|
||||||
"malay": "may",
|
|
||||||
"polish": "pol",
|
|
||||||
"romanian": "rum",
|
|
||||||
"russian": "rus",
|
|
||||||
"serbian": "srp",
|
|
||||||
"thai": "tha",
|
|
||||||
"turkish": "tur",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class SubsceneProvider(Provider):
|
|
||||||
provider_name = "subscene_cloudscraper"
|
|
||||||
|
|
||||||
_movie_title_regex = re.compile(r"^(.+?)( \((\d{4})\))?$")
|
|
||||||
_tv_show_title_regex = re.compile(
|
|
||||||
r"^(.+?) [-\(]\s?(.*?) (season|series)\)?( \((\d{4})\))?$"
|
|
||||||
)
|
|
||||||
_supported_languages = {}
|
|
||||||
_supported_languages["brazillian-portuguese"] = Language("por", "BR")
|
|
||||||
|
|
||||||
for key, val in _LANGUAGE_MAP.items():
|
|
||||||
_supported_languages[key] = Language.fromalpha3b(val)
|
|
||||||
|
|
||||||
_supported_languages_reversed = {
|
|
||||||
val: key for key, val in _supported_languages.items()
|
|
||||||
}
|
|
||||||
|
|
||||||
languages = set(_supported_languages.values())
|
|
||||||
|
|
||||||
video_types = (Episode, Movie)
|
|
||||||
subtitle_class = SubsceneSubtitle
|
|
||||||
|
|
||||||
def initialize(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def terminate(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _scraper_call(self, url, retry=7, method="GET", sleep=5, **kwargs):
|
|
||||||
last_exc = None
|
|
||||||
|
|
||||||
for n in range(retry):
|
|
||||||
# Creating an instance for every try in order to avoid dropped connections.
|
|
||||||
|
|
||||||
# This could probably be improved!
|
|
||||||
scraper = cloudscraper.create_scraper()
|
|
||||||
if method == "GET":
|
|
||||||
req = scraper.get(url, **kwargs)
|
|
||||||
elif method == "POST":
|
|
||||||
req = scraper.post(url, **kwargs)
|
|
||||||
else:
|
|
||||||
raise NotImplementedError(f"{method} not allowed")
|
|
||||||
|
|
||||||
try:
|
|
||||||
req.raise_for_status()
|
|
||||||
except HTTPError as error:
|
|
||||||
logger.debug(
|
|
||||||
"'%s' returned. Trying again [%d] in %s", error, n + 1, sleep
|
|
||||||
)
|
|
||||||
last_exc = error
|
|
||||||
time.sleep(sleep)
|
|
||||||
else:
|
|
||||||
return req
|
|
||||||
|
|
||||||
raise ProviderError("403 Retry count exceeded") from last_exc
|
|
||||||
|
|
||||||
def _gen_results(self, query):
|
|
||||||
url = (
|
|
||||||
f"{_BASE_URL}/subtitles/searchbytitle?query={urllib.parse.quote(query)}&l="
|
|
||||||
)
|
|
||||||
|
|
||||||
result = self._scraper_call(url, method="POST")
|
|
||||||
soup = bso(result.content, "html.parser")
|
|
||||||
|
|
||||||
for title in soup.select("li div[class='title'] a"):
|
|
||||||
yield title
|
|
||||||
|
|
||||||
def _search_movie(self, title, year):
|
|
||||||
title = title.lower()
|
|
||||||
year = str(year)
|
|
||||||
|
|
||||||
found_movie = None
|
|
||||||
|
|
||||||
results = []
|
|
||||||
for result in self._gen_results(title):
|
|
||||||
text = result.text.lower()
|
|
||||||
match = self._movie_title_regex.match(text)
|
|
||||||
if not match:
|
|
||||||
continue
|
|
||||||
match_title = match.group(1)
|
|
||||||
match_year = match.group(3)
|
|
||||||
if year == match_year:
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
"href": result.get("href"),
|
|
||||||
"similarity": SequenceMatcher(None, title, match_title).ratio(),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
if results:
|
|
||||||
results.sort(key=lambda x: x["similarity"], reverse=True)
|
|
||||||
found_movie = results[0]["href"]
|
|
||||||
logger.debug("Movie found: %s", results[0])
|
|
||||||
return found_movie
|
|
||||||
|
|
||||||
def _search_tv_show_season(self, title, season, year=None):
|
|
||||||
try:
|
|
||||||
season_str = _SEASONS[season - 1].lower()
|
|
||||||
except IndexError:
|
|
||||||
logger.debug("Season number not supported: %s", season)
|
|
||||||
return None
|
|
||||||
|
|
||||||
found_tv_show_season = None
|
|
||||||
|
|
||||||
results = []
|
|
||||||
for result in self._gen_results(title):
|
|
||||||
text = result.text.lower()
|
|
||||||
|
|
||||||
match = self._tv_show_title_regex.match(text)
|
|
||||||
if not match:
|
|
||||||
logger.debug("Series title not matched: %s", text)
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
logger.debug("Series title matched: %s", text)
|
|
||||||
|
|
||||||
match_title = match.group(1)
|
|
||||||
match_season = match.group(2)
|
|
||||||
|
|
||||||
# Match "complete series" titles as they usually contain season packs
|
|
||||||
if season_str == match_season or "complete" in match_season:
|
|
||||||
plus = 0.1 if year and str(year) in text else 0
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
"href": result.get("href"),
|
|
||||||
"similarity": SequenceMatcher(None, title, match_title).ratio()
|
|
||||||
+ plus,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
if results:
|
|
||||||
results.sort(key=lambda x: x["similarity"], reverse=True)
|
|
||||||
found_tv_show_season = results[0]["href"]
|
|
||||||
logger.debug("TV Show season found: %s", results[0])
|
|
||||||
|
|
||||||
return found_tv_show_season
|
|
||||||
|
|
||||||
def _find_movie_subtitles(self, path, language):
|
|
||||||
soup = self._get_subtitle_page_soup(path, language)
|
|
||||||
|
|
||||||
subtitles = []
|
|
||||||
for item in soup.select("tr"):
|
|
||||||
subtitle = _get_subtitle_from_item(item, language)
|
|
||||||
if subtitle is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
logger.debug("Found subtitle: %s", subtitle)
|
|
||||||
subtitles.append(subtitle)
|
|
||||||
|
|
||||||
return subtitles
|
|
||||||
|
|
||||||
def _find_episode_subtitles(
|
|
||||||
self, path, season, episode, language, episode_title=None
|
|
||||||
):
|
|
||||||
soup = self._get_subtitle_page_soup(path, language)
|
|
||||||
|
|
||||||
subtitles = []
|
|
||||||
|
|
||||||
for item in soup.select("tr"):
|
|
||||||
valid_item = None
|
|
||||||
clean_text = " ".join(item.text.split())
|
|
||||||
|
|
||||||
if not clean_text:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# It will return list values
|
|
||||||
guess = _memoized_episode_guess(clean_text)
|
|
||||||
|
|
||||||
if "season" not in guess:
|
|
||||||
if "complete series" in clean_text.lower():
|
|
||||||
logger.debug("Complete series pack found: %s", clean_text)
|
|
||||||
guess["season"] = [season]
|
|
||||||
else:
|
|
||||||
logger.debug("Nothing guessed from release: %s", clean_text)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if season in guess["season"] and episode in guess.get("episode", []):
|
|
||||||
logger.debug("Episode match found: %s - %s", guess, clean_text)
|
|
||||||
valid_item = item
|
|
||||||
|
|
||||||
elif season in guess["season"] and not "episode" in guess:
|
|
||||||
logger.debug("Season pack found: %s", clean_text)
|
|
||||||
valid_item = item
|
|
||||||
|
|
||||||
if valid_item is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
subtitle = _get_subtitle_from_item(item, language, episode)
|
|
||||||
|
|
||||||
if subtitle is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
subtitle.episode_title = episode_title
|
|
||||||
|
|
||||||
logger.debug("Found subtitle: %s", subtitle)
|
|
||||||
subtitles.append(subtitle)
|
|
||||||
|
|
||||||
return subtitles
|
|
||||||
|
|
||||||
def _get_subtitle_page_soup(self, path, language):
|
|
||||||
language_path = self._supported_languages_reversed[language]
|
|
||||||
result = self._scraper_call(f"{_BASE_URL}{path}/{language_path}")
|
|
||||||
return bso(result.content, "html.parser")
|
|
||||||
|
|
||||||
def list_subtitles(self, video, languages):
|
|
||||||
is_episode = isinstance(video, Episode)
|
|
||||||
|
|
||||||
if is_episode:
|
|
||||||
result = self._search_tv_show_season(video.series, video.season, video.year)
|
|
||||||
else:
|
|
||||||
result = self._search_movie(video.title, video.year)
|
|
||||||
|
|
||||||
if result is None:
|
|
||||||
logger.debug("No results")
|
|
||||||
return []
|
|
||||||
|
|
||||||
subtitles = []
|
|
||||||
|
|
||||||
for language in languages:
|
|
||||||
if is_episode:
|
|
||||||
subtitles.extend(
|
|
||||||
self._find_episode_subtitles(
|
|
||||||
result, video.season, video.episode, language, video.title
|
|
||||||
)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
subtitles.extend(self._find_movie_subtitles(result, language))
|
|
||||||
|
|
||||||
return subtitles
|
|
||||||
|
|
||||||
def download_subtitle(self, subtitle):
|
|
||||||
# TODO: add MustGetBlacklisted support
|
|
||||||
|
|
||||||
result = self._scraper_call(subtitle.page_link)
|
|
||||||
soup = bso(result.content, "html.parser")
|
|
||||||
try:
|
|
||||||
download_url = _BASE_URL + str(
|
|
||||||
soup.select_one("a[id='downloadButton']")["href"] # type: ignore
|
|
||||||
)
|
|
||||||
except (AttributeError, KeyError, TypeError):
|
|
||||||
raise APIThrottled(f"Couldn't get download url from {subtitle.page_link}")
|
|
||||||
|
|
||||||
downloaded = self._scraper_call(download_url)
|
|
||||||
archive = get_archive_from_bytes(downloaded.content)
|
|
||||||
|
|
||||||
if archive is None:
|
|
||||||
raise APIThrottled(f"Invalid archive: {subtitle.page_link}")
|
|
||||||
|
|
||||||
subtitle.content = get_subtitle_from_archive(
|
|
||||||
archive,
|
|
||||||
episode=subtitle.episode_number,
|
|
||||||
episode_title=subtitle.episode_title,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@functools.lru_cache(2048)
|
|
||||||
def _memoized_episode_guess(content):
|
|
||||||
# Use include to save time from unnecessary checks
|
|
||||||
return guessit(
|
|
||||||
content,
|
|
||||||
{
|
|
||||||
"type": "episode",
|
|
||||||
# Add codec keys to avoid matching x264, 5.1, etc as episode info
|
|
||||||
"includes": ["season", "episode", "video_codec", "audio_codec"],
|
|
||||||
"enforce_list": True,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _get_subtitle_from_item(item, language, episode_number=None):
|
|
||||||
release_infos = []
|
|
||||||
|
|
||||||
try:
|
|
||||||
release_infos.append(item.find("td", {"class": "a6"}).text.strip())
|
|
||||||
except (AttributeError, KeyError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
|
||||||
release_infos.append(
|
|
||||||
item.find("td", {"class": "a1"}).find_all("span")[-1].text.strip()
|
|
||||||
)
|
|
||||||
except (AttributeError, KeyError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
release_info = "".join(r_info for r_info in release_infos if r_info)
|
|
||||||
|
|
||||||
try:
|
|
||||||
path = item.find("td", {"class": "a1"}).find("a")["href"]
|
|
||||||
except (AttributeError, KeyError):
|
|
||||||
logger.debug("Couldn't get path: %s", item)
|
|
||||||
return None
|
|
||||||
|
|
||||||
return SubsceneSubtitle(language, _BASE_URL + path, release_info, episode_number)
|
|
@ -1,299 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
# vim: fenc=utf-8 ts=4 et sw=4 sts=4
|
|
||||||
|
|
||||||
# This file is part of Subscene-API.
|
|
||||||
#
|
|
||||||
# Subscene-API is free software: you can redistribute it and/or modify
|
|
||||||
# it under the terms of the GNU General Public License as published by
|
|
||||||
# the Free Software Foundation, either version 3 of the License, or
|
|
||||||
# (at your option) any later version.
|
|
||||||
#
|
|
||||||
# Subscene-API is distributed in the hope that it will be useful,
|
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
# GNU General Public License for more details.
|
|
||||||
#
|
|
||||||
# You should have received a copy of the GNU General Public License
|
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
|
|
||||||
"""
|
|
||||||
Python wrapper for Subscene subtitle database.
|
|
||||||
|
|
||||||
since Subscene doesn't provide an official API, I wrote
|
|
||||||
this script that does the job by parsing the website"s pages.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# imports
|
|
||||||
import re
|
|
||||||
import enum
|
|
||||||
import sys
|
|
||||||
import requests
|
|
||||||
import time
|
|
||||||
import logging
|
|
||||||
|
|
||||||
is_PY2 = sys.version_info[0] < 3
|
|
||||||
if is_PY2:
|
|
||||||
from contextlib2 import suppress
|
|
||||||
from urllib2 import Request, urlopen
|
|
||||||
else:
|
|
||||||
from contextlib import suppress
|
|
||||||
from urllib.request import Request, urlopen
|
|
||||||
|
|
||||||
from dogpile.cache.api import NO_VALUE
|
|
||||||
from subliminal.cache import region
|
|
||||||
from bs4 import BeautifulSoup, NavigableString
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# constants
|
|
||||||
HEADERS = {
|
|
||||||
}
|
|
||||||
SITE_DOMAIN = "https://subscene.com"
|
|
||||||
|
|
||||||
DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWeb"\
|
|
||||||
"Kit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36"
|
|
||||||
|
|
||||||
|
|
||||||
ENDPOINT_RE = re.compile(r'(?uis)<form.+?action="/subtitles/(.+)">.*?<input type="text"')
|
|
||||||
|
|
||||||
|
|
||||||
class NewEndpoint(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
# utils
|
|
||||||
def soup_for(url, data=None, session=None, user_agent=DEFAULT_USER_AGENT):
|
|
||||||
url = re.sub("\s", "+", url)
|
|
||||||
if not session:
|
|
||||||
r = Request(url, data=None, headers=dict(HEADERS, **{"User-Agent": user_agent}))
|
|
||||||
html = urlopen(r).read().decode("utf-8")
|
|
||||||
else:
|
|
||||||
ret = session.post(url, data=data)
|
|
||||||
ret.raise_for_status()
|
|
||||||
html = ret.text
|
|
||||||
return BeautifulSoup(html, "html.parser")
|
|
||||||
|
|
||||||
|
|
||||||
class AttrDict(object):
|
|
||||||
def __init__(self, *attrs):
|
|
||||||
self._attrs = attrs
|
|
||||||
|
|
||||||
for attr in attrs:
|
|
||||||
setattr(self, attr, "")
|
|
||||||
|
|
||||||
def to_dict(self):
|
|
||||||
return {k: getattr(self, k) for k in self._attrs}
|
|
||||||
|
|
||||||
|
|
||||||
# models
|
|
||||||
@enum.unique
|
|
||||||
class SearchTypes(enum.Enum):
|
|
||||||
Exact = 1
|
|
||||||
TvSerie = 2
|
|
||||||
Popular = 3
|
|
||||||
Close = 4
|
|
||||||
|
|
||||||
|
|
||||||
SectionsParts = {
|
|
||||||
SearchTypes.Exact: "Exact",
|
|
||||||
SearchTypes.TvSerie: "TV-Series",
|
|
||||||
SearchTypes.Popular: "Popular",
|
|
||||||
SearchTypes.Close: "Close"
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class Subtitle(object):
|
|
||||||
def __init__(self, title, url, language, owner_username, owner_url,
|
|
||||||
description, hearing_impaired):
|
|
||||||
self.title = title
|
|
||||||
self.url = url
|
|
||||||
self.language = language
|
|
||||||
self.owner_username = owner_username
|
|
||||||
self.owner_url = owner_url
|
|
||||||
self.description = description
|
|
||||||
self.hearing_impaired = hearing_impaired
|
|
||||||
|
|
||||||
self._zipped_url = None
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return self.title
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_rows(cls, rows):
|
|
||||||
subtitles = []
|
|
||||||
|
|
||||||
for row in rows:
|
|
||||||
if row.td.a is not None and row.td.get("class", ["lazy"])[0] != "empty":
|
|
||||||
subtitles.append(cls.from_row(row))
|
|
||||||
|
|
||||||
return subtitles
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_row(cls, row):
|
|
||||||
attrs = AttrDict("title", "url", "language", "owner_username",
|
|
||||||
"owner_url", "description", "hearing_impaired")
|
|
||||||
|
|
||||||
with suppress(Exception):
|
|
||||||
attrs.title = row.find("td", "a1").a.find_all("span")[1].text \
|
|
||||||
.strip()
|
|
||||||
|
|
||||||
with suppress(Exception):
|
|
||||||
attrs.url = SITE_DOMAIN + row.find("td", "a1").a.get("href")
|
|
||||||
|
|
||||||
with suppress(Exception):
|
|
||||||
attrs.language = row.find("td", "a1").a.find_all("span")[0].text \
|
|
||||||
.strip()
|
|
||||||
|
|
||||||
with suppress(Exception):
|
|
||||||
attrs.owner_username = row.find("td", "a5").a.text.strip()
|
|
||||||
|
|
||||||
with suppress(Exception):
|
|
||||||
attrs.owner_page = SITE_DOMAIN + row.find("td", "a5").a \
|
|
||||||
.get("href").strip()
|
|
||||||
|
|
||||||
with suppress(Exception):
|
|
||||||
attrs.description = row.find("td", "a6").div.text.strip()
|
|
||||||
|
|
||||||
with suppress(Exception):
|
|
||||||
attrs.hearing_impaired = bool(row.find("td", "a41"))
|
|
||||||
|
|
||||||
return cls(**attrs.to_dict())
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_zipped_url(cls, url, session=None):
|
|
||||||
soup = soup_for(url, session=session)
|
|
||||||
return SITE_DOMAIN + soup.find("div", "download").a.get("href")
|
|
||||||
|
|
||||||
@property
|
|
||||||
def zipped_url(self):
|
|
||||||
if self._zipped_url:
|
|
||||||
return self._zipped_url
|
|
||||||
|
|
||||||
self._zipped_url = Subtitle.get_zipped_url(self.url)
|
|
||||||
return self._zipped_url
|
|
||||||
|
|
||||||
|
|
||||||
class Film(object):
|
|
||||||
def __init__(self, title, year=None, imdb=None, cover=None,
|
|
||||||
subtitles=None):
|
|
||||||
self.title = title
|
|
||||||
self.year = year
|
|
||||||
self.imdb = imdb
|
|
||||||
self.cover = cover
|
|
||||||
self.subtitles = subtitles
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return self.title
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_url(cls, url, session=None):
|
|
||||||
soup = soup_for(url, session=session)
|
|
||||||
|
|
||||||
content = soup.find("div", "subtitles")
|
|
||||||
header = content.find("div", "box clearfix")
|
|
||||||
cover = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
cover = header.find("div", "poster").img.get("src")
|
|
||||||
except AttributeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
title = header.find("div", "header").h2.text[:-12].strip()
|
|
||||||
|
|
||||||
imdb = header.find("div", "header").h2.find("a", "imdb").get("href")
|
|
||||||
|
|
||||||
year = header.find("div", "header").ul.li.text
|
|
||||||
year = int(re.findall(r"[0-9]+", year)[0])
|
|
||||||
|
|
||||||
rows = content.find("table").tbody.find_all("tr")
|
|
||||||
subtitles = Subtitle.from_rows(rows)
|
|
||||||
|
|
||||||
return cls(title, year, imdb, cover, subtitles)
|
|
||||||
|
|
||||||
|
|
||||||
# functions
|
|
||||||
def section_exists(soup, section):
|
|
||||||
tag_part = SectionsParts[section]
|
|
||||||
|
|
||||||
try:
|
|
||||||
headers = soup.find("div", "search-result").find_all("h2")
|
|
||||||
except AttributeError:
|
|
||||||
return False
|
|
||||||
|
|
||||||
for header in headers:
|
|
||||||
if tag_part in header.text:
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def get_first_film(soup, section, year=None, session=None):
|
|
||||||
tag_part = SectionsParts[section]
|
|
||||||
tag = None
|
|
||||||
|
|
||||||
headers = soup.find("div", "search-result").find_all("h2")
|
|
||||||
for header in headers:
|
|
||||||
if tag_part in header.text:
|
|
||||||
tag = header
|
|
||||||
break
|
|
||||||
|
|
||||||
if not tag:
|
|
||||||
return
|
|
||||||
|
|
||||||
url = None
|
|
||||||
|
|
||||||
url = SITE_DOMAIN + tag.findNext("ul").find("li").div.a.get("href")
|
|
||||||
for t in tag.findNext("ul").findAll("li"):
|
|
||||||
if isinstance(t, NavigableString) or not t.div:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if str(year) in t.div.a.string:
|
|
||||||
url = SITE_DOMAIN + t.div.a.get("href")
|
|
||||||
break
|
|
||||||
|
|
||||||
return Film.from_url(url, session=session)
|
|
||||||
|
|
||||||
|
|
||||||
def find_endpoint(session, content=None):
|
|
||||||
endpoint = region.get("subscene_endpoint2")
|
|
||||||
if endpoint is NO_VALUE:
|
|
||||||
if not content:
|
|
||||||
content = session.get(SITE_DOMAIN).text
|
|
||||||
|
|
||||||
m = ENDPOINT_RE.search(content)
|
|
||||||
if m:
|
|
||||||
endpoint = m.group(1).strip()
|
|
||||||
logger.debug("Switching main endpoint to %s", endpoint)
|
|
||||||
region.set("subscene_endpoint2", endpoint)
|
|
||||||
return endpoint
|
|
||||||
|
|
||||||
|
|
||||||
def search(term, release=True, session=None, year=None, limit_to=SearchTypes.Exact, throttle=0):
|
|
||||||
# note to subscene: if you actually start to randomize the endpoint, we'll have to query your server even more
|
|
||||||
|
|
||||||
if release:
|
|
||||||
endpoint = "release"
|
|
||||||
else:
|
|
||||||
endpoint = find_endpoint(session)
|
|
||||||
time.sleep(throttle)
|
|
||||||
|
|
||||||
if not endpoint:
|
|
||||||
logger.error("Couldn't find endpoint, exiting")
|
|
||||||
return
|
|
||||||
|
|
||||||
soup = soup_for("%s/subtitles/%s" % (SITE_DOMAIN, endpoint), data={"query": term},
|
|
||||||
session=session)
|
|
||||||
|
|
||||||
if soup:
|
|
||||||
if "Subtitle search by" in str(soup):
|
|
||||||
rows = soup.find("table").tbody.find_all("tr")
|
|
||||||
subtitles = Subtitle.from_rows(rows)
|
|
||||||
return Film(term, subtitles=subtitles)
|
|
||||||
|
|
||||||
for junk, search_type in SearchTypes.__members__.items():
|
|
||||||
if section_exists(soup, search_type):
|
|
||||||
return get_first_film(soup, search_type, year=year, session=session)
|
|
||||||
|
|
||||||
if limit_to == search_type:
|
|
||||||
return
|
|
@ -1,7 +1,7 @@
|
|||||||
node_modules
|
|
||||||
dist
|
|
||||||
*.local
|
*.local
|
||||||
|
*.tsbuildinfo
|
||||||
build
|
build
|
||||||
coverage
|
coverage
|
||||||
|
dev-dist
|
||||||
*.tsbuildinfo
|
dist
|
||||||
|
node_modules
|
||||||
|
@ -0,0 +1 @@
|
|||||||
|
20.13
|
@ -0,0 +1,14 @@
|
|||||||
|
module.exports = {
|
||||||
|
plugins: {
|
||||||
|
"postcss-preset-mantine": {},
|
||||||
|
"postcss-simple-vars": {
|
||||||
|
variables: {
|
||||||
|
"mantine-breakpoint-xs": "36em",
|
||||||
|
"mantine-breakpoint-sm": "48em",
|
||||||
|
"mantine-breakpoint-md": "62em",
|
||||||
|
"mantine-breakpoint-lg": "75em",
|
||||||
|
"mantine-breakpoint-xl": "88em",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
After Width: | Height: | Size: 1.7 KiB |
Before Width: | Height: | Size: 4.6 KiB |
After Width: | Height: | Size: 8.4 KiB |
After Width: | Height: | Size: 2.9 KiB |
After Width: | Height: | Size: 11 KiB |
After Width: | Height: | Size: 866 B |
After Width: | Height: | Size: 82 KiB |
After Width: | Height: | Size: 93 KiB |
After Width: | Height: | Size: 132 KiB |