Updated subliminal_path and subzero to latest dev version

pull/614/head
Halali 5 years ago
parent 4e5e7d6744
commit b3c6def6bc

@ -27,16 +27,6 @@ class TitloviConverter(LanguageReverseConverter):
} }
self.codes = set(self.from_titlovi.keys()) self.codes = set(self.from_titlovi.keys())
# temporary fix, should be removed as soon as API is used
self.lang_from_countrycode = {'ba': ('bos',),
'en': ('eng',),
'hr': ('hrv',),
'mk': ('mkd',),
'rs': ('srp',),
'rsc': ('srp', None, 'Cyrl'),
'si': ('slv',)
}
def convert(self, alpha3, country=None, script=None): def convert(self, alpha3, country=None, script=None):
if (alpha3, country, script) in self.to_titlovi: if (alpha3, country, script) in self.to_titlovi:
return self.to_titlovi[(alpha3, country, script)] return self.to_titlovi[(alpha3, country, script)]
@ -49,9 +39,5 @@ class TitloviConverter(LanguageReverseConverter):
if titlovi in self.from_titlovi: if titlovi in self.from_titlovi:
return self.from_titlovi[titlovi] return self.from_titlovi[titlovi]
# temporary fix, should be removed as soon as API is used
if titlovi in self.lang_from_countrycode:
return self.lang_from_countrycode[titlovi]
raise ConfigurationError('Unsupported language number for titlovi: %s' % titlovi) raise ConfigurationError('Unsupported language number for titlovi: %s' % titlovi)

@ -30,7 +30,7 @@ from subliminal.core import guessit, ProviderPool, io, is_windows_special_path,
ThreadPoolExecutor, check_video ThreadPoolExecutor, check_video
from subliminal_patch.exceptions import TooManyRequests, APIThrottled, ParseResponseError from subliminal_patch.exceptions import TooManyRequests, APIThrottled, ParseResponseError
from subzero.language import Language from subzero.language import Language, ENDSWITH_LANGUAGECODE_RE
from scandir import scandir, scandir_generic as _scandir_generic from scandir import scandir, scandir_generic as _scandir_generic
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -571,12 +571,14 @@ def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, ski
return video return video
def _search_external_subtitles(path, languages=None, only_one=False, scandir_generic=False): def _search_external_subtitles(path, languages=None, only_one=False, scandir_generic=False, match_strictness="strict"):
dirpath, filename = os.path.split(path) dirpath, filename = os.path.split(path)
dirpath = dirpath or '.' dirpath = dirpath or '.'
fileroot, fileext = os.path.splitext(filename) fn_no_ext, fileext = os.path.splitext(filename)
fn_no_ext_lower = fn_no_ext.lower()
subtitles = {} subtitles = {}
_scandir = _scandir_generic if scandir_generic else scandir _scandir = _scandir_generic if scandir_generic else scandir
for entry in _scandir(dirpath): for entry in _scandir(dirpath):
if (not entry.name or entry.name in ('\x0c', '$', ',', '\x7f')) and not scandir_generic: if (not entry.name or entry.name in ('\x0c', '$', ',', '\x7f')) and not scandir_generic:
logger.debug('Could not determine the name of the file, retrying with scandir_generic') logger.debug('Could not determine the name of the file, retrying with scandir_generic')
@ -587,9 +589,11 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen
p = entry.name p = entry.name
# keep only valid subtitle filenames # keep only valid subtitle filenames
if not p.lower().startswith(fileroot.lower()) or not p.lower().endswith(SUBTITLE_EXTENSIONS): if not p.lower().endswith(SUBTITLE_EXTENSIONS):
continue continue
# not p.lower().startswith(fileroot.lower()) or not
p_root, p_ext = os.path.splitext(p) p_root, p_ext = os.path.splitext(p)
if not INCLUDE_EXOTIC_SUBS and p_ext not in (".srt", ".ass", ".ssa", ".vtt"): if not INCLUDE_EXOTIC_SUBS and p_ext not in (".srt", ".ass", ".ssa", ".vtt"):
continue continue
@ -608,7 +612,19 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen
forced = "forced" in adv_tag forced = "forced" in adv_tag
# extract the potential language code # extract the potential language code
language_code = p_root[len(fileroot):].replace('_', '-')[1:] language_code = p_root.rsplit(".", 1)[1].replace('_', '-')
# remove possible language code for matching
p_root_bare = ENDSWITH_LANGUAGECODE_RE.sub("", p_root)
p_root_lower = p_root_bare.lower()
filename_matches = p_root_lower == fn_no_ext_lower
filename_contains = p_root_lower in fn_no_ext_lower
if not filename_matches:
if match_strictness == "strict" or (match_strictness == "loose" and not filename_contains):
continue
# default language is undefined # default language is undefined
language = Language('und') language = Language('und')
@ -632,7 +648,7 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen
return subtitles return subtitles
def search_external_subtitles(path, languages=None, only_one=False): def search_external_subtitles(path, languages=None, only_one=False, match_strictness="strict"):
""" """
wrap original search_external_subtitles function to search multiple paths for one given video wrap original search_external_subtitles function to search multiple paths for one given video
# todo: cleanup and merge with _search_external_subtitles # todo: cleanup and merge with _search_external_subtitles
@ -653,10 +669,11 @@ def search_external_subtitles(path, languages=None, only_one=False):
if os.path.isdir(os.path.dirname(abspath)): if os.path.isdir(os.path.dirname(abspath)):
try: try:
subtitles.update(_search_external_subtitles(abspath, languages=languages, subtitles.update(_search_external_subtitles(abspath, languages=languages,
only_one=only_one)) only_one=only_one, match_strictness=match_strictness))
except OSError: except OSError:
subtitles.update(_search_external_subtitles(abspath, languages=languages, subtitles.update(_search_external_subtitles(abspath, languages=languages,
only_one=only_one, scandir_generic=True)) only_one=only_one, match_strictness=match_strictness,
scandir_generic=True))
logger.debug("external subs: found %s", subtitles) logger.debug("external subs: found %s", subtitles)
return subtitles return subtitles

@ -2,42 +2,35 @@
import io import io
import logging import logging
import math
import re import re
import time from datetime import datetime
import dateutil.parser
import rarfile import rarfile
from bs4 import BeautifulSoup
from zipfile import ZipFile, is_zipfile from zipfile import ZipFile, is_zipfile
from rarfile import RarFile, is_rarfile from rarfile import RarFile, is_rarfile
from babelfish import language_converters, Script from babelfish import language_converters, Script
from requests import RequestException from requests import RequestException, codes as request_codes
from guessit import guessit from guessit import guessit
from subliminal_patch.http import RetryingCFSession from subliminal_patch.http import RetryingCFSession
from subliminal_patch.providers import Provider from subliminal_patch.providers import Provider
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
from subliminal_patch.subtitle import Subtitle from subliminal_patch.subtitle import Subtitle
from subliminal_patch.utils import sanitize, fix_inconsistent_naming as _fix_inconsistent_naming from subliminal_patch.utils import sanitize, fix_inconsistent_naming as _fix_inconsistent_naming
from subliminal.exceptions import ProviderError from subliminal.exceptions import ProviderError, AuthenticationError, ConfigurationError
from subliminal.score import get_equivalent_release_groups from subliminal.score import get_equivalent_release_groups
from subliminal.utils import sanitize_release_group from subliminal.utils import sanitize_release_group
from subliminal.subtitle import guess_matches from subliminal.subtitle import guess_matches
from subliminal.video import Episode, Movie from subliminal.video import Episode, Movie
from subliminal.subtitle import fix_line_ending from subliminal.subtitle import fix_line_ending
from subliminal_patch.pitcher import pitchers, load_verification, store_verification
from subzero.language import Language
from random import randint from subzero.language import Language
from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST from dogpile.cache.api import NO_VALUE
from subliminal.cache import region
# parsing regex definitions # parsing regex definitions
title_re = re.compile(r'(?P<title>(?:.+(?= [Aa][Kk][Aa] ))|.+)(?:(?:.+)(?P<altitle>(?<= [Aa][Kk][Aa] ).+))?') title_re = re.compile(r'(?P<title>(?:.+(?= [Aa][Kk][Aa] ))|.+)(?:(?:.+)(?P<altitle>(?<= [Aa][Kk][Aa] ).+))?')
lang_re = re.compile(r'(?<=flags/)(?P<lang>.{2})(?:.)(?P<script>c?)(?:.+)')
season_re = re.compile(r'Sezona (?P<season>\d+)')
episode_re = re.compile(r'Epizoda (?P<episode>\d+)')
year_re = re.compile(r'(?P<year>\d+)')
fps_re = re.compile(r'fps: (?P<fps>.+)')
def fix_inconsistent_naming(title): def fix_inconsistent_naming(title):
@ -51,6 +44,7 @@ def fix_inconsistent_naming(title):
return _fix_inconsistent_naming(title, {"DC's Legends of Tomorrow": "Legends of Tomorrow", return _fix_inconsistent_naming(title, {"DC's Legends of Tomorrow": "Legends of Tomorrow",
"Marvel's Jessica Jones": "Jessica Jones"}) "Marvel's Jessica Jones": "Jessica Jones"})
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Configure :mod:`rarfile` to use the same path separator as :mod:`zipfile` # Configure :mod:`rarfile` to use the same path separator as :mod:`zipfile`
@ -62,9 +56,9 @@ language_converters.register('titlovi = subliminal_patch.converters.titlovi:Titl
class TitloviSubtitle(Subtitle): class TitloviSubtitle(Subtitle):
provider_name = 'titlovi' provider_name = 'titlovi'
def __init__(self, language, page_link, download_link, sid, releases, title, alt_title=None, season=None, def __init__(self, language, download_link, sid, releases, title, alt_title=None, season=None,
episode=None, year=None, fps=None, asked_for_release_group=None, asked_for_episode=None): episode=None, year=None, rating=None, download_count=None, asked_for_release_group=None, asked_for_episode=None):
super(TitloviSubtitle, self).__init__(language, page_link=page_link) super(TitloviSubtitle, self).__init__(language)
self.sid = sid self.sid = sid
self.releases = self.release_info = releases self.releases = self.release_info = releases
self.title = title self.title = title
@ -73,11 +67,21 @@ class TitloviSubtitle(Subtitle):
self.episode = episode self.episode = episode
self.year = year self.year = year
self.download_link = download_link self.download_link = download_link
self.fps = fps self.rating = rating
self.download_count = download_count
self.matches = None self.matches = None
self.asked_for_release_group = asked_for_release_group self.asked_for_release_group = asked_for_release_group
self.asked_for_episode = asked_for_episode self.asked_for_episode = asked_for_episode
def __repr__(self):
if self.season and self.episode:
return '<%s "%s (%r)" s%.2de%.2d [%s:%s] ID:%r R:%.2f D:%r>' % (
self.__class__.__name__, self.title, self.year, self.season, self.episode, self.language, self._guessed_encoding, self.sid,
self.rating, self.download_count)
else:
return '<%s "%s (%r)" [%s:%s] ID:%r R:%.2f D:%r>' % (
self.__class__.__name__, self.title, self.year, self.language, self._guessed_encoding, self.sid, self.rating, self.download_count)
@property @property
def id(self): def id(self):
return self.sid return self.sid
@ -134,20 +138,62 @@ class TitloviSubtitle(Subtitle):
class TitloviProvider(Provider, ProviderSubtitleArchiveMixin): class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
subtitle_class = TitloviSubtitle subtitle_class = TitloviSubtitle
languages = {Language.fromtitlovi(l) for l in language_converters['titlovi'].codes} | {Language.fromietf('sr-Latn')} languages = {Language.fromtitlovi(l) for l in language_converters['titlovi'].codes} | {Language.fromietf('sr-Latn')}
server_url = 'https://titlovi.com' api_url = 'https://kodi.titlovi.com/api/subtitles'
search_url = server_url + '/titlovi/?' api_gettoken_url = api_url + '/gettoken'
download_url = server_url + '/download/?type=1&mediaid=' api_search_url = api_url + '/search'
def __init__(self, username=None, password=None):
if not all((username, password)):
raise ConfigurationError('Username and password must be specified')
self.username = username
self.password = password
self.session = None
self.user_id = None
self.login_token = None
self.token_exp = None
def initialize(self): def initialize(self):
self.session = RetryingCFSession() self.session = RetryingCFSession()
#load_verification("titlovi", self.session) #load_verification("titlovi", self.session)
token = region.get("titlovi_token")
if token is not NO_VALUE:
self.user_id, self.login_token, self.token_exp = token
if datetime.now() > self.token_exp:
logger.debug('Token expired')
self.log_in()
else:
logger.debug('Use cached token')
else:
logger.debug('Token not found in cache')
self.log_in()
def log_in(self):
login_params = dict(username=self.username, password=self.password, json=True)
try:
response = self.session.post(self.api_gettoken_url, params=login_params)
if response.status_code == request_codes.ok:
resp_json = response.json()
self.login_token = resp_json.get('Token')
self.user_id = resp_json.get('UserId')
self.token_exp = dateutil.parser.parse(resp_json.get('ExpirationDate'))
region.set("titlovi_token", [self.user_id, self.login_token, self.token_exp])
logger.debug('New token obtained')
elif response.status_code == request_codes.unauthorized:
raise AuthenticationError('Login failed')
except RequestException as e:
logger.error(e)
def terminate(self): def terminate(self):
self.session.close() self.session.close()
def query(self, languages, title, season=None, episode=None, year=None, video=None): def query(self, languages, title, season=None, episode=None, year=None, imdb_id=None, video=None):
items_per_page = 10 search_params = dict()
current_page = 1
used_languages = languages used_languages = languages
lang_strings = [str(lang) for lang in used_languages] lang_strings = [str(lang) for lang in used_languages]
@ -162,135 +208,73 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
langs = '|'.join(map(str, [l.titlovi for l in used_languages])) langs = '|'.join(map(str, [l.titlovi for l in used_languages]))
# set query params # set query params
params = {'prijevod': title, 'jezik': langs} search_params['query'] = title
search_params['lang'] = langs
is_episode = False is_episode = False
if season and episode: if season and episode:
is_episode = True is_episode = True
params['s'] = season search_params['season'] = season
params['e'] = episode search_params['episode'] = episode
if year: #if year:
params['g'] = year # search_params['year'] = year
if imdb_id:
search_params['imdbID'] = imdb_id
# loop through paginated results # loop through paginated results
logger.info('Searching subtitles %r', params) logger.info('Searching subtitles %r', search_params)
subtitles = [] subtitles = []
query_results = []
while True: try:
# query the server search_params['token'] = self.login_token
try: search_params['userid'] = self.user_id
r = self.session.get(self.search_url, params=params, timeout=10) search_params['json'] = True
r.raise_for_status()
except RequestException as e: response = self.session.get(self.api_search_url, params=search_params)
logger.exception('RequestException %s', e) resp_json = response.json()
break if resp_json['SubtitleResults']:
query_results.extend(resp_json['SubtitleResults'])
except Exception as e:
logger.error(e)
for sub in query_results:
# title and alternate title
match = title_re.search(sub.get('Title'))
if match:
_title = match.group('title')
alt_title = match.group('altitle')
else: else:
try: continue
soup = BeautifulSoup(r.content, 'lxml')
# handle movies and series separately
# number of results if is_episode:
result_count = int(soup.select_one('.results_count b').string) subtitle = self.subtitle_class(Language.fromtitlovi(sub.get('Lang')), sub.get('Link'), sub.get('Id'), sub.get('Release'), _title,
except: alt_title=alt_title, season=sub.get('Season'), episode=sub.get('Episode'),
result_count = None year=sub.get('Year'), rating=sub.get('Rating'),
download_count=sub.get('DownloadCount'),
# exit if no results asked_for_release_group=video.release_group,
if not result_count: asked_for_episode=episode)
if not subtitles: else:
logger.debug('No subtitles found') subtitle = self.subtitle_class(Language.fromtitlovi(sub.get('Lang')), sub.get('Link'), sub.get('Id'), sub.get('Release'), _title,
else: alt_title=alt_title, year=sub.get('Year'), rating=sub.get('Rating'),
logger.debug("No more subtitles found") download_count=sub.get('DownloadCount'),
break asked_for_release_group=video.release_group)
logger.debug('Found subtitle %r', subtitle)
# number of pages with results
pages = int(math.ceil(result_count / float(items_per_page))) # prime our matches so we can use the values later
subtitle.get_matches(video)
# get current page
if 'pg' in params: # add found subtitles
current_page = int(params['pg']) subtitles.append(subtitle)
try:
sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit')
for sub in sublist:
# subtitle id
sid = sub.find(attrs={'data-id': True}).attrs['data-id']
# get download link
download_link = self.download_url + sid
# title and alternate title
match = title_re.search(sub.a.string)
if match:
_title = match.group('title')
alt_title = match.group('altitle')
else:
continue
# page link
page_link = self.server_url + sub.a.attrs['href']
# subtitle language
_lang = sub.select_one('.lang')
match = lang_re.search(_lang.attrs.get('src', _lang.attrs.get('data-cfsrc', '')))
if match:
try:
# decode language
lang = Language.fromtitlovi(match.group('lang')+match.group('script'))
except ValueError:
continue
# relase year or series start year
match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string)
if match:
r_year = int(match.group('year'))
# fps
match = fps_re.search(sub.select_one('.fps').string)
if match:
fps = match.group('fps')
# releases
releases = str(sub.select_one('.fps').parent.contents[0].string)
# handle movies and series separately
if is_episode:
# season and episode info
sxe = sub.select_one('.s0xe0y').string
r_season = None
r_episode = None
if sxe:
match = season_re.search(sxe)
if match:
r_season = int(match.group('season'))
match = episode_re.search(sxe)
if match:
r_episode = int(match.group('episode'))
subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
alt_title=alt_title, season=r_season, episode=r_episode,
year=r_year, fps=fps,
asked_for_release_group=video.release_group,
asked_for_episode=episode)
else:
subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
alt_title=alt_title, year=r_year, fps=fps,
asked_for_release_group=video.release_group)
logger.debug('Found subtitle %r', subtitle)
# prime our matches so we can use the values later
subtitle.get_matches(video)
# add found subtitles
subtitles.append(subtitle)
finally:
soup.decompose()
# stop on last page
if current_page >= pages:
break
# increment current page
params['pg'] = current_page + 1
logger.debug('Getting page %d', params['pg'])
return subtitles return subtitles
def list_subtitles(self, video, languages): def list_subtitles(self, video, languages):
season = episode = None season = episode = None
if isinstance(video, Episode): if isinstance(video, Episode):
title = video.series title = video.series
season = video.season season = video.season
@ -300,6 +284,7 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
return [s for s in return [s for s in
self.query(languages, fix_inconsistent_naming(title), season=season, episode=episode, year=video.year, self.query(languages, fix_inconsistent_naming(title), season=season, episode=episode, year=video.year,
imdb_id=video.imdb_id,
video=video)] video=video)]
def download_subtitle(self, subtitle): def download_subtitle(self, subtitle):
@ -337,10 +322,12 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
sub_to_extract = None sub_to_extract = None
for sub_name in subs_in_archive: for sub_name in subs_in_archive:
if not ('.cyr' in sub_name or '.cir' in sub_name): _sub_name = sub_name.lower()
if not ('.cyr' in _sub_name or '.cir' in _sub_name or 'cyr)' in _sub_name):
sr_lat_subs.append(sub_name) sr_lat_subs.append(sub_name)
if ('.cyr' in sub_name or '.cir' in sub_name) and not '.lat' in sub_name: if ('.cyr' in sub_name or '.cir' in _sub_name) and not '.lat' in _sub_name.lower():
sr_cyr_subs.append(sub_name) sr_cyr_subs.append(sub_name)
if subtitle.language == 'sr': if subtitle.language == 'sr':

@ -1,5 +1,6 @@
# coding=utf-8 # coding=utf-8
import types import types
import re
from babelfish.exceptions import LanguageError from babelfish.exceptions import LanguageError
from babelfish import Language as Language_, basestr from babelfish import Language as Language_, basestr
@ -134,3 +135,16 @@ class Language(Language_):
return Language(*Language_.fromietf(s).__getstate__()) return Language(*Language_.fromietf(s).__getstate__())
return Language(*Language_.fromalpha3b(s).__getstate__()) return Language(*Language_.fromalpha3b(s).__getstate__())
IETF_MATCH = ".+\.([^-.]+)(?:-[A-Za-z]+)?$"
ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2,})?$")
def match_ietf_language(s, ietf=False):
language_match = re.match(".+\.([^\.]+)$" if not ietf
else IETF_MATCH, s)
if language_match and len(language_match.groups()) == 1:
language = language_match.groups()[0]
return language
return s

@ -107,6 +107,12 @@ class Dicked(object):
for key, value in entries.iteritems(): for key, value in entries.iteritems():
self.__dict__[key] = (Dicked(**value) if isinstance(value, dict) else value) self.__dict__[key] = (Dicked(**value) if isinstance(value, dict) else value)
def has(self, key):
return self._entries is not None and key in self._entries
def get(self, key, default=None):
return self._entries.get(key, default) if self._entries else default
def __repr__(self): def __repr__(self):
return str(self) return str(self)

@ -17,7 +17,8 @@ def has_external_subtitle(part_id, stored_subs, language):
def set_existing_languages(video, video_info, external_subtitles=False, embedded_subtitles=False, known_embedded=None, def set_existing_languages(video, video_info, external_subtitles=False, embedded_subtitles=False, known_embedded=None,
stored_subs=None, languages=None, only_one=False, known_metadata_subs=None): stored_subs=None, languages=None, only_one=False, known_metadata_subs=None,
match_strictness="strict"):
logger.debug(u"Determining existing subtitles for %s", video.name) logger.debug(u"Determining existing subtitles for %s", video.name)
external_langs_found = set() external_langs_found = set()
@ -27,7 +28,8 @@ def set_existing_languages(video, video_info, external_subtitles=False, embedded
external_langs_found = known_metadata_subs external_langs_found = known_metadata_subs
external_langs_found.update(set(search_external_subtitles(video.name, languages=languages, external_langs_found.update(set(search_external_subtitles(video.name, languages=languages,
only_one=only_one).values())) only_one=only_one,
match_strictness=match_strictness).values()))
# found external subtitles should be considered? # found external subtitles should be considered?
if external_subtitles: if external_subtitles:

Loading…
Cancel
Save