diff --git a/.gitignore b/.gitignore index 0bac645ee..452c43001 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ cachefile.dbm *.log *.log.* *.db +*.pyc .idea/* bazarr.pid /venv @@ -10,6 +11,7 @@ bazarr.pid static/scss/.sass-cache/* static/scss/.sass-cache *.scssc +/.vscode # Allow !*.dll \ No newline at end of file diff --git a/README.md b/README.md index 378ac78c7..62b5fc94a 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,7 @@ If you need something that is not already part of Bazarr, feel free to create a * TVSubtitles * Wizdom * XSubs +* Yavka.net * Zimuku ## Screenshot diff --git a/bazarr.py b/bazarr.py index d4929937a..f9e5d1c3f 100644 --- a/bazarr.py +++ b/bazarr.py @@ -80,6 +80,9 @@ if __name__ == '__main__': while True: check_status() try: - time.sleep(5) + if sys.platform.startswith('win'): + time.sleep(5) + else: + os.wait() except (KeyboardInterrupt, SystemExit): pass diff --git a/bazarr/get_providers.py b/bazarr/get_providers.py index e8348f81c..b25267c05 100644 --- a/bazarr/get_providers.py +++ b/bazarr/get_providers.py @@ -35,6 +35,11 @@ PROVIDER_THROTTLE_MAP = { }, "titulky": { DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours") + }, + "legendasdivx": { + TooManyRequests: (datetime.timedelta(hours=2), "2 hours"), + DownloadLimitExceeded: (datetime.timedelta(hours=6), "6 hours"), + ParseResponseError: (datetime.timedelta(hours=1), "1 hours"), } } diff --git a/bazarr/list_subtitles.py b/bazarr/list_subtitles.py index dedd7fd4f..fe3f4488c 100644 --- a/bazarr/list_subtitles.py +++ b/bazarr/list_subtitles.py @@ -387,7 +387,7 @@ def guess_external_subtitles(dest_folder, subtitles): logging.debug('BAZARR detected encoding %r', guess) if guess["confidence"] < 0.6: raise UnicodeError - if guess["confidence"] < 0.7 or guess["encoding"] == "ascii": + if guess["encoding"] == "ascii": guess["encoding"] = "utf-8" text = text.decode(guess["encoding"]) detected_language = guess_language(text) diff --git a/bazarr/main.py b/bazarr/main.py index 5ade8af14..8cfb73718 100644 --- a/bazarr/main.py +++ b/bazarr/main.py @@ -45,6 +45,8 @@ from get_movies import * from scheduler import Scheduler from check_update import check_and_apply_update from subliminal_patch.extensions import provider_registry as provider_manager +from subliminal_patch.core import SUBTITLE_EXTENSIONS +from subliminal.cache import region from functools import wraps from app import create_app, socketio diff --git a/libs/subliminal_patch/providers/legendasdivx.py b/libs/subliminal_patch/providers/legendasdivx.py index 6247792af..e9f505bdf 100644 --- a/libs/subliminal_patch/providers/legendasdivx.py +++ b/libs/subliminal_patch/providers/legendasdivx.py @@ -2,20 +2,22 @@ from __future__ import absolute_import import logging import io +import re import os import rarfile import zipfile from requests import Session from guessit import guessit -from subliminal_patch.exceptions import ParseResponseError +from subliminal.exceptions import ConfigurationError, AuthenticationError, ServiceUnavailable, DownloadLimitExceeded from subliminal_patch.providers import Provider from subliminal.providers import ParserBeautifulSoup from subliminal_patch.subtitle import Subtitle from subliminal.video import Episode, Movie -from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending,guess_matches +from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending, guess_matches from subzero.language import Language from subliminal_patch.score import get_scores +from subliminal.utils import sanitize, sanitize_release_group logger = logging.getLogger(__name__) @@ -27,11 +29,12 @@ class LegendasdivxSubtitle(Subtitle): super(LegendasdivxSubtitle, self).__init__(language) self.language = language self.page_link = data['link'] - self.hits=data['hits'] - self.exact_match=data['exact_match'] - self.description=data['description'].lower() + self.hits = data['hits'] + self.exact_match = data['exact_match'] + self.description = data['description'] self.video = video - self.videoname =data['videoname'] + self.video_filename = data['video_filename'] + self.uploader = data['uploader'] @property def id(self): @@ -44,40 +47,37 @@ class LegendasdivxSubtitle(Subtitle): def get_matches(self, video): matches = set() - if self.videoname.lower() in self.description: + description = sanitize(self.description) + + if sanitize(self.video_filename) in description: matches.update(['title']) matches.update(['season']) matches.update(['episode']) # episode - if video.title and video.title.lower() in self.description: + if video.title and sanitize(video.title) in description: matches.update(['title']) - if video.year and '{:04d}'.format(video.year) in self.description: + if video.year and '{:04d}'.format(video.year) in description: matches.update(['year']) if isinstance(video, Episode): # already matched in search query - if video.season and 's{:02d}'.format(video.season) in self.description: + if video.season and 's{:02d}'.format(video.season) in description: matches.update(['season']) - if video.episode and 'e{:02d}'.format(video.episode) in self.description: + if video.episode and 'e{:02d}'.format(video.episode) in description: matches.update(['episode']) if video.episode and video.season and video.series: - if '{}.s{:02d}e{:02d}'.format(video.series.lower(),video.season,video.episode) in self.description: - matches.update(['series']) - matches.update(['season']) - matches.update(['episode']) - if '{} s{:02d}e{:02d}'.format(video.series.lower(),video.season,video.episode) in self.description: + if '{} s{:02d}e{:02d}'.format(sanitize(video.series), video.season, video.episode) in description: matches.update(['series']) matches.update(['season']) matches.update(['episode']) # release_group - if video.release_group and video.release_group.lower() in self.description: + if video.release_group and sanitize_release_group(video.release_group) in sanitize_release_group(description): matches.update(['release_group']) # resolution - - if video.resolution and video.resolution.lower() in self.description: + if video.resolution and video.resolution.lower() in description: matches.update(['resolution']) # format @@ -87,9 +87,9 @@ class LegendasdivxSubtitle(Subtitle): if formats[0] == "web-dl": formats.append("webdl") formats.append("webrip") - formats.append("web ") + formats.append("web") for frmt in formats: - if frmt.lower() in self.description: + if frmt in description: matches.update(['format']) break @@ -97,11 +97,11 @@ class LegendasdivxSubtitle(Subtitle): if video.video_codec: video_codecs = [video.video_codec.lower()] if video_codecs[0] == "h264": - formats.append("x264") + video_codecs.append("x264") elif video_codecs[0] == "h265": - formats.append("x265") - for vc in formats: - if vc.lower() in self.description: + video_codecs.append("x265") + for vc in video_codecs: + if vc in description: matches.update(['video_codec']) break @@ -109,9 +109,6 @@ class LegendasdivxSubtitle(Subtitle): # matches |= guess_matches(video, guessit(self.description)) return matches - - - class LegendasdivxProvider(Provider): """Legendasdivx Provider.""" languages = {Language('por', 'BR')} | {Language('por')} @@ -126,15 +123,21 @@ class LegendasdivxProvider(Provider): 'Cache-Control': 'no-cache' } loginpage = site + '/forum/ucp.php?mode=login' + logoutpage = site + '/sair.php' searchurl = site + '/modules.php?name=Downloads&file=jz&d_op=search&op=_jz00&query={query}' - language_list = list(languages) + download_link = site + '/modules.php{link}' def __init__(self, username, password): + # make sure login credentials are configured. + if any((username, password)) and not all((username, password)): + raise ConfigurationError('Username and password must be specified') self.username = username self.password = password + self.logged_in = False def initialize(self): self.session = Session() + self.session.headers.update(self.headers) self.login() def terminate(self): @@ -143,100 +146,103 @@ class LegendasdivxProvider(Provider): def login(self): logger.info('Logging in') - self.headers['Referer'] = self.site + '/index.php' - self.session.headers.update(self.headers.items()) + res = self.session.get(self.loginpage) bsoup = ParserBeautifulSoup(res.content, ['lxml']) - + _allinputs = bsoup.findAll('input') - fields = {} + data = {} + # necessary to set 'sid' for POST request for field in _allinputs: - fields[field.get('name')] = field.get('value') - - fields['username'] = self.username - fields['password'] = self.password - fields['autologin'] = 'on' - fields['viewonline'] = 'on' - - self.headers['Referer'] = self.loginpage - self.session.headers.update(self.headers.items()) - res = self.session.post(self.loginpage, fields) + data[field.get('name')] = field.get('value') + + data['username'] = self.username + data['password'] = self.password + + res = self.session.post(self.loginpage, data) + res.raise_for_status() + try: - logger.debug('Got session id %s' % + logger.debug('Logged in successfully: PHPSESSID: %s' % self.session.cookies.get_dict()['PHPSESSID']) - except KeyError as e: - logger.error(repr(e)) - logger.error("Didn't get session id, check your credentials") - return False + self.logged_in = True + except KeyError: + logger.error("Couldn't retrieve session ID, check your credentials") + raise AuthenticationError("Please check your credentials.") except Exception as e: - logger.error(repr(e)) - logger.error('uncached error #legendasdivx #AA') - return False - - return True + if 'bloqueado' in res.text.lower(): # blocked IP address + logger.error("LegendasDivx.pt :: Your IP is blocked on this server.") + raise ParseResponseError("Legendasdivx.pt :: %r" % res.text) + logger.error("LegendasDivx.pt :: Uncaught error: %r" % repr(e)) + raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r" % repr(e)) def logout(self): - # need to figure this out - return True + if self.logged_in: + logger.info('Legendasdivx:: Logging out') + r = self.session.get(self.logoutpage, timeout=10) + r.raise_for_status() + logger.debug('Legendasdivx :: Logged out') + self.logged_in = False + + def _process_page(self, video, bsoup, video_filename): - def _process_page(self, video, bsoup, querytext, videoname): subtitles = [] + _allsubs = bsoup.findAll("div", {"class": "sub_box"}) - lang = Language.fromopensubtitles("pob") + for _subbox in _allsubs: - hits=0 + hits = 0 for th in _subbox.findAll("th", {"class": "color2"}): if th.string == 'Hits:': hits = int(th.parent.find("td").string) if th.string == 'Idioma:': - lang = th.parent.find("td").find ("img").get ('src') - if 'brazil' in lang: + lang = th.parent.find("td").find("img").get('src') + if 'brazil' in lang.lower(): lang = Language.fromopensubtitles('pob') - else: + elif 'portugal' in lang.lower(): lang = Language.fromopensubtitles('por') - - description = _subbox.find("td", {"class": "td_desc brd_up"}) + else: + continue + # get description for matches + description = _subbox.find("td", {"class": "td_desc brd_up"}).get_text() + #get subtitle link download = _subbox.find("a", {"class": "sub_download"}) - try: - # sometimes BSoup just doesn't get the link - logger.debug(download.get('href')) - except Exception as e: - logger.warning('skipping subbox on %s' % self.searchurl.format(query=querytext)) - continue + + # sometimes BSoup can't find 'a' tag and returns None. + i = 0 + while not (download): # must get it... trying again... + download = _subbox.find("a", {"class": "sub_download"}) + i=+1 + logger.debug("Try number {0} try!".format(str(i))) + dl = download.get('href') + logger.debug("Found subtitle on: %s" % self.download_link.format(link=dl)) + + # get subtitle uploader + sub_header = _subbox.find("div", {"class" :"sub_header"}) + uploader = sub_header.find("a").text if sub_header else 'anonymous' exact_match = False - if video.name.lower() in description.get_text().lower(): + if video.name.lower() in description.lower(): exact_match = True data = {'link': self.site + '/modules.php' + download.get('href'), 'exact_match': exact_match, 'hits': hits, - 'videoname': videoname, - 'description': description.get_text() } + 'uploader': uploader, + 'video_filename': video_filename, + 'description': description + } subtitles.append( LegendasdivxSubtitle(lang, video, data) ) return subtitles - def query(self, video, language): - try: - logger.debug('Got session id %s' % - self.session.cookies.get_dict()['PHPSESSID']) - except Exception as e: - self.login() - - language_ids = '0' - if isinstance(language, (tuple, list, set)): - if len(language) == 1: - language_ids = ','.join(sorted(l.opensubtitles for l in language)) - if language_ids == 'por': - language_ids = '&form_cat=28' - else: - language_ids = '&form_cat=29' - - videoname = video.name - videoname = os.path.basename(videoname) - videoname, _ = os.path.splitext(videoname) - # querytext = videoname.lower() + def query(self, video, languages): + + video_filename = video.name + video_filename = os.path.basename(video_filename) + video_filename, _ = os.path.splitext(video_filename) + video_filename = sanitize_release_group(video_filename) + _searchurl = self.searchurl if video.imdb_id is None: if isinstance(video, Episode): @@ -246,22 +252,47 @@ class LegendasdivxProvider(Provider): else: querytext = video.imdb_id + # language query filter + if isinstance(languages, (tuple, list, set)): + language_ids = ','.join(sorted(l.opensubtitles for l in languages)) + if 'por' in language_ids: # prioritize portuguese subtitles + lang_filter = '&form_cat=28' # pt + elif 'pob' in language_ids: + lang_filter = '&form_cat=29' # br + else: + lang_filter = '' + + querytext = querytext + lang_filter if lang_filter else querytext - # querytext = querytext.replace( - # ".", "+").replace("[", "").replace("]", "") - if language_ids != '0': - querytext = querytext + language_ids self.headers['Referer'] = self.site + '/index.php' self.session.headers.update(self.headers.items()) res = self.session.get(_searchurl.format(query=querytext)) - # form_cat=28 = br - # form_cat=29 = pt + if "A legenda não foi encontrada" in res.text: logger.warning('%s not found', querytext) return [] bsoup = ParserBeautifulSoup(res.content, ['html.parser']) - subtitles = self._process_page(video, bsoup, querytext, videoname) + subtitles = self._process_page(video, bsoup, video_filename) + + # search for more than 10 results (legendasdivx uses pagination) + # don't throttle - maximum results = 6 * 10 + MAX_PAGES = 6 + + #get number of pages bases on results found + page_header = bsoup.find("div", {"class": "pager_bar"}) + results_found = re.search(r'\((.*?) encontradas\)', page_header.text).group(1) + num_pages = (int(results_found) // 10) + 1 + num_pages = min(MAX_PAGES, num_pages) + + if num_pages > 1: + for num_page in range(2, num_pages+2): + _search_next = self.searchurl.format(query=querytext) + "&page={0}".format(str(num_page)) + logger.debug("Moving to next page: %s" % _search_next) + res = self.session.get(_search_next) + next_page = ParserBeautifulSoup(res.content, ['html.parser']) + subs = self._process_page(video, next_page, video_filename) + subtitles.extend(subs) return subtitles @@ -270,9 +301,14 @@ class LegendasdivxProvider(Provider): def download_subtitle(self, subtitle): res = self.session.get(subtitle.page_link) + res.raise_for_status() if res: - if res.text == '500': - raise ValueError('Error 500 on server') + if res.status_code in ['500', '503']: + raise ServiceUnavailable("Legendasdivx.pt :: 503 - Service Unavailable") + elif 'limite' in res.text.lower(): # daily downloads limit reached + raise DownloadLimitReached("Legendasdivx.pt :: Download limit reached") + elif 'bloqueado' in res.text.lower(): # blocked IP address + raise ParseResponseError("Legendasdivx.pt :: %r" % res.text) archive = self._get_archive(res.content) # extract the subtitle @@ -281,7 +317,9 @@ class LegendasdivxProvider(Provider): subtitle.normalize() return subtitle - raise ValueError('Problems conecting to the server') + + logger.error("Legendasdivx.pt :: there was a problem retrieving subtitle (status %s)" % res.status_code) + return def _get_archive(self, content): # open the archive @@ -294,7 +332,6 @@ class LegendasdivxProvider(Provider): logger.debug('Identified zip archive') archive = zipfile.ZipFile(archive_stream) else: - # raise ParseResponseError('Unsupported compressed format') raise Exception('Unsupported compressed format') return archive @@ -305,7 +342,7 @@ class LegendasdivxProvider(Provider): _tmp.remove('.txt') _subtitle_extensions = tuple(_tmp) _max_score = 0 - _scores = get_scores (subtitle.video) + _scores = get_scores(subtitle.video) for name in archive.namelist(): # discard hidden files @@ -338,4 +375,4 @@ class LegendasdivxProvider(Provider): logger.debug("returning from archive: {} scored {}".format(_max_name, _max_score)) return archive.read(_max_name) - raise ParseResponseError('Can not find the subtitle in the compressed file') + raise ValueError("No subtitle found on compressed file. Max score was 0") \ No newline at end of file diff --git a/libs/subliminal_patch/providers/opensubtitles.py b/libs/subliminal_patch/providers/opensubtitles.py index 012fe6c13..bcda1db85 100644 --- a/libs/subliminal_patch/providers/opensubtitles.py +++ b/libs/subliminal_patch/providers/opensubtitles.py @@ -44,6 +44,12 @@ class OpenSubtitlesSubtitle(_OpenSubtitlesSubtitle): self.wrong_fps = False self.skip_wrong_fps = skip_wrong_fps + def get_fps(self): + try: + return float(self.fps) + except: + return None + def get_matches(self, video, hearing_impaired=False): matches = super(OpenSubtitlesSubtitle, self).get_matches(video) @@ -138,11 +144,9 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider): return ServerProxy(url, SubZeroRequestsTransport(use_https=self.use_ssl, timeout=timeout or self.timeout, user_agent=os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"))) - def log_in(self, server_url=None): - if server_url: - self.terminate() - - self.server = self.get_server_proxy(server_url) + def log_in_url(self, server_url): + self.token = None + self.server = self.get_server_proxy(server_url) response = self.retry( lambda: checked( @@ -155,6 +159,25 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider): logger.debug('Logged in with token %r', self.token[:10]+"X"*(len(self.token)-10)) region.set("os_token", bytearray(self.token, encoding='utf-8')) + region.set("os_server_url", bytearray(server_url, encoding='utf-8')) + + def log_in(self): + logger.info('Logging in') + + try: + self.log_in_url(self.vip_url if self.is_vip else self.default_url) + + except Unauthorized: + if self.is_vip: + logger.info("VIP server login failed, falling back") + try: + self.log_in_url(self.default_url) + except Unauthorized: + pass + + if not self.token: + logger.error("Login failed, please check your credentials") + raise Unauthorized def use_token_or_login(self, func): if not self.token: @@ -167,45 +190,18 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider): return func() def initialize(self): - if self.is_vip: - self.server = self.get_server_proxy(self.vip_url) - logger.info("Using VIP server") - else: - self.server = self.get_server_proxy(self.default_url) - - logger.info('Logging in') - - token = str(region.get("os_token")) - if token is not NO_VALUE: - try: - logger.debug('Trying previous token: %r', token[:10]+"X"*(len(token)-10)) - checked(lambda: self.server.NoOperation(token)) - self.token = token - logger.debug("Using previous login token: %r", token[:10]+"X"*(len(token)-10)) - return - except (NoSession, Unauthorized): - logger.debug('Token not valid.') - pass - - try: - self.log_in() + token_cache = region.get("os_token") + url_cache = region.get("os_server_url") - except Unauthorized: - if self.is_vip: - logger.info("VIP server login failed, falling back") - self.log_in(self.default_url) - if self.token: - return + if token_cache is not NO_VALUE and url_cache is not NO_VALUE: + self.token = token_cache.decode("utf-8") + self.server = self.get_server_proxy(url_cache.decode("utf-8")) + logger.debug("Using previous login token: %r", self.token[:10] + "X" * (len(self.token) - 10)) + else: + self.server = None + self.token = None - logger.error("Login failed, please check your credentials") - def terminate(self): - if self.token: - try: - checked(lambda: self.server.LogOut(self.token)) - except: - logger.error("Logout failed: %s", traceback.format_exc()) - self.server = None self.token = None diff --git a/libs/subliminal_patch/providers/subssabbz.py b/libs/subliminal_patch/providers/subssabbz.py index 44fa42a46..66edc1c4f 100644 --- a/libs/subliminal_patch/providers/subssabbz.py +++ b/libs/subliminal_patch/providers/subssabbz.py @@ -13,7 +13,6 @@ from guessit import guessit from subliminal_patch.providers import Provider from subliminal_patch.subtitle import Subtitle from subliminal_patch.utils import sanitize, fix_inconsistent_naming -from subliminal.exceptions import ProviderError from subliminal.utils import sanitize_release_group from subliminal.subtitle import guess_matches from subliminal.video import Episode, Movie @@ -43,18 +42,23 @@ class SubsSabBzSubtitle(Subtitle): """SubsSabBz Subtitle.""" provider_name = 'subssabbz' - def __init__(self, langauge, filename, type, video, link): + def __init__(self, langauge, filename, type, video, link, fps, num_cds): super(SubsSabBzSubtitle, self).__init__(langauge) self.langauge = langauge self.filename = filename self.page_link = link self.type = type self.video = video + self.fps = fps + self.num_cds = num_cds self.release_info = os.path.splitext(filename)[0] @property def id(self): - return self.filename + return self.page_link + self.filename + + def get_fps(self): + return self.fps def make_picklable(self): self.content = None @@ -76,13 +80,21 @@ class SubsSabBzSubtitle(Subtitle): if video_filename == subtitle_filename: matches.add('hash') - matches |= guess_matches(video, guessit(self.filename, {'type': self.type})) + if video.year and self.year == video.year: + matches.add('year') + + if isinstance(video, Movie): + if video.imdb_id and self.imdb_id == video.imdb_id: + matches.add('imdb_id') + + matches |= guess_matches(video, guessit(self.title, {'type': self.type, 'allowed_countries': [None]})) + matches |= guess_matches(video, guessit(self.filename, {'type': self.type, 'allowed_countries': [None]})) return matches class SubsSabBzProvider(Provider): """SubsSabBz Provider.""" - languages = {Language('por', 'BR')} | {Language(l) for l in [ + languages = {Language(l) for l in [ 'bul', 'eng' ]} @@ -136,19 +148,51 @@ class SubsSabBzProvider(Provider): soup = BeautifulSoup(response.content, 'lxml') rows = soup.findAll('tr', {'class': 'subs-row'}) - # Search on first 20 rows only - for row in rows[:20]: + # Search on first 25 rows only + for row in rows[:25]: a_element_wrapper = row.find('td', { 'class': 'c2field' }) if a_element_wrapper: element = a_element_wrapper.find('a') if element: link = element.get('href') - element = row.find('a', href = re.compile(r'.*showuser=.*')) - uploader = element.get_text() if element else None + notes = element.get('onmouseover') + title = element.get_text() + + try: + year = int(str(element.next_sibling).strip(' ()')) + except: + year = None + + td = row.findAll('td') + + try: + num_cds = int(td[6].get_text()) + except: + num_cds = None + + try: + fps = float(td[7].get_text()) + except: + fps = None + + try: + uploader = td[8].get_text() + except: + uploader = None + + try: + imdb_id = re.findall(r'imdb.com/title/(tt\d+)/?$', td[9].find('a').get('href'))[0] + except: + imdb_id = None + logger.info('Found subtitle link %r', link) - sub = self.download_archive_and_add_subtitle_files(link, language, video) - for s in sub: + sub = self.download_archive_and_add_subtitle_files(link, language, video, fps, num_cds) + for s in sub: + s.title = title + s.notes = notes + s.year = year s.uploader = uploader + s.imdb_id = imdb_id subtitles = subtitles + sub return subtitles @@ -160,23 +204,24 @@ class SubsSabBzProvider(Provider): pass else: seeking_subtitle_file = subtitle.filename - arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video) + arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video, + subtitle.fps, subtitle.num_cds) for s in arch: if s.filename == seeking_subtitle_file: subtitle.content = s.content - def process_archive_subtitle_files(self, archiveStream, language, video, link): + def process_archive_subtitle_files(self, archiveStream, language, video, link, fps, num_cds): subtitles = [] type = 'episode' if isinstance(video, Episode) else 'movie' - for file_name in archiveStream.namelist(): + for file_name in sorted(archiveStream.namelist()): if file_name.lower().endswith(('.srt', '.sub')): logger.info('Found subtitle file %r', file_name) - subtitle = SubsSabBzSubtitle(language, file_name, type, video, link) - subtitle.content = archiveStream.read(file_name) + subtitle = SubsSabBzSubtitle(language, file_name, type, video, link, fps, num_cds) + subtitle.content = fix_line_ending(archiveStream.read(file_name)) subtitles.append(subtitle) return subtitles - def download_archive_and_add_subtitle_files(self, link, language, video ): + def download_archive_and_add_subtitle_files(self, link, language, video, fps, num_cds): logger.info('Downloading subtitle %r', link) request = self.session.get(link, headers={ 'Referer': 'http://subs.sab.bz/index.php?' @@ -185,9 +230,9 @@ class SubsSabBzProvider(Provider): archive_stream = io.BytesIO(request.content) if is_rarfile(archive_stream): - return self.process_archive_subtitle_files( RarFile(archive_stream), language, video, link ) + return self.process_archive_subtitle_files(RarFile(archive_stream), language, video, link, fps, num_cds) elif is_zipfile(archive_stream): - return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video, link ) + return self.process_archive_subtitle_files(ZipFile(archive_stream), language, video, link, fps, num_cds) else: logger.error('Ignore unsupported archive %r', request.headers) return [] diff --git a/libs/subliminal_patch/providers/subsunacs.py b/libs/subliminal_patch/providers/subsunacs.py index 1d5187895..b4edc1af1 100644 --- a/libs/subliminal_patch/providers/subsunacs.py +++ b/libs/subliminal_patch/providers/subsunacs.py @@ -13,7 +13,6 @@ from guessit import guessit from subliminal_patch.providers import Provider from subliminal_patch.subtitle import Subtitle from subliminal_patch.utils import sanitize, fix_inconsistent_naming -from subliminal.exceptions import ProviderError from subliminal.utils import sanitize_release_group from subliminal.subtitle import guess_matches from subliminal.video import Episode, Movie @@ -42,18 +41,23 @@ class SubsUnacsSubtitle(Subtitle): """SubsUnacs Subtitle.""" provider_name = 'subsunacs' - def __init__(self, langauge, filename, type, video, link): + def __init__(self, langauge, filename, type, video, link, fps, num_cds): super(SubsUnacsSubtitle, self).__init__(langauge) self.langauge = langauge self.filename = filename self.page_link = link self.type = type self.video = video + self.fps = fps + self.num_cds = num_cds self.release_info = os.path.splitext(filename)[0] @property def id(self): - return self.filename + return self.page_link + self.filename + + def get_fps(self): + return self.fps def make_picklable(self): self.content = None @@ -75,13 +79,17 @@ class SubsUnacsSubtitle(Subtitle): if video_filename == subtitle_filename: matches.add('hash') - matches |= guess_matches(video, guessit(self.filename, {'type': self.type})) + if video.year and self.year == video.year: + matches.add('year') + + matches |= guess_matches(video, guessit(self.title, {'type': self.type, 'allowed_countries': [None]})) + matches |= guess_matches(video, guessit(self.filename, {'type': self.type, 'allowed_countries': [None]})) return matches class SubsUnacsProvider(Provider): """SubsUnacs Provider.""" - languages = {Language('por', 'BR')} | {Language(l) for l in [ + languages = {Language(l) for l in [ 'bul', 'eng' ]} @@ -146,11 +154,43 @@ class SubsUnacsProvider(Provider): element = a_element_wrapper.find('a', {'class': 'tooltip'}) if element: link = element.get('href') - element = row.find('a', href = re.compile(r'.*/search\.php\?t=1\&(memid|u)=.*')) - uploader = element.get_text() if element else None + notes = element.get('title') + title = element.get_text() + + try: + year = int(element.find_next_sibling('span', {'class' : 'smGray'}).text.strip('\xa0()')) + except: + year = None + + td = row.findAll('td') + + try: + num_cds = int(td[1].get_text()) + except: + num_cds = None + + try: + fps = float(td[2].get_text()) + except: + fps = None + + try: + rating = float(td[3].find('img').get('title')) + except: + rating = None + + try: + uploader = td[5].get_text() + except: + uploader = None + logger.info('Found subtitle link %r', link) - sub = self.download_archive_and_add_subtitle_files('https://subsunacs.net' + link, language, video) - for s in sub: + sub = self.download_archive_and_add_subtitle_files('https://subsunacs.net' + link, language, video, fps, num_cds) + for s in sub: + s.title = title + s.notes = notes + s.year = year + s.rating = rating s.uploader = uploader subtitles = subtitles + sub return subtitles @@ -163,28 +203,29 @@ class SubsUnacsProvider(Provider): pass else: seeking_subtitle_file = subtitle.filename - arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video) + arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video, + subtitle.fps, subtitle.num_cds) for s in arch: if s.filename == seeking_subtitle_file: subtitle.content = s.content - def process_archive_subtitle_files(self, archiveStream, language, video, link): + def process_archive_subtitle_files(self, archiveStream, language, video, link, fps, num_cds): subtitles = [] type = 'episode' if isinstance(video, Episode) else 'movie' - for file_name in archiveStream.namelist(): + for file_name in sorted(archiveStream.namelist()): if file_name.lower().endswith(('.srt', '.sub', '.txt')): file_is_txt = True if file_name.lower().endswith('.txt') else False if file_is_txt and re.search(r'subsunacs\.net|танете част|прочети|^read ?me|procheti', file_name, re.I): logger.info('Ignore readme txt file %r', file_name) continue logger.info('Found subtitle file %r', file_name) - subtitle = SubsUnacsSubtitle(language, file_name, type, video, link) - subtitle.content = archiveStream.read(file_name) + subtitle = SubsUnacsSubtitle(language, file_name, type, video, link, fps, num_cds) + subtitle.content = fix_line_ending(archiveStream.read(file_name)) if file_is_txt == False or subtitle.is_valid(): subtitles.append(subtitle) return subtitles - def download_archive_and_add_subtitle_files(self, link, language, video ): + def download_archive_and_add_subtitle_files(self, link, language, video, fps, num_cds): logger.info('Downloading subtitle %r', link) request = self.session.get(link, headers={ 'Referer': 'https://subsunacs.net/search.php' @@ -193,9 +234,9 @@ class SubsUnacsProvider(Provider): archive_stream = io.BytesIO(request.content) if is_rarfile(archive_stream): - return self.process_archive_subtitle_files( RarFile(archive_stream), language, video, link ) + return self.process_archive_subtitle_files(RarFile(archive_stream), language, video, link, fps, num_cds) elif is_zipfile(archive_stream): - return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video, link ) + return self.process_archive_subtitle_files(ZipFile(archive_stream), language, video, link, fps, num_cds) else: logger.error('Ignore unsupported archive %r', request.headers) return [] diff --git a/libs/subliminal_patch/providers/xsubs.py b/libs/subliminal_patch/providers/xsubs.py index 9f8854b92..98160d62c 100644 --- a/libs/subliminal_patch/providers/xsubs.py +++ b/libs/subliminal_patch/providers/xsubs.py @@ -19,6 +19,8 @@ from subliminal.video import Episode logger = logging.getLogger(__name__) article_re = re.compile(r'^([A-Za-z]{1,3}) (.*)$') episode_re = re.compile(r'^(\d+)(-(\d+))*$') +episode_name_re = re.compile(r'^(.*?)( [\[(].{2,4}[\])])*$') +series_sanitize_re = re.compile(r'^(.*?)( \[\D+\])*$') class XSubsSubtitle(Subtitle): @@ -143,7 +145,11 @@ class XSubsProvider(Provider): for show_category in soup.findAll('seriesl'): if show_category.attrs['category'] == u'Σειρές': for show in show_category.findAll('series'): - show_ids[sanitize(show.text)] = int(show['srsid']) + series = show.text + series_match = series_sanitize_re.match(series) + if series_match: + series = series_match.group(1) + show_ids[sanitize(series)] = int(show['srsid']) break logger.debug('Found %d show ids', len(show_ids)) @@ -195,6 +201,9 @@ class XSubsProvider(Provider): soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) series = soup.find('name').text + series_match = episode_name_re.match(series) + if series_match: + series = series_match.group(1) # loop over season rows seasons = soup.findAll('series_group') diff --git a/libs/subliminal_patch/providers/yavkanet.py b/libs/subliminal_patch/providers/yavkanet.py index 1ea0d2ca0..42029634e 100644 --- a/libs/subliminal_patch/providers/yavkanet.py +++ b/libs/subliminal_patch/providers/yavkanet.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import import logging -import re import io import os from random import randint @@ -13,7 +12,6 @@ from guessit import guessit from subliminal_patch.providers import Provider from subliminal_patch.subtitle import Subtitle from subliminal_patch.utils import sanitize -from subliminal.exceptions import ProviderError from subliminal.utils import sanitize_release_group from subliminal.subtitle import guess_matches from subliminal.video import Episode, Movie @@ -27,18 +25,22 @@ class YavkaNetSubtitle(Subtitle): """YavkaNet Subtitle.""" provider_name = 'yavkanet' - def __init__(self, langauge, filename, type, video, link): + def __init__(self, langauge, filename, type, video, link, fps): super(YavkaNetSubtitle, self).__init__(langauge) self.langauge = langauge self.filename = filename self.page_link = link self.type = type self.video = video + self.fps = fps self.release_info = os.path.splitext(filename)[0] @property def id(self): - return self.filename + return self.page_link + self.filename + + def get_fps(self): + return self.fps def make_picklable(self): self.content = None @@ -60,7 +62,11 @@ class YavkaNetSubtitle(Subtitle): if video_filename == subtitle_filename: matches.add('hash') - matches |= guess_matches(video, guessit(self.filename, {'type': self.type})) + if video.year and self.year == video.year: + matches.add('year') + + matches |= guess_matches(video, guessit(self.title, {'type': self.type, 'allowed_countries': [None]})) + matches |= guess_matches(video, guessit(self.filename, {'type': self.type, 'allowed_countries': [None]})) return matches @@ -122,18 +128,34 @@ class YavkaNetProvider(Provider): return subtitles soup = BeautifulSoup(response.content, 'lxml') - rows = soup.findAll('tr', {'class': 'info'}) + rows = soup.findAll('tr') - # Search on first 20 rows only - for row in rows[:20]: + # Search on first 25 rows only + for row in rows[:25]: element = row.find('a', {'class': 'selector'}) if element: link = element.get('href') + notes = element.get('content') + title = element.get_text() + + try: + year = int(element.find_next_sibling('span').text.strip('()')) + except: + year = None + + try: + fps = float(row.find('span', {'title': 'Кадри в секунда'}).text.strip()) + except: + fps = None + element = row.find('a', {'class': 'click'}) uploader = element.get_text() if element else None logger.info('Found subtitle link %r', link) - sub = self.download_archive_and_add_subtitle_files('http://yavka.net/' + link, language, video) - for s in sub: + sub = self.download_archive_and_add_subtitle_files('http://yavka.net/' + link, language, video, fps) + for s in sub: + s.title = title + s.notes = notes + s.year = year s.uploader = uploader subtitles = subtitles + sub return subtitles @@ -146,23 +168,24 @@ class YavkaNetProvider(Provider): pass else: seeking_subtitle_file = subtitle.filename - arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video) + arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video, + subtitle.fps) for s in arch: if s.filename == seeking_subtitle_file: subtitle.content = s.content - def process_archive_subtitle_files(self, archiveStream, language, video, link): + def process_archive_subtitle_files(self, archiveStream, language, video, link, fps): subtitles = [] type = 'episode' if isinstance(video, Episode) else 'movie' for file_name in archiveStream.namelist(): if file_name.lower().endswith(('.srt', '.sub')): logger.info('Found subtitle file %r', file_name) - subtitle = YavkaNetSubtitle(language, file_name, type, video, link) - subtitle.content = archiveStream.read(file_name) + subtitle = YavkaNetSubtitle(language, file_name, type, video, link, fps) + subtitle.content = fix_line_ending(archiveStream.read(file_name)) subtitles.append(subtitle) return subtitles - def download_archive_and_add_subtitle_files(self, link, language, video ): + def download_archive_and_add_subtitle_files(self, link, language, video, fps): logger.info('Downloading subtitle %r', link) request = self.session.get(link, headers={ 'Referer': 'http://yavka.net/subtitles.php' @@ -171,9 +194,9 @@ class YavkaNetProvider(Provider): archive_stream = io.BytesIO(request.content) if is_rarfile(archive_stream): - return self.process_archive_subtitle_files( RarFile(archive_stream), language, video, link ) + return self.process_archive_subtitle_files(RarFile(archive_stream), language, video, link, fps) elif is_zipfile(archive_stream): - return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video, link ) + return self.process_archive_subtitle_files(ZipFile(archive_stream), language, video, link, fps) else: logger.error('Ignore unsupported archive %r', request.headers) return [] diff --git a/libs/subliminal_patch/subtitle.py b/libs/subliminal_patch/subtitle.py index ce89e74d3..249fe633f 100644 --- a/libs/subliminal_patch/subtitle.py +++ b/libs/subliminal_patch/subtitle.py @@ -89,6 +89,13 @@ class Subtitle(Subtitle_): def numeric_id(self): raise NotImplemented + def get_fps(self): + """ + :return: frames per second or None if not supported + :rtype: float + """ + return None + def make_picklable(self): """ some subtitle instances might have unpicklable objects stored; clean them up here @@ -264,10 +271,14 @@ class Subtitle(Subtitle_): else: logger.info("Got format: %s", subs.format) except pysubs2.UnknownFPSError: - # if parsing failed, suggest our media file's fps - logger.info("No FPS info in subtitle. Using our own media FPS for the MicroDVD subtitle: %s", - self.plex_media_fps) - subs = pysubs2.SSAFile.from_string(text, fps=self.plex_media_fps) + # if parsing failed, use frame rate from provider + sub_fps = self.get_fps() + if not isinstance(sub_fps, float) or sub_fps < 10.0: + # or use our media file's fps as a fallback + sub_fps = self.plex_media_fps + logger.info("No FPS info in subtitle. Using our own media FPS for the MicroDVD subtitle: %s", + self.plex_media_fps) + subs = pysubs2.SSAFile.from_string(text, fps=sub_fps) unicontent = self.pysubs2_to_unicode(subs) self.content = unicontent.encode(self._guessed_encoding)