From e7ccafe9589d939e7db786224519a7611179436e Mon Sep 17 00:00:00 2001 From: josdion Date: Sat, 30 May 2020 17:26:26 +0300 Subject: [PATCH 1/7] YIFY Subtitles Provider added --- .../providers/yifysubtitles.py | 188 ++++++++++++++++++ views/settingsproviders.html | 18 +- 2 files changed, 203 insertions(+), 3 deletions(-) create mode 100644 libs/subliminal_patch/providers/yifysubtitles.py diff --git a/libs/subliminal_patch/providers/yifysubtitles.py b/libs/subliminal_patch/providers/yifysubtitles.py new file mode 100644 index 000000000..3e085851d --- /dev/null +++ b/libs/subliminal_patch/providers/yifysubtitles.py @@ -0,0 +1,188 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import +import logging +import re +import io +import codecs +from hashlib import sha1 +from random import randint +from zipfile import ZipFile, is_zipfile +from bs4 import BeautifulSoup +from requests import Session +from guessit import guessit +from dogpile.cache.api import NO_VALUE +from subliminal import Movie, region +from subliminal.subtitle import fix_line_ending +from subliminal_patch.providers import Provider +from subliminal_patch.subtitle import Subtitle, guess_matches +from subzero.language import Language +from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST + +logger = logging.getLogger(__name__) + + +class YifySubtitle(Subtitle): + """YIFY Subtitles""" + provider_name = 'yifysubtitles' + + def __init__(self, language, page_link, release, uploader, sub_link, rating, hi): + super(YifySubtitle, self).__init__(language) + self.page_link = page_link + self.hearing_impaired = hi + self.release_info = release + self.uploader = uploader + self.sub_link = sub_link + self.rating = rating + + @property + def id(self): + return self.page_link + + def make_picklable(self): + self.content = None + self._is_valid = False + return self + + def get_matches(self, video): + matches = set() + matches.add('imdb_id') + matches |= guess_matches(video, guessit(self.release_info, video.hints)) + return matches + + +class YifySubtitlesProvider(Provider): + """YIFY Subtitles Provider.""" + + YifyLanguages = [ + ('Albanian', 'sqi', None), + ('Arabic', 'ara', None), + ('Bengali', 'ben', None), + ('Brazilian Portuguese', 'por', 'BR'), + ('Bulgarian', 'bul', None), + ('Chinese', 'zho', None), + ('Croatian', 'hrv', None), + ('Czech', 'ces', None), + ('Danish', 'dan', None), + ('Dutch', 'nld', None), + ('English', 'eng', None), + ('Farsi/Persian', 'fas', None), + ('Finnish', 'fin', None), + ('French', 'fra', None), + ('German', 'deu', None), + ('Greek', 'ell', None), + ('Hebrew', 'heb', None), + ('Hungarian', 'hun', None), + ('Indonesian', 'ind', None), + ('Italian', 'ita', None), + ('Japanese', 'jpn', None), + ('Korean', 'kor', None), + ('Lithuanian', 'lit', None), + ('Macedonian', 'mkd', None), + ('Malay', 'msa', None), + ('Norwegian', 'nor', None), + ('Polish', 'pol', None), + ('Portuguese', 'por', None), + ('Romanian', 'ron', None), + ('Russian', 'rus', None), + ('Serbian', 'srp', None), + ('Slovenian', 'slv', None), + ('Spanish', 'spa', None), + ('Swedish', 'swe', None), + ('Thai', 'tha', None), + ('Turkish', 'tur', None), + ('Urdu', 'urd', None), + ('Vietnamese', 'vie', None) + ] + + languages = {Language(l, c) for (_, l, c) in YifyLanguages} + server_url = 'https://www.yifysubtitles.com' + + def initialize(self): + self.session = Session() + self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)] + self.session.headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" + self.session.headers["Accept-Language"] = "en-US,en;q=0.5" + self.session.headers["Accept-Encoding"] = "gzip, deflate" + self.session.headers["DNT"] = "1" + self.session.headers["Connection"] = "keep-alive" + self.session.headers["Upgrade-Insecure-Requests"] = "1" + self.session.headers["Cache-Control"] = "max-age=0" + + def terminate(self): + self.session.close() + + def _parse_row(self, row, languages): + td = row.findAll('td') + rating = int(td[0].text) + sub_lang = td[1].text + release = re.sub(r'^subtitle ', '', td[2].text) + sub_link = td[2].find('a').get('href') + sub_link = re.sub(r'^/subtitles/', self.server_url + '/subtitle/', sub_link) + '.zip' + hi = True if td[3].find('span', {'class': 'hi-subtitle'}) else False + uploader = td[4].text + page_link = self.server_url + td[5].find('a').get('href') + + _, l, c = next(x for x in self.YifyLanguages if x[0] == sub_lang) + lang = Language(l, c) + if languages & set([lang]): + return [YifySubtitle(lang, page_link, release, uploader, sub_link, rating, hi)] + + return [] + + def _query(self, languages, video): + subtitles = [] + + logger.info('Searching subtitle %r', video.imdb_id) + response = self.session.get(self.server_url + '/movie-imdb/' + video.imdb_id, + allow_redirects=False, timeout=10, + headers={'Referer': self.server_url}) + response.raise_for_status() + + if response.status_code != 200: + logger.debug('No subtitles found') + return subtitles + + soup = BeautifulSoup(response.content, 'lxml') + tbl = soup.find('table', {'class': 'other-subs'}) + tbl_body = tbl.find('tbody') if tbl else None + rows = tbl_body.findAll('tr') if tbl_body else [] + + for row in rows: + try: + subtitles = subtitles + self._parse_row(row, languages) + except Exception as e: + pass + + subtitles.sort(key=lambda x: x.rating, reverse=True) + return subtitles + + def list_subtitles(self, video, languages): + return self._query(languages, video) if isinstance(video, Movie) and video.imdb_id else [] + + def download_subtitle(self, subtitle): + logger.info('Downloading subtitle %r', subtitle.sub_link) + cache_key = sha1(subtitle.sub_link.encode("utf-8")).digest() + request = region.get(cache_key) + if request is NO_VALUE: + request = self.session.get(subtitle.sub_link, headers={ + 'Referer': subtitle.page_link + }) + request.raise_for_status() + region.set(cache_key, request) + else: + logger.info('Cache file: %s', codecs.encode(cache_key, 'hex_codec').decode('utf-8')) + + archive_stream = io.BytesIO(request.content) + if is_zipfile(archive_stream): + self._process_archive(ZipFile(archive_stream), subtitle) + else: + logger.error('Ignore unsupported archive %r', request.headers) + + def _process_archive(self, archive_stream, subtitle): + for file_name in archive_stream.namelist(): + if file_name.lower().endswith(('.srt', '.sub')): + logger.info('Found subtitle file %r', file_name) + subtitle.content = fix_line_ending(archive_stream.read(file_name)) + if subtitle.is_valid(): + return + diff --git a/views/settingsproviders.html b/views/settingsproviders.html index b073645a5..0f43c51c2 100644 --- a/views/settingsproviders.html +++ b/views/settingsproviders.html @@ -398,7 +398,7 @@
@@ -478,7 +478,7 @@
@@ -626,7 +626,19 @@
+
+ + +
+
+ YIFY Subtitles +
+
+
From 9be61914784d16668ebc6289e3d965ece1e0a578 Mon Sep 17 00:00:00 2001 From: josdion Date: Sun, 31 May 2020 11:43:41 +0300 Subject: [PATCH 2/7] set yifysubtitles video_types to movie only --- README.md | 1 + libs/subliminal_patch/providers/yifysubtitles.py | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 542e769dc..84bd854fd 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,7 @@ If you need something that is not already part of Bazarr, feel free to create a * Wizdom * XSubs * Yavka.net +* YIFY Subtitles * Zimuku ## Screenshot diff --git a/libs/subliminal_patch/providers/yifysubtitles.py b/libs/subliminal_patch/providers/yifysubtitles.py index 3e085851d..560806efe 100644 --- a/libs/subliminal_patch/providers/yifysubtitles.py +++ b/libs/subliminal_patch/providers/yifysubtitles.py @@ -96,6 +96,7 @@ class YifySubtitlesProvider(Provider): languages = {Language(l, c) for (_, l, c) in YifyLanguages} server_url = 'https://www.yifysubtitles.com' + video_types = (Movie,) def initialize(self): self.session = Session() @@ -129,11 +130,11 @@ class YifySubtitlesProvider(Provider): return [] - def _query(self, languages, video): + def query(self, languages, imdb_id): subtitles = [] - logger.info('Searching subtitle %r', video.imdb_id) - response = self.session.get(self.server_url + '/movie-imdb/' + video.imdb_id, + logger.info('Searching subtitle %r', imdb_id) + response = self.session.get(self.server_url + '/movie-imdb/' + imdb_id, allow_redirects=False, timeout=10, headers={'Referer': self.server_url}) response.raise_for_status() @@ -157,7 +158,7 @@ class YifySubtitlesProvider(Provider): return subtitles def list_subtitles(self, video, languages): - return self._query(languages, video) if isinstance(video, Movie) and video.imdb_id else [] + return self.query(languages, video.imdb_id) if isinstance(video, Movie) and video.imdb_id else [] def download_subtitle(self, subtitle): logger.info('Downloading subtitle %r', subtitle.sub_link) From c89d542479af2205dc90f59ab71cf52e89b09b1a Mon Sep 17 00:00:00 2001 From: josdion Date: Sun, 31 May 2020 13:46:16 +0300 Subject: [PATCH 3/7] fix encoding problems in subtitle class - don't use directly self._guessed_encoding, but instead call the function guessed_encoding() as the variable can be None. - use self.guessed_encoding() only if self.encoding is not set. --- libs/subliminal_patch/subtitle.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/libs/subliminal_patch/subtitle.py b/libs/subliminal_patch/subtitle.py index 4b2c5510d..4ef52edbe 100644 --- a/libs/subliminal_patch/subtitle.py +++ b/libs/subliminal_patch/subtitle.py @@ -82,9 +82,7 @@ class Subtitle(Subtitle_): return if not isinstance(self.content, text_type): - if self.encoding: - return self.content.decode(self.encoding, errors='replace') - return self.content.decode(self.guess_encoding(), errors='replace') + return self.content.decode(self.get_encoding(), errors='replace') return self.content @@ -106,8 +104,11 @@ class Subtitle(Subtitle_): """ return self + def get_encoding(self): + return self.encoding if self.encoding else self.guess_encoding() + def set_encoding(self, encoding): - ge = self.guess_encoding() + ge = self.get_encoding() if encoding == ge: return @@ -115,6 +116,7 @@ class Subtitle(Subtitle_): logger.debug("Changing encoding: to %s, from %s", encoding, ge) self.content = unicontent.encode(encoding) self._guessed_encoding = encoding + self.encoding = encoding def normalize(self): """ @@ -284,7 +286,7 @@ class Subtitle(Subtitle_): subs = pysubs2.SSAFile.from_string(text, fps=sub_fps) unicontent = self.pysubs2_to_unicode(subs) - self.content = unicontent.encode(self._guessed_encoding) + self.content = unicontent.encode(self.get_encoding()) except: logger.exception("Couldn't convert subtitle %s to .srt format: %s", self, traceback.format_exc()) return False @@ -364,8 +366,8 @@ class Subtitle(Subtitle_): :return: string """ if not self.mods: - return fix_text(self.content.decode(encoding=self._guessed_encoding), **ftfy_defaults).encode( - encoding=self._guessed_encoding) + return fix_text(self.content.decode(encoding=self.get_encoding()), **ftfy_defaults).encode( + encoding=self.get_encoding()) submods = SubtitleModifications(debug=debug) if submods.load(content=self.text, language=self.language): @@ -374,7 +376,7 @@ class Subtitle(Subtitle_): self.mods = submods.mods_used content = fix_text(self.pysubs2_to_unicode(submods.f, format=format), **ftfy_defaults)\ - .encode(encoding=self._guessed_encoding) + .encode(encoding=self.get_encoding()) submods.f = None del submods return content From a9624fb81890530cda2168b33c29a8aaa6a4ee58 Mon Sep 17 00:00:00 2001 From: josdion Date: Sun, 31 May 2020 17:41:18 +0300 Subject: [PATCH 4/7] added "Show Only Desired Languages" option for embedded subtitles --- bazarr/api.py | 9 +++++++++ bazarr/config.py | 1 + views/settingssubtitles.html | 13 +++++++++++++ 3 files changed, 23 insertions(+) diff --git a/bazarr/api.py b/bazarr/api.py index cd11e45aa..ffddb5e49 100644 --- a/bazarr/api.py +++ b/bazarr/api.py @@ -424,6 +424,10 @@ class Episodes(Resource): "code2": subtitle[0], "code3": alpha3_from_alpha2(subtitle[0]), "forced": True if len(subtitle) > 1 else False} + + if settings.general.getboolean('embedded_subs_show_desired'): + item['subtitles'] = [x for x in item['subtitles'] if + x[0]['code2'] in ast.literal_eval(desired_languages) or x[1]] else: item.update({"subtitles": []}) @@ -713,6 +717,11 @@ class Movies(Resource): "code2": language[0], "code3": alpha3_from_alpha2(language[0]), "forced": True if len(language) > 1 else False} + + if settings.general.getboolean('embedded_subs_show_desired'): + item['subtitles'] = [x for x in item['subtitles'] if + x['code2'] in [y['code2'] for y in item['languages']] or x['path']] + item['subtitles'] = sorted(item['subtitles'], key=itemgetter('name', 'forced')) else: item.update({"subtitles": []}) diff --git a/bazarr/config.py b/bazarr/config.py index 1ac551e76..b38365d3f 100644 --- a/bazarr/config.py +++ b/bazarr/config.py @@ -39,6 +39,7 @@ defaults = { 'page_size_manual_search': '10', 'minimum_score_movie': '70', 'use_embedded_subs': 'True', + 'embedded_subs_show_desired': 'True', 'utf8_encode': 'True', 'ignore_pgs_subs': 'False', 'adaptive_searching': 'False', diff --git a/views/settingssubtitles.html b/views/settingssubtitles.html index 4e768e011..34c50c9f0 100644 --- a/views/settingssubtitles.html +++ b/views/settingssubtitles.html @@ -208,6 +208,18 @@ +
+
+ Show Only Desired Languages +
+
+ + +
+

Post-Processing

@@ -426,6 +438,7 @@ $('#settings-general-multithreading').prop('checked', {{'true' if settings.general.getboolean('multithreading') else 'false'}}).trigger('change'); $('#settings-general-use_embedded_subs').prop('checked', {{'true' if settings.general.getboolean('use_embedded_subs') else 'false'}}).trigger('change'); $('#settings-general-ignore_pgs_subs').prop('checked', {{'true' if settings.general.getboolean('ignore_pgs_subs') else 'false'}}).trigger('change'); + $('#settings-general-embedded_subs_show_desired').prop('checked', {{'true' if settings.general.getboolean('embedded_subs_show_desired') else 'false'}}).trigger('change'); $('#settings-general-utf8_encode').prop('checked', {{'true' if settings.general.getboolean('utf8_encode') else 'false'}}).trigger('change'); $('#settings-general-chmod_enabled').prop('checked', {{'true' if settings.general.getboolean('chmod_enabled') else 'false'}}).trigger('change'); $('#settings-general-use_postprocessing').prop('checked', {{'true' if settings.general.getboolean('use_postprocessing') else 'false'}}).trigger('change'); From 197b84c36e541e31514498c0792bf81094b9770d Mon Sep 17 00:00:00 2001 From: josdion Date: Sun, 31 May 2020 18:07:59 +0300 Subject: [PATCH 5/7] subssabbz, subsunacs - reduce score of subtitles for multi-disc movie releases --- libs/subliminal_patch/providers/subssabbz.py | 18 +++++++++++++----- libs/subliminal_patch/providers/subsunacs.py | 9 ++++++++- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/libs/subliminal_patch/providers/subssabbz.py b/libs/subliminal_patch/providers/subssabbz.py index 3c095bcff..1b365ea2d 100644 --- a/libs/subliminal_patch/providers/subssabbz.py +++ b/libs/subliminal_patch/providers/subssabbz.py @@ -25,6 +25,7 @@ from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST logger = logging.getLogger(__name__) + def fix_tv_naming(title): """Fix TV show titles with inconsistent naming using dictionary, but do not sanitize them. @@ -41,13 +42,13 @@ def fix_tv_naming(title): "Doctor Who (2005)": "Doctor Who", }, True) + class SubsSabBzSubtitle(Subtitle): """SubsSabBz Subtitle.""" provider_name = 'subssabbz' - def __init__(self, langauge, filename, type, video, link, fps, num_cds): - super(SubsSabBzSubtitle, self).__init__(langauge) - self.langauge = langauge + def __init__(self, language, filename, type, video, link, fps, num_cds): + super(SubsSabBzSubtitle, self).__init__(language) self.filename = filename self.page_link = link self.type = type @@ -83,7 +84,7 @@ class SubsSabBzSubtitle(Subtitle): if ((video_filename == subtitle_filename) or (self.single_file is True and video_filename in self.notes.upper())): - matches.add('hash') + matches.add('hash') if video.year and self.year == video.year: matches.add('year') @@ -93,7 +94,14 @@ class SubsSabBzSubtitle(Subtitle): matches.add('imdb_id') matches |= guess_matches(video, guessit(self.title, {'type': self.type})) - matches |= guess_matches(video, guessit(self.filename, {'type': self.type})) + + guess_filename = guessit(self.filename, video.hints) + matches |= guess_matches(video, guess_filename) + + if isinstance(video, Movie) and (self.num_cds > 1 or 'cd' in guess_filename): + # reduce score of subtitles for multi-disc movie releases + return set() + return matches diff --git a/libs/subliminal_patch/providers/subsunacs.py b/libs/subliminal_patch/providers/subsunacs.py index 93f43f9d5..59656a08b 100644 --- a/libs/subliminal_patch/providers/subsunacs.py +++ b/libs/subliminal_patch/providers/subsunacs.py @@ -90,7 +90,14 @@ class SubsUnacsSubtitle(Subtitle): matches.add('year') matches |= guess_matches(video, guessit(self.title, {'type': self.type})) - matches |= guess_matches(video, guessit(self.filename, {'type': self.type})) + + guess_filename = guessit(self.filename, video.hints) + matches |= guess_matches(video, guess_filename) + + if isinstance(video, Movie) and (self.num_cds > 1 or 'cd' in guess_filename): + # reduce score of subtitles for multi-disc movie releases + return set() + return matches From 3e4a75aeca0c1d5fa70a35d8d878761592bb6d34 Mon Sep 17 00:00:00 2001 From: josdion Date: Sun, 31 May 2020 18:24:48 +0300 Subject: [PATCH 6/7] fix "Show Only Desired Languages" for movies without selected languages --- bazarr/api.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bazarr/api.py b/bazarr/api.py index ffddb5e49..438fdcdb5 100644 --- a/bazarr/api.py +++ b/bazarr/api.py @@ -719,8 +719,10 @@ class Movies(Resource): "forced": True if len(language) > 1 else False} if settings.general.getboolean('embedded_subs_show_desired'): - item['subtitles'] = [x for x in item['subtitles'] if - x['code2'] in [y['code2'] for y in item['languages']] or x['path']] + desired_lang_list = [] + if item['languages'] and item['languages'] != 'None': + desired_lang_list = [x['code2'] for x in item['languages']] + item['subtitles'] = [x for x in item['subtitles'] if x['code2'] in desired_lang_list or x['path']] item['subtitles'] = sorted(item['subtitles'], key=itemgetter('name', 'forced')) else: From 8acb488fba1bc804c7ef379c7d683a1eb786039e Mon Sep 17 00:00:00 2001 From: josdion Date: Sun, 31 May 2020 19:01:53 +0300 Subject: [PATCH 7/7] delete unsupported subtitle archive files from cache --- libs/subliminal_patch/providers/subssabbz.py | 1 + libs/subliminal_patch/providers/subsunacs.py | 1 + libs/subliminal_patch/providers/yavkanet.py | 9 +++++---- libs/subliminal_patch/providers/yifysubtitles.py | 1 + 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/libs/subliminal_patch/providers/subssabbz.py b/libs/subliminal_patch/providers/subssabbz.py index 1b365ea2d..386e3c19f 100644 --- a/libs/subliminal_patch/providers/subssabbz.py +++ b/libs/subliminal_patch/providers/subssabbz.py @@ -255,4 +255,5 @@ class SubsSabBzProvider(Provider): return self.process_archive_subtitle_files(ZipFile(archive_stream), language, video, link, fps, num_cds) else: logger.error('Ignore unsupported archive %r', request.headers) + region.delete(cache_key) return [] diff --git a/libs/subliminal_patch/providers/subsunacs.py b/libs/subliminal_patch/providers/subsunacs.py index 59656a08b..19c5eff86 100644 --- a/libs/subliminal_patch/providers/subsunacs.py +++ b/libs/subliminal_patch/providers/subsunacs.py @@ -277,4 +277,5 @@ class SubsUnacsProvider(Provider): return self.process_archive_subtitle_files(SevenZipFile(archive_stream), language, video, link, fps, num_cds) else: logger.error('Ignore unsupported archive %r', request.headers) + region.delete(cache_key) return [] diff --git a/libs/subliminal_patch/providers/yavkanet.py b/libs/subliminal_patch/providers/yavkanet.py index 70a8ad830..6de60ef35 100644 --- a/libs/subliminal_patch/providers/yavkanet.py +++ b/libs/subliminal_patch/providers/yavkanet.py @@ -25,13 +25,13 @@ from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST logger = logging.getLogger(__name__) + class YavkaNetSubtitle(Subtitle): """YavkaNet Subtitle.""" provider_name = 'yavkanet' - def __init__(self, langauge, filename, type, video, link, fps): - super(YavkaNetSubtitle, self).__init__(langauge) - self.langauge = langauge + def __init__(self, language, filename, type, video, link, fps): + super(YavkaNetSubtitle, self).__init__(language) self.filename = filename self.page_link = link self.type = type @@ -66,7 +66,7 @@ class YavkaNetSubtitle(Subtitle): if ((video_filename == subtitle_filename) or (self.single_file is True and video_filename in self.notes.upper())): - matches.add('hash') + matches.add('hash') if video.year and self.year == video.year: matches.add('year') @@ -212,4 +212,5 @@ class YavkaNetProvider(Provider): return self.process_archive_subtitle_files(ZipFile(archive_stream), language, video, link, fps) else: logger.error('Ignore unsupported archive %r', request.headers) + region.delete(cache_key) return [] diff --git a/libs/subliminal_patch/providers/yifysubtitles.py b/libs/subliminal_patch/providers/yifysubtitles.py index 560806efe..59d683577 100644 --- a/libs/subliminal_patch/providers/yifysubtitles.py +++ b/libs/subliminal_patch/providers/yifysubtitles.py @@ -178,6 +178,7 @@ class YifySubtitlesProvider(Provider): self._process_archive(ZipFile(archive_stream), subtitle) else: logger.error('Ignore unsupported archive %r', request.headers) + region.delete(cache_key) def _process_archive(self, archive_stream, subtitle): for file_name in archive_stream.namelist():