You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
bazarr/custom_libs/subliminal_patch/providers/hosszupuska.py

252 lines
9.9 KiB

# coding: utf-8
from __future__ import absolute_import
import io
import six
import logging
import re
import os
import time
from babelfish import language_converters
from subzero.language import Language
from requests import Session
from subliminal_patch.providers import Provider
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
from subliminal.providers import ParserBeautifulSoup
from subliminal_patch.exceptions import ProviderError
from subliminal.score import get_equivalent_release_groups
from subliminal_patch.subtitle import Subtitle, guess_matches
from subliminal.utils import sanitize, sanitize_release_group
from subliminal.video import Episode
from zipfile import ZipFile, is_zipfile
from rarfile import RarFile, is_rarfile
from subliminal_patch.utils import sanitize, fix_inconsistent_naming as _fix_inconsistent_naming
from guessit import guessit
def fix_inconsistent_naming(title):
"""Fix titles with inconsistent naming using dictionary and sanitize them.
:param str title: original title.
:return: new title.
:rtype: str
"""
return _fix_inconsistent_naming(title, {"Stargate Origins": "Stargate: Origins",
"Marvel's Agents of S.H.I.E.L.D.": "Marvels+Agents+of+S.H.I.E.L.D",
"Mayans M.C.": "Mayans MC"}, True)
logger = logging.getLogger(__name__)
language_converters.register('hosszupuska = subliminal_patch.converters.hosszupuska:HosszupuskaConverter')
_SUB_ENGLISH_NAME_RE = re.compile(r's(\d{1,2})e(\d{1,2})')
_SUB_YEAR_RE = re.compile(r"(?<=\()(\d{4})(?=\))")
class HosszupuskaSubtitle(Subtitle):
"""Hosszupuska Subtitle."""
provider_name = 'hosszupuska'
def __str__(self):
subtit = (f"Subtitle id: {self.subtitle_id} Series: {self.series} "
f"Season: {self.season} Episode: {self.episode} "
f"Releases: {self.releases}")
if self.year:
subtit = f"{subtit} Year: {self.year}"
if six.PY3:
return subtit
return subtit.encode('utf-8')
def __init__(self, language, page_link, subtitle_id, series, season, episode, version,
releases, year, asked_for_release_group=None, asked_for_episode=None):
super(HosszupuskaSubtitle, self).__init__(language, page_link=page_link)
self.subtitle_id = subtitle_id
self.series = series
self.season = season
self.episode = episode
self.version = version
self.releases = releases
self.year = year
if year:
self.year = int(year)
self.release_info = u", ".join(releases)
self.page_link = page_link
self.asked_for_release_group = asked_for_release_group
self.asked_for_episode = asked_for_episode
def __repr__(self):
ep_addon = (" S%02dE%02d" % (self.season, self.episode)) if self.episode else ""
return '<%s %r [%s]>' % (
self.__class__.__name__, u"%s%s%s [%s]" % (self.series, " (%s)" % self.year if self.year else "", ep_addon,
self.release_info), self.language)
@property
def id(self):
return str(self.subtitle_id)
def get_matches(self, video):
matches = set()
# series
if video.series and ( sanitize(self.series) == sanitize(fix_inconsistent_naming(video.series)) or sanitize(self.series) == sanitize(video.series)):
matches.add('series')
# season
if video.season and self.season == video.season:
matches.add('season')
# episode
if video.episode and self.episode == video.episode:
matches.add('episode')
# year
if ('series' in matches and video.original_series and self.year is None or
video.year and video.year == self.year):
matches.add('year')
logger.debug("Matches: %s", matches)
# release_group
if (video.release_group and self.version and
any(r in sanitize_release_group(self.version)
for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))):
matches.add('release_group')
matches |= guess_matches(video, guessit(self.release_info), {"type": "episode"})
return matches
class HosszupuskaProvider(Provider, ProviderSubtitleArchiveMixin):
"""Hosszupuska Provider."""
languages = {Language('hun', 'HU')} | {Language(l) for l in [
'hun', 'eng'
]}
video_types = (Episode,)
server_url = 'http://hosszupuskasub.com/'
subtitle_class = HosszupuskaSubtitle
hearing_impaired_verifiable = False
multi_result_throttle = 2 # seconds
def initialize(self):
self.session = Session()
self.session.headers = {'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")}
def terminate(self):
self.session.close()
def get_language(self, text):
if text == '1.gif':
return Language.fromhosszupuska('hu')
if text == '2.gif':
return Language.fromhosszupuska('en')
return None
def query(self, series, season, episode, year=None, video=None):
# Search for s01e03 instead of s1e3
seasona = "%02d" % season
episodea = "%02d" % episode
seriesa = fix_inconsistent_naming(series)
seriesa = series.replace(' ', '+')
# get the episode page
logger.info('Getting the page for episode %s', episode)
url = self.server_url + "sorozatok.php?cim=" + seriesa + "&evad="+str(seasona) + \
"&resz="+str(episodea)+"&nyelvtipus=%25&x=24&y=8"
logger.info('Url %s', url)
r = self.session.get(url, timeout=10).content
soup = ParserBeautifulSoup(r, ['lxml'])
subtitles = []
for num, temp in enumerate(soup.find_all("table")):
if "this.style.backgroundImage='url(css/over2.jpg)" in str(temp) and "css/infooldal.png" in str(temp):
logger.debug("Found valid table (%d index)", num)
subtitles += self._loop_over_table(temp, season, episode, video)
return subtitles
def _loop_over_table(self, table, season, episode, video):
i = 0
for row in table.find_all("tr"):
i = i + 1
if "this.style.backgroundImage='url(css/over2.jpg)" in str(row): #and i > 5:
datas = row.find_all("td")
# Currently subliminal not use these params, but maybe later will come in handy
# hunagrian_name = re.split('s(\d{1,2})', datas[1].find_all('b')[0].getText())[0]
# Translator of subtitle
# sub_translator = datas[3].getText()
# Posting date of subtitle
# sub_date = datas[4].getText()
sub_year = sub_english_name = sub_version = None
# Handle the case when '(' in subtitle
if datas[1].getText().count('(') == 1:
sub_english_name = _SUB_ENGLISH_NAME_RE.split(datas[1].getText())[3]
if datas[1].getText().count('(') == 2:
sub_year_search = _SUB_YEAR_RE.findall(datas[1].getText().strip())
if sub_year_search and len(sub_year_search):
sub_year = sub_year_search[0]
sub_english_name = _SUB_ENGLISH_NAME_RE.split(datas[1].getText().split('(')[0])[0]
if not sub_english_name:
continue
sub_season = int((re.findall(r's(\d{1,2})', datas[1].find_all('b')[0].getText(), re.VERBOSE)[0])
.lstrip('0'))
sub_episode = int((re.findall(r'e(\d{1,2})', datas[1].find_all('b')[0].getText(), re.VERBOSE)[0])
.lstrip('0'))
if sub_season == season and sub_episode == episode:
sub_language = self.get_language(datas[2].find_all('img')[0]['src'].split('/')[1])
sub_downloadlink = datas[6].find_all('a')[0]['href']
sub_id = sub_downloadlink.split('=')[1].split('.')[0]
if datas[1].getText().count('(') == 1:
sub_version = datas[1].getText().split('(')[1].split(')')[0]
if datas[1].getText().count('(') == 2:
sub_version = datas[1].getText().split('(')[2].split(')')[0]
# One subtitle can be used for several releases
sub_releases = [s.strip() for s in sub_version.split(',')]
subtitle = self.subtitle_class(sub_language, sub_downloadlink, sub_id, sub_english_name.strip(),
sub_season, sub_episode, sub_version, sub_releases, sub_year,
asked_for_release_group=video.release_group,
asked_for_episode=episode)
logger.debug('Found subtitle: %r', subtitle)
yield subtitle
def list_subtitles(self, video, languages):
titles = [video.series] + video.alternative_series
for title in titles:
subs = self.query(title, video.season, video.episode, video.year, video=video)
if subs:
return subs
time.sleep(self.multi_result_throttle)
return []
def download_subtitle(self, subtitle):
r = self.session.get(subtitle.page_link, timeout=10)
r.raise_for_status()
# open the archive
archive_stream = io.BytesIO(r.content)
if is_rarfile(archive_stream):
logger.debug('Archive identified as rar')
archive = RarFile(archive_stream)
elif is_zipfile(archive_stream):
logger.debug('Archive identified as zip')
archive = ZipFile(archive_stream)
else:
raise ProviderError('Unidentified archive type')
subtitle.content = self.get_subtitle_from_archive(subtitle, archive)