Continuing development.

pull/384/head
Louis Vézina 6 years ago
parent f5d9a868a1
commit 8aef7bc0d3

@ -61,8 +61,7 @@ class CertifiSession(CloudflareScraper):
cache_key = "cf_data_%s" % domain cache_key = "cf_data_%s" % domain
if not self.cookies.get("__cfduid", "", domain=domain) or not self.cookies.get("cf_clearance", "", if not self.cookies.get("__cfduid", "", domain=domain):
domain=domain):
cf_data = region.get(cache_key) cf_data = region.get(cache_key)
if cf_data is not NO_VALUE: if cf_data is not NO_VALUE:
cf_cookies, user_agent = cf_data cf_cookies, user_agent = cf_data
@ -78,7 +77,8 @@ class CertifiSession(CloudflareScraper):
except: except:
pass pass
else: else:
if cf_data != region.get(cache_key): if cf_data != region.get(cache_key) and self.cookies.get("__cfduid", "", domain=domain)\
and self.cookies.get("cf_clearance", "", domain=domain):
logger.debug("Storing cf data for %s: %s", domain, cf_data) logger.debug("Storing cf data for %s: %s", domain, cf_data)
region.set(cache_key, cf_data) region.set(cache_key, cf_data)

@ -1,9 +1,11 @@
# coding=utf-8 # coding=utf-8
import os
import time import time
import logging import logging
import json import json
import requests from subliminal.cache import region
from dogpile.cache.api import NO_VALUE
from python_anticaptcha import AnticaptchaClient, NoCaptchaTaskProxylessTask, NoCaptchaTask, AnticaptchaException,\ from python_anticaptcha import AnticaptchaClient, NoCaptchaTaskProxylessTask, NoCaptchaTask, AnticaptchaException,\
Proxy Proxy
from deathbycaptcha import SocketClient as DBCClient, DEFAULT_TOKEN_TIMEOUT from deathbycaptcha import SocketClient as DBCClient, DEFAULT_TOKEN_TIMEOUT
@ -13,14 +15,29 @@ logger = logging.getLogger(__name__)
class PitcherRegistry(object): class PitcherRegistry(object):
pitchers = {} pitchers = []
pitchers_by_key = {}
def register(self, cls): def register(self, cls):
self.pitchers[cls.name] = cls idx = len(self.pitchers)
self.pitchers.append(cls)
key = "%s_%s" % (cls.name, cls.needs_proxy)
key_by_source = "%s_%s" % (cls.source, cls.needs_proxy)
self.pitchers_by_key[key] = idx
self.pitchers_by_key[key_by_source] = idx
return cls return cls
def get_pitcher(self, name): def get_pitcher(self, name_or_site=None, with_proxy=False):
return self.pitchers[name] name_or_site = name_or_site or os.environ.get("ANTICAPTCHA_CLASS")
if not name_or_site:
raise Exception("AntiCaptcha class not given, exiting")
key = "%s_%s" % (name_or_site, with_proxy)
if key not in self.pitchers_by_key:
raise Exception("Pitcher %s not found (proxy: %s)" % (name_or_site, with_proxy))
return self.pitchers[self.pitchers_by_key.get(key)]
registry = pitchers = PitcherRegistry() registry = pitchers = PitcherRegistry()
@ -28,17 +45,24 @@ registry = pitchers = PitcherRegistry()
class Pitcher(object): class Pitcher(object):
name = None name = None
source = None
needs_proxy = False
tries = 3 tries = 3
job = None job = None
client = None client = None
client_key = None
website_url = None website_url = None
website_key = None website_key = None
website_name = None website_name = None
solve_time = None solve_time = None
success = False success = False
def __init__(self, website_name, website_url, website_key, tries=3, *args, **kwargs): def __init__(self, website_name, website_url, website_key, tries=3, client_key=None, *args, **kwargs):
self.tries = tries self.tries = tries
self.client_key = client_key or os.environ.get("ANTICAPTCHA_ACCOUNT_KEY")
if not self.client_key:
raise Exception("AntiCaptcha key not given, exiting")
self.website_name = website_name self.website_name = website_name
self.website_key = website_key self.website_key = website_key
self.website_url = website_url self.website_url = website_url
@ -67,17 +91,17 @@ class Pitcher(object):
@registry.register @registry.register
class AntiCaptchaProxyLessPitcher(Pitcher): class AntiCaptchaProxyLessPitcher(Pitcher):
name = "AntiCaptchaProxyLess" name = "AntiCaptchaProxyLess"
source = "anti-captcha.com"
host = "api.anti-captcha.com" host = "api.anti-captcha.com"
language_pool = "en" language_pool = "en"
client_key = None tries = 5
use_ssl = True use_ssl = True
is_invisible = False is_invisible = False
def __init__(self, website_name, client_key, website_url, website_key, tries=3, host=None, language_pool=None, def __init__(self, website_name, website_url, website_key, tries=3, host=None, language_pool=None,
use_ssl=True, is_invisible=False, *args, **kwargs): use_ssl=True, is_invisible=False, *args, **kwargs):
super(AntiCaptchaProxyLessPitcher, self).__init__(website_name, website_url, website_key, tries=tries, *args, super(AntiCaptchaProxyLessPitcher, self).__init__(website_name, website_url, website_key, tries=tries, *args,
**kwargs) **kwargs)
self.client_key = client_key
self.host = host or self.host self.host = host or self.host
self.language_pool = language_pool or self.language_pool self.language_pool = language_pool or self.language_pool
self.use_ssl = use_ssl self.use_ssl = use_ssl
@ -134,12 +158,12 @@ class AntiCaptchaProxyLessPitcher(Pitcher):
class AntiCaptchaPitcher(AntiCaptchaProxyLessPitcher): class AntiCaptchaPitcher(AntiCaptchaProxyLessPitcher):
name = "AntiCaptcha" name = "AntiCaptcha"
proxy = None proxy = None
needs_proxy = True
user_agent = None user_agent = None
cookies = None cookies = None
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
self.proxy = Proxy.parse_url(kwargs.pop("proxy")) self.proxy = Proxy.parse_url(kwargs.pop("proxy"))
print self.proxy.__dict__
self.user_agent = kwargs.pop("user_agent") self.user_agent = kwargs.pop("user_agent")
cookies = kwargs.pop("cookies", {}) cookies = kwargs.pop("cookies", {})
if isinstance(cookies, dict): if isinstance(cookies, dict):
@ -156,14 +180,15 @@ class AntiCaptchaPitcher(AntiCaptchaProxyLessPitcher):
@registry.register @registry.register
class DBCProxyLessPitcher(Pitcher): class DBCProxyLessPitcher(Pitcher):
name = "DeathByCaptchaProxyLess" name = "DeathByCaptchaProxyLess"
source = "deathbycaptcha.com"
username = None username = None
password = None password = None
def __init__(self, website_name, client_key, website_url, website_key, def __init__(self, website_name, website_url, website_key,
timeout=DEFAULT_TOKEN_TIMEOUT, tries=3, *args, **kwargs): timeout=DEFAULT_TOKEN_TIMEOUT, tries=3, *args, **kwargs):
super(DBCProxyLessPitcher, self).__init__(website_name, website_url, website_key, tries=tries) super(DBCProxyLessPitcher, self).__init__(website_name, website_url, website_key, tries=tries)
self.username, self.password = client_key.split(":", 1) self.username, self.password = self.client_key.split(":", 1)
self.timeout = timeout self.timeout = timeout
def get_client(self): def get_client(self):
@ -182,19 +207,22 @@ class DBCProxyLessPitcher(Pitcher):
def _throw(self): def _throw(self):
super(DBCProxyLessPitcher, self)._throw() super(DBCProxyLessPitcher, self)._throw()
payload = json.dumps(self.payload_dict) payload = json.dumps(self.payload_dict)
try: for i in range(self.tries):
#balance = self.client.get_balance() try:
data = self.client.decode(timeout=self.timeout, type=4, token_params=payload) #balance = self.client.get_balance()
if data and data["is_correct"]: data = self.client.decode(timeout=self.timeout, type=4, token_params=payload)
self.success = True if data and data["is_correct"] and data["text"]:
return data["text"] self.success = True
except: return data["text"]
raise except:
raise
@registry.register @registry.register
class DBCPitcher(DBCProxyLessPitcher): class DBCPitcher(DBCProxyLessPitcher):
name = "DeathByCaptcha"
proxy = None proxy = None
needs_proxy = True
proxy_type = "HTTP" proxy_type = "HTTP"
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -210,3 +238,20 @@ class DBCPitcher(DBCProxyLessPitcher):
}) })
return payload return payload
def load_verification(site_name, session, callback=lambda x: None):
ccks = region.get("%s_data" % site_name, expiration_time=15552000) # 6m
if ccks != NO_VALUE:
cookies, user_agent = ccks
logger.debug("%s: Re-using previous user agent: %s", site_name.capitalize(), user_agent)
session.headers["User-Agent"] = user_agent
try:
session.cookies._cookies.update(cookies)
return callback(region)
except:
return False
return False
def store_verification(site_name, session):
region.set("%s_data" % site_name, (session.cookies._cookies, session.headers["User-Agent"]))

@ -1,24 +1,20 @@
# coding=utf-8 # coding=utf-8
import logging import logging
import re import re
import os
import datetime import datetime
import subliminal import subliminal
import time import time
import requests
from random import randint from random import randint
from dogpile.cache.api import NO_VALUE
from requests import Session from requests import Session
from python_anticaptcha import AnticaptchaClient, NoCaptchaTaskProxylessTask, NoCaptchaTask, AnticaptchaException from subliminal.cache import region
from subliminal.exceptions import ServiceUnavailable, DownloadLimitExceeded, AuthenticationError from subliminal.exceptions import DownloadLimitExceeded, AuthenticationError
from subliminal.providers.addic7ed import Addic7edProvider as _Addic7edProvider, \ from subliminal.providers.addic7ed import Addic7edProvider as _Addic7edProvider, \
Addic7edSubtitle as _Addic7edSubtitle, ParserBeautifulSoup, show_cells_re Addic7edSubtitle as _Addic7edSubtitle, ParserBeautifulSoup, show_cells_re
from subliminal.cache import region
from subliminal.subtitle import fix_line_ending from subliminal.subtitle import fix_line_ending
from subliminal_patch.utils import sanitize from subliminal_patch.utils import sanitize
from subliminal_patch.exceptions import TooManyRequests from subliminal_patch.exceptions import TooManyRequests
from subliminal_patch.pitcher import pitchers from subliminal_patch.pitcher import pitchers, load_verification, store_verification
from subzero.language import Language from subzero.language import Language
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -86,24 +82,19 @@ class Addic7edProvider(_Addic7edProvider):
# login # login
if self.username and self.password: if self.username and self.password:
ccks = region.get("addic7ed_data", expiration_time=15552000) # 6m def check_verification(cache_region):
if ccks != NO_VALUE: rr = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10,
cookies, user_agent = ccks headers={"Referer": self.server_url})
logger.debug("Addic7ed: Re-using previous user agent") if rr.status_code == 302:
self.session.headers["User-Agent"] = user_agent logger.info('Addic7ed: Login expired')
try: cache_region.delete("addic7ed_data")
self.session.cookies._cookies.update(cookies) else:
r = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10, logger.info('Addic7ed: Re-using old login')
headers={"Referer": self.server_url}) self.logged_in = True
if r.status_code == 302: return True
logger.info('Addic7ed: Login expired')
region.delete("addic7ed_data") if load_verification("addic7ed", self.session, callback=check_verification):
else: return
logger.info('Addic7ed: Re-using old login')
self.logged_in = True
return
except:
pass
logger.info('Addic7ed: Logging in') logger.info('Addic7ed: Logging in')
data = {'username': self.username, 'password': self.password, 'Submit': 'Log in', 'url': '', data = {'username': self.username, 'password': self.password, 'Submit': 'Log in', 'url': '',
@ -115,25 +106,16 @@ class Addic7edProvider(_Addic7edProvider):
if "grecaptcha" in r.content: if "grecaptcha" in r.content:
logger.info('Addic7ed: Solving captcha. This might take a couple of minutes, but should only ' logger.info('Addic7ed: Solving captcha. This might take a couple of minutes, but should only '
'happen once every so often') 'happen once every so often')
anticaptcha_key = os.environ.get("ANTICAPTCHA_ACCOUNT_KEY")
if not anticaptcha_key:
logger.error("AntiCaptcha key not given, exiting")
return
anticaptcha_proxy = os.environ.get("ANTICAPTCHA_PROXY")
site_key = re.search(r'grecaptcha.execute\(\'(.+?)\',', r.content).group(1) site_key = re.search(r'grecaptcha.execute\(\'(.+?)\',', r.content).group(1)
if not site_key: if not site_key:
logger.error("Addic7ed: Captcha site-key not found!") logger.error("Addic7ed: Captcha site-key not found!")
return return
#pitcher_cls = pitchers.get_pitcher("AntiCaptchaProxyLess") pitcher = pitchers.get_pitcher()("Addic7ed", self.server_url + 'login.php', site_key,
#pitcher = pitcher_cls("Addic7ed", anticaptcha_key, self.server_url + 'login.php', site_key) user_agent=self.session.headers["User-Agent"],
pitcher_cls = pitchers.get_pitcher("AntiCaptchaProxyLess") cookies=self.session.cookies.get_dict(),
pitcher = pitcher_cls("Addic7ed", anticaptcha_key, self.server_url + 'login.php', site_key, is_invisible=True)
user_agent=self.session.headers["User-Agent"],
cookies=self.session.cookies.get_dict(),
is_invisible=True)
result = pitcher.throw() result = pitcher.throw()
if not result: if not result:
@ -156,13 +138,13 @@ class Addic7edProvider(_Addic7edProvider):
raise AuthenticationError(self.username) raise AuthenticationError(self.username)
break break
region.set("addic7ed_data", (self.session.cookies._cookies, self.session.headers["User-Agent"])) store_verification("addic7ed", self.session)
logger.debug('Addic7ed: Logged in') logger.debug('Addic7ed: Logged in')
self.logged_in = True self.logged_in = True
def terminate(self): def terminate(self):
pass self.session.close()
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME) @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
def _get_show_ids(self): def _get_show_ids(self):

@ -0,0 +1,184 @@
# -*- coding: utf-8 -*-
import io
import logging
import os
import zipfile
import rarfile
from subzero.language import Language
from guessit import guessit
from requests import Session
from six import text_type
from subliminal import __short_version__
from subliminal.providers import ParserBeautifulSoup, Provider
from subliminal.subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending, guess_matches
from subliminal.video import Episode, Movie
logger = logging.getLogger(__name__)
class GreekSubtitlesSubtitle(Subtitle):
"""GreekSubtitles Subtitle."""
provider_name = 'greeksubtitles'
def __init__(self, language, page_link, version, download_link):
super(GreekSubtitlesSubtitle, self).__init__(language, page_link=page_link)
self.version = version
self.download_link = download_link
self.hearing_impaired = None
self.encoding = 'windows-1253'
@property
def id(self):
return self.download_link
def get_matches(self, video):
matches = set()
# episode
if isinstance(video, Episode):
# other properties
matches |= guess_matches(video, guessit(self.version, {'type': 'episode'}), partial=True)
# movie
elif isinstance(video, Movie):
# other properties
matches |= guess_matches(video, guessit(self.version, {'type': 'movie'}), partial=True)
return matches
class GreekSubtitlesProvider(Provider):
"""GreekSubtitles Provider."""
languages = {Language(l) for l in ['ell', 'eng']}
server_url = 'http://gr.greek-subtitles.com/'
search_url = 'search.php?name={}'
download_url = 'http://www.greeksubtitles.info/getp.php?id={:d}'
subtitle_class = GreekSubtitlesSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__)
def terminate(self):
self.session.close()
def query(self, keyword, season=None, episode=None, year=None):
params = keyword
if season and episode:
params += ' S{season:02d}E{episode:02d}'.format(season=season, episode=episode)
elif year:
params += ' {:4d}'.format(year)
logger.debug('Searching subtitles %r', params)
subtitles = []
search_link = self.server_url + text_type(self.search_url).format(params)
while True:
r = self.session.get(search_link, timeout=30)
r.raise_for_status()
if not r.content:
logger.debug('No data returned from provider')
return []
soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])
# loop over subtitles cells
for cell in soup.select('td.latest_name > a:nth-of-type(1)'):
# read the item
subtitle_id = int(cell['href'].rsplit('/', 2)[1])
page_link = cell['href']
language = Language.fromalpha2(cell.parent.find('img')['src'].split('/')[-1].split('.')[0])
version = cell.text.strip() or None
if version is None:
version = ""
subtitle = self.subtitle_class(language, page_link, version, self.download_url.format(subtitle_id))
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
anchors = soup.select('td a')
next_page_available = False
for anchor in anchors:
if 'Next' in anchor.text and 'search.php' in anchor['href']:
search_link = self.server_url + anchor['href']
next_page_available = True
break
if not next_page_available:
break
return subtitles
def list_subtitles(self, video, languages):
if isinstance(video, Episode):
titles = [video.series] + video.alternative_series
elif isinstance(video, Movie):
titles = [video.title] + video.alternative_titles
else:
titles = []
subtitles = []
# query for subtitles with the show_id
for title in titles:
if isinstance(video, Episode):
subtitles += [s for s in self.query(title, season=video.season, episode=video.episode,
year=video.year)
if s.language in languages]
elif isinstance(video, Movie):
subtitles += [s for s in self.query(title, year=video.year)
if s.language in languages]
return subtitles
def download_subtitle(self, subtitle):
if isinstance(subtitle, GreekSubtitlesSubtitle):
# download the subtitle
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link},
timeout=30)
r.raise_for_status()
if not r.content:
logger.debug('Unable to download subtitle. No data returned from provider')
return
archive = _get_archive(r.content)
subtitle_content = _get_subtitle_from_archive(archive)
if subtitle_content:
subtitle.content = fix_line_ending(subtitle_content)
else:
logger.debug('Could not extract subtitle from %r', archive)
def _get_archive(content):
# open the archive
archive_stream = io.BytesIO(content)
archive = None
if rarfile.is_rarfile(archive_stream):
logger.debug('Identified rar archive')
archive = rarfile.RarFile(archive_stream)
elif zipfile.is_zipfile(archive_stream):
logger.debug('Identified zip archive')
archive = zipfile.ZipFile(archive_stream)
return archive
def _get_subtitle_from_archive(archive):
for name in archive.namelist():
# discard hidden files
if os.path.split(name)[-1].startswith('.'):
continue
# discard non-subtitle files
if not name.lower().endswith(SUBTITLE_EXTENSIONS):
continue
return archive.read(name)
return None

@ -0,0 +1,283 @@
# -*- coding: utf-8 -*-
# encoding=utf8
import io
import logging
import os
import random
import rarfile
import re
import zipfile
from subzero.language import Language
from guessit import guessit
from requests import Session
from six import text_type
from subliminal.providers import ParserBeautifulSoup, Provider
from subliminal import __short_version__
from subliminal.cache import SHOW_EXPIRATION_TIME, region
from subliminal.score import get_equivalent_release_groups
from subliminal.subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending, guess_matches
from subliminal.utils import sanitize, sanitize_release_group
from subliminal.video import Movie
logger = logging.getLogger(__name__)
year_re = re.compile(r'^\((\d{4})\)$')
class Subs4FreeSubtitle(Subtitle):
"""Subs4Free Subtitle."""
provider_name = 'subs4free'
def __init__(self, language, page_link, title, year, version, download_link):
super(Subs4FreeSubtitle, self).__init__(language, page_link=page_link)
self.title = title
self.year = year
self.version = version
self.download_link = download_link
self.hearing_impaired = None
self.encoding = 'utf8'
@property
def id(self):
return self.download_link
def get_matches(self, video):
matches = set()
# movie
if isinstance(video, Movie):
# title
if video.title and (sanitize(self.title) in (
sanitize(name) for name in [video.title] + video.alternative_titles)):
matches.add('title')
# year
if video.year and self.year == video.year:
matches.add('year')
# release_group
if (video.release_group and self.version and
any(r in sanitize_release_group(self.version)
for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))):
matches.add('release_group')
# other properties
matches |= guess_matches(video, guessit(self.version, {'type': 'movie'}), partial=True)
return matches
class Subs4FreeProvider(Provider):
"""Subs4Free Provider."""
languages = {Language(l) for l in ['ell', 'eng']}
video_types = (Movie,)
server_url = 'https://www.sf4-industry.com'
download_url = '/getSub.html'
search_url = '/search_report.php?search={}&searchType=1'
subtitle_class = Subs4FreeSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__)
def terminate(self):
self.session.close()
def get_show_ids(self, title, year=None):
"""Get the best matching show id for `series` and `year``.
First search in the result of :meth:`_get_show_suggestions`.
:param title: show title.
:param year: year of the show, if any.
:type year: int
:return: the show id, if found.
:rtype: str
"""
title_sanitized = sanitize(title).lower()
show_ids = self._get_suggestions(title)
matched_show_ids = []
for show in show_ids:
show_id = None
show_title = sanitize(show['title'])
# attempt with year
if not show_id and year:
logger.debug('Getting show id with year')
show_id = show['link'].split('?p=')[-1] if show_title == '{title} {year:d}'.format(
title=title_sanitized, year=year) else None
# attempt clean
if not show_id:
logger.debug('Getting show id')
show_id = show['link'].split('?p=')[-1] if show_title == title_sanitized else None
if show_id:
matched_show_ids.append(show_id)
return matched_show_ids
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, to_str=text_type,
should_cache_fn=lambda value: value)
def _get_suggestions(self, title):
"""Search the show or movie id from the `title` and `year`.
:param str title: title of the show.
:return: the show suggestions found.
:rtype: dict
"""
# make the search
logger.info('Searching show ids with %r', title)
r = self.session.get(self.server_url + text_type(self.search_url).format(title),
headers={'Referer': self.server_url}, timeout=10)
r.raise_for_status()
if not r.content:
logger.debug('No data returned from provider')
return {}
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
suggestions = [{'link': l.attrs['value'], 'title': l.text}
for l in soup.select('select[name="Mov_sel"] > option[value]')]
logger.debug('Found suggestions: %r', suggestions)
return suggestions
def query(self, movie_id, title, year):
# get the season list of the show
logger.info('Getting the subtitle list of show id %s', movie_id)
if movie_id:
page_link = self.server_url + '/' + movie_id
else:
page_link = self.server_url + text_type(self.search_url).format(' '.join([title, str(year)]))
r = self.session.get(page_link, timeout=10)
r.raise_for_status()
if not r.content:
logger.debug('No data returned from provider')
return []
soup = ParserBeautifulSoup(r.content, ['html.parser'])
year_num = None
year_element = soup.select_one('td#dates_header > table div')
matches = False
if year_element:
matches = year_re.match(str(year_element.contents[2]).strip())
if matches:
year_num = int(matches.group(1))
title_element = soup.select_one('td#dates_header > table u')
show_title = str(title_element.contents[0]).strip() if title_element else None
subtitles = []
# loop over episode rows
for subtitle in soup.select('table.table_border div[align="center"] > div'):
# read common info
version = subtitle.find('b').text
download_link = self.server_url + subtitle.find('a')['href']
language = Language.fromalpha2(subtitle.find('img')['src'].split('/')[-1].split('.')[0])
subtitle = self.subtitle_class(language, page_link, show_title, year_num, version, download_link)
logger.debug('Found subtitle {!r}'.format(subtitle))
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
# lookup show_id
titles = [video.title] + video.alternative_titles if isinstance(video, Movie) else []
show_ids = None
for title in titles:
show_ids = self.get_show_ids(title, video.year)
if show_ids and len(show_ids) > 0:
break
subtitles = []
# query for subtitles with the show_id
if show_ids and len(show_ids) > 0:
for show_id in show_ids:
subtitles += [s for s in self.query(show_id, video.title, video.year) if s.language in languages]
else:
subtitles += [s for s in self.query(None, video.title, video.year) if s.language in languages]
return subtitles
def download_subtitle(self, subtitle):
if isinstance(subtitle, Subs4FreeSubtitle):
# download the subtitle
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10)
r.raise_for_status()
if not r.content:
logger.debug('Unable to download subtitle. No data returned from provider')
return
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
download_element = soup.select_one('input[name="id"]')
image_element = soup.select_one('input[type="image"]')
subtitle_id = download_element['value'] if download_element else None
width = int(str(image_element['width']).strip('px')) if image_element else 0
height = int(str(image_element['height']).strip('px')) if image_element else 0
if not subtitle_id:
logger.debug('Unable to download subtitle. No download link found')
return
download_url = self.server_url + self.download_url
r = self.session.post(download_url, data={'utf8': 1, 'id': subtitle_id, 'x': random.randint(0, width),
'y': random.randint(0, height)},
headers={'Referer': subtitle.download_link}, timeout=10)
r.raise_for_status()
if not r.content:
logger.debug('Unable to download subtitle. No data returned from provider')
return
archive = _get_archive(r.content)
subtitle_content = _get_subtitle_from_archive(archive) if archive else r.content
if subtitle_content:
subtitle.content = fix_line_ending(subtitle_content)
else:
logger.debug('Could not extract subtitle from %r', archive)
def _get_archive(content):
# open the archive
archive_stream = io.BytesIO(content)
archive = None
if rarfile.is_rarfile(archive_stream):
logger.debug('Identified rar archive')
archive = rarfile.RarFile(archive_stream)
elif zipfile.is_zipfile(archive_stream):
logger.debug('Identified zip archive')
archive = zipfile.ZipFile(archive_stream)
return archive
def _get_subtitle_from_archive(archive):
for name in archive.namelist():
# discard hidden files
if os.path.split(name)[-1].startswith('.'):
continue
# discard non-subtitle files
if not name.lower().endswith(SUBTITLE_EXTENSIONS):
continue
return archive.read(name)
return None

@ -0,0 +1,272 @@
# -*- coding: utf-8 -*-
import io
import logging
import os
import rarfile
import re
import zipfile
from subzero.language import Language
from guessit import guessit
from requests import Session
from six import text_type
from subliminal.providers import ParserBeautifulSoup, Provider
from subliminal import __short_version__
from subliminal.cache import SHOW_EXPIRATION_TIME, region
from subliminal.score import get_equivalent_release_groups
from subliminal.subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending, guess_matches
from subliminal.utils import sanitize, sanitize_release_group
from subliminal.video import Episode
logger = logging.getLogger(__name__)
year_re = re.compile(r'^\((\d{4})\)$')
class Subs4SeriesSubtitle(Subtitle):
"""Subs4Series Subtitle."""
provider_name = 'subs4series'
def __init__(self, language, page_link, series, year, version, download_link):
super(Subs4SeriesSubtitle, self).__init__(language, page_link=page_link)
self.series = series
self.year = year
self.version = version
self.download_link = download_link
self.hearing_impaired = None
self.encoding = 'windows-1253'
@property
def id(self):
return self.download_link
def get_matches(self, video):
matches = set()
# episode
if isinstance(video, Episode):
# series name
if video.series and sanitize(self.series) in (
sanitize(name) for name in [video.series] + video.alternative_series):
matches.add('series')
# year
if video.original_series and self.year is None or video.year and video.year == self.year:
matches.add('year')
# release_group
if (video.release_group and self.version and
any(r in sanitize_release_group(self.version)
for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))):
matches.add('release_group')
# other properties
matches |= guess_matches(video, guessit(self.version, {'type': 'episode'}), partial=True)
return matches
class Subs4SeriesProvider(Provider):
"""Subs4Series Provider."""
languages = {Language(l) for l in ['ell', 'eng']}
video_types = (Episode,)
server_url = 'https://www.subs4series.com'
search_url = '/search_report.php?search={}&searchType=1'
episode_link = '/tv-series/{show_id}/season-{season:d}/episode-{episode:d}'
subtitle_class = Subs4SeriesSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__)
def terminate(self):
self.session.close()
def get_show_ids(self, title, year=None):
"""Get the best matching show id for `series` and `year`.
First search in the result of :meth:`_get_show_suggestions`.
:param title: show title.
:param year: year of the show, if any.
:type year: int
:return: the show id, if found.
:rtype: str
"""
title_sanitized = sanitize(title).lower()
show_ids = self._get_suggestions(title)
matched_show_ids = []
for show in show_ids:
show_id = None
show_title = sanitize(show['title'])
# attempt with year
if not show_id and year:
logger.debug('Getting show id with year')
show_id = '/'.join(show['link'].rsplit('/', 2)[1:]) if show_title == '{title} {year:d}'.format(
title=title_sanitized, year=year) else None
# attempt clean
if not show_id:
logger.debug('Getting show id')
show_id = '/'.join(show['link'].rsplit('/', 2)[1:]) if show_title == title_sanitized else None
if show_id:
matched_show_ids.append(show_id)
return matched_show_ids
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, to_str=text_type,
should_cache_fn=lambda value: value)
def _get_suggestions(self, title):
"""Search the show or movie id from the `title` and `year`.
:param str title: title of the show.
:return: the show suggestions found.
:rtype: dict
"""
# make the search
logger.info('Searching show ids with %r', title)
r = self.session.get(self.server_url + text_type(self.search_url).format(title),
headers={'Referer': self.server_url}, timeout=10)
r.raise_for_status()
if not r.content:
logger.debug('No data returned from provider')
return {}
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
series = [{'link': l.attrs['value'], 'title': l.text}
for l in soup.select('select[name="Mov_sel"] > option[value]')]
logger.debug('Found suggestions: %r', series)
return series
def query(self, show_id, series, season, episode, title):
# get the season list of the show
logger.info('Getting the subtitle list of show id %s', show_id)
if all((show_id, season, episode)):
page_link = self.server_url + self.episode_link.format(show_id=show_id, season=season, episode=episode)
else:
return []
r = self.session.get(page_link, timeout=10)
r.raise_for_status()
if not r.content:
logger.debug('No data returned from provider')
return []
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
year_num = None
matches = year_re.match(str(soup.select_one('#dates_header_br > table div').contents[2]).strip())
if matches:
year_num = int(matches.group(1))
show_title = str(soup.select_one('#dates_header_br > table u').contents[0]).strip()
subtitles = []
# loop over episode rows
for subtitle in soup.select('table.table_border div[align="center"] > div'):
# read common info
version = subtitle.find('b').text
download_link = self.server_url + subtitle.find('a')['href']
language = Language.fromalpha2(subtitle.find('img')['src'].split('/')[-1].split('.')[0])
subtitle = self.subtitle_class(language, page_link, show_title, year_num, version, download_link)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
# lookup show_id
titles = [video.series] + video.alternative_series if isinstance(video, Episode) else []
show_ids = None
for title in titles:
show_ids = self.get_show_ids(title, video.year)
if show_ids and len(show_ids) > 0:
break
subtitles = []
# query for subtitles with the show_id
for show_id in show_ids:
subtitles += [s for s in self.query(show_id, video.series, video.season, video.episode, video.title)
if s.language in languages]
return subtitles
def download_subtitle(self, subtitle):
if isinstance(subtitle, Subs4SeriesSubtitle):
# download the subtitle
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10)
r.raise_for_status()
if not r.content:
logger.debug('Unable to download subtitle. No data returned from provider')
return
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
download_element = soup.select_one('a.style55ws')
if not download_element:
download_element = soup.select_one('form[method="post"]')
target = download_element['action'] if download_element else None
else:
target = download_element['href']
if not target:
logger.debug('Unable to download subtitle. No download link found')
return
download_url = self.server_url + target
r = self.session.get(download_url, headers={'Referer': subtitle.download_link}, timeout=10)
r.raise_for_status()
if not r.content:
logger.debug('Unable to download subtitle. No data returned from provider')
return
archive = _get_archive(r.content)
subtitle_content = _get_subtitle_from_archive(archive) if archive else r.content
if subtitle_content:
subtitle.content = fix_line_ending(subtitle_content)
else:
logger.debug('Could not extract subtitle from %r', archive)
def _get_archive(content):
# open the archive
archive_stream = io.BytesIO(content)
archive = None
if rarfile.is_rarfile(archive_stream):
logger.debug('Identified rar archive')
archive = rarfile.RarFile(archive_stream)
elif zipfile.is_zipfile(archive_stream):
logger.debug('Identified zip archive')
archive = zipfile.ZipFile(archive_stream)
return archive
def _get_subtitle_from_archive(archive):
for name in archive.namelist():
# discard hidden files
if os.path.split(name)[-1].startswith('.'):
continue
# discard non-subtitle files
if not name.lower().endswith(SUBTITLE_EXTENSIONS):
continue
return archive.read(name)
return None

@ -0,0 +1,159 @@
# -*- coding: utf-8 -*-
import logging
import re
import io
import os
from random import randint
from bs4 import BeautifulSoup
from zipfile import ZipFile, is_zipfile
from rarfile import RarFile, is_rarfile
from requests import Session
from guessit import guessit
from subliminal_patch.providers import Provider
from subliminal_patch.subtitle import Subtitle
from subliminal_patch.utils import sanitize
from subliminal.exceptions import ProviderError
from subliminal.utils import sanitize_release_group
from subliminal.subtitle import guess_matches
from subliminal.video import Episode, Movie
from subliminal.subtitle import fix_line_ending
from subzero.language import Language
from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
logger = logging.getLogger(__name__)
class SubsSabBzSubtitle(Subtitle):
"""SubsSabBz Subtitle."""
provider_name = 'subssabbz'
def __init__(self, langauge, filename, type):
super(SubsSabBzSubtitle, self).__init__(langauge)
self.langauge = langauge
self.filename = filename
self.type = type
@property
def id(self):
return self.filename
def get_matches(self, video):
matches = set()
video_filename = video.name
video_filename = os.path.basename(video_filename)
video_filename, _ = os.path.splitext(video_filename)
video_filename = sanitize_release_group(video_filename)
subtitle_filename = self.filename
subtitle_filename = os.path.basename(subtitle_filename)
subtitle_filename, _ = os.path.splitext(subtitle_filename)
subtitle_filename = sanitize_release_group(subtitle_filename)
if video_filename == subtitle_filename:
matches.add('hash')
matches |= guess_matches(video, guessit(self.filename, {'type': self.type}))
matches.add(id(self))
return matches
class SubsSabBzProvider(Provider):
"""SubsSabBz Provider."""
languages = {Language('por', 'BR')} | {Language(l) for l in [
'bul', 'eng'
]}
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
self.session.headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
self.session.headers["Accept-Language"] = "en-US,en;q=0.5"
self.session.headers["Accept-Encoding"] = "gzip, deflate, br"
self.session.headers["DNT"] = "1"
self.session.headers["Connection"] = "keep-alive"
self.session.headers["Upgrade-Insecure-Requests"] = "1"
self.session.headers["Cache-Control"] = "max-age=0"
def terminate(self):
self.session.close()
def query(self, language, video):
subtitles = []
isEpisode = isinstance(video, Episode)
params = {
'act': 'search',
'movie': '',
'select-language': '2',
'upldr': '',
'yr': '',
'release': ''
}
if isEpisode:
params['movie'] = "%s %02d %02d" % (sanitize(video.series), video.season, video.episode)
else:
params['yr'] = video.year
params['movie'] = (video.title)
if language == 'en' or language == 'eng':
params['select-language'] = 1
logger.info('Searching subtitle %r', params)
response = self.session.post('http://subs.sab.bz/index.php?', params=params, allow_redirects=False, timeout=10, headers={
'Referer': 'http://subs.sab.bz/',
})
response.raise_for_status()
if response.status_code != 200:
logger.debug('No subtitles found')
return subtitles
soup = BeautifulSoup(response.content, 'html.parser')
rows = soup.findAll('tr', {'class': 'subs-row'})
# Search on first 10 rows only
for row in rows[:10]:
a_element_wrapper = row.find('td', { 'class': 'c2field' })
if a_element_wrapper:
element = a_element_wrapper.find('a')
if element:
link = element.get('href')
logger.info('Found subtitle link %r', link)
subtitles = subtitles + self.download_archive_and_add_subtitle_files(link, language, video)
return subtitles
def list_subtitles(self, video, languages):
return [s for l in languages for s in self.query(l, video)]
def download_subtitle(self, subtitle):
pass
def process_archive_subtitle_files(self, archiveStream, language, video):
subtitles = []
type = 'episode' if isinstance(video, Episode) else 'movie'
for file_name in archiveStream.namelist():
if file_name.lower().endswith(('.srt', '.sub')):
logger.info('Found subtitle file %r', file_name)
subtitle = SubsSabBzSubtitle(language, file_name, type)
subtitle.content = archiveStream.read(file_name)
subtitles.append(subtitle)
return subtitles
def download_archive_and_add_subtitle_files(self, link, language, video ):
logger.info('Downloading subtitle %r', link)
request = self.session.get(link, headers={
'Referer': 'http://subs.sab.bz/index.php?'
})
request.raise_for_status()
archive_stream = io.BytesIO(request.content)
if is_rarfile(archive_stream):
return self.process_archive_subtitle_files( RarFile(archive_stream), language, video )
elif is_zipfile(archive_stream):
return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video )
else:
raise ValueError('Not a valid archive')

@ -0,0 +1,161 @@
# -*- coding: utf-8 -*-
import logging
import re
import io
import os
from random import randint
from bs4 import BeautifulSoup
from zipfile import ZipFile, is_zipfile
from rarfile import RarFile, is_rarfile
from requests import Session
from guessit import guessit
from subliminal_patch.providers import Provider
from subliminal_patch.subtitle import Subtitle
from subliminal_patch.utils import sanitize
from subliminal.exceptions import ProviderError
from subliminal.utils import sanitize_release_group
from subliminal.subtitle import guess_matches
from subliminal.video import Episode, Movie
from subliminal.subtitle import fix_line_ending
from subzero.language import Language
from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
logger = logging.getLogger(__name__)
class SubsUnacsSubtitle(Subtitle):
"""SubsUnacs Subtitle."""
provider_name = 'subsunacs'
def __init__(self, langauge, filename, type):
super(SubsUnacsSubtitle, self).__init__(langauge)
self.langauge = langauge
self.filename = filename
self.type = type
@property
def id(self):
return self.filename
def get_matches(self, video):
matches = set()
video_filename = video.name
video_filename = os.path.basename(video_filename)
video_filename, _ = os.path.splitext(video_filename)
video_filename = sanitize_release_group(video_filename)
subtitle_filename = self.filename
subtitle_filename = os.path.basename(subtitle_filename)
subtitle_filename, _ = os.path.splitext(subtitle_filename)
subtitle_filename = sanitize_release_group(subtitle_filename)
if video_filename == subtitle_filename:
matches.add('hash')
matches |= guess_matches(video, guessit(self.filename, {'type': self.type}))
matches.add(id(self))
return matches
class SubsUnacsProvider(Provider):
"""SubsUnacs Provider."""
languages = {Language('por', 'BR')} | {Language(l) for l in [
'bul', 'eng'
]}
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
self.session.headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
self.session.headers["Accept-Language"] = "en-US,en;q=0.5"
self.session.headers["Accept-Encoding"] = "gzip, deflate, br"
self.session.headers["DNT"] = "1"
self.session.headers["Connection"] = "keep-alive"
self.session.headers["Upgrade-Insecure-Requests"] = "1"
self.session.headers["Cache-Control"] = "max-age=0"
def terminate(self):
self.session.close()
def query(self, language, video):
subtitles = []
isEpisode = isinstance(video, Episode)
params = {
'm': '',
'l': 0,
'c': '',
'y': '',
'action': " Търси ",
'a': '',
'd': '',
'u': '',
'g': '',
't': '',
'imdbcheck': 1}
if isEpisode:
params['m'] = "%s %02d %02d" % (sanitize(video.series), video.season, video.episode)
else:
params['y'] = video.year
params['m'] = (video.title)
if language == 'en' or language == 'eng':
params['l'] = 1
logger.info('Searching subtitle %r', params)
response = self.session.post('https://subsunacs.net/search.php', params=params, allow_redirects=False, timeout=10, headers={
'Referer': 'https://subsunacs.net/index.php',
})
response.raise_for_status()
if response.status_code != 200:
logger.debug('No subtitles found')
return subtitles
soup = BeautifulSoup(response.content, 'html.parser')
rows = soup.findAll('td', {'class': 'tdMovie'})
# Search on first 10 rows only
for row in rows[:10]:
element = row.find('a', {'class': 'tooltip'})
if element:
link = element.get('href')
logger.info('Found subtitle link %r', link)
subtitles = subtitles + self.download_archive_and_add_subtitle_files('https://subsunacs.net' + link, language, video)
return subtitles
def list_subtitles(self, video, languages):
return [s for l in languages for s in self.query(l, video)]
def download_subtitle(self, subtitle):
pass
def process_archive_subtitle_files(self, archiveStream, language, video):
subtitles = []
type = 'episode' if isinstance(video, Episode) else 'movie'
for file_name in archiveStream.namelist():
if file_name.lower().endswith(('.srt', '.sub')):
logger.info('Found subtitle file %r', file_name)
subtitle = SubsUnacsSubtitle(language, file_name, type)
subtitle.content = archiveStream.read(file_name)
subtitles.append(subtitle)
return subtitles
def download_archive_and_add_subtitle_files(self, link, language, video ):
logger.info('Downloading subtitle %r', link)
request = self.session.get(link, headers={
'Referer': 'https://subsunacs.net/search.php'
})
request.raise_for_status()
archive_stream = io.BytesIO(request.content)
if is_rarfile(archive_stream):
return self.process_archive_subtitle_files( RarFile(archive_stream), language, video )
elif is_zipfile(archive_stream):
return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video )
else:
raise ValueError('Not a valid archive')

@ -0,0 +1,318 @@
# -*- coding: utf-8 -*-
import io
import json
import logging
import os
import rarfile
import re
import zipfile
from subzero.language import Language
from guessit import guessit
from requests import Session
from six import text_type
from subliminal.providers import ParserBeautifulSoup, Provider
from subliminal import __short_version__
from subliminal.cache import SHOW_EXPIRATION_TIME, region
from subliminal.score import get_equivalent_release_groups
from subliminal.subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending, guess_matches
from subliminal.utils import sanitize, sanitize_release_group
from subliminal.video import Episode, Movie
logger = logging.getLogger(__name__)
episode_re = re.compile(r'^S(\d{2})E(\d{2})$')
class SubzSubtitle(Subtitle):
"""Subz Subtitle."""
provider_name = 'subz'
def __init__(self, language, page_link, series, season, episode, title, year, version, download_link):
super(SubzSubtitle, self).__init__(language, page_link=page_link)
self.series = series
self.season = season
self.episode = episode
self.title = title
self.year = year
self.version = version
self.download_link = download_link
self.hearing_impaired = None
self.encoding = 'windows-1253'
@property
def id(self):
return self.download_link
def get_matches(self, video):
matches = set()
video_type = None
# episode
if isinstance(video, Episode):
video_type = 'episode'
# series name
if video.series and sanitize(self.series) in (
sanitize(name) for name in [video.series] + video.alternative_series):
matches.add('series')
# season
if video.season and self.season == video.season:
matches.add('season')
# episode
if video.episode and self.episode == video.episode:
matches.add('episode')
# title of the episode
if video.title and sanitize(self.title) == sanitize(video.title):
matches.add('title')
# year
if video.original_series and self.year is None or video.year and video.year == self.year:
matches.add('year')
# movie
elif isinstance(video, Movie):
video_type = 'movie'
# title
if video.title and (sanitize(self.title) in (
sanitize(name) for name in [video.title] + video.alternative_titles)):
matches.add('title')
# year
if video.year and self.year == video.year:
matches.add('year')
# release_group
if (video.release_group and self.version and
any(r in sanitize_release_group(self.version)
for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))):
matches.add('release_group')
# other properties
matches |= guess_matches(video, guessit(self.version, {'type': video_type}), partial=True)
return matches
class SubzProvider(Provider):
"""Subz Provider."""
languages = {Language(l) for l in ['ell']}
server_url = 'https://subz.xyz'
sign_in_url = '/sessions'
sign_out_url = '/logout'
search_url = '/typeahead/{}'
episode_link = '/series/{show_id}/seasons/{season:d}/episodes/{episode:d}'
movie_link = '/movies/{}'
subtitle_class = SubzSubtitle
def __init__(self):
self.logged_in = False
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__)
def terminate(self):
self.session.close()
def get_show_ids(self, title, year=None, is_episode=True, country_code=None):
"""Get the best matching show id for `series`, `year` and `country_code`.
First search in the result of :meth:`_get_show_suggestions`.
:param title: show title.
:param year: year of the show, if any.
:type year: int
:param is_episode: if the search is for episode.
:type is_episode: bool
:param country_code: country code of the show, if any.
:type country_code: str
:return: the show id, if found.
:rtype: str
"""
title_sanitized = sanitize(title).lower()
show_ids = self._get_suggestions(title, is_episode)
matched_show_ids = []
for show in show_ids:
show_id = None
# attempt with country
if not show_id and country_code:
logger.debug('Getting show id with country')
if sanitize(show['title']) == text_type('{title} {country}').format(title=title_sanitized,
country=country_code.lower()):
show_id = show['link'].split('/')[-1]
# attempt with year
if not show_id and year:
logger.debug('Getting show id with year')
if sanitize(show['title']) == text_type('{title} {year}').format(title=title_sanitized, year=year):
show_id = show['link'].split('/')[-1]
# attempt clean
if not show_id:
logger.debug('Getting show id')
show_id = show['link'].split('/')[-1] if sanitize(show['title']) == title_sanitized else None
if show_id:
matched_show_ids.append(show_id)
return matched_show_ids
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, to_str=text_type,
should_cache_fn=lambda value: value)
def _get_suggestions(self, title, is_episode=True):
"""Search the show or movie id from the `title` and `year`.
:param str title: title of the show.
:param is_episode: if the search is for episode.
:type is_episode: bool
:return: the show suggestions found.
:rtype: dict
"""
# make the search
logger.info('Searching show ids with %r', title)
r = self.session.get(self.server_url + text_type(self.search_url).format(title), timeout=10)
r.raise_for_status()
if not r.content:
logger.debug('No data returned from provider')
return {}
show_type = 'series' if is_episode else 'movie'
parsed_suggestions = [s for s in json.loads(r.text) if 'type' in s and s['type'] == show_type]
logger.debug('Found suggestions: %r', parsed_suggestions)
return parsed_suggestions
def query(self, show_id, series, season, episode, title):
# get the season list of the show
logger.info('Getting the subtitle list of show id %s', show_id)
is_episode = False
if all((show_id, season, episode)):
is_episode = True
page_link = self.server_url + self.episode_link.format(show_id=show_id, season=season, episode=episode)
elif all((show_id, title)):
page_link = self.server_url + self.movie_link.format(show_id)
else:
return []
r = self.session.get(page_link, timeout=10)
r.raise_for_status()
if not r.content:
logger.debug('No data returned from provider')
return []
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
year_num = None
if not is_episode:
year_num = int(soup.select_one('span.year').text)
show_title = str(soup.select_one('#summary-wrapper > div.summary h1').contents[0]).strip()
subtitles = []
# loop over episode rows
for subtitle in soup.select('div[id="subtitles"] tr[data-id]'):
# read common info
version = subtitle.find('td', {'class': 'name'}).text
download_link = subtitle.find('a', {'class': 'btn-success'})['href'].strip('\'')
# read the episode info
if is_episode:
episode_numbers = soup.select_one('#summary-wrapper > div.container.summary span.main-title-sxe').text
season_num = None
episode_num = None
matches = episode_re.match(episode_numbers.strip())
if matches:
season_num = int(matches.group(1))
episode_num = int(matches.group(2))
episode_title = soup.select_one('#summary-wrapper > div.container.summary span.main-title').text
subtitle = self.subtitle_class(Language.fromalpha2('el'), page_link, show_title, season_num,
episode_num, episode_title, year_num, version, download_link)
# read the movie info
else:
subtitle = self.subtitle_class(Language.fromalpha2('el'), page_link, None, None, None, show_title,
year_num, version, download_link)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
# lookup show_id
if isinstance(video, Episode):
titles = [video.series] + video.alternative_series
elif isinstance(video, Movie):
titles = [video.title] + video.alternative_titles
else:
titles = []
show_ids = None
for title in titles:
show_ids = self.get_show_ids(title, video.year, isinstance(video, Episode))
if show_ids is not None and len(show_ids) > 0:
break
subtitles = []
# query for subtitles with the show_id
for show_id in show_ids:
if isinstance(video, Episode):
subtitles += [s for s in self.query(show_id, video.series, video.season, video.episode, video.title)
if s.language in languages and s.season == video.season and s.episode == video.episode]
elif isinstance(video, Movie):
subtitles += [s for s in self.query(show_id, None, None, None, video.title)
if s.language in languages and s.year == video.year]
return subtitles
def download_subtitle(self, subtitle):
if isinstance(subtitle, SubzSubtitle):
# download the subtitle
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10)
r.raise_for_status()
if not r.content:
logger.debug('Unable to download subtitle. No data returned from provider')
return
archive = _get_archive(r.content)
subtitle_content = _get_subtitle_from_archive(archive)
if subtitle_content:
subtitle.content = fix_line_ending(subtitle_content)
else:
logger.debug('Could not extract subtitle from %r', archive)
def _get_archive(content):
# open the archive
archive_stream = io.BytesIO(content)
archive = None
if rarfile.is_rarfile(archive_stream):
logger.debug('Identified rar archive')
archive = rarfile.RarFile(archive_stream)
elif zipfile.is_zipfile(archive_stream):
logger.debug('Identified zip archive')
archive = zipfile.ZipFile(archive_stream)
return archive
def _get_subtitle_from_archive(archive):
for name in archive.namelist():
# discard hidden files
if os.path.split(name)[-1].startswith('.'):
continue
# discard non-subtitle files
if not name.lower().endswith(SUBTITLE_EXTENSIONS):
continue
return archive.read(name)
return None

@ -4,6 +4,7 @@ import io
import logging import logging
import math import math
import re import re
import time
import rarfile import rarfile
@ -23,6 +24,7 @@ from subliminal.utils import sanitize_release_group
from subliminal.subtitle import guess_matches from subliminal.subtitle import guess_matches
from subliminal.video import Episode, Movie from subliminal.video import Episode, Movie
from subliminal.subtitle import fix_line_ending from subliminal.subtitle import fix_line_ending
from subliminal_patch.pitcher import pitchers, load_verification, store_verification
from subzero.language import Language from subzero.language import Language
from random import randint from random import randint
@ -142,6 +144,7 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
logger.debug('User-Agent set to %s', self.session.headers['User-Agent']) logger.debug('User-Agent set to %s', self.session.headers['User-Agent'])
self.session.headers['Referer'] = self.server_url self.session.headers['Referer'] = self.server_url
logger.debug('Referer set to %s', self.session.headers['Referer']) logger.debug('Referer set to %s', self.session.headers['Referer'])
load_verification("titlovi", self.session)
def terminate(self): def terminate(self):
self.session.close() self.session.close()
@ -182,110 +185,144 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
r = self.session.get(self.search_url, params=params, timeout=10) r = self.session.get(self.search_url, params=params, timeout=10)
r.raise_for_status() r.raise_for_status()
except RequestException as e: except RequestException as e:
logger.exception('RequestException %s', e) captcha_passed = False
break if e.response.status_code == 403 and "data-sitekey" in e.response.content:
logger.info('titlovi: Solving captcha. This might take a couple of minutes, but should only '
try: 'happen once every so often')
soup = BeautifulSoup(r.content, 'lxml')
site_key = re.search(r'data-sitekey="(.+?)"', e.response.content).group(1)
# number of results challenge_s = re.search(r'type="hidden" name="s" value="(.+?)"', e.response.content).group(1)
result_count = int(soup.select_one('.results_count b').string) challenge_ray = re.search(r'data-ray="(.+?)"', e.response.content).group(1)
except: if not all([site_key, challenge_s, challenge_ray]):
result_count = None raise Exception("titlovi: Captcha site-key not found!")
# exit if no results pitcher = pitchers.get_pitcher()("titlovi", e.request.url, site_key,
if not result_count: user_agent=self.session.headers["User-Agent"],
if not subtitles: cookies=self.session.cookies.get_dict(),
logger.debug('No subtitles found') is_invisible=True)
else:
logger.debug("No more subtitles found") result = pitcher.throw()
break if not result:
raise Exception("titlovi: Couldn't solve captcha!")
# number of pages with results
pages = int(math.ceil(result_count / float(items_per_page))) s_params = {
"s": challenge_s,
# get current page "id": challenge_ray,
if 'pg' in params: "g-recaptcha-response": result,
current_page = int(params['pg']) }
r = self.session.get(self.server_url + "/cdn-cgi/l/chk_captcha", params=s_params, timeout=10,
try: allow_redirects=False)
sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit') r.raise_for_status()
for sub in sublist: r = self.session.get(self.search_url, params=params, timeout=10)
# subtitle id r.raise_for_status()
sid = sub.find(attrs={'data-id': True}).attrs['data-id'] store_verification("titlovi", self.session)
# get download link captcha_passed = True
download_link = self.download_url + sid
# title and alternate title if not captcha_passed:
match = title_re.search(sub.a.string) logger.exception('RequestException %s', e)
if match: break
_title = match.group('title') else:
alt_title = match.group('altitle') try:
soup = BeautifulSoup(r.content, 'lxml')
# number of results
result_count = int(soup.select_one('.results_count b').string)
except:
result_count = None
# exit if no results
if not result_count:
if not subtitles:
logger.debug('No subtitles found')
else: else:
continue logger.debug("No more subtitles found")
break
# page link
page_link = self.server_url + sub.a.attrs['href'] # number of pages with results
# subtitle language pages = int(math.ceil(result_count / float(items_per_page)))
match = lang_re.search(sub.select_one('.lang').attrs['src'])
if match: # get current page
try: if 'pg' in params:
# decode language current_page = int(params['pg'])
lang = Language.fromtitlovi(match.group('lang')+match.group('script'))
except ValueError: try:
sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit')
for sub in sublist:
# subtitle id
sid = sub.find(attrs={'data-id': True}).attrs['data-id']
# get download link
download_link = self.download_url + sid
# title and alternate title
match = title_re.search(sub.a.string)
if match:
_title = match.group('title')
alt_title = match.group('altitle')
else:
continue continue
# relase year or series start year # page link
match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string) page_link = self.server_url + sub.a.attrs['href']
if match: # subtitle language
r_year = int(match.group('year')) match = lang_re.search(sub.select_one('.lang').attrs['src'])
# fps if match:
match = fps_re.search(sub.select_one('.fps').string) try:
if match: # decode language
fps = match.group('fps') lang = Language.fromtitlovi(match.group('lang')+match.group('script'))
# releases except ValueError:
releases = str(sub.select_one('.fps').parent.contents[0].string) continue
# handle movies and series separately # relase year or series start year
if is_episode: match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string)
# season and episode info if match:
sxe = sub.select_one('.s0xe0y').string r_year = int(match.group('year'))
r_season = None # fps
r_episode = None match = fps_re.search(sub.select_one('.fps').string)
if sxe: if match:
match = season_re.search(sxe) fps = match.group('fps')
if match: # releases
r_season = int(match.group('season')) releases = str(sub.select_one('.fps').parent.contents[0].string)
match = episode_re.search(sxe)
if match: # handle movies and series separately
r_episode = int(match.group('episode')) if is_episode:
# season and episode info
subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title, sxe = sub.select_one('.s0xe0y').string
alt_title=alt_title, season=r_season, episode=r_episode, r_season = None
year=r_year, fps=fps, r_episode = None
asked_for_release_group=video.release_group, if sxe:
asked_for_episode=episode) match = season_re.search(sxe)
else: if match:
subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title, r_season = int(match.group('season'))
alt_title=alt_title, year=r_year, fps=fps, match = episode_re.search(sxe)
asked_for_release_group=video.release_group) if match:
logger.debug('Found subtitle %r', subtitle) r_episode = int(match.group('episode'))
# prime our matches so we can use the values later subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
subtitle.get_matches(video) alt_title=alt_title, season=r_season, episode=r_episode,
year=r_year, fps=fps,
# add found subtitles asked_for_release_group=video.release_group,
subtitles.append(subtitle) asked_for_episode=episode)
else:
finally: subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
soup.decompose() alt_title=alt_title, year=r_year, fps=fps,
asked_for_release_group=video.release_group)
# stop on last page logger.debug('Found subtitle %r', subtitle)
if current_page >= pages:
break # prime our matches so we can use the values later
subtitle.get_matches(video)
# increment current page
params['pg'] = current_page + 1 # add found subtitles
logger.debug('Getting page %d', params['pg']) subtitles.append(subtitle)
finally:
soup.decompose()
# stop on last page
if current_page >= pages:
break
# increment current page
params['pg'] = current_page + 1
logger.debug('Getting page %d', params['pg'])
return subtitles return subtitles

@ -0,0 +1,302 @@
# -*- coding: utf-8 -*-
import logging
import re
from subzero.language import Language
from guessit import guessit
from requests import Session
from subliminal.providers import ParserBeautifulSoup, Provider
from subliminal import __short_version__
from subliminal.cache import SHOW_EXPIRATION_TIME, region
from subliminal.exceptions import AuthenticationError, ConfigurationError
from subliminal.score import get_equivalent_release_groups
from subliminal.subtitle import Subtitle, fix_line_ending, guess_matches
from subliminal.utils import sanitize, sanitize_release_group
from subliminal.video import Episode
logger = logging.getLogger(__name__)
article_re = re.compile(r'^([A-Za-z]{1,3}) (.*)$')
class XSubsSubtitle(Subtitle):
"""XSubs Subtitle."""
provider_name = 'xsubs'
def __init__(self, language, page_link, series, season, episode, year, title, version, download_link):
super(XSubsSubtitle, self).__init__(language, page_link=page_link)
self.series = series
self.season = season
self.episode = episode
self.year = year
self.title = title
self.version = version
self.download_link = download_link
self.hearing_impaired = None
self.encoding = 'windows-1253'
@property
def id(self):
return self.download_link
def get_matches(self, video):
matches = set()
if isinstance(video, Episode):
# series name
if video.series and sanitize(self.series) in (
sanitize(name) for name in [video.series] + video.alternative_series):
matches.add('series')
# season
if video.season and self.season == video.season:
matches.add('season')
# episode
if video.episode and self.episode == video.episode:
matches.add('episode')
# title of the episode
if video.title and sanitize(self.title) == sanitize(video.title):
matches.add('title')
# year
if video.original_series and self.year is None or video.year and video.year == self.year:
matches.add('year')
# release_group
if (video.release_group and self.version and
any(r in sanitize_release_group(self.version)
for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))):
matches.add('release_group')
# other properties
matches |= guess_matches(video, guessit(self.version, {'type': 'episode'}), partial=True)
return matches
class XSubsProvider(Provider):
"""XSubs Provider."""
languages = {Language(l) for l in ['ell']}
video_types = (Episode,)
server_url = 'http://xsubs.tv'
sign_in_url = '/xforum/account/signin/'
sign_out_url = '/xforum/account/signout/'
all_series_url = '/series/all.xml'
series_url = '/series/{:d}/main.xml'
season_url = '/series/{show_id:d}/{season:d}.xml'
page_link = '/ice/xsw.xml?srsid={show_id:d}#{season_id:d};{season:d}'
download_link = '/xthru/getsub/{:d}'
subtitle_class = XSubsSubtitle
def __init__(self, username=None, password=None):
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
self.username = username
self.password = password
self.logged_in = False
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__)
# login
if self.username and self.password:
logger.info('Logging in')
self.session.get(self.server_url + self.sign_in_url)
data = {'username': self.username,
'password': self.password,
'csrfmiddlewaretoken': self.session.cookies['csrftoken']}
r = self.session.post(self.server_url + self.sign_in_url, data, allow_redirects=False, timeout=10)
if r.status_code != 302:
raise AuthenticationError(self.username)
logger.debug('Logged in')
self.logged_in = True
def terminate(self):
# logout
if self.logged_in:
logger.info('Logging out')
r = self.session.get(self.server_url + self.sign_out_url, timeout=10)
r.raise_for_status()
logger.debug('Logged out')
self.logged_in = False
self.session.close()
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, should_cache_fn=lambda value: value)
def _get_show_ids(self):
# get the shows page
logger.info('Getting show ids')
r = self.session.get(self.server_url + self.all_series_url, timeout=10)
r.raise_for_status()
if not r.content:
logger.debug('No data returned from provider')
return []
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
# populate the show ids
show_ids = {}
for show_category in soup.findAll('seriesl'):
if show_category.attrs['category'] == u'Σειρές':
for show in show_category.findAll('series'):
show_ids[sanitize(show.text)] = int(show['srsid'])
break
logger.debug('Found %d show ids', len(show_ids))
return show_ids
def get_show_id(self, series_names, year=None, country_code=None):
series_sanitized_names = []
for name in series_names:
sanitized_name = sanitize(name)
series_sanitized_names.append(sanitized_name)
alternative_name = _get_alternative_name(sanitized_name)
if alternative_name:
series_sanitized_names.append(alternative_name)
show_ids = self._get_show_ids()
show_id = None
for series_sanitized in series_sanitized_names:
# attempt with country
if not show_id and country_code:
logger.debug('Getting show id with country')
show_id = show_ids.get('{series} {country}'.format(series=series_sanitized,
country=country_code.lower()))
# attempt with year
if not show_id and year:
logger.debug('Getting show id with year')
show_id = show_ids.get('{series} {year:d}'.format(series=series_sanitized, year=year))
# attempt with article at the end
if not show_id and year:
logger.debug('Getting show id with year in brackets')
show_id = show_ids.get('{series} [{year:d}]'.format(series=series_sanitized, year=year))
# attempt clean
if not show_id:
logger.debug('Getting show id')
show_id = show_ids.get(series_sanitized)
if show_id:
break
return int(show_id) if show_id else None
def query(self, show_id, series, season, year=None, country=None):
# get the season list of the show
logger.info('Getting the season list of show id %d', show_id)
r = self.session.get(self.server_url + self.series_url.format(show_id), timeout=10)
r.raise_for_status()
if not r.content:
logger.debug('No data returned from provider')
return []
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
series_title = soup.find('name').text
# loop over season rows
seasons = soup.findAll('series_group')
season_id = None
for season_row in seasons:
try:
parsed_season = int(season_row['ssnnum'])
if parsed_season == season:
season_id = int(season_row['ssnid'])
break
except (ValueError, TypeError):
continue
if season_id is None:
logger.debug('Season not found in provider')
return []
# get the subtitle list of the season
logger.info('Getting the subtitle list of season %d', season)
r = self.session.get(self.server_url + self.season_url.format(show_id=show_id, season=season_id), timeout=10)
r.raise_for_status()
if not r.content:
logger.debug('No data returned from provider')
return []
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
subtitles = []
# loop over episode rows
for episode in soup.findAll('subg'):
# read the episode info
etitle = episode.find('etitle')
if etitle is None:
continue
episode_num = int(etitle['number'].split('-')[0])
sgt = episode.find('sgt')
if sgt is None:
continue
season_num = int(sgt['ssnnum'])
# filter out unreleased subtitles
for subtitle in episode.findAll('sr'):
if subtitle['published_on'] == '':
continue
page_link = self.server_url + self.page_link.format(show_id=show_id, season_id=season_id,
season=season_num)
episode_title = etitle['title']
version = subtitle.fmt.text + ' ' + subtitle.team.text
download_link = self.server_url + self.download_link.format(int(subtitle['rlsid']))
subtitle = self.subtitle_class(Language.fromalpha2('el'), page_link, series_title, season_num,
episode_num, year, episode_title, version, download_link)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
if isinstance(video, Episode):
# lookup show_id
titles = [video.series] + video.alternative_series
show_id = self.get_show_id(titles, video.year)
# query for subtitles with the show_id
if show_id:
subtitles = [s for s in self.query(show_id, video.series, video.season, video.year)
if s.language in languages and s.season == video.season and s.episode == video.episode]
if subtitles:
return subtitles
else:
logger.error('No show id found for %r (%r)', video.series, {'year': video.year})
return []
def download_subtitle(self, subtitle):
if isinstance(subtitle, XSubsSubtitle):
# download the subtitle
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link},
timeout=10)
r.raise_for_status()
if not r.content:
logger.debug('Unable to download subtitle. No data returned from provider')
return
subtitle.content = fix_line_ending(r.content)
def _get_alternative_name(series):
article_match = article_re.match(series)
if article_match:
return '{series} {article}'.format(series=article_match.group(2), article=article_match.group(1))
return None
Loading…
Cancel
Save