Merge remote-tracking branch 'origin/development' into halali

pull/387/head
Halali 6 years ago
commit d49a04ce15

@ -41,7 +41,8 @@ defaults = {
'subfolder_custom': '',
'upgrade_subs': 'True',
'days_to_upgrade_subs': '7',
'upgrade_manual': 'True'
'upgrade_manual': 'True',
'anti_captcha_provider': 'None'
},
'auth': {
'type': 'None',
@ -98,7 +99,15 @@ defaults = {
},
'assrt': {
'token': ''
}}
},
'anticaptcha': {
'anti_captcha_key': ''
},
'deathbycaptcha': {
'username': '',
'password': ''
}
}
settings = simpleconfigparser(defaults=defaults)
settings.read(os.path.join(args.config_dir, 'config', 'config.ini'))

@ -17,6 +17,16 @@ from get_args import args
# set subliminal_patch user agent
os.environ["SZ_USER_AGENT"] = "Bazarr/1"
# set anti-captcha provider and key
if settings.general.anti_captcha_provider == 'anti-captcha':
os.environ["ANTICAPTCHA_CLASS"] = 'AntiCaptchaProxyLess'
os.environ["ANTICAPTCHA_ACCOUNT_KEY"] = settings.anticaptcha.anti_captcha_key
elif settings.general.anti_captcha_provider == 'AntiCaptchaProxyLessPitcher':
os.environ["ANTICAPTCHA_CLASS"] = 'DBCProxyLess'
os.environ["ANTICAPTCHA_ACCOUNT_KEY"] = ':'.join({settings.deathbycaptcha.username, settings.deathbycaptcha.password})
else:
os.environ["ANTICAPTCHA_CLASS"] = ''
# Check if args.config_dir exist
if not os.path.exists(args.config_dir):
# Create config_dir directory tree

@ -67,6 +67,8 @@ def configure_logging(debug=False):
fh.setFormatter(f)
fh.addFilter(BlacklistFilter())
fh.addFilter(PublicIPFilter())
fh.setLevel(log_level)
logger.addHandler(fh)
if debug:
logging.getLogger("apscheduler").setLevel(logging.DEBUG)
@ -90,8 +92,7 @@ def configure_logging(debug=False):
logging.getLogger("rebulk").setLevel(logging.WARNING)
logging.getLogger("stevedore.extension").setLevel(logging.CRITICAL)
logging.getLogger("geventwebsocket.handler").setLevel(logging.WARNING)
fh.setLevel(log_level)
logger.addHandler(fh)
class MyFilter(logging.Filter):

@ -602,17 +602,17 @@ def search_json(query):
search_list = []
if settings.general.getboolean('use_sonarr'):
c.execute("SELECT title, sonarrSeriesId FROM table_shows WHERE title LIKE ? ORDER BY title",
c.execute("SELECT title, sonarrSeriesId, year FROM table_shows WHERE title LIKE ? ORDER BY title",
('%' + query + '%',))
series = c.fetchall()
for serie in series:
search_list.append(dict([('name', serie[0]), ('url', base_url + 'episodes/' + str(serie[1]))]))
search_list.append(dict([('name', serie[0] + ' (' + serie[2] + ')'), ('url', base_url + 'episodes/' + str(serie[1]))]))
if settings.general.getboolean('use_radarr'):
c.execute("SELECT title, radarrId FROM table_movies WHERE title LIKE ? ORDER BY title", ('%' + query + '%',))
c.execute("SELECT title, radarrId, year FROM table_movies WHERE title LIKE ? ORDER BY title", ('%' + query + '%',))
movies = c.fetchall()
for movie in movies:
search_list.append(dict([('name', movie[0]), ('url', base_url + 'movie/' + str(movie[1]))]))
search_list.append(dict([('name', movie[0] + ' (' + movie[2] + ')'), ('url', base_url + 'movie/' + str(movie[1]))]))
c.close()
response.content_type = 'application/json'
@ -1275,6 +1275,10 @@ def save_settings():
settings_upgrade_manual = 'False'
else:
settings_upgrade_manual = 'True'
settings_anti_captcha_provider = request.forms.get('settings_anti_captcha_provider')
settings_anti_captcha_key = request.forms.get('settings_anti_captcha_key')
settings_death_by_captcha_username = request.forms.get('settings_death_by_captcha_username')
settings_death_by_captcha_password = request.forms.get('settings_death_by_captcha_password')
before = (unicode(settings.general.ip), int(settings.general.port), unicode(settings.general.base_url),
unicode(settings.general.path_mappings), unicode(settings.general.getboolean('use_sonarr')),
@ -1306,6 +1310,22 @@ def save_settings():
settings.general.upgrade_subs = text_type(settings_upgrade_subs)
settings.general.days_to_upgrade_subs = text_type(settings_days_to_upgrade_subs)
settings.general.upgrade_manual = text_type(settings_upgrade_manual)
settings.general.anti_captcha_provider = text_type(settings_anti_captcha_provider)
settings.anticaptcha.anti_captcha_key = text_type(settings_anti_captcha_key)
settings.deathbycaptcha.username = text_type(settings_death_by_captcha_username)
settings.deathbycaptcha.password = text_type(settings_death_by_captcha_password)
# set anti-captcha provider and key
if settings.general.anti_captcha_provider == 'anti-captcha':
os.environ["ANTICAPTCHA_CLASS"] = 'AntiCaptchaProxyLess'
os.environ["ANTICAPTCHA_ACCOUNT_KEY"] = settings.anticaptcha.anti_captcha_key
elif settings.general.anti_captcha_provider == 'AntiCaptchaProxyLessPitcher':
os.environ["ANTICAPTCHA_CLASS"] = 'DBCProxyLess'
os.environ["ANTICAPTCHA_ACCOUNT_KEY"] = ':'.join(
{settings.deathbycaptcha.username, settings.deathbycaptcha.password})
else:
os.environ["ANTICAPTCHA_CLASS"] = ''
settings.general.minimum_score_movie = text_type(settings_general_minimum_score_movies)
settings.general.use_embedded_subs = text_type(settings_general_embedded)
settings.general.adaptive_searching = text_type(settings_general_adaptive_searching)

@ -0,0 +1,279 @@
import logging
import random
import time
import re
# based off of https://gist.github.com/doko-desuka/58d9212461f62583f8df9bc6387fade2
# and https://github.com/Anorov/cloudflare-scrape
# and https://github.com/VeNoMouS/cloudflare-scrape-js2py
'''''''''
Disables InsecureRequestWarning: Unverified HTTPS request is being made warnings.
'''''''''
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
''''''
from requests.sessions import Session
from copy import deepcopy
try:
from urlparse import urlparse
except ImportError:
from urllib.parse import urlparse
DEFAULT_USER_AGENTS = [
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/65.0.3325.181 Chrome/65.0.3325.181 Safari/537.36",
"Mozilla/5.0 (Linux; Android 7.0; Moto G (5) Build/NPPS25.137-93-8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.137 Mobile Safari/537.36",
"Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_4 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B554a Safari/9537.53",
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:59.0) Gecko/20100101 Firefox/59.0",
"Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0"
]
DEFAULT_USER_AGENT = random.choice(DEFAULT_USER_AGENTS)
BUG_REPORT = (
"Cloudflare may have changed their technique, or there may be a bug in the script.\n\nPlease read " "https://github.com/Anorov/cloudflare-scrape#updates, then file a "
"bug report at https://github.com/Anorov/cloudflare-scrape/issues.")
class CloudflareScraper(Session):
def __init__(self, *args, **kwargs):
super(CloudflareScraper, self).__init__(*args, **kwargs)
if "requests" in self.headers["User-Agent"]:
# Spoof Firefox on Linux if no custom User-Agent has been set
self.headers["User-Agent"] = random.choice(DEFAULT_USER_AGENTS)
def request(self, method, url, *args, **kwargs):
resp = super(CloudflareScraper, self).request(method, url, *args, **kwargs)
# Check if Cloudflare anti-bot is on
if (resp.status_code in (503, 429)
and resp.headers.get("Server", "").startswith("cloudflare")
and b"jschl_vc" in resp.content
and b"jschl_answer" in resp.content
):
return self.solve_cf_challenge(resp, **kwargs)
# Otherwise, no Cloudflare anti-bot detected
return resp
def solve_cf_challenge(self, resp, **original_kwargs):
body = resp.text
parsed_url = urlparse(resp.url)
domain = parsed_url.netloc
submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme, domain)
cloudflare_kwargs = deepcopy(original_kwargs)
params = cloudflare_kwargs.setdefault("params", {})
headers = cloudflare_kwargs.setdefault("headers", {})
headers["Referer"] = resp.url
try:
cf_delay = float(re.search('submit.*?(\d+)', body, re.DOTALL).group(1)) / 1000.0
form_index = body.find('id="challenge-form"')
if form_index == -1:
raise Exception('CF form not found')
sub_body = body[form_index:]
s_match = re.search('name="s" value="(.+?)"', sub_body)
if s_match:
params["s"] = s_match.group(1) # On older variants this parameter is absent.
params["jschl_vc"] = re.search(r'name="jschl_vc" value="(\w+)"', sub_body).group(1)
params["pass"] = re.search(r'name="pass" value="(.+?)"', sub_body).group(1)
if body.find('id="cf-dn-', form_index) != -1:
extra_div_expression = re.search('id="cf-dn-.*?>(.+?)<', sub_body).group(1)
# Initial value.
js_answer = self.cf_parse_expression(
re.search('setTimeout\(function\(.*?:(.*?)}', body, re.DOTALL).group(1)
)
# Extract the arithmetic operations.
builder = re.search("challenge-form'\);\s*;(.*);a.value", body, re.DOTALL).group(1)
# Remove a function semicolon before splitting on semicolons, else it messes the order.
lines = builder.replace(' return +(p)}();', '', 1).split(';')
for line in lines:
if len(line) and '=' in line:
heading, expression = line.split('=', 1)
if 'eval(eval(atob' in expression:
# Uses the expression in an external <div>.
expression_value = self.cf_parse_expression(extra_div_expression)
elif '(function(p' in expression:
# Expression + domain sampling function.
expression_value = self.cf_parse_expression(expression, domain)
else:
expression_value = self.cf_parse_expression(expression)
js_answer = self.cf_arithmetic_op(heading[-1], js_answer, expression_value)
if '+ t.length' in body:
js_answer += len(domain) # Only older variants add the domain length.
params["jschl_answer"] = '%.10f' % js_answer
except Exception as e:
# Something is wrong with the page.
# This may indicate Cloudflare has changed their anti-bot
# technique. If you see this and are running the latest version,
# please open a GitHub issue so I can update the code accordingly.
logging.error("[!] %s Unable to parse Cloudflare anti-bots page. "
"Try upgrading cloudflare-scrape, or submit a bug report "
"if you are running the latest version. Please read "
"https://github.com/Anorov/cloudflare-scrape#updates "
"before submitting a bug report." % e)
raise
# Cloudflare requires a delay before solving the challenge.
# Always wait the full delay + 1s because of 'time.sleep()' imprecision.
time.sleep(cf_delay + 1.0)
# Requests transforms any request into a GET after a redirect,
# so the redirect has to be handled manually here to allow for
# performing other types of requests even as the first request.
method = resp.request.method
cloudflare_kwargs["allow_redirects"] = False
redirect = self.request(method, submit_url, **cloudflare_kwargs)
if 'Location' in redirect.headers:
redirect_location = urlparse(redirect.headers["Location"])
if not redirect_location.netloc:
redirect_url = "%s://%s%s" % (parsed_url.scheme, domain, redirect_location.path)
return self.request(method, redirect_url, **original_kwargs)
return self.request(method, redirect.headers["Location"], **original_kwargs)
else:
return redirect
def cf_sample_domain_function(self, func_expression, domain):
parameter_start_index = func_expression.find('}(') + 2
# Send the expression with the "+" char and enclosing parenthesis included, as they are
# stripped inside ".cf_parse_expression()'.
sample_index = self.cf_parse_expression(
func_expression[parameter_start_index: func_expression.rfind(')))')]
)
return ord(domain[int(sample_index)])
def cf_arithmetic_op(self, op, a, b):
if op == '+':
return a + b
elif op == '/':
return a / float(b)
elif op == '*':
return a * float(b)
elif op == '-':
return a - b
else:
raise Exception('Unknown operation')
def cf_parse_expression(self, expression, domain=None):
def _get_jsfuck_number(section):
digit_expressions = section.replace('!+[]', '1').replace('+!![]', '1').replace('+[]', '0').split('+')
return int(
# Form a number string, with each digit as the sum of the values inside each parenthesis block.
''.join(
str(sum(int(digit_char) for digit_char in digit_expression[1:-1])) # Strip the parenthesis.
for digit_expression in digit_expressions
)
)
if '/' in expression:
dividend, divisor = expression.split('/')
dividend = dividend[2:-1] # Strip the leading '+' char and the enclosing parenthesis.
if domain:
# 2019-04-02: At this moment, this extra domain sampling function always appears on the
# divisor side, at the end.
divisor_a, divisor_b = divisor.split('))+(')
divisor_a = _get_jsfuck_number(divisor_a[5:]) # Left-strip the sequence of "(+(+(".
divisor_b = self.cf_sample_domain_function(divisor_b, domain)
return _get_jsfuck_number(dividend) / float(divisor_a + divisor_b)
else:
divisor = divisor[2:-1]
return _get_jsfuck_number(dividend) / float(_get_jsfuck_number(divisor))
else:
return _get_jsfuck_number(expression[2:-1])
@classmethod
def create_scraper(cls, sess=None, **kwargs):
"""
Convenience function for creating a ready-to-go requests.Session (subclass) object.
"""
scraper = cls()
if sess:
attrs = ["auth", "cert", "cookies", "headers", "hooks", "params", "proxies", "data"]
for attr in attrs:
val = getattr(sess, attr, None)
if val:
setattr(scraper, attr, val)
return scraper
## Functions for integrating cloudflare-scrape with other applications and scripts
@classmethod
def get_tokens(cls, url, user_agent=None, **kwargs):
scraper = cls.create_scraper()
if user_agent:
scraper.headers["User-Agent"] = user_agent
try:
resp = scraper.get(url, **kwargs)
resp.raise_for_status()
except Exception as e:
logging.error("'%s' returned an error. Could not collect tokens." % url)
raise
domain = urlparse(resp.url).netloc
cookie_domain = None
for d in scraper.cookies.list_domains():
if d.startswith(".") and d in ("." + domain):
cookie_domain = d
break
else:
raise ValueError(
"Unable to find Cloudflare cookies. Does the site actually have Cloudflare IUAM (\"I'm Under Attack Mode\") enabled?")
return ({
"__cfduid": scraper.cookies.get("__cfduid", "", domain=cookie_domain),
"cf_clearance": scraper.cookies.get("cf_clearance", "", domain=cookie_domain)
},
scraper.headers["User-Agent"]
)
def get_live_tokens(self, domain):
for d in self.cookies.list_domains():
if d.startswith(".") and d in ("." + domain):
cookie_domain = d
break
else:
raise ValueError(
"Unable to find Cloudflare cookies. Does the site actually have Cloudflare IUAM (\"I'm Under Attack Mode\") enabled?")
return ({
"__cfduid": self.cookies.get("__cfduid", "", domain=cookie_domain),
"cf_clearance": self.cookies.get("cf_clearance", "", domain=cookie_domain)
},
self.headers["User-Agent"]
)
@classmethod
def get_cookie_string(cls, url, user_agent=None, **kwargs):
"""
Convenience function for building a Cookie HTTP header value.
"""
tokens, user_agent = cls.get_tokens(url, user_agent=user_agent, **kwargs)
return "; ".join("=".join(pair) for pair in tokens.items()), user_agent
create_scraper = CloudflareScraper.create_scraper
get_tokens = CloudflareScraper.get_tokens
get_cookie_string = CloudflareScraper.get_cookie_string

@ -0,0 +1,516 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""Death by Captcha HTTP and socket API clients.
There are two types of Death by Captcha (DBC hereinafter) API: HTTP and
socket ones. Both offer the same functionalily, with the socket API
sporting faster responses and using way less connections.
To access the socket API, use SocketClient class; for the HTTP API, use
HttpClient class. Both are thread-safe. SocketClient keeps a persistent
connection opened and serializes all API requests sent through it, thus
it is advised to keep a pool of them if you're script is heavily
multithreaded.
Both SocketClient and HttpClient give you the following methods:
get_user()
Returns your DBC account details as a dict with the following keys:
"user": your account numeric ID; if login fails, it will be the only
item with the value of 0;
"rate": your CAPTCHA rate, i.e. how much you will be charged for one
solved CAPTCHA in US cents;
"balance": your DBC account balance in US cents;
"is_banned": flag indicating whether your account is suspended or not.
get_balance()
Returns your DBC account balance in US cents.
get_captcha(cid)
Returns an uploaded CAPTCHA details as a dict with the following keys:
"captcha": the CAPTCHA numeric ID; if no such CAPTCHAs found, it will
be the only item with the value of 0;
"text": the CAPTCHA text, if solved, otherwise None;
"is_correct": flag indicating whether the CAPTCHA was solved correctly
(DBC can detect that in rare cases).
The only argument `cid` is the CAPTCHA numeric ID.
get_text(cid)
Returns an uploaded CAPTCHA text (None if not solved). The only argument
`cid` is the CAPTCHA numeric ID.
report(cid)
Reports an incorrectly solved CAPTCHA. The only argument `cid` is the
CAPTCHA numeric ID. Returns True on success, False otherwise.
upload(captcha)
Uploads a CAPTCHA. The only argument `captcha` can be either file-like
object (any object with `read` method defined, actually, so StringIO
will do), or CAPTCHA image file name. On successul upload you'll get
the CAPTCHA details dict (see get_captcha() method).
NOTE: AT THIS POINT THE UPLOADED CAPTCHA IS NOT SOLVED YET! You have
to poll for its status periodically using get_captcha() or get_text()
method until the CAPTCHA is solved and you get the text.
decode(captcha, timeout=DEFAULT_TIMEOUT)
A convenient method that uploads a CAPTCHA and polls for its status
periodically, but no longer than `timeout` (defaults to 60 seconds).
If solved, you'll get the CAPTCHA details dict (see get_captcha()
method for details). See upload() method for details on `captcha`
argument.
Visit http://www.deathbycaptcha.com/user/api for updates.
"""
import base64
import binascii
import errno
import imghdr
import random
import os
import select
import socket
import sys
import threading
import time
import urllib
import urllib2
try:
from json import read as json_decode, write as json_encode
except ImportError:
try:
from json import loads as json_decode, dumps as json_encode
except ImportError:
from simplejson import loads as json_decode, dumps as json_encode
# API version and unique software ID
API_VERSION = 'DBC/Python v4.6'
# Default CAPTCHA timeout and decode() polling interval
DEFAULT_TIMEOUT = 60
DEFAULT_TOKEN_TIMEOUT = 120
POLLS_INTERVAL = [1, 1, 2, 3, 2, 2, 3, 2, 2]
DFLT_POLL_INTERVAL = 3
# Base HTTP API url
HTTP_BASE_URL = 'http://api.dbcapi.me/api'
# Preferred HTTP API server's response content type, do not change
HTTP_RESPONSE_TYPE = 'application/json'
# Socket API server's host & ports range
SOCKET_HOST = 'api.dbcapi.me'
SOCKET_PORTS = range(8123, 8131)
def _load_image(captcha):
if hasattr(captcha, 'read'):
img = captcha.read()
elif type(captcha) == bytearray:
img = captcha
else:
img = ''
try:
captcha_file = open(captcha, 'rb')
except Exception:
raise
else:
img = captcha_file.read()
captcha_file.close()
if not len(img):
raise ValueError('CAPTCHA image is empty')
elif imghdr.what(None, img) is None:
raise TypeError('Unknown CAPTCHA image type')
else:
return img
class AccessDeniedException(Exception):
pass
class Client(object):
"""Death by Captcha API Client."""
def __init__(self, username, password):
self.is_verbose = False
self.userpwd = {'username': username, 'password': password}
def _log(self, cmd, msg=''):
if self.is_verbose:
print '%d %s %s' % (time.time(), cmd, msg.rstrip())
return self
def close(self):
pass
def connect(self):
pass
def get_user(self):
"""Fetch user details -- ID, balance, rate and banned status."""
raise NotImplementedError()
def get_balance(self):
"""Fetch user balance (in US cents)."""
return self.get_user().get('balance')
def get_captcha(self, cid):
"""Fetch a CAPTCHA details -- ID, text and correctness flag."""
raise NotImplementedError()
def get_text(self, cid):
"""Fetch a CAPTCHA text."""
return self.get_captcha(cid).get('text') or None
def report(self, cid):
"""Report a CAPTCHA as incorrectly solved."""
raise NotImplementedError()
def upload(self, captcha):
"""Upload a CAPTCHA.
Accepts file names and file-like objects. Returns CAPTCHA details
dict on success.
"""
raise NotImplementedError()
def decode(self, captcha=None, timeout=None, **kwargs):
"""
Try to solve a CAPTCHA.
See Client.upload() for arguments details.
Uploads a CAPTCHA, polls for its status periodically with arbitrary
timeout (in seconds), returns CAPTCHA details if (correctly) solved.
"""
if not timeout:
if not captcha:
timeout = DEFAULT_TOKEN_TIMEOUT
else:
timeout = DEFAULT_TIMEOUT
deadline = time.time() + (max(0, timeout) or DEFAULT_TIMEOUT)
uploaded_captcha = self.upload(captcha, **kwargs)
if uploaded_captcha:
intvl_idx = 0 # POLL_INTERVAL index
while deadline > time.time() and not uploaded_captcha.get('text'):
intvl, intvl_idx = self._get_poll_interval(intvl_idx)
time.sleep(intvl)
pulled = self.get_captcha(uploaded_captcha['captcha'])
if pulled['captcha'] == uploaded_captcha['captcha']:
uploaded_captcha = pulled
if uploaded_captcha.get('text') and \
uploaded_captcha.get('is_correct'):
return uploaded_captcha
def _get_poll_interval(self, idx):
"""Returns poll interval and next index depending on index provided"""
if len(POLLS_INTERVAL) > idx:
intvl = POLLS_INTERVAL[idx]
else:
intvl = DFLT_POLL_INTERVAL
idx += 1
return intvl, idx
class HttpClient(Client):
"""Death by Captcha HTTP API client."""
def __init__(self, *args):
Client.__init__(self, *args)
self.opener = urllib2.build_opener(urllib2.HTTPRedirectHandler())
def _call(self, cmd, payload=None, headers=None):
if headers is None:
headers = {}
headers['Accept'] = HTTP_RESPONSE_TYPE
headers['User-Agent'] = API_VERSION
if hasattr(payload, 'items'):
payload = urllib.urlencode(payload)
self._log('SEND', '%s %d %s' % (cmd, len(payload), payload))
else:
self._log('SEND', '%s' % cmd)
if payload is not None:
headers['Content-Length'] = len(payload)
try:
response = self.opener.open(urllib2.Request(
HTTP_BASE_URL + '/' + cmd.strip('/'),
data=payload,
headers=headers
)).read()
except urllib2.HTTPError, err:
if 403 == err.code:
raise AccessDeniedException('Access denied, please check'
' your credentials and/or balance')
elif 400 == err.code or 413 == err.code:
raise ValueError("CAPTCHA was rejected by the service, check"
" if it's a valid image")
elif 503 == err.code:
raise OverflowError("CAPTCHA was rejected due to service"
" overload, try again later")
else:
raise err
else:
self._log('RECV', '%d %s' % (len(response), response))
try:
return json_decode(response)
except Exception:
raise RuntimeError('Invalid API response')
return {}
def get_user(self):
return self._call('user', self.userpwd.copy()) or {'user': 0}
def get_captcha(self, cid):
return self._call('captcha/%d' % cid) or {'captcha': 0}
def report(self, cid):
return not self._call('captcha/%d/report' % cid,
self.userpwd.copy()).get('is_correct')
def upload(self, captcha=None, **kwargs):
boundary = binascii.hexlify(os.urandom(16))
banner = kwargs.get('banner', '')
if banner:
kwargs['banner'] = 'base64:' + base64.b64encode(_load_image(banner))
body = '\r\n'.join(('\r\n'.join((
'--%s' % boundary,
'Content-Disposition: form-data; name="%s"' % k,
'Content-Type: text/plain',
'Content-Length: %d' % len(str(v)),
'',
str(v)
))) for k, v in self.userpwd.items())
body += '\r\n'.join(('\r\n'.join((
'--%s' % boundary,
'Content-Disposition: form-data; name="%s"' % k,
'Content-Type: text/plain',
'Content-Length: %d' % len(str(v)),
'',
str(v)
))) for k, v in kwargs.items())
if captcha:
img = _load_image(captcha)
body += '\r\n'.join((
'',
'--%s' % boundary,
'Content-Disposition: form-data; name="captchafile"; '
'filename="captcha"',
'Content-Type: application/octet-stream',
'Content-Length: %d' % len(img),
'',
img,
'--%s--' % boundary,
''
))
response = self._call('captcha', body, {
'Content-Type': 'multipart/form-data; boundary="%s"' % boundary
}) or {}
if response.get('captcha'):
return response
class SocketClient(Client):
"""Death by Captcha socket API client."""
TERMINATOR = '\r\n'
def __init__(self, *args):
Client.__init__(self, *args)
self.socket_lock = threading.Lock()
self.socket = None
def close(self):
if self.socket:
self._log('CLOSE')
try:
self.socket.shutdown(socket.SHUT_RDWR)
except socket.error:
pass
finally:
self.socket.close()
self.socket = None
def connect(self):
if not self.socket:
self._log('CONN')
host = (socket.gethostbyname(SOCKET_HOST),
random.choice(SOCKET_PORTS))
self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.socket.settimeout(0)
try:
self.socket.connect(host)
except socket.error, err:
if (err.args[0] not in
(errno.EAGAIN, errno.EWOULDBLOCK, errno.EINPROGRESS)):
self.close()
raise err
return self.socket
def __del__(self):
self.close()
def _sendrecv(self, sock, buf):
self._log('SEND', buf)
fds = [sock]
buf += self.TERMINATOR
response = ''
intvl_idx = 0
while True:
intvl, intvl_idx = self._get_poll_interval(intvl_idx)
rds, wrs, exs = select.select((not buf and fds) or [],
(buf and fds) or [],
fds,
intvl)
if exs:
raise IOError('select() failed')
try:
if wrs:
while buf:
buf = buf[wrs[0].send(buf):]
elif rds:
while True:
s = rds[0].recv(256)
if not s:
raise IOError('recv(): connection lost')
else:
response += s
except socket.error, err:
if (err.args[0] not in
(errno.EAGAIN, errno.EWOULDBLOCK, errno.EINPROGRESS)):
raise err
if response.endswith(self.TERMINATOR):
self._log('RECV', response)
return response.rstrip(self.TERMINATOR)
raise IOError('send/recv timed out')
def _call(self, cmd, data=None):
if data is None:
data = {}
data['cmd'] = cmd
data['version'] = API_VERSION
request = json_encode(data)
response = None
for _ in range(2):
if not self.socket and cmd != 'login':
self._call('login', self.userpwd.copy())
self.socket_lock.acquire()
try:
sock = self.connect()
response = self._sendrecv(sock, request)
except IOError, err:
sys.stderr.write(str(err) + "\n")
self.close()
except socket.error, err:
sys.stderr.write(str(err) + "\n")
self.close()
raise IOError('Connection refused')
else:
break
finally:
self.socket_lock.release()
if response is None:
raise IOError('Connection lost or timed out during API request')
try:
response = json_decode(response)
except Exception:
raise RuntimeError('Invalid API response')
if not response.get('error'):
return response
error = response['error']
if error in ('not-logged-in', 'invalid-credentials'):
raise AccessDeniedException('Access denied, check your credentials')
elif 'banned' == error:
raise AccessDeniedException('Access denied, account is suspended')
elif 'insufficient-funds' == error:
raise AccessDeniedException(
'CAPTCHA was rejected due to low balance')
elif 'invalid-captcha' == error:
raise ValueError('CAPTCHA is not a valid image')
elif 'service-overload' == error:
raise OverflowError(
'CAPTCHA was rejected due to service overload, try again later')
else:
self.socket_lock.acquire()
self.close()
self.socket_lock.release()
raise RuntimeError('API server error occured: %s' % error)
def get_user(self):
return self._call('user') or {'user': 0}
def get_captcha(self, cid):
return self._call('captcha', {'captcha': cid}) or {'captcha': 0}
def upload(self, captcha=None, **kwargs):
data = {}
if captcha:
data['captcha'] = base64.b64encode(_load_image(captcha))
if kwargs:
banner = kwargs.get('banner', '')
if banner:
kwargs['banner'] = base64.b64encode(_load_image(banner))
data.update(kwargs)
response = self._call('upload', data)
if response.get('captcha'):
uploaded_captcha = dict(
(k, response.get(k))
for k in ('captcha', 'text', 'is_correct')
)
if not uploaded_captcha['text']:
uploaded_captcha['text'] = None
return uploaded_captcha
def report(self, cid):
return not self._call('report', {'captcha': cid}).get('is_correct')
if '__main__' == __name__:
# Put your DBC username & password here:
# client = HttpClient(sys.argv[1], sys.argv[2])
client = SocketClient(sys.argv[1], sys.argv[2])
client.is_verbose = True
print 'Your balance is %s US cents' % client.get_balance()
for fn in sys.argv[3:]:
try:
# Put your CAPTCHA image file name or file-like object, and optional
# solving timeout (in seconds) here:
captcha = client.decode(fn, DEFAULT_TIMEOUT)
except Exception, e:
sys.stderr.write('Failed uploading CAPTCHA: %s\n' % (e, ))
captcha = None
if captcha:
print 'CAPTCHA %d solved: %s' % \
(captcha['captcha'], captcha['text'])
# Report as incorrectly solved if needed. Make sure the CAPTCHA was
# in fact incorrectly solved!
# try:
# client.report(captcha['captcha'])
# except Exception, e:
# sys.stderr.write('Failed reporting CAPTCHA: %s\n' % (e, ))

@ -0,0 +1,7 @@
from .base import AnticaptchaClient
from .tasks import NoCaptchaTask, NoCaptchaTaskProxylessTask, ImageToTextTask, FunCaptchaTask
from .proxy import Proxy
from .exceptions import AnticaptchaException
from .fields import SimpleText, Image, WebLink, TextInput, Textarea, Checkbox, Select, Radio, ImageUpload
AnticatpchaException = AnticaptchaException

@ -0,0 +1,114 @@
import requests
import time
from six.moves.urllib_parse import urljoin
from .exceptions import AnticaptchaException
SLEEP_EVERY_CHECK_FINISHED = 3
MAXIMUM_JOIN_TIME = 60 * 5
class Job(object):
client = None
task_id = None
_last_result = None
def __init__(self, client, task_id):
self.client = client
self.task_id = task_id
def _update(self):
self._last_result = self.client.getTaskResult(self.task_id)
def check_is_ready(self):
self._update()
return self._last_result['status'] == 'ready'
def get_solution_response(self): # Recaptcha
return self._last_result['solution']['gRecaptchaResponse']
def get_token_response(self): # Funcaptcha
return self._last_result['solution']['token']
def get_answers(self):
return self._last_result['solution']['answers']
def get_captcha_text(self): # Image
return self._last_result['solution']['text']
def report_incorrect(self):
return self.client.reportIncorrectImage(self.task_id)
def join(self, maximum_time=None):
elapsed_time = 0
maximum_time = maximum_time or MAXIMUM_JOIN_TIME
while not self.check_is_ready():
time.sleep(SLEEP_EVERY_CHECK_FINISHED)
elapsed_time += SLEEP_EVERY_CHECK_FINISHED
if elapsed_time is not None and elapsed_time > maximum_time:
raise AnticaptchaException(None, 250,
"The execution time exceeded a maximum time of {} seconds. It takes {} seconds.".format(
maximum_time, elapsed_time))
class AnticaptchaClient(object):
client_key = None
CREATE_TASK_URL = "/createTask"
TASK_RESULT_URL = "/getTaskResult"
BALANCE_URL = "/getBalance"
REPORT_IMAGE_URL = "/reportIncorrectImageCaptcha"
SOFT_ID = 847
language_pool = "en"
def __init__(self, client_key, language_pool="en", host="api.anti-captcha.com", use_ssl=True):
self.client_key = client_key
self.language_pool = language_pool
self.base_url = "{proto}://{host}/".format(proto="https" if use_ssl else "http",
host=host)
self.session = requests.Session()
@property
def client_ip(self):
if not hasattr(self, '_client_ip'):
self._client_ip = self.session.get('http://httpbin.org/ip').json()['origin']
return self._client_ip
def _check_response(self, response):
if response.get('errorId', False) == 11:
response['errorDescription'] = "{} Your missing IP address is {}.".format(response['errorDescription'],
self.client_ip)
if response.get('errorId', False):
raise AnticaptchaException(response['errorId'],
response['errorCode'],
response['errorDescription'])
def createTask(self, task):
request = {"clientKey": self.client_key,
"task": task.serialize(),
"softId": self.SOFT_ID,
"languagePool": self.language_pool,
}
response = self.session.post(urljoin(self.base_url, self.CREATE_TASK_URL), json=request).json()
self._check_response(response)
return Job(self, response['taskId'])
def getTaskResult(self, task_id):
request = {"clientKey": self.client_key,
"taskId": task_id}
response = self.session.post(urljoin(self.base_url, self.TASK_RESULT_URL), json=request).json()
self._check_response(response)
return response
def getBalance(self):
request = {"clientKey": self.client_key}
response = self.session.post(urljoin(self.base_url, self.BALANCE_URL), json=request).json()
self._check_response(response)
return response['balance']
def reportIncorrectImage(self, task_id):
request = {"clientKey": self.client_key,
"taskId": task_id
}
response = self.session.post(urljoin(self.base_url, self.REPORT_IMAGE_URL), json=request).json()
self._check_response(response)
return response.get('status', False) != False

@ -0,0 +1,23 @@
class AnticaptchaException(Exception):
def __init__(self, error_id, error_code, error_description, *args):
super(AnticaptchaException, self).__init__("[{}:{}]{}".format(error_code, error_id, error_description))
self.error_description = error_description
self.error_id = error_id
self.error_code = error_code
AnticatpchaException = AnticaptchaException
class InvalidWidthException(AnticaptchaException):
def __init__(self, width):
self.width = width
msg = 'Invalid width (%s). Can be one of these: 100, 50, 33, 25.' % (self.width,)
super(InvalidWidthException, self).__init__("AC-1", 1, msg)
class MissingNameException(AnticaptchaException):
def __init__(self, cls):
self.cls = cls
msg = 'Missing name data in {0}. Provide {0}.__init__(name="X") or {0}.serialize(name="X")'.format(str(self.cls))
super(MissingNameException, self).__init__("AC-2", 2, msg)

@ -0,0 +1,199 @@
import six
from python_anticaptcha.exceptions import InvalidWidthException, MissingNameException
class BaseField(object):
label = None
labelHint = None
def serialize(self, name=None):
data = {}
if self.label:
data['label'] = self.label or False
if self.labelHint:
data['labelHint'] = self.labelHint or False
return data
class NameBaseField(BaseField):
name = None
def serialize(self, name=None):
data = super(NameBaseField, self).serialize(name)
if name:
data['name'] = name
elif self.name:
data['name'] = self.name
else:
raise MissingNameException(cls=self.__class__)
return data
class SimpleText(BaseField):
contentType = 'text'
def __init__(self, content, label=None, labelHint=None, width=None):
self.label = label
self.labelHint = labelHint
self.content = content
self.width = width
def serialize(self, name=None):
data = super(SimpleText, self).serialize(name)
data['contentType'] = self.contentType
data['content'] = self.content
if self.width:
if self.width not in [100, 50, 33, 25]:
raise InvalidWidthException(self.width)
data['inputOptions'] = {}
data['width'] = self.width
return data
class Image(BaseField):
contentType = 'image'
def __init__(self, imageUrl, label=None, labelHint=None):
self.label = label
self.labelHint = labelHint
self.imageUrl = imageUrl
def serialize(self, name=None):
data = super(Image, self).serialize(name)
data['contentType'] = self.contentType
data['content'] = self.imageUrl
return data
class WebLink(BaseField):
contentType = 'link'
def __init__(self, linkText, linkUrl, label=None, labelHint=None, width=None):
self.label = label
self.labelHint = labelHint
self.linkText = linkText
self.linkUrl = linkUrl
self.width = width
def serialize(self, name=None):
data = super(WebLink, self).serialize(name)
data['contentType'] = self.contentType
if self.width:
if self.width not in [100, 50, 33, 25]:
raise InvalidWidthException(self.width)
data['inputOptions'] = {}
data['width'] = self.width
data.update({'content': {'url': self.linkUrl,
'text': self.linkText}})
return data
class TextInput(NameBaseField):
def __init__(self, placeHolder=None, label=None, labelHint=None, width=None):
self.label = label
self.labelHint = labelHint
self.placeHolder = placeHolder
self.width = width
def serialize(self, name=None):
data = super(TextInput, self).serialize(name)
data['inputType'] = 'text'
data['inputOptions'] = {}
if self.width:
if self.width not in [100, 50, 33, 25]:
raise InvalidWidthException(self.width)
data['inputOptions']['width'] = str(self.width)
if self.placeHolder:
data['inputOptions']['placeHolder'] = self.placeHolder
return data
class Textarea(NameBaseField):
def __init__(self, placeHolder=None, rows=None, label=None, width=None, labelHint=None):
self.label = label
self.labelHint = labelHint
self.placeHolder = placeHolder
self.rows = rows
self.width = width
def serialize(self, name=None):
data = super(Textarea, self).serialize(name)
data['inputType'] = 'textarea'
data['inputOptions'] = {}
if self.rows:
data['inputOptions']['rows'] = str(self.rows)
if self.placeHolder:
data['inputOptions']['placeHolder'] = self.placeHolder
if self.width:
data['inputOptions']['width'] = str(self.width)
return data
class Checkbox(NameBaseField):
def __init__(self, text, label=None, labelHint=None):
self.label = label
self.labelHint = labelHint
self.text = text
def serialize(self, name=None):
data = super(Checkbox, self).serialize(name)
data['inputType'] = 'checkbox'
data['inputOptions'] = {'label': self.text}
return data
class Select(NameBaseField):
type = 'select'
def __init__(self, label=None, choices=None, labelHint=None):
self.label = label
self.labelHint = labelHint
self.choices = choices or ()
def get_choices(self):
for choice in self.choices:
if isinstance(choice, six.text_type):
yield choice, choice
else:
yield choice
def serialize(self, name=None):
data = super(Select, self).serialize(name)
data['inputType'] = self.type
data['inputOptions'] = []
for value, caption in self.get_choices():
data['inputOptions'].append({"value": value,
"caption": caption})
return data
class Radio(Select):
type = 'radio'
class ImageUpload(NameBaseField):
def __init__(self, label=None, labelHint=None):
self.label = label
self.labelHint = labelHint
def serialize(self, name=None):
data = super(ImageUpload, self).serialize(name)
data['inputType'] = 'imageUpload'
return data

@ -0,0 +1,28 @@
from six.moves.urllib_parse import urlparse
class Proxy(object):
def __init__(self, proxy_type, proxy_address, proxy_port, proxy_login, proxy_password):
self.proxyType = proxy_type
self.proxyAddress = proxy_address
self.proxyPort = proxy_port
self.proxyLogin = proxy_login
self.proxyPassword = proxy_password
def serialize(self):
result = {'proxyType': self.proxyType,
'proxyAddress': self.proxyAddress,
'proxyPort': self.proxyPort}
if self.proxyLogin or self.proxyPassword:
result['proxyLogin'] = self.proxyLogin
result['proxyPassword'] = self.proxyPassword
return result
@classmethod
def parse_url(cls, url):
parsed = urlparse(url)
return cls(proxy_type=parsed.scheme,
proxy_address=parsed.hostname,
proxy_port=parsed.port,
proxy_login=parsed.username,
proxy_password=parsed.password)

@ -0,0 +1,128 @@
import base64
from .fields import BaseField
class BaseTask(object):
def serialize(self, **result):
return result
class ProxyMixin(BaseTask):
def __init__(self, *args, **kwargs):
self.proxy = kwargs.pop('proxy')
self.userAgent = kwargs.pop('user_agent')
self.cookies = kwargs.pop('cookies', '')
super(ProxyMixin, self).__init__(*args, **kwargs)
def serialize(self, **result):
result = super(ProxyMixin, self).serialize(**result)
result.update(self.proxy.serialize())
result['userAgent'] = self.userAgent
if self.cookies:
result['cookies'] = self.cookies
return result
class NoCaptchaTaskProxylessTask(BaseTask):
type = "NoCaptchaTaskProxyless"
websiteURL = None
websiteKey = None
websiteSToken = None
def __init__(self, website_url, website_key, website_s_token=None, is_invisible=None):
self.websiteURL = website_url
self.websiteKey = website_key
self.websiteSToken = website_s_token
self.isInvisible = is_invisible
def serialize(self):
data = {'type': self.type,
'websiteURL': self.websiteURL,
'websiteKey': self.websiteKey}
if self.websiteSToken is not None:
data['websiteSToken'] = self.websiteSToken
if self.isInvisible is not None:
data['isInvisible'] = self.isInvisible
return data
class FunCaptchaTask(ProxyMixin):
type = "FunCaptchaTask"
websiteURL = None
websiteKey = None
def __init__(self, website_url, website_key, *args, **kwargs):
self.websiteURL = website_url
self.websiteKey = website_key
super(FunCaptchaTask, self).__init__(*args, **kwargs)
def serialize(self, **result):
result = super(FunCaptchaTask, self).serialize(**result)
result.update({'type': self.type,
'websiteURL': self.websiteURL,
'websitePublicKey': self.websiteKey})
return result
class NoCaptchaTask(ProxyMixin, NoCaptchaTaskProxylessTask):
type = "NoCaptchaTask"
class ImageToTextTask(object):
type = "ImageToTextTask"
fp = None
phrase = None
case = None
numeric = None
math = None
minLength = None
maxLength = None
def __init__(self, fp, phrase=None, case=None, numeric=None, math=None, min_length=None, max_length=None):
self.fp = fp
self.phrase = phrase
self.case = case
self.numeric = numeric
self.math = math
self.minLength = min_length
self.maxLength = max_length
def serialize(self):
return {'type': self.type,
'body': base64.b64encode(self.fp.read()).decode('utf-8'),
'phrase': self.phrase,
'case': self.case,
'numeric': self.numeric,
'math': self.math,
'minLength': self.minLength,
'maxLength': self.maxLength}
class CustomCaptchaTask(BaseTask):
type = 'CustomCaptchaTask'
imageUrl = None
assignment = None
form = None
def __init__(self, imageUrl, form=None, assignment=None):
self.imageUrl = imageUrl
self.form = form or {}
self.assignment = assignment
def serialize(self):
data = super(CustomCaptchaTask, self).serialize()
data.update({'type': self.type,
'imageUrl': self.imageUrl})
if self.form:
forms = []
for name, field in self.form.items():
if isinstance(field, BaseField):
forms.append(field.serialize(name))
else:
field = field.copy()
field['name'] = name
forms.append(field)
data['forms'] = forms
if self.assignment:
data['assignment'] = self.assignment
return data

@ -518,10 +518,20 @@ def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, ski
hints["expected_title"] = [hints["title"]]
guessed_result = guessit(guess_from, options=hints)
logger.debug('GuessIt found: %s', json.dumps(guessed_result, cls=GuessitEncoder, indent=4, ensure_ascii=False))
video = Video.fromguess(path, guessed_result)
video.hints = hints
# get possibly alternative title from the filename itself
alt_guess = guessit(filename, options=hints)
if "title" in alt_guess and alt_guess["title"] != guessed_result["title"]:
if video_type == "episode":
video.alternative_series.append(alt_guess["title"])
else:
video.alternative_titles.append(alt_guess["title"])
logger.debug("Adding alternative title: %s", alt_guess["title"])
if dont_use_actual_file:
return video

@ -8,10 +8,18 @@ import requests
import xmlrpclib
import dns.resolver
from requests import Session, exceptions
from requests import exceptions
from urllib3.util import connection
from retry.api import retry_call
from exceptions import APIThrottled
from dogpile.cache.api import NO_VALUE
from subliminal.cache import region
from cfscrape import CloudflareScraper
try:
from urlparse import urlparse
except ImportError:
from urllib.parse import urlparse
from subzero.lib.io import get_viable_encoding
@ -30,24 +38,58 @@ custom_resolver = dns.resolver.Resolver(configure=False)
custom_resolver.nameservers = ['8.8.8.8', '1.1.1.1']
class CertifiSession(Session):
class CertifiSession(CloudflareScraper):
timeout = 10
def __init__(self):
super(CertifiSession, self).__init__()
self.verify = pem_file
def request(self, *args, **kwargs):
self.headers.update({
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Cache-Control': 'no-cache',
'Pragma': 'no-cache',
'DNT': '1'
})
def request(self, method, url, *args, **kwargs):
if kwargs.get('timeout') is None:
kwargs['timeout'] = self.timeout
return super(CertifiSession, self).request(*args, **kwargs)
parsed_url = urlparse(url)
domain = parsed_url.netloc
cache_key = "cf_data_%s" % domain
if not self.cookies.get("__cfduid", "", domain=domain):
cf_data = region.get(cache_key)
if cf_data is not NO_VALUE:
cf_cookies, user_agent = cf_data
logger.debug("Trying to use old cf data for %s: %s", domain, cf_data)
for cookie, value in cf_cookies.iteritems():
self.cookies.set(cookie, value, domain=domain)
self.headers['User-Agent'] = user_agent
ret = super(CertifiSession, self).request(method, url, *args, **kwargs)
try:
cf_data = self.get_live_tokens(domain)
except:
pass
else:
if cf_data != region.get(cache_key) and self.cookies.get("__cfduid", "", domain=domain)\
and self.cookies.get("cf_clearance", "", domain=domain):
logger.debug("Storing cf data for %s: %s", domain, cf_data)
region.set(cache_key, cf_data)
return ret
class RetryingSession(CertifiSession):
proxied_functions = ("get", "post")
def __init__(self):
super(CertifiSession, self).__init__()
super(RetryingSession, self).__init__()
self.verify = pem_file
proxy = os.environ.get('SZ_HTTP_PROXY')
@ -62,7 +104,7 @@ class RetryingSession(CertifiSession):
# fixme: may be a little loud
logger.debug("Using proxy %s for: %s", self.proxies["http"], args[0])
return retry_call(getattr(super(CertifiSession, self), method), fargs=args, fkwargs=kwargs, tries=3, delay=5,
return retry_call(getattr(super(RetryingSession, self), method), fargs=args, fkwargs=kwargs, tries=3, delay=5,
exceptions=(exceptions.ConnectionError,
exceptions.ProxyError,
exceptions.SSLError,

@ -0,0 +1,257 @@
# coding=utf-8
import os
import time
import logging
import json
from subliminal.cache import region
from dogpile.cache.api import NO_VALUE
from python_anticaptcha import AnticaptchaClient, NoCaptchaTaskProxylessTask, NoCaptchaTask, AnticaptchaException,\
Proxy
from deathbycaptcha import SocketClient as DBCClient, DEFAULT_TOKEN_TIMEOUT
logger = logging.getLogger(__name__)
class PitcherRegistry(object):
pitchers = []
pitchers_by_key = {}
def register(self, cls):
idx = len(self.pitchers)
self.pitchers.append(cls)
key = "%s_%s" % (cls.name, cls.needs_proxy)
key_by_source = "%s_%s" % (cls.source, cls.needs_proxy)
self.pitchers_by_key[key] = idx
self.pitchers_by_key[key_by_source] = idx
return cls
def get_pitcher(self, name_or_site=None, with_proxy=False):
name_or_site = name_or_site or os.environ.get("ANTICAPTCHA_CLASS")
if not name_or_site:
raise Exception("AntiCaptcha class not given, exiting")
key = "%s_%s" % (name_or_site, with_proxy)
if key not in self.pitchers_by_key:
raise Exception("Pitcher %s not found (proxy: %s)" % (name_or_site, with_proxy))
return self.pitchers[self.pitchers_by_key.get(key)]
registry = pitchers = PitcherRegistry()
class Pitcher(object):
name = None
source = None
needs_proxy = False
tries = 3
job = None
client = None
client_key = None
website_url = None
website_key = None
website_name = None
solve_time = None
success = False
def __init__(self, website_name, website_url, website_key, tries=3, client_key=None, *args, **kwargs):
self.tries = tries
self.client_key = client_key or os.environ.get("ANTICAPTCHA_ACCOUNT_KEY")
if not self.client_key:
raise Exception("AntiCaptcha key not given, exiting")
self.website_name = website_name
self.website_key = website_key
self.website_url = website_url
self.success = False
self.solve_time = None
def get_client(self):
raise NotImplementedError
def get_job(self):
raise NotImplementedError
def _throw(self):
self.client = self.get_client()
self.job = self.get_job()
def throw(self):
t = time.time()
data = self._throw()
if self.success:
self.solve_time = time.time() - t
logger.info("%s: Solving took %ss", self.website_name, int(self.solve_time))
return data
@registry.register
class AntiCaptchaProxyLessPitcher(Pitcher):
name = "AntiCaptchaProxyLess"
source = "anti-captcha.com"
host = "api.anti-captcha.com"
language_pool = "en"
tries = 5
use_ssl = True
is_invisible = False
def __init__(self, website_name, website_url, website_key, tries=3, host=None, language_pool=None,
use_ssl=True, is_invisible=False, *args, **kwargs):
super(AntiCaptchaProxyLessPitcher, self).__init__(website_name, website_url, website_key, tries=tries, *args,
**kwargs)
self.host = host or self.host
self.language_pool = language_pool or self.language_pool
self.use_ssl = use_ssl
self.is_invisible = is_invisible
def get_client(self):
return AnticaptchaClient(self.client_key, self.language_pool, self.host, self.use_ssl)
def get_job(self):
task = NoCaptchaTaskProxylessTask(website_url=self.website_url, website_key=self.website_key,
is_invisible=self.is_invisible)
return self.client.createTask(task)
def _throw(self):
for i in range(self.tries):
try:
super(AntiCaptchaProxyLessPitcher, self)._throw()
self.job.join()
ret = self.job.get_solution_response()
if ret:
self.success = True
return ret
except AnticaptchaException as e:
if i >= self.tries - 1:
logger.error("%s: Captcha solving finally failed. Exiting", self.website_name)
return
if e.error_code == 'ERROR_ZERO_BALANCE':
logger.error("%s: No balance left on captcha solving service. Exiting", self.website_name)
return
elif e.error_code == 'ERROR_NO_SLOT_AVAILABLE':
logger.info("%s: No captcha solving slot available, retrying", self.website_name)
time.sleep(5.0)
continue
elif e.error_code == 'ERROR_KEY_DOES_NOT_EXIST':
logger.error("%s: Bad AntiCaptcha API key", self.website_name)
return
elif e.error_id is None and e.error_code == 250:
# timeout
if i < self.tries:
logger.info("%s: Captcha solving timed out, retrying", self.website_name)
time.sleep(1.0)
continue
else:
logger.error("%s: Captcha solving timed out three times; bailing out", self.website_name)
return
raise
@registry.register
class AntiCaptchaPitcher(AntiCaptchaProxyLessPitcher):
name = "AntiCaptcha"
proxy = None
needs_proxy = True
user_agent = None
cookies = None
def __init__(self, *args, **kwargs):
self.proxy = Proxy.parse_url(kwargs.pop("proxy"))
self.user_agent = kwargs.pop("user_agent")
cookies = kwargs.pop("cookies", {})
if isinstance(cookies, dict):
self.cookies = ";".join(["%s=%s" % (k, v) for k, v in cookies.iteritems()])
super(AntiCaptchaPitcher, self).__init__(*args, **kwargs)
def get_job(self):
task = NoCaptchaTask(website_url=self.website_url, website_key=self.website_key, proxy=self.proxy,
user_agent=self.user_agent, cookies=self.cookies, is_invisible=self.is_invisible)
return self.client.createTask(task)
@registry.register
class DBCProxyLessPitcher(Pitcher):
name = "DeathByCaptchaProxyLess"
source = "deathbycaptcha.com"
username = None
password = None
def __init__(self, website_name, website_url, website_key,
timeout=DEFAULT_TOKEN_TIMEOUT, tries=3, *args, **kwargs):
super(DBCProxyLessPitcher, self).__init__(website_name, website_url, website_key, tries=tries)
self.username, self.password = self.client_key.split(":", 1)
self.timeout = timeout
def get_client(self):
return DBCClient(self.username, self.password)
def get_job(self):
pass
@property
def payload_dict(self):
return {
"googlekey": self.website_key,
"pageurl": self.website_url
}
def _throw(self):
super(DBCProxyLessPitcher, self)._throw()
payload = json.dumps(self.payload_dict)
for i in range(self.tries):
try:
#balance = self.client.get_balance()
data = self.client.decode(timeout=self.timeout, type=4, token_params=payload)
if data and data["is_correct"] and data["text"]:
self.success = True
return data["text"]
except:
raise
@registry.register
class DBCPitcher(DBCProxyLessPitcher):
name = "DeathByCaptcha"
proxy = None
needs_proxy = True
proxy_type = "HTTP"
def __init__(self, *args, **kwargs):
self.proxy = kwargs.pop("proxy")
super(DBCPitcher, self).__init__(*args, **kwargs)
@property
def payload_dict(self):
payload = super(DBCPitcher, self).payload_dict
payload.update({
"proxytype": self.proxy_type,
"proxy": self.proxy
})
return payload
def load_verification(site_name, session, callback=lambda x: None):
ccks = region.get("%s_data" % site_name, expiration_time=15552000) # 6m
if ccks != NO_VALUE:
cookies, user_agent = ccks
logger.debug("%s: Re-using previous user agent: %s", site_name.capitalize(), user_agent)
session.headers["User-Agent"] = user_agent
try:
session.cookies._cookies.update(cookies)
return callback(region)
except:
return False
return False
def store_verification(site_name, session):
region.set("%s_data" % site_name, (session.cookies._cookies, session.headers["User-Agent"]))

@ -4,18 +4,17 @@ import re
import datetime
import subliminal
import time
from random import randint
from dogpile.cache.api import NO_VALUE
from requests import Session
from subliminal.exceptions import ServiceUnavailable, DownloadLimitExceeded, AuthenticationError
from subliminal.cache import region
from subliminal.exceptions import DownloadLimitExceeded, AuthenticationError
from subliminal.providers.addic7ed import Addic7edProvider as _Addic7edProvider, \
Addic7edSubtitle as _Addic7edSubtitle, ParserBeautifulSoup, show_cells_re
from subliminal.cache import region
from subliminal.subtitle import fix_line_ending
from subliminal_patch.utils import sanitize
from subliminal_patch.exceptions import TooManyRequests
from subliminal_patch.pitcher import pitchers, load_verification, store_verification
from subzero.language import Language
logger = logging.getLogger(__name__)
@ -64,6 +63,7 @@ class Addic7edProvider(_Addic7edProvider):
USE_ADDICTED_RANDOM_AGENTS = False
hearing_impaired_verifiable = True
subtitle_class = Addic7edSubtitle
server_url = 'https://www.addic7ed.com/'
sanitize_characters = {'-', ':', '(', ')', '.', '/'}
@ -75,45 +75,76 @@ class Addic7edProvider(_Addic7edProvider):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % subliminal.__short_version__
if self.USE_ADDICTED_RANDOM_AGENTS:
from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
logger.debug("Addic7ed: using random user agents")
self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
self.session.headers['Referer'] = self.server_url
from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
logger.debug("Addic7ed: using random user agents")
self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
self.session.headers['Referer'] = self.server_url
# login
if self.username and self.password:
ccks = region.get("addic7ed_cookies", expiration_time=86400)
if ccks != NO_VALUE:
try:
self.session.cookies._cookies.update(ccks)
r = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10)
if r.status_code == 302:
logger.info('Addic7ed: Login expired')
region.delete("addic7ed_cookies")
else:
logger.info('Addic7ed: Reusing old login')
self.logged_in = True
return
except:
pass
def check_verification(cache_region):
rr = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10,
headers={"Referer": self.server_url})
if rr.status_code == 302:
logger.info('Addic7ed: Login expired')
cache_region.delete("addic7ed_data")
else:
logger.info('Addic7ed: Re-using old login')
self.logged_in = True
return True
if load_verification("addic7ed", self.session, callback=check_verification):
return
logger.info('Addic7ed: Logging in')
data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'}
r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10,
headers={"Referer": self.server_url + "login.php"})
data = {'username': self.username, 'password': self.password, 'Submit': 'Log in', 'url': '',
'remember': 'true'}
tries = 0
while tries < 3:
r = self.session.get(self.server_url + 'login.php', timeout=10, headers={"Referer": self.server_url})
if "grecaptcha" in r.content:
logger.info('Addic7ed: Solving captcha. This might take a couple of minutes, but should only '
'happen once every so often')
site_key = re.search(r'grecaptcha.execute\(\'(.+?)\',', r.content).group(1)
if not site_key:
logger.error("Addic7ed: Captcha site-key not found!")
return
if "relax, slow down" in r.content:
raise TooManyRequests(self.username)
pitcher = pitchers.get_pitcher()("Addic7ed", self.server_url + 'login.php', site_key,
user_agent=self.session.headers["User-Agent"],
cookies=self.session.cookies.get_dict(),
is_invisible=True)
if r.status_code != 302:
raise AuthenticationError(self.username)
result = pitcher.throw()
if not result:
raise Exception("Addic7ed: Couldn't solve captcha!")
data["recaptcha_response"] = result
r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10,
headers={"Referer": self.server_url + "login.php"})
if "relax, slow down" in r.content:
raise TooManyRequests(self.username)
if r.status_code != 302:
if "User <b></b> doesn't exist" in r.content and tries <= 2:
logger.info("Addic7ed: Error, trying again. (%s/%s)", tries+1, 3)
tries += 1
continue
raise AuthenticationError(self.username)
break
region.set("addic7ed_cookies", self.session.cookies._cookies)
store_verification("addic7ed", self.session)
logger.debug('Addic7ed: Logged in')
self.logged_in = True
def terminate(self):
self.session.close()
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
def _get_show_ids(self):
@ -140,7 +171,7 @@ class Addic7edProvider(_Addic7edProvider):
# populate the show ids
show_ids = {}
for show in soup.select('td.version > h3 > a[href^="/show/"]'):
for show in soup.select('td > h3 > a[href^="/show/"]'):
show_clean = sanitize(show.text, default_characters=self.sanitize_characters)
try:
show_id = int(show['href'][6:])

@ -11,8 +11,8 @@ from babelfish import language_converters
from dogpile.cache.api import NO_VALUE
from subliminal.exceptions import ConfigurationError, ServiceUnavailable
from subliminal.providers.opensubtitles import OpenSubtitlesProvider as _OpenSubtitlesProvider,\
OpenSubtitlesSubtitle as _OpenSubtitlesSubtitle, Episode, ServerProxy, Unauthorized, NoSession, \
DownloadLimitReached, InvalidImdbid, UnknownUserAgent, DisabledUserAgent, OpenSubtitlesError
OpenSubtitlesSubtitle as _OpenSubtitlesSubtitle, Episode, Movie, ServerProxy, Unauthorized, NoSession, \
DownloadLimitReached, InvalidImdbid, UnknownUserAgent, DisabledUserAgent, OpenSubtitlesError, sanitize
from mixins import ProviderRetryMixin
from subliminal.subtitle import fix_line_ending
from subliminal_patch.http import SubZeroRequestsTransport
@ -45,6 +45,19 @@ class OpenSubtitlesSubtitle(_OpenSubtitlesSubtitle):
def get_matches(self, video, hearing_impaired=False):
matches = super(OpenSubtitlesSubtitle, self).get_matches(video)
# episode
if isinstance(video, Episode) and self.movie_kind == 'episode':
# series
if video.series and (sanitize(self.series_name) in (
sanitize(name) for name in [video.series] + video.alternative_series)):
matches.add('series')
# movie
elif isinstance(video, Movie) and self.movie_kind == 'movie':
# title
if video.title and (sanitize(self.movie_name) in (
sanitize(name) for name in [video.title] + video.alternative_titles)):
matches.add('title')
sub_fps = None
try:
sub_fps = float(self.fps)
@ -205,19 +218,19 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
season = episode = None
if isinstance(video, Episode):
query = video.series
query = [video.series] + video.alternative_series
season = video.season
episode = episode = min(video.episode) if isinstance(video.episode, list) else video.episode
if video.is_special:
season = None
episode = None
query = u"%s %s" % (video.series, video.title)
query = [u"%s %s" % (series, video.title) for series in [video.series] + video.alternative_series]
logger.info("%s: Searching for special: %r", self.__class__, query)
# elif ('opensubtitles' not in video.hashes or not video.size) and not video.imdb_id:
# query = video.name.split(os.sep)[-1]
else:
query = video.title
query = [video.title] + video.alternative_titles
return self.query(languages, hash=video.hashes.get('opensubtitles'), size=video.size, imdb_id=video.imdb_id,
query=query, season=season, episode=episode, tag=video.original_name,
@ -238,9 +251,11 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
else:
criteria.append({'imdbid': imdb_id[2:]})
if query and season and episode:
criteria.append({'query': query.replace('\'', ''), 'season': season, 'episode': episode})
for q in query:
criteria.append({'query': q.replace('\'', ''), 'season': season, 'episode': episode})
elif query:
criteria.append({'query': query.replace('\'', '')})
for q in query:
criteria.append({'query': q.replace('\'', '')})
if not criteria:
raise ValueError('Not enough information')

@ -5,6 +5,7 @@ import logging
import os
import time
import inflect
import cfscrape
from random import randint
from zipfile import ZipFile
@ -12,7 +13,9 @@ from zipfile import ZipFile
from babelfish import language_converters
from guessit import guessit
from requests import Session
from dogpile.cache.api import NO_VALUE
from subliminal import Episode, ProviderError
from subliminal.cache import region
from subliminal.utils import sanitize_release_group
from subliminal_patch.providers import Provider
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
@ -125,6 +128,7 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
self.session = Session()
from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
self.session.headers['Referer'] = "https://subscene.com"
def terminate(self):
logger.info("Closing session")
@ -198,43 +202,48 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
subtitles = []
logger.debug(u"Searching for: %s", vfn)
film = search(vfn, session=self.session)
if film and film.subtitles:
logger.debug('Release results found: %s', len(film.subtitles))
subtitles = self.parse_results(video, film)
else:
logger.debug('No release results found')
time.sleep(self.search_throttle)
# re-search for episodes without explicit release name
if isinstance(video, Episode):
#term = u"%s S%02iE%02i" % (video.series, video.season, video.episode)
term = u"%s - %s Season" % (video.series, p.number_to_words("%sth" % video.season).capitalize())
time.sleep(self.search_throttle)
logger.debug('Searching for alternative results: %s', term)
film = search(term, session=self.session, release=False)
if film and film.subtitles:
logger.debug('Alternative results found: %s', len(film.subtitles))
subtitles += self.parse_results(video, film)
else:
logger.debug('No alternative results found')
# packs
if video.season_fully_aired:
term = u"%s S%02i" % (video.series, video.season)
logger.debug('Searching for packs: %s', term)
for series in [video.series] + video.alternative_series:
term = u"%s - %s Season" % (series, p.number_to_words("%sth" % video.season).capitalize())
time.sleep(self.search_throttle)
film = search(term, session=self.session)
logger.debug('Searching for alternative results: %s', term)
film = search(term, session=self.session, release=False)
if film and film.subtitles:
logger.debug('Pack results found: %s', len(film.subtitles))
logger.debug('Alternative results found: %s', len(film.subtitles))
subtitles += self.parse_results(video, film)
else:
logger.debug('No pack results found')
else:
logger.debug("Not searching for packs, because the season hasn't fully aired")
logger.debug('No alternative results found')
# packs
if video.season_fully_aired:
term = u"%s S%02i" % (series, video.season)
logger.debug('Searching for packs: %s', term)
time.sleep(self.search_throttle)
film = search(term, session=self.session)
if film and film.subtitles:
logger.debug('Pack results found: %s', len(film.subtitles))
subtitles += self.parse_results(video, film)
else:
logger.debug('No pack results found')
else:
logger.debug("Not searching for packs, because the season hasn't fully aired")
else:
logger.debug('Searching for movie results: %s', video.title)
film = search(video.title, year=video.year, session=self.session, limit_to=None, release=False)
if film and film.subtitles:
subtitles += self.parse_results(video, film)
for title in [video.title] + video.alternative_titles:
logger.debug('Searching for movie results: %s', title)
film = search(title, year=video.year, session=self.session, limit_to=None, release=False)
if film and film.subtitles:
subtitles += self.parse_results(video, film)
logger.info("%s subtitles found" % len(subtitles))
return subtitles

@ -26,16 +26,22 @@ class SubsSabBzSubtitle(Subtitle):
"""SubsSabBz Subtitle."""
provider_name = 'subssabbz'
def __init__(self, langauge, filename, type):
def __init__(self, langauge, filename, type, video, link):
super(SubsSabBzSubtitle, self).__init__(langauge)
self.langauge = langauge
self.filename = filename
self.page_link = link
self.type = type
self.video = video
@property
def id(self):
return self.filename
def make_picklable(self):
self.content = None
return self
def get_matches(self, video):
matches = set()
@ -118,7 +124,7 @@ class SubsSabBzProvider(Provider):
for row in rows[:10]:
a_element_wrapper = row.find('td', { 'class': 'c2field' })
if a_element_wrapper:
element = row.find('a')
element = a_element_wrapper.find('a')
if element:
link = element.get('href')
logger.info('Found subtitle link %r', link)
@ -130,15 +136,22 @@ class SubsSabBzProvider(Provider):
return [s for l in languages for s in self.query(l, video)]
def download_subtitle(self, subtitle):
pass
if subtitle.content:
pass
else:
seeking_subtitle_file = subtitle.filename
arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video)
for s in arch:
if s.filename == seeking_subtitle_file:
subtitle.content = s.content
def process_archive_subtitle_files(self, archiveStream, language, video):
def process_archive_subtitle_files(self, archiveStream, language, video, link):
subtitles = []
type = 'episode' if isinstance(video, Episode) else 'movie'
for file_name in archiveStream.namelist():
if file_name.lower().endswith(('.srt', '.sub')):
logger.info('Found subtitle file %r', file_name)
subtitle = SubsSabBzSubtitle(language, file_name, type)
subtitle = SubsSabBzSubtitle(language, file_name, type, video, link)
subtitle.content = archiveStream.read(file_name)
subtitles.append(subtitle)
return subtitles
@ -152,8 +165,8 @@ class SubsSabBzProvider(Provider):
archive_stream = io.BytesIO(request.content)
if is_rarfile(archive_stream):
return self.process_archive_subtitle_files( RarFile(archive_stream), language, video )
return self.process_archive_subtitle_files( RarFile(archive_stream), language, video, link )
elif is_zipfile(archive_stream):
return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video )
return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video, link )
else:
raise ValueError('Not a valid archive')

@ -26,19 +26,25 @@ class SubsUnacsSubtitle(Subtitle):
"""SubsUnacs Subtitle."""
provider_name = 'subsunacs'
def __init__(self, langauge, filename, type):
def __init__(self, langauge, filename, type, video, link):
super(SubsUnacsSubtitle, self).__init__(langauge)
self.langauge = langauge
self.filename = filename
self.page_link = link
self.type = type
self.video = video
@property
def id(self):
return self.filename
def make_picklable(self):
self.content = None
return self
def get_matches(self, video):
matches = set()
video_filename = video.name
video_filename = os.path.basename(video_filename)
video_filename, _ = os.path.splitext(video_filename)
@ -77,11 +83,11 @@ class SubsUnacsProvider(Provider):
def terminate(self):
self.session.close()
def query(self, language, video):
subtitles = []
isEpisode = isinstance(video, Episode)
params = {
'm': '',
'l': 0,
@ -117,7 +123,7 @@ class SubsUnacsProvider(Provider):
soup = BeautifulSoup(response.content, 'html.parser')
rows = soup.findAll('td', {'class': 'tdMovie'})
# Search on first 10 rows only
for row in rows[:10]:
element = row.find('a', {'class': 'tooltip'})
@ -125,37 +131,44 @@ class SubsUnacsProvider(Provider):
link = element.get('href')
logger.info('Found subtitle link %r', link)
subtitles = subtitles + self.download_archive_and_add_subtitle_files('https://subsunacs.net' + link, language, video)
return subtitles
def list_subtitles(self, video, languages):
return [s for l in languages for s in self.query(l, video)]
def download_subtitle(self, subtitle):
pass
def process_archive_subtitle_files(self, archiveStream, language, video):
if subtitle.content:
pass
else:
seeking_subtitle_file = subtitle.filename
arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video)
for s in arch:
if s.filename == seeking_subtitle_file:
subtitle.content = s.content
def process_archive_subtitle_files(self, archiveStream, language, video, link):
subtitles = []
type = 'episode' if isinstance(video, Episode) else 'movie'
for file_name in archiveStream.namelist():
if file_name.lower().endswith(('.srt', '.sub')):
logger.info('Found subtitle file %r', file_name)
subtitle = SubsUnacsSubtitle(language, file_name, type)
subtitle = SubsUnacsSubtitle(language, file_name, type, video, link)
subtitle.content = archiveStream.read(file_name)
subtitles.append(subtitle)
return subtitles
def download_archive_and_add_subtitle_files(self, link, language, video ):
logger.info('Downloading subtitle %r', link)
request = self.session.get(link, headers={
'Referer': 'https://subsunacs.net/search.php'
'Referer': 'https://subsunacs.net/search.php'
})
request.raise_for_status()
archive_stream = io.BytesIO(request.content)
if is_rarfile(archive_stream):
return self.process_archive_subtitle_files( RarFile(archive_stream), language, video )
return self.process_archive_subtitle_files( RarFile(archive_stream), language, video, link )
elif is_zipfile(archive_stream):
return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video )
return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video, link )
else:
raise ValueError('Not a valid archive')

@ -4,6 +4,7 @@ import io
import logging
import math
import re
import time
import rarfile
@ -23,6 +24,7 @@ from subliminal.utils import sanitize_release_group
from subliminal.subtitle import guess_matches
from subliminal.video import Episode, Movie
from subliminal.subtitle import fix_line_ending
from subliminal_patch.pitcher import pitchers, load_verification, store_verification
from subzero.language import Language
from random import randint
@ -142,6 +144,7 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
logger.debug('User-Agent set to %s', self.session.headers['User-Agent'])
self.session.headers['Referer'] = self.server_url
logger.debug('Referer set to %s', self.session.headers['Referer'])
load_verification("titlovi", self.session)
def terminate(self):
self.session.close()
@ -182,110 +185,144 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
r = self.session.get(self.search_url, params=params, timeout=10)
r.raise_for_status()
except RequestException as e:
logger.exception('RequestException %s', e)
break
try:
soup = BeautifulSoup(r.content, 'lxml')
# number of results
result_count = int(soup.select_one('.results_count b').string)
except:
result_count = None
# exit if no results
if not result_count:
if not subtitles:
logger.debug('No subtitles found')
else:
logger.debug("No more subtitles found")
break
# number of pages with results
pages = int(math.ceil(result_count / float(items_per_page)))
# get current page
if 'pg' in params:
current_page = int(params['pg'])
try:
sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit')
for sub in sublist:
# subtitle id
sid = sub.find(attrs={'data-id': True}).attrs['data-id']
# get download link
download_link = self.download_url + sid
# title and alternate title
match = title_re.search(sub.a.string)
if match:
_title = match.group('title')
alt_title = match.group('altitle')
captcha_passed = False
if e.response.status_code == 403 and "data-sitekey" in e.response.content:
logger.info('titlovi: Solving captcha. This might take a couple of minutes, but should only '
'happen once every so often')
site_key = re.search(r'data-sitekey="(.+?)"', e.response.content).group(1)
challenge_s = re.search(r'type="hidden" name="s" value="(.+?)"', e.response.content).group(1)
challenge_ray = re.search(r'data-ray="(.+?)"', e.response.content).group(1)
if not all([site_key, challenge_s, challenge_ray]):
raise Exception("titlovi: Captcha site-key not found!")
pitcher = pitchers.get_pitcher()("titlovi", e.request.url, site_key,
user_agent=self.session.headers["User-Agent"],
cookies=self.session.cookies.get_dict(),
is_invisible=True)
result = pitcher.throw()
if not result:
raise Exception("titlovi: Couldn't solve captcha!")
s_params = {
"s": challenge_s,
"id": challenge_ray,
"g-recaptcha-response": result,
}
r = self.session.get(self.server_url + "/cdn-cgi/l/chk_captcha", params=s_params, timeout=10,
allow_redirects=False)
r.raise_for_status()
r = self.session.get(self.search_url, params=params, timeout=10)
r.raise_for_status()
store_verification("titlovi", self.session)
captcha_passed = True
if not captcha_passed:
logger.exception('RequestException %s', e)
break
else:
try:
soup = BeautifulSoup(r.content, 'lxml')
# number of results
result_count = int(soup.select_one('.results_count b').string)
except:
result_count = None
# exit if no results
if not result_count:
if not subtitles:
logger.debug('No subtitles found')
else:
continue
# page link
page_link = self.server_url + sub.a.attrs['href']
# subtitle language
match = lang_re.search(sub.select_one('.lang').attrs['src'])
if match:
try:
# decode language
lang = Language.fromtitlovi(match.group('lang')+match.group('script'))
except ValueError:
logger.debug("No more subtitles found")
break
# number of pages with results
pages = int(math.ceil(result_count / float(items_per_page)))
# get current page
if 'pg' in params:
current_page = int(params['pg'])
try:
sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit')
for sub in sublist:
# subtitle id
sid = sub.find(attrs={'data-id': True}).attrs['data-id']
# get download link
download_link = self.download_url + sid
# title and alternate title
match = title_re.search(sub.a.string)
if match:
_title = match.group('title')
alt_title = match.group('altitle')
else:
continue
# relase year or series start year
match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string)
if match:
r_year = int(match.group('year'))
# fps
match = fps_re.search(sub.select_one('.fps').string)
if match:
fps = match.group('fps')
# releases
releases = str(sub.select_one('.fps').parent.contents[0].string)
# handle movies and series separately
if is_episode:
# season and episode info
sxe = sub.select_one('.s0xe0y').string
r_season = None
r_episode = None
if sxe:
match = season_re.search(sxe)
if match:
r_season = int(match.group('season'))
match = episode_re.search(sxe)
if match:
r_episode = int(match.group('episode'))
subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
alt_title=alt_title, season=r_season, episode=r_episode,
year=r_year, fps=fps,
asked_for_release_group=video.release_group,
asked_for_episode=episode)
else:
subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
alt_title=alt_title, year=r_year, fps=fps,
asked_for_release_group=video.release_group)
logger.debug('Found subtitle %r', subtitle)
# prime our matches so we can use the values later
subtitle.get_matches(video)
# add found subtitles
subtitles.append(subtitle)
finally:
soup.decompose()
# stop on last page
if current_page >= pages:
break
# increment current page
params['pg'] = current_page + 1
logger.debug('Getting page %d', params['pg'])
# page link
page_link = self.server_url + sub.a.attrs['href']
# subtitle language
match = lang_re.search(sub.select_one('.lang').attrs['src'])
if match:
try:
# decode language
lang = Language.fromtitlovi(match.group('lang')+match.group('script'))
except ValueError:
continue
# relase year or series start year
match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string)
if match:
r_year = int(match.group('year'))
# fps
match = fps_re.search(sub.select_one('.fps').string)
if match:
fps = match.group('fps')
# releases
releases = str(sub.select_one('.fps').parent.contents[0].string)
# handle movies and series separately
if is_episode:
# season and episode info
sxe = sub.select_one('.s0xe0y').string
r_season = None
r_episode = None
if sxe:
match = season_re.search(sxe)
if match:
r_season = int(match.group('season'))
match = episode_re.search(sxe)
if match:
r_episode = int(match.group('episode'))
subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
alt_title=alt_title, season=r_season, episode=r_episode,
year=r_year, fps=fps,
asked_for_release_group=video.release_group,
asked_for_episode=episode)
else:
subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
alt_title=alt_title, year=r_year, fps=fps,
asked_for_release_group=video.release_group)
logger.debug('Found subtitle %r', subtitle)
# prime our matches so we can use the values later
subtitle.get_matches(video)
# add found subtitles
subtitles.append(subtitle)
finally:
soup.decompose()
# stop on last page
if current_page >= pages:
break
# increment current page
params['pg'] = current_page + 1
logger.debug('Getting page %d', params['pg'])
return subtitles

@ -0,0 +1,208 @@
# -*- coding: utf-8 -*-
import io
import logging
import os
import zipfile
import rarfile
from subzero.language import Language
from guessit import guessit
from requests import Session
from six import text_type
from subliminal import __short_version__
from subliminal.providers import ParserBeautifulSoup, Provider
from subliminal.subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending, guess_matches
from subliminal.video import Episode, Movie
logger = logging.getLogger(__name__)
class ZimukuSubtitle(Subtitle):
"""Zimuku Subtitle."""
provider_name = 'zimuku'
def __init__(self, language, page_link, version, download_link):
super(ZimukuSubtitle, self).__init__(language, page_link=page_link)
self.version = version
self.download_link = download_link
self.hearing_impaired = None
self.encoding = 'utf-8'
@property
def id(self):
return self.download_link
def get_matches(self, video):
matches = set()
# episode
if isinstance(video, Episode):
# other properties
matches |= guess_matches(video, guessit(self.version, {'type': 'episode'}), partial=True)
# movie
elif isinstance(video, Movie):
# other properties
matches |= guess_matches(video, guessit(self.version, {'type': 'movie'}), partial=True)
return matches
class ZimukuProvider(Provider):
"""Zimuku Provider."""
languages = {Language(l) for l in ['zho', 'eng']}
server_url = 'http://www.zimuku.la'
search_url = '/search?q={}'
download_url = 'http://www.zimuku.la/'
UserAgent = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)'
subtitle_class = ZimukuSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__)
def terminate(self):
self.session.close()
def query(self, keyword, season=None, episode=None, year=None):
params = keyword
if season and episode:
params += ' S{season:02d}E{episode:02d}'.format(season=season, episode=episode)
elif year:
params += ' {:4d}'.format(year)
logger.debug('Searching subtitles %r', params)
subtitles = []
search_link = self.server_url + text_type(self.search_url).format(params)
r = self.session.get(search_link, timeout=30)
r.raise_for_status()
if not r.content:
logger.debug('No data returned from provider')
return []
soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])
for entity in soup.select('div.item.prel.clearfix a:nth-of-type(2)'):
moviename = entity.text
entity_url = self.server_url + entity['href']
logger.debug(entity_url)
r = self.session.get(entity_url, timeout=30)
r.raise_for_status()
logger.debug('looking into ' + entity_url)
soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']).find("div", class_="subs box clearfix")
# loop over subtitles cells
subs = soup.tbody.find_all("tr")
for sub in subs:
page_link = '%s%s' % (self.server_url, sub.a.get('href').encode('utf-8'))
version = sub.a.text.encode('utf-8') or None
if version is None:
version = ""
try:
td = sub.find("td", class_="tac lang")
r2 = td.find_all("img")
langs = [x.get('title').encode('utf-8') for x in r2]
except:
langs = '未知'
name = '%s (%s)' % (version, ",".join(langs))
if ('English' in langs) and not(('简体中文' in langs) or ('繁體中文' in langs)):
language = Language('eng')
else:
language = Language('zho')
# read the item
subtitle = self.subtitle_class(language, page_link, version, page_link.replace("detail","dld"))
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
if isinstance(video, Episode):
titles = [video.series] + video.alternative_series
elif isinstance(video, Movie):
titles = [video.title] + video.alternative_titles
else:
titles = []
subtitles = []
# query for subtitles with the show_id
for title in titles:
if isinstance(video, Episode):
subtitles += [s for s in self.query(title, season=video.season, episode=video.episode,
year=video.year)
if s.language in languages]
elif isinstance(video, Movie):
subtitles += [s for s in self.query(title, year=video.year)
if s.language in languages]
return subtitles
def download_subtitle(self, subtitle):
if isinstance(subtitle, ZimukuSubtitle):
# download the subtitle
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link},
timeout=30)
r.raise_for_status()
if not r.content:
logger.debug('Unable to download subtitle. No data returned from provider')
return
soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])
links = soup.find("div", {"class":"clearfix"}).find_all('a')
# TODO: add settings for choice
for down_link in links:
url = down_link.get('href').encode('utf-8')
url = self.server_url + url
r = self.session.get(url, headers={'Referer': subtitle.download_link},
timeout=30)
r.raise_for_status()
if len(r.content) > 1024:
break
archive_stream = io.BytesIO(r.content)
archive = None
if rarfile.is_rarfile(archive_stream):
logger.debug('Identified rar archive')
archive = rarfile.RarFile(archive_stream)
subtitle_content = _get_subtitle_from_archive(archive)
elif zipfile.is_zipfile(archive_stream):
logger.debug('Identified zip archive')
archive = zipfile.ZipFile(archive_stream)
subtitle_content = _get_subtitle_from_archive(archive)
else:
subtitle_content = r.content
if subtitle_content:
subtitle.content = fix_line_ending(subtitle_content)
else:
logger.debug('Could not extract subtitle from %r', archive)
def _get_subtitle_from_archive(archive):
for name in archive.namelist():
# discard hidden files
if os.path.split(name)[-1].startswith('.'):
continue
# discard non-subtitle files
if not name.lower().endswith(SUBTITLE_EXTENSIONS):
continue
return archive.read(name)
return None

@ -4,7 +4,7 @@ import subliminal
import base64
import zlib
from subliminal import __short_version__
from subliminal.refiners.omdb import OMDBClient, refine
from subliminal.refiners.omdb import OMDBClient, refine as refine_orig, Episode, Movie
class SZOMDBClient(OMDBClient):
@ -63,5 +63,13 @@ class SZOMDBClient(OMDBClient):
return j
def refine(video, **kwargs):
refine_orig(video, **kwargs)
if isinstance(video, Episode) and video.series_imdb_id:
video.series_imdb_id = video.series_imdb_id.strip()
elif isinstance(video, Movie) and video.imdb_id:
video.imdb_id = video.imdb_id.strip()
omdb_client = SZOMDBClient(headers={'User-Agent': 'Subliminal/%s' % __short_version__})
subliminal.refiners.omdb.omdb_client = omdb_client

@ -38,6 +38,8 @@ class Subtitle(Subtitle_):
plex_media_fps = None
skip_wrong_fps = False
wrong_fps = False
wrong_series = False
wrong_season_ep = False
is_pack = False
asked_for_release_group = None
asked_for_episode = None
@ -356,7 +358,8 @@ def guess_matches(video, guess, partial=False):
matches = set()
if isinstance(video, Episode):
# series
if video.series and 'title' in guess and sanitize(guess['title']) == sanitize(video.series):
if video.series and 'title' in guess and sanitize(guess['title']) in (
sanitize(name) for name in [video.series] + video.alternative_series):
matches.add('series')
# title
if video.title and 'episode_title' in guess and sanitize(guess['episode_title']) == sanitize(video.title):
@ -384,7 +387,8 @@ def guess_matches(video, guess, partial=False):
if video.year and 'year' in guess and guess['year'] == video.year:
matches.add('year')
# title
if video.title and 'title' in guess and sanitize(guess['title']) == sanitize(video.title):
if video.title and 'title' in guess and sanitize(guess['title']) in (
sanitize(name) for name in [video.title] + video.alternative_titles):
matches.add('title')
# release_group

@ -1228,12 +1228,104 @@
</div>
</div>
</div>
<div class="ui dividing header">Anti-captcha options</div>
<div class="twelve wide column">
<div class="ui grid">
<div class="middle aligned row">
<div class="right aligned four wide column">
<label>Provider</label>
</div>
<div class="five wide column">
<select name="settings_anti_captcha_provider" id="settings_anti_captcha_provider" class="ui fluid selection dropdown">
<option value="None">None</option>
<option value="anti-captcha">Anti-Captcha</option>
<option value="death-by-captcha">Death by Captcha</option>
</select>
</div>
<div class="collapsed center aligned column">
<div class="ui basic icon"
data-tooltip='Choose the anti-captcha provider you want to use.'
data-inverted="">
<i class="help circle large icon"></i>
</div>
</div>
</div>
<div class="middle aligned row anticaptcha">
<div class="two wide column"></div>
<div class="right aligned four wide column">
<label>Provider website</label>
</div>
<div class="five wide column">
<a href="http://getcaptchasolution.com/eixxo1rsnw" target="_blank">Anti-Captcha.com</a>
</div>
</div>
<div class="middle aligned row anticaptcha">
<div class="two wide column"></div>
<div class="right aligned four wide column">
<label>Account Key</label>
</div>
<div class="five wide column">
<div class='field'>
<div class="ui fluid input">
<input id="settings_anti_captcha_key" name="settings_anti_captcha_key"
type="text" value="{{ settings.anticaptcha.anti_captcha_key }}">
</div>
</div>
</div>
</div>
<div class="middle aligned row deathbycaptcha">
<div class="two wide column"></div>
<div class="right aligned four wide column">
<label>Provider website</label>
</div>
<div class="five wide column">
<a href="https://www.deathbycaptcha.com" target="_blank">DeathByCaptcha.com</a>
</div>
</div>
<div class="middle aligned row deathbycaptcha">
<div class="two wide column"></div>
<div class="right aligned four wide column">
<label>Username</label>
</div>
<div class="five wide column">
<div class='field'>
<div class="ui fluid input">
<input id="settings_death_by_captcha_username" name="settings_death_by_captcha_username"
type="text" value="{{ settings.deathbycaptcha.username }}">
</div>
</div>
</div>
</div>
<div class="middle aligned row deathbycaptcha">
<div class="two wide column"></div>
<div class="right aligned four wide column">
<label>Password</label>
</div>
<div class="five wide column">
<div class='field'>
<div class="ui fluid input">
<input id="settings_death_by_captcha_password" name="settings_death_by_captcha_password"
type="text" value="{{ settings.deathbycaptcha.password }}">
</div>
</div>
</div>
</div>
</div>
</div>
<div class="ui dividing header">Subtitles providers</div>
<div class="twelve wide column">
<div class="ui grid">
<div class="middle aligned row">
<div class="right aligned four wide column">
<label>Addic7ed</label>
<label>Addic7ed (require anti-captcha)</label>
</div>
<div class="one wide column">
<div id="addic7ed" class="ui toggle checkbox provider">
@ -1703,7 +1795,7 @@
<div class="middle aligned row">
<div class="right aligned four wide column">
<label>Titlovi</label>
<label>Titlovi (require anti-captcha)</label>
</div>
<div class="one wide column">
<div id="titlovi" class="ui toggle checkbox provider">
@ -1772,6 +1864,28 @@
</div>
</div>
<div class="middle aligned row">
<div class="right aligned four wide column">
<label>Zimuku</label>
</div>
<div class="one wide column">
<div id="zimuku" class="ui toggle checkbox provider">
<input type="checkbox">
<label></label>
</div>
</div>
<div class="collapsed column">
<div class="collapsed center aligned column">
<div class="ui basic icon" data-tooltip="Chinese subtitles provider." data-inverted="">
<i class="help circle large icon"></i>
</div>
</div>
</div>
</div>
<div id="zimuku_option" class="ui grid container">
</div>
<div class="middle aligned row">
<div class="eleven wide column">
<div class='field' hidden>
@ -2235,6 +2349,30 @@
}
});
if ($('#settings_anti_captcha_provider').val() === "None") {
$('.anticaptcha').hide();
$('.deathbycaptcha').hide();
} else if ($('#settings_anti_captcha_provider').val() === "anti-captcha") {
$('.anticaptcha').show();
$('.deathbycaptcha').hide();
} else if ($('#settings_anti_captcha_provider').val() === "death-by-cCaptcha") {
$('.deathbycaptcha').show();
$('.anticaptcha').hide();
}
$('#settings_anti_captcha_provider').dropdown('setting', 'onChange', function(){
if ($('#settings_anti_captcha_provider').val() === "None") {
$('.anticaptcha').hide();
$('.deathbycaptcha').hide();
} else if ($('#settings_anti_captcha_provider').val() === "anti-captcha") {
$('.anticaptcha').show();
$('.deathbycaptcha').hide();
} else if ($('#settings_anti_captcha_provider').val() === "death-by-captcha") {
$('.deathbycaptcha').show();
$('.anticaptcha').hide();
}
});
if ($('#settings_use_postprocessing').data("postprocessing") === "True") {
$('.postprocessing').show();
} else {
@ -2445,6 +2583,8 @@
$('#settings_page_size').dropdown('set selected','{{!settings.general.page_size}}');
$('#settings_subfolder').dropdown('clear');
$('#settings_subfolder').dropdown('set selected', '{{!settings.general.subfolder}}');
$('#settings_anti_captcha_provider').dropdown('clear');
$('#settings_anti_captcha_provider').dropdown('set selected', '{{!settings.general.anti_captcha_provider}}');
$('#settings_proxy_type').dropdown('clear');
$('#settings_proxy_type').dropdown('set selected','{{!settings.proxy.type}}');
$('#settings_providers').dropdown('clear');
@ -2616,6 +2756,7 @@
]
},
settings_days_to_upgrade_subs : {
depends: 'settings_upgrade_subs',
rules : [
{
type : 'integer[1..30]'

@ -414,7 +414,7 @@
<div class="ui grid">
<div class="middle aligned row">
<div class="right aligned four wide column">
<label>Addic7ed</label>
<label>Addic7ed (require anti-captcha)</label>
</div>
<div class="one wide column">
<div id="addic7ed" class="ui toggle checkbox provider">
@ -884,7 +884,7 @@
<div class="middle aligned row">
<div class="right aligned four wide column">
<label>Titlovi</label>
<label>Titlovi (require anti-captcha)</label>
</div>
<div class="one wide column">
<div id="titlovi" class="ui toggle checkbox provider">

Loading…
Cancel
Save