diff --git a/bazarr/config.py b/bazarr/config.py
index 2d3405f04..07a8d6965 100644
--- a/bazarr/config.py
+++ b/bazarr/config.py
@@ -41,7 +41,8 @@ defaults = {
'subfolder_custom': '',
'upgrade_subs': 'True',
'days_to_upgrade_subs': '7',
- 'upgrade_manual': 'True'
+ 'upgrade_manual': 'True',
+ 'anti_captcha_provider': 'None'
},
'auth': {
'type': 'None',
@@ -98,7 +99,15 @@ defaults = {
},
'assrt': {
'token': ''
- }}
+ },
+ 'anticaptcha': {
+ 'anti_captcha_key': ''
+ },
+ 'deathbycaptcha': {
+ 'username': '',
+ 'password': ''
+ }
+}
settings = simpleconfigparser(defaults=defaults)
settings.read(os.path.join(args.config_dir, 'config', 'config.ini'))
diff --git a/bazarr/init.py b/bazarr/init.py
index eb3af0ce3..284159c0a 100644
--- a/bazarr/init.py
+++ b/bazarr/init.py
@@ -17,6 +17,16 @@ from get_args import args
# set subliminal_patch user agent
os.environ["SZ_USER_AGENT"] = "Bazarr/1"
+# set anti-captcha provider and key
+if settings.general.anti_captcha_provider == 'anti-captcha':
+ os.environ["ANTICAPTCHA_CLASS"] = 'AntiCaptchaProxyLess'
+ os.environ["ANTICAPTCHA_ACCOUNT_KEY"] = settings.anticaptcha.anti_captcha_key
+elif settings.general.anti_captcha_provider == 'AntiCaptchaProxyLessPitcher':
+ os.environ["ANTICAPTCHA_CLASS"] = 'DBCProxyLess'
+ os.environ["ANTICAPTCHA_ACCOUNT_KEY"] = ':'.join({settings.deathbycaptcha.username, settings.deathbycaptcha.password})
+else:
+ os.environ["ANTICAPTCHA_CLASS"] = ''
+
# Check if args.config_dir exist
if not os.path.exists(args.config_dir):
# Create config_dir directory tree
diff --git a/bazarr/logger.py b/bazarr/logger.py
index 3507767bd..e95d2259f 100644
--- a/bazarr/logger.py
+++ b/bazarr/logger.py
@@ -67,6 +67,8 @@ def configure_logging(debug=False):
fh.setFormatter(f)
fh.addFilter(BlacklistFilter())
fh.addFilter(PublicIPFilter())
+ fh.setLevel(log_level)
+ logger.addHandler(fh)
if debug:
logging.getLogger("apscheduler").setLevel(logging.DEBUG)
@@ -90,8 +92,7 @@ def configure_logging(debug=False):
logging.getLogger("rebulk").setLevel(logging.WARNING)
logging.getLogger("stevedore.extension").setLevel(logging.CRITICAL)
logging.getLogger("geventwebsocket.handler").setLevel(logging.WARNING)
- fh.setLevel(log_level)
- logger.addHandler(fh)
+
class MyFilter(logging.Filter):
diff --git a/bazarr/main.py b/bazarr/main.py
index ea3e4414a..326daf65b 100644
--- a/bazarr/main.py
+++ b/bazarr/main.py
@@ -602,17 +602,17 @@ def search_json(query):
search_list = []
if settings.general.getboolean('use_sonarr'):
- c.execute("SELECT title, sonarrSeriesId FROM table_shows WHERE title LIKE ? ORDER BY title",
+ c.execute("SELECT title, sonarrSeriesId, year FROM table_shows WHERE title LIKE ? ORDER BY title",
('%' + query + '%',))
series = c.fetchall()
for serie in series:
- search_list.append(dict([('name', serie[0]), ('url', base_url + 'episodes/' + str(serie[1]))]))
+ search_list.append(dict([('name', serie[0] + ' (' + serie[2] + ')'), ('url', base_url + 'episodes/' + str(serie[1]))]))
if settings.general.getboolean('use_radarr'):
- c.execute("SELECT title, radarrId FROM table_movies WHERE title LIKE ? ORDER BY title", ('%' + query + '%',))
+ c.execute("SELECT title, radarrId, year FROM table_movies WHERE title LIKE ? ORDER BY title", ('%' + query + '%',))
movies = c.fetchall()
for movie in movies:
- search_list.append(dict([('name', movie[0]), ('url', base_url + 'movie/' + str(movie[1]))]))
+ search_list.append(dict([('name', movie[0] + ' (' + movie[2] + ')'), ('url', base_url + 'movie/' + str(movie[1]))]))
c.close()
response.content_type = 'application/json'
@@ -1275,6 +1275,10 @@ def save_settings():
settings_upgrade_manual = 'False'
else:
settings_upgrade_manual = 'True'
+ settings_anti_captcha_provider = request.forms.get('settings_anti_captcha_provider')
+ settings_anti_captcha_key = request.forms.get('settings_anti_captcha_key')
+ settings_death_by_captcha_username = request.forms.get('settings_death_by_captcha_username')
+ settings_death_by_captcha_password = request.forms.get('settings_death_by_captcha_password')
before = (unicode(settings.general.ip), int(settings.general.port), unicode(settings.general.base_url),
unicode(settings.general.path_mappings), unicode(settings.general.getboolean('use_sonarr')),
@@ -1306,6 +1310,22 @@ def save_settings():
settings.general.upgrade_subs = text_type(settings_upgrade_subs)
settings.general.days_to_upgrade_subs = text_type(settings_days_to_upgrade_subs)
settings.general.upgrade_manual = text_type(settings_upgrade_manual)
+ settings.general.anti_captcha_provider = text_type(settings_anti_captcha_provider)
+ settings.anticaptcha.anti_captcha_key = text_type(settings_anti_captcha_key)
+ settings.deathbycaptcha.username = text_type(settings_death_by_captcha_username)
+ settings.deathbycaptcha.password = text_type(settings_death_by_captcha_password)
+
+ # set anti-captcha provider and key
+ if settings.general.anti_captcha_provider == 'anti-captcha':
+ os.environ["ANTICAPTCHA_CLASS"] = 'AntiCaptchaProxyLess'
+ os.environ["ANTICAPTCHA_ACCOUNT_KEY"] = settings.anticaptcha.anti_captcha_key
+ elif settings.general.anti_captcha_provider == 'AntiCaptchaProxyLessPitcher':
+ os.environ["ANTICAPTCHA_CLASS"] = 'DBCProxyLess'
+ os.environ["ANTICAPTCHA_ACCOUNT_KEY"] = ':'.join(
+ {settings.deathbycaptcha.username, settings.deathbycaptcha.password})
+ else:
+ os.environ["ANTICAPTCHA_CLASS"] = ''
+
settings.general.minimum_score_movie = text_type(settings_general_minimum_score_movies)
settings.general.use_embedded_subs = text_type(settings_general_embedded)
settings.general.adaptive_searching = text_type(settings_general_adaptive_searching)
diff --git a/libs/cfscrape.py b/libs/cfscrape.py
new file mode 100644
index 000000000..15986f03a
--- /dev/null
+++ b/libs/cfscrape.py
@@ -0,0 +1,279 @@
+import logging
+import random
+import time
+import re
+
+# based off of https://gist.github.com/doko-desuka/58d9212461f62583f8df9bc6387fade2
+# and https://github.com/Anorov/cloudflare-scrape
+# and https://github.com/VeNoMouS/cloudflare-scrape-js2py
+
+'''''''''
+Disables InsecureRequestWarning: Unverified HTTPS request is being made warnings.
+'''''''''
+import requests
+from requests.packages.urllib3.exceptions import InsecureRequestWarning
+
+requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
+''''''
+from requests.sessions import Session
+from copy import deepcopy
+
+try:
+ from urlparse import urlparse
+except ImportError:
+ from urllib.parse import urlparse
+
+DEFAULT_USER_AGENTS = [
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/65.0.3325.181 Chrome/65.0.3325.181 Safari/537.36",
+ "Mozilla/5.0 (Linux; Android 7.0; Moto G (5) Build/NPPS25.137-93-8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.137 Mobile Safari/537.36",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_4 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B554a Safari/9537.53",
+ "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0",
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:59.0) Gecko/20100101 Firefox/59.0",
+ "Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0"
+]
+
+DEFAULT_USER_AGENT = random.choice(DEFAULT_USER_AGENTS)
+
+BUG_REPORT = (
+ "Cloudflare may have changed their technique, or there may be a bug in the script.\n\nPlease read " "https://github.com/Anorov/cloudflare-scrape#updates, then file a "
+ "bug report at https://github.com/Anorov/cloudflare-scrape/issues.")
+
+
+class CloudflareScraper(Session):
+ def __init__(self, *args, **kwargs):
+ super(CloudflareScraper, self).__init__(*args, **kwargs)
+
+ if "requests" in self.headers["User-Agent"]:
+ # Spoof Firefox on Linux if no custom User-Agent has been set
+ self.headers["User-Agent"] = random.choice(DEFAULT_USER_AGENTS)
+
+ def request(self, method, url, *args, **kwargs):
+ resp = super(CloudflareScraper, self).request(method, url, *args, **kwargs)
+
+ # Check if Cloudflare anti-bot is on
+ if (resp.status_code in (503, 429)
+ and resp.headers.get("Server", "").startswith("cloudflare")
+ and b"jschl_vc" in resp.content
+ and b"jschl_answer" in resp.content
+ ):
+ return self.solve_cf_challenge(resp, **kwargs)
+
+ # Otherwise, no Cloudflare anti-bot detected
+ return resp
+
+ def solve_cf_challenge(self, resp, **original_kwargs):
+ body = resp.text
+ parsed_url = urlparse(resp.url)
+ domain = parsed_url.netloc
+ submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme, domain)
+
+ cloudflare_kwargs = deepcopy(original_kwargs)
+ params = cloudflare_kwargs.setdefault("params", {})
+ headers = cloudflare_kwargs.setdefault("headers", {})
+ headers["Referer"] = resp.url
+
+ try:
+ cf_delay = float(re.search('submit.*?(\d+)', body, re.DOTALL).group(1)) / 1000.0
+
+ form_index = body.find('id="challenge-form"')
+ if form_index == -1:
+ raise Exception('CF form not found')
+ sub_body = body[form_index:]
+
+ s_match = re.search('name="s" value="(.+?)"', sub_body)
+ if s_match:
+ params["s"] = s_match.group(1) # On older variants this parameter is absent.
+ params["jschl_vc"] = re.search(r'name="jschl_vc" value="(\w+)"', sub_body).group(1)
+ params["pass"] = re.search(r'name="pass" value="(.+?)"', sub_body).group(1)
+
+ if body.find('id="cf-dn-', form_index) != -1:
+ extra_div_expression = re.search('id="cf-dn-.*?>(.+?)<', sub_body).group(1)
+
+ # Initial value.
+ js_answer = self.cf_parse_expression(
+ re.search('setTimeout\(function\(.*?:(.*?)}', body, re.DOTALL).group(1)
+ )
+ # Extract the arithmetic operations.
+ builder = re.search("challenge-form'\);\s*;(.*);a.value", body, re.DOTALL).group(1)
+ # Remove a function semicolon before splitting on semicolons, else it messes the order.
+ lines = builder.replace(' return +(p)}();', '', 1).split(';')
+
+ for line in lines:
+ if len(line) and '=' in line:
+ heading, expression = line.split('=', 1)
+ if 'eval(eval(atob' in expression:
+ # Uses the expression in an external
.
+ expression_value = self.cf_parse_expression(extra_div_expression)
+ elif '(function(p' in expression:
+ # Expression + domain sampling function.
+ expression_value = self.cf_parse_expression(expression, domain)
+ else:
+ expression_value = self.cf_parse_expression(expression)
+ js_answer = self.cf_arithmetic_op(heading[-1], js_answer, expression_value)
+
+ if '+ t.length' in body:
+ js_answer += len(domain) # Only older variants add the domain length.
+
+ params["jschl_answer"] = '%.10f' % js_answer
+
+ except Exception as e:
+ # Something is wrong with the page.
+ # This may indicate Cloudflare has changed their anti-bot
+ # technique. If you see this and are running the latest version,
+ # please open a GitHub issue so I can update the code accordingly.
+ logging.error("[!] %s Unable to parse Cloudflare anti-bots page. "
+ "Try upgrading cloudflare-scrape, or submit a bug report "
+ "if you are running the latest version. Please read "
+ "https://github.com/Anorov/cloudflare-scrape#updates "
+ "before submitting a bug report." % e)
+ raise
+
+ # Cloudflare requires a delay before solving the challenge.
+ # Always wait the full delay + 1s because of 'time.sleep()' imprecision.
+ time.sleep(cf_delay + 1.0)
+
+ # Requests transforms any request into a GET after a redirect,
+ # so the redirect has to be handled manually here to allow for
+ # performing other types of requests even as the first request.
+ method = resp.request.method
+ cloudflare_kwargs["allow_redirects"] = False
+
+ redirect = self.request(method, submit_url, **cloudflare_kwargs)
+
+ if 'Location' in redirect.headers:
+ redirect_location = urlparse(redirect.headers["Location"])
+ if not redirect_location.netloc:
+ redirect_url = "%s://%s%s" % (parsed_url.scheme, domain, redirect_location.path)
+ return self.request(method, redirect_url, **original_kwargs)
+ return self.request(method, redirect.headers["Location"], **original_kwargs)
+ else:
+ return redirect
+
+ def cf_sample_domain_function(self, func_expression, domain):
+ parameter_start_index = func_expression.find('}(') + 2
+ # Send the expression with the "+" char and enclosing parenthesis included, as they are
+ # stripped inside ".cf_parse_expression()'.
+ sample_index = self.cf_parse_expression(
+ func_expression[parameter_start_index: func_expression.rfind(')))')]
+ )
+ return ord(domain[int(sample_index)])
+
+ def cf_arithmetic_op(self, op, a, b):
+ if op == '+':
+ return a + b
+ elif op == '/':
+ return a / float(b)
+ elif op == '*':
+ return a * float(b)
+ elif op == '-':
+ return a - b
+ else:
+ raise Exception('Unknown operation')
+
+ def cf_parse_expression(self, expression, domain=None):
+
+ def _get_jsfuck_number(section):
+ digit_expressions = section.replace('!+[]', '1').replace('+!![]', '1').replace('+[]', '0').split('+')
+ return int(
+ # Form a number string, with each digit as the sum of the values inside each parenthesis block.
+ ''.join(
+ str(sum(int(digit_char) for digit_char in digit_expression[1:-1])) # Strip the parenthesis.
+ for digit_expression in digit_expressions
+ )
+ )
+
+ if '/' in expression:
+ dividend, divisor = expression.split('/')
+ dividend = dividend[2:-1] # Strip the leading '+' char and the enclosing parenthesis.
+
+ if domain:
+ # 2019-04-02: At this moment, this extra domain sampling function always appears on the
+ # divisor side, at the end.
+ divisor_a, divisor_b = divisor.split('))+(')
+ divisor_a = _get_jsfuck_number(divisor_a[5:]) # Left-strip the sequence of "(+(+(".
+ divisor_b = self.cf_sample_domain_function(divisor_b, domain)
+ return _get_jsfuck_number(dividend) / float(divisor_a + divisor_b)
+ else:
+ divisor = divisor[2:-1]
+ return _get_jsfuck_number(dividend) / float(_get_jsfuck_number(divisor))
+ else:
+ return _get_jsfuck_number(expression[2:-1])
+
+ @classmethod
+ def create_scraper(cls, sess=None, **kwargs):
+ """
+ Convenience function for creating a ready-to-go requests.Session (subclass) object.
+ """
+ scraper = cls()
+
+ if sess:
+ attrs = ["auth", "cert", "cookies", "headers", "hooks", "params", "proxies", "data"]
+ for attr in attrs:
+ val = getattr(sess, attr, None)
+ if val:
+ setattr(scraper, attr, val)
+
+ return scraper
+
+ ## Functions for integrating cloudflare-scrape with other applications and scripts
+
+ @classmethod
+ def get_tokens(cls, url, user_agent=None, **kwargs):
+ scraper = cls.create_scraper()
+ if user_agent:
+ scraper.headers["User-Agent"] = user_agent
+
+ try:
+ resp = scraper.get(url, **kwargs)
+ resp.raise_for_status()
+ except Exception as e:
+ logging.error("'%s' returned an error. Could not collect tokens." % url)
+ raise
+
+ domain = urlparse(resp.url).netloc
+ cookie_domain = None
+
+ for d in scraper.cookies.list_domains():
+ if d.startswith(".") and d in ("." + domain):
+ cookie_domain = d
+ break
+ else:
+ raise ValueError(
+ "Unable to find Cloudflare cookies. Does the site actually have Cloudflare IUAM (\"I'm Under Attack Mode\") enabled?")
+
+ return ({
+ "__cfduid": scraper.cookies.get("__cfduid", "", domain=cookie_domain),
+ "cf_clearance": scraper.cookies.get("cf_clearance", "", domain=cookie_domain)
+ },
+ scraper.headers["User-Agent"]
+ )
+
+ def get_live_tokens(self, domain):
+ for d in self.cookies.list_domains():
+ if d.startswith(".") and d in ("." + domain):
+ cookie_domain = d
+ break
+ else:
+ raise ValueError(
+ "Unable to find Cloudflare cookies. Does the site actually have Cloudflare IUAM (\"I'm Under Attack Mode\") enabled?")
+
+ return ({
+ "__cfduid": self.cookies.get("__cfduid", "", domain=cookie_domain),
+ "cf_clearance": self.cookies.get("cf_clearance", "", domain=cookie_domain)
+ },
+ self.headers["User-Agent"]
+ )
+
+ @classmethod
+ def get_cookie_string(cls, url, user_agent=None, **kwargs):
+ """
+ Convenience function for building a Cookie HTTP header value.
+ """
+ tokens, user_agent = cls.get_tokens(url, user_agent=user_agent, **kwargs)
+ return "; ".join("=".join(pair) for pair in tokens.items()), user_agent
+
+
+create_scraper = CloudflareScraper.create_scraper
+get_tokens = CloudflareScraper.get_tokens
+get_cookie_string = CloudflareScraper.get_cookie_string
diff --git a/libs/deathbycaptcha.py b/libs/deathbycaptcha.py
new file mode 100644
index 000000000..3c2fafb77
--- /dev/null
+++ b/libs/deathbycaptcha.py
@@ -0,0 +1,516 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+"""Death by Captcha HTTP and socket API clients.
+
+There are two types of Death by Captcha (DBC hereinafter) API: HTTP and
+socket ones. Both offer the same functionalily, with the socket API
+sporting faster responses and using way less connections.
+
+To access the socket API, use SocketClient class; for the HTTP API, use
+HttpClient class. Both are thread-safe. SocketClient keeps a persistent
+connection opened and serializes all API requests sent through it, thus
+it is advised to keep a pool of them if you're script is heavily
+multithreaded.
+
+Both SocketClient and HttpClient give you the following methods:
+
+get_user()
+ Returns your DBC account details as a dict with the following keys:
+
+ "user": your account numeric ID; if login fails, it will be the only
+ item with the value of 0;
+ "rate": your CAPTCHA rate, i.e. how much you will be charged for one
+ solved CAPTCHA in US cents;
+ "balance": your DBC account balance in US cents;
+ "is_banned": flag indicating whether your account is suspended or not.
+
+get_balance()
+ Returns your DBC account balance in US cents.
+
+get_captcha(cid)
+ Returns an uploaded CAPTCHA details as a dict with the following keys:
+
+ "captcha": the CAPTCHA numeric ID; if no such CAPTCHAs found, it will
+ be the only item with the value of 0;
+ "text": the CAPTCHA text, if solved, otherwise None;
+ "is_correct": flag indicating whether the CAPTCHA was solved correctly
+ (DBC can detect that in rare cases).
+
+ The only argument `cid` is the CAPTCHA numeric ID.
+
+get_text(cid)
+ Returns an uploaded CAPTCHA text (None if not solved). The only argument
+ `cid` is the CAPTCHA numeric ID.
+
+report(cid)
+ Reports an incorrectly solved CAPTCHA. The only argument `cid` is the
+ CAPTCHA numeric ID. Returns True on success, False otherwise.
+
+upload(captcha)
+ Uploads a CAPTCHA. The only argument `captcha` can be either file-like
+ object (any object with `read` method defined, actually, so StringIO
+ will do), or CAPTCHA image file name. On successul upload you'll get
+ the CAPTCHA details dict (see get_captcha() method).
+
+ NOTE: AT THIS POINT THE UPLOADED CAPTCHA IS NOT SOLVED YET! You have
+ to poll for its status periodically using get_captcha() or get_text()
+ method until the CAPTCHA is solved and you get the text.
+
+decode(captcha, timeout=DEFAULT_TIMEOUT)
+ A convenient method that uploads a CAPTCHA and polls for its status
+ periodically, but no longer than `timeout` (defaults to 60 seconds).
+ If solved, you'll get the CAPTCHA details dict (see get_captcha()
+ method for details). See upload() method for details on `captcha`
+ argument.
+
+Visit http://www.deathbycaptcha.com/user/api for updates.
+
+"""
+
+import base64
+import binascii
+import errno
+import imghdr
+import random
+import os
+import select
+import socket
+import sys
+import threading
+import time
+import urllib
+import urllib2
+try:
+ from json import read as json_decode, write as json_encode
+except ImportError:
+ try:
+ from json import loads as json_decode, dumps as json_encode
+ except ImportError:
+ from simplejson import loads as json_decode, dumps as json_encode
+
+
+# API version and unique software ID
+API_VERSION = 'DBC/Python v4.6'
+
+# Default CAPTCHA timeout and decode() polling interval
+DEFAULT_TIMEOUT = 60
+DEFAULT_TOKEN_TIMEOUT = 120
+POLLS_INTERVAL = [1, 1, 2, 3, 2, 2, 3, 2, 2]
+DFLT_POLL_INTERVAL = 3
+
+# Base HTTP API url
+HTTP_BASE_URL = 'http://api.dbcapi.me/api'
+
+# Preferred HTTP API server's response content type, do not change
+HTTP_RESPONSE_TYPE = 'application/json'
+
+# Socket API server's host & ports range
+SOCKET_HOST = 'api.dbcapi.me'
+SOCKET_PORTS = range(8123, 8131)
+
+
+def _load_image(captcha):
+ if hasattr(captcha, 'read'):
+ img = captcha.read()
+ elif type(captcha) == bytearray:
+ img = captcha
+ else:
+ img = ''
+ try:
+ captcha_file = open(captcha, 'rb')
+ except Exception:
+ raise
+ else:
+ img = captcha_file.read()
+ captcha_file.close()
+ if not len(img):
+ raise ValueError('CAPTCHA image is empty')
+ elif imghdr.what(None, img) is None:
+ raise TypeError('Unknown CAPTCHA image type')
+ else:
+ return img
+
+
+class AccessDeniedException(Exception):
+ pass
+
+
+class Client(object):
+
+ """Death by Captcha API Client."""
+
+ def __init__(self, username, password):
+ self.is_verbose = False
+ self.userpwd = {'username': username, 'password': password}
+
+ def _log(self, cmd, msg=''):
+ if self.is_verbose:
+ print '%d %s %s' % (time.time(), cmd, msg.rstrip())
+ return self
+
+ def close(self):
+ pass
+
+ def connect(self):
+ pass
+
+ def get_user(self):
+ """Fetch user details -- ID, balance, rate and banned status."""
+ raise NotImplementedError()
+
+ def get_balance(self):
+ """Fetch user balance (in US cents)."""
+ return self.get_user().get('balance')
+
+ def get_captcha(self, cid):
+ """Fetch a CAPTCHA details -- ID, text and correctness flag."""
+ raise NotImplementedError()
+
+ def get_text(self, cid):
+ """Fetch a CAPTCHA text."""
+ return self.get_captcha(cid).get('text') or None
+
+ def report(self, cid):
+ """Report a CAPTCHA as incorrectly solved."""
+ raise NotImplementedError()
+
+ def upload(self, captcha):
+ """Upload a CAPTCHA.
+
+ Accepts file names and file-like objects. Returns CAPTCHA details
+ dict on success.
+
+ """
+ raise NotImplementedError()
+
+ def decode(self, captcha=None, timeout=None, **kwargs):
+ """
+ Try to solve a CAPTCHA.
+
+ See Client.upload() for arguments details.
+
+ Uploads a CAPTCHA, polls for its status periodically with arbitrary
+ timeout (in seconds), returns CAPTCHA details if (correctly) solved.
+ """
+ if not timeout:
+ if not captcha:
+ timeout = DEFAULT_TOKEN_TIMEOUT
+ else:
+ timeout = DEFAULT_TIMEOUT
+
+ deadline = time.time() + (max(0, timeout) or DEFAULT_TIMEOUT)
+ uploaded_captcha = self.upload(captcha, **kwargs)
+ if uploaded_captcha:
+ intvl_idx = 0 # POLL_INTERVAL index
+ while deadline > time.time() and not uploaded_captcha.get('text'):
+ intvl, intvl_idx = self._get_poll_interval(intvl_idx)
+ time.sleep(intvl)
+ pulled = self.get_captcha(uploaded_captcha['captcha'])
+ if pulled['captcha'] == uploaded_captcha['captcha']:
+ uploaded_captcha = pulled
+ if uploaded_captcha.get('text') and \
+ uploaded_captcha.get('is_correct'):
+ return uploaded_captcha
+
+ def _get_poll_interval(self, idx):
+ """Returns poll interval and next index depending on index provided"""
+
+ if len(POLLS_INTERVAL) > idx:
+ intvl = POLLS_INTERVAL[idx]
+ else:
+ intvl = DFLT_POLL_INTERVAL
+ idx += 1
+
+ return intvl, idx
+
+
+class HttpClient(Client):
+
+ """Death by Captcha HTTP API client."""
+
+ def __init__(self, *args):
+ Client.__init__(self, *args)
+ self.opener = urllib2.build_opener(urllib2.HTTPRedirectHandler())
+
+ def _call(self, cmd, payload=None, headers=None):
+ if headers is None:
+ headers = {}
+ headers['Accept'] = HTTP_RESPONSE_TYPE
+ headers['User-Agent'] = API_VERSION
+ if hasattr(payload, 'items'):
+ payload = urllib.urlencode(payload)
+ self._log('SEND', '%s %d %s' % (cmd, len(payload), payload))
+ else:
+ self._log('SEND', '%s' % cmd)
+ if payload is not None:
+ headers['Content-Length'] = len(payload)
+ try:
+ response = self.opener.open(urllib2.Request(
+ HTTP_BASE_URL + '/' + cmd.strip('/'),
+ data=payload,
+ headers=headers
+ )).read()
+ except urllib2.HTTPError, err:
+ if 403 == err.code:
+ raise AccessDeniedException('Access denied, please check'
+ ' your credentials and/or balance')
+ elif 400 == err.code or 413 == err.code:
+ raise ValueError("CAPTCHA was rejected by the service, check"
+ " if it's a valid image")
+ elif 503 == err.code:
+ raise OverflowError("CAPTCHA was rejected due to service"
+ " overload, try again later")
+ else:
+ raise err
+ else:
+ self._log('RECV', '%d %s' % (len(response), response))
+ try:
+ return json_decode(response)
+ except Exception:
+ raise RuntimeError('Invalid API response')
+ return {}
+
+ def get_user(self):
+ return self._call('user', self.userpwd.copy()) or {'user': 0}
+
+ def get_captcha(self, cid):
+ return self._call('captcha/%d' % cid) or {'captcha': 0}
+
+ def report(self, cid):
+ return not self._call('captcha/%d/report' % cid,
+ self.userpwd.copy()).get('is_correct')
+
+ def upload(self, captcha=None, **kwargs):
+ boundary = binascii.hexlify(os.urandom(16))
+ banner = kwargs.get('banner', '')
+ if banner:
+ kwargs['banner'] = 'base64:' + base64.b64encode(_load_image(banner))
+ body = '\r\n'.join(('\r\n'.join((
+ '--%s' % boundary,
+ 'Content-Disposition: form-data; name="%s"' % k,
+ 'Content-Type: text/plain',
+ 'Content-Length: %d' % len(str(v)),
+ '',
+ str(v)
+ ))) for k, v in self.userpwd.items())
+
+ body += '\r\n'.join(('\r\n'.join((
+ '--%s' % boundary,
+ 'Content-Disposition: form-data; name="%s"' % k,
+ 'Content-Type: text/plain',
+ 'Content-Length: %d' % len(str(v)),
+ '',
+ str(v)
+ ))) for k, v in kwargs.items())
+
+ if captcha:
+ img = _load_image(captcha)
+ body += '\r\n'.join((
+ '',
+ '--%s' % boundary,
+ 'Content-Disposition: form-data; name="captchafile"; '
+ 'filename="captcha"',
+ 'Content-Type: application/octet-stream',
+ 'Content-Length: %d' % len(img),
+ '',
+ img,
+ '--%s--' % boundary,
+ ''
+ ))
+
+ response = self._call('captcha', body, {
+ 'Content-Type': 'multipart/form-data; boundary="%s"' % boundary
+ }) or {}
+ if response.get('captcha'):
+ return response
+
+
+class SocketClient(Client):
+
+ """Death by Captcha socket API client."""
+
+ TERMINATOR = '\r\n'
+
+ def __init__(self, *args):
+ Client.__init__(self, *args)
+ self.socket_lock = threading.Lock()
+ self.socket = None
+
+ def close(self):
+ if self.socket:
+ self._log('CLOSE')
+ try:
+ self.socket.shutdown(socket.SHUT_RDWR)
+ except socket.error:
+ pass
+ finally:
+ self.socket.close()
+ self.socket = None
+
+ def connect(self):
+ if not self.socket:
+ self._log('CONN')
+ host = (socket.gethostbyname(SOCKET_HOST),
+ random.choice(SOCKET_PORTS))
+ self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ self.socket.settimeout(0)
+ try:
+ self.socket.connect(host)
+ except socket.error, err:
+ if (err.args[0] not in
+ (errno.EAGAIN, errno.EWOULDBLOCK, errno.EINPROGRESS)):
+ self.close()
+ raise err
+ return self.socket
+
+ def __del__(self):
+ self.close()
+
+ def _sendrecv(self, sock, buf):
+ self._log('SEND', buf)
+ fds = [sock]
+ buf += self.TERMINATOR
+ response = ''
+ intvl_idx = 0
+ while True:
+ intvl, intvl_idx = self._get_poll_interval(intvl_idx)
+ rds, wrs, exs = select.select((not buf and fds) or [],
+ (buf and fds) or [],
+ fds,
+ intvl)
+ if exs:
+ raise IOError('select() failed')
+ try:
+ if wrs:
+ while buf:
+ buf = buf[wrs[0].send(buf):]
+ elif rds:
+ while True:
+ s = rds[0].recv(256)
+ if not s:
+ raise IOError('recv(): connection lost')
+ else:
+ response += s
+ except socket.error, err:
+ if (err.args[0] not in
+ (errno.EAGAIN, errno.EWOULDBLOCK, errno.EINPROGRESS)):
+ raise err
+ if response.endswith(self.TERMINATOR):
+ self._log('RECV', response)
+ return response.rstrip(self.TERMINATOR)
+ raise IOError('send/recv timed out')
+
+ def _call(self, cmd, data=None):
+ if data is None:
+ data = {}
+ data['cmd'] = cmd
+ data['version'] = API_VERSION
+ request = json_encode(data)
+
+ response = None
+ for _ in range(2):
+ if not self.socket and cmd != 'login':
+ self._call('login', self.userpwd.copy())
+ self.socket_lock.acquire()
+ try:
+ sock = self.connect()
+ response = self._sendrecv(sock, request)
+ except IOError, err:
+ sys.stderr.write(str(err) + "\n")
+ self.close()
+ except socket.error, err:
+ sys.stderr.write(str(err) + "\n")
+ self.close()
+ raise IOError('Connection refused')
+ else:
+ break
+ finally:
+ self.socket_lock.release()
+
+ if response is None:
+ raise IOError('Connection lost or timed out during API request')
+
+ try:
+ response = json_decode(response)
+ except Exception:
+ raise RuntimeError('Invalid API response')
+
+ if not response.get('error'):
+ return response
+
+ error = response['error']
+ if error in ('not-logged-in', 'invalid-credentials'):
+ raise AccessDeniedException('Access denied, check your credentials')
+ elif 'banned' == error:
+ raise AccessDeniedException('Access denied, account is suspended')
+ elif 'insufficient-funds' == error:
+ raise AccessDeniedException(
+ 'CAPTCHA was rejected due to low balance')
+ elif 'invalid-captcha' == error:
+ raise ValueError('CAPTCHA is not a valid image')
+ elif 'service-overload' == error:
+ raise OverflowError(
+ 'CAPTCHA was rejected due to service overload, try again later')
+ else:
+ self.socket_lock.acquire()
+ self.close()
+ self.socket_lock.release()
+ raise RuntimeError('API server error occured: %s' % error)
+
+ def get_user(self):
+ return self._call('user') or {'user': 0}
+
+ def get_captcha(self, cid):
+ return self._call('captcha', {'captcha': cid}) or {'captcha': 0}
+
+ def upload(self, captcha=None, **kwargs):
+ data = {}
+ if captcha:
+ data['captcha'] = base64.b64encode(_load_image(captcha))
+ if kwargs:
+ banner = kwargs.get('banner', '')
+ if banner:
+ kwargs['banner'] = base64.b64encode(_load_image(banner))
+ data.update(kwargs)
+ response = self._call('upload', data)
+ if response.get('captcha'):
+ uploaded_captcha = dict(
+ (k, response.get(k))
+ for k in ('captcha', 'text', 'is_correct')
+ )
+ if not uploaded_captcha['text']:
+ uploaded_captcha['text'] = None
+ return uploaded_captcha
+
+ def report(self, cid):
+ return not self._call('report', {'captcha': cid}).get('is_correct')
+
+
+if '__main__' == __name__:
+ # Put your DBC username & password here:
+ # client = HttpClient(sys.argv[1], sys.argv[2])
+ client = SocketClient(sys.argv[1], sys.argv[2])
+ client.is_verbose = True
+
+ print 'Your balance is %s US cents' % client.get_balance()
+
+ for fn in sys.argv[3:]:
+ try:
+ # Put your CAPTCHA image file name or file-like object, and optional
+ # solving timeout (in seconds) here:
+ captcha = client.decode(fn, DEFAULT_TIMEOUT)
+ except Exception, e:
+ sys.stderr.write('Failed uploading CAPTCHA: %s\n' % (e, ))
+ captcha = None
+
+ if captcha:
+ print 'CAPTCHA %d solved: %s' % \
+ (captcha['captcha'], captcha['text'])
+
+ # Report as incorrectly solved if needed. Make sure the CAPTCHA was
+ # in fact incorrectly solved!
+ # try:
+ # client.report(captcha['captcha'])
+ # except Exception, e:
+ # sys.stderr.write('Failed reporting CAPTCHA: %s\n' % (e, ))
diff --git a/libs/python_anticaptcha/__init__.py b/libs/python_anticaptcha/__init__.py
new file mode 100644
index 000000000..ac9f0550f
--- /dev/null
+++ b/libs/python_anticaptcha/__init__.py
@@ -0,0 +1,7 @@
+from .base import AnticaptchaClient
+from .tasks import NoCaptchaTask, NoCaptchaTaskProxylessTask, ImageToTextTask, FunCaptchaTask
+from .proxy import Proxy
+from .exceptions import AnticaptchaException
+from .fields import SimpleText, Image, WebLink, TextInput, Textarea, Checkbox, Select, Radio, ImageUpload
+
+AnticatpchaException = AnticaptchaException
\ No newline at end of file
diff --git a/libs/python_anticaptcha/base.py b/libs/python_anticaptcha/base.py
new file mode 100644
index 000000000..fca8cdf53
--- /dev/null
+++ b/libs/python_anticaptcha/base.py
@@ -0,0 +1,114 @@
+import requests
+import time
+
+from six.moves.urllib_parse import urljoin
+from .exceptions import AnticaptchaException
+
+SLEEP_EVERY_CHECK_FINISHED = 3
+MAXIMUM_JOIN_TIME = 60 * 5
+
+
+class Job(object):
+ client = None
+ task_id = None
+ _last_result = None
+
+ def __init__(self, client, task_id):
+ self.client = client
+ self.task_id = task_id
+
+ def _update(self):
+ self._last_result = self.client.getTaskResult(self.task_id)
+
+ def check_is_ready(self):
+ self._update()
+ return self._last_result['status'] == 'ready'
+
+ def get_solution_response(self): # Recaptcha
+ return self._last_result['solution']['gRecaptchaResponse']
+
+ def get_token_response(self): # Funcaptcha
+ return self._last_result['solution']['token']
+
+ def get_answers(self):
+ return self._last_result['solution']['answers']
+
+ def get_captcha_text(self): # Image
+ return self._last_result['solution']['text']
+
+ def report_incorrect(self):
+ return self.client.reportIncorrectImage(self.task_id)
+
+ def join(self, maximum_time=None):
+ elapsed_time = 0
+ maximum_time = maximum_time or MAXIMUM_JOIN_TIME
+ while not self.check_is_ready():
+ time.sleep(SLEEP_EVERY_CHECK_FINISHED)
+ elapsed_time += SLEEP_EVERY_CHECK_FINISHED
+ if elapsed_time is not None and elapsed_time > maximum_time:
+ raise AnticaptchaException(None, 250,
+ "The execution time exceeded a maximum time of {} seconds. It takes {} seconds.".format(
+ maximum_time, elapsed_time))
+
+
+class AnticaptchaClient(object):
+ client_key = None
+ CREATE_TASK_URL = "/createTask"
+ TASK_RESULT_URL = "/getTaskResult"
+ BALANCE_URL = "/getBalance"
+ REPORT_IMAGE_URL = "/reportIncorrectImageCaptcha"
+ SOFT_ID = 847
+ language_pool = "en"
+
+ def __init__(self, client_key, language_pool="en", host="api.anti-captcha.com", use_ssl=True):
+ self.client_key = client_key
+ self.language_pool = language_pool
+ self.base_url = "{proto}://{host}/".format(proto="https" if use_ssl else "http",
+ host=host)
+ self.session = requests.Session()
+
+ @property
+ def client_ip(self):
+ if not hasattr(self, '_client_ip'):
+ self._client_ip = self.session.get('http://httpbin.org/ip').json()['origin']
+ return self._client_ip
+
+ def _check_response(self, response):
+ if response.get('errorId', False) == 11:
+ response['errorDescription'] = "{} Your missing IP address is {}.".format(response['errorDescription'],
+ self.client_ip)
+ if response.get('errorId', False):
+ raise AnticaptchaException(response['errorId'],
+ response['errorCode'],
+ response['errorDescription'])
+
+ def createTask(self, task):
+ request = {"clientKey": self.client_key,
+ "task": task.serialize(),
+ "softId": self.SOFT_ID,
+ "languagePool": self.language_pool,
+ }
+ response = self.session.post(urljoin(self.base_url, self.CREATE_TASK_URL), json=request).json()
+ self._check_response(response)
+ return Job(self, response['taskId'])
+
+ def getTaskResult(self, task_id):
+ request = {"clientKey": self.client_key,
+ "taskId": task_id}
+ response = self.session.post(urljoin(self.base_url, self.TASK_RESULT_URL), json=request).json()
+ self._check_response(response)
+ return response
+
+ def getBalance(self):
+ request = {"clientKey": self.client_key}
+ response = self.session.post(urljoin(self.base_url, self.BALANCE_URL), json=request).json()
+ self._check_response(response)
+ return response['balance']
+
+ def reportIncorrectImage(self, task_id):
+ request = {"clientKey": self.client_key,
+ "taskId": task_id
+ }
+ response = self.session.post(urljoin(self.base_url, self.REPORT_IMAGE_URL), json=request).json()
+ self._check_response(response)
+ return response.get('status', False) != False
diff --git a/libs/python_anticaptcha/exceptions.py b/libs/python_anticaptcha/exceptions.py
new file mode 100644
index 000000000..f37eb372c
--- /dev/null
+++ b/libs/python_anticaptcha/exceptions.py
@@ -0,0 +1,23 @@
+class AnticaptchaException(Exception):
+ def __init__(self, error_id, error_code, error_description, *args):
+ super(AnticaptchaException, self).__init__("[{}:{}]{}".format(error_code, error_id, error_description))
+ self.error_description = error_description
+ self.error_id = error_id
+ self.error_code = error_code
+
+
+AnticatpchaException = AnticaptchaException
+
+
+class InvalidWidthException(AnticaptchaException):
+ def __init__(self, width):
+ self.width = width
+ msg = 'Invalid width (%s). Can be one of these: 100, 50, 33, 25.' % (self.width,)
+ super(InvalidWidthException, self).__init__("AC-1", 1, msg)
+
+
+class MissingNameException(AnticaptchaException):
+ def __init__(self, cls):
+ self.cls = cls
+ msg = 'Missing name data in {0}. Provide {0}.__init__(name="X") or {0}.serialize(name="X")'.format(str(self.cls))
+ super(MissingNameException, self).__init__("AC-2", 2, msg)
diff --git a/libs/python_anticaptcha/fields.py b/libs/python_anticaptcha/fields.py
new file mode 100644
index 000000000..9e6245946
--- /dev/null
+++ b/libs/python_anticaptcha/fields.py
@@ -0,0 +1,199 @@
+import six
+from python_anticaptcha.exceptions import InvalidWidthException, MissingNameException
+
+
+class BaseField(object):
+ label = None
+ labelHint = None
+
+ def serialize(self, name=None):
+ data = {}
+ if self.label:
+ data['label'] = self.label or False
+ if self.labelHint:
+ data['labelHint'] = self.labelHint or False
+ return data
+
+
+class NameBaseField(BaseField):
+ name = None
+
+ def serialize(self, name=None):
+ data = super(NameBaseField, self).serialize(name)
+ if name:
+ data['name'] = name
+ elif self.name:
+ data['name'] = self.name
+ else:
+ raise MissingNameException(cls=self.__class__)
+ return data
+
+
+class SimpleText(BaseField):
+ contentType = 'text'
+
+ def __init__(self, content, label=None, labelHint=None, width=None):
+ self.label = label
+ self.labelHint = labelHint
+
+ self.content = content
+ self.width = width
+
+ def serialize(self, name=None):
+ data = super(SimpleText, self).serialize(name)
+ data['contentType'] = self.contentType
+ data['content'] = self.content
+
+ if self.width:
+ if self.width not in [100, 50, 33, 25]:
+ raise InvalidWidthException(self.width)
+ data['inputOptions'] = {}
+ data['width'] = self.width
+ return data
+
+
+class Image(BaseField):
+ contentType = 'image'
+
+ def __init__(self, imageUrl, label=None, labelHint=None):
+ self.label = label
+ self.labelHint = labelHint
+ self.imageUrl = imageUrl
+
+ def serialize(self, name=None):
+ data = super(Image, self).serialize(name)
+ data['contentType'] = self.contentType
+ data['content'] = self.imageUrl
+ return data
+
+
+class WebLink(BaseField):
+ contentType = 'link'
+
+ def __init__(self, linkText, linkUrl, label=None, labelHint=None, width=None):
+ self.label = label
+ self.labelHint = labelHint
+
+ self.linkText = linkText
+ self.linkUrl = linkUrl
+
+ self.width = width
+
+ def serialize(self, name=None):
+ data = super(WebLink, self).serialize(name)
+ data['contentType'] = self.contentType
+
+ if self.width:
+ if self.width not in [100, 50, 33, 25]:
+ raise InvalidWidthException(self.width)
+ data['inputOptions'] = {}
+ data['width'] = self.width
+
+ data.update({'content': {'url': self.linkUrl,
+ 'text': self.linkText}})
+
+ return data
+
+
+class TextInput(NameBaseField):
+ def __init__(self, placeHolder=None, label=None, labelHint=None, width=None):
+ self.label = label
+ self.labelHint = labelHint
+
+ self.placeHolder = placeHolder
+
+ self.width = width
+
+ def serialize(self, name=None):
+ data = super(TextInput, self).serialize(name)
+ data['inputType'] = 'text'
+
+ data['inputOptions'] = {}
+
+ if self.width:
+ if self.width not in [100, 50, 33, 25]:
+ raise InvalidWidthException(self.width)
+
+ data['inputOptions']['width'] = str(self.width)
+
+ if self.placeHolder:
+ data['inputOptions']['placeHolder'] = self.placeHolder
+ return data
+
+
+class Textarea(NameBaseField):
+ def __init__(self, placeHolder=None, rows=None, label=None, width=None, labelHint=None):
+ self.label = label
+ self.labelHint = labelHint
+
+ self.placeHolder = placeHolder
+ self.rows = rows
+ self.width = width
+
+ def serialize(self, name=None):
+ data = super(Textarea, self).serialize(name)
+ data['inputType'] = 'textarea'
+ data['inputOptions'] = {}
+ if self.rows:
+ data['inputOptions']['rows'] = str(self.rows)
+ if self.placeHolder:
+ data['inputOptions']['placeHolder'] = self.placeHolder
+ if self.width:
+ data['inputOptions']['width'] = str(self.width)
+ return data
+
+
+class Checkbox(NameBaseField):
+ def __init__(self, text, label=None, labelHint=None):
+ self.label = label
+ self.labelHint = labelHint
+
+ self.text = text
+
+ def serialize(self, name=None):
+ data = super(Checkbox, self).serialize(name)
+ data['inputType'] = 'checkbox'
+ data['inputOptions'] = {'label': self.text}
+ return data
+
+
+class Select(NameBaseField):
+ type = 'select'
+
+ def __init__(self, label=None, choices=None, labelHint=None):
+ self.label = label
+ self.labelHint = labelHint
+ self.choices = choices or ()
+
+ def get_choices(self):
+ for choice in self.choices:
+ if isinstance(choice, six.text_type):
+ yield choice, choice
+ else:
+ yield choice
+
+ def serialize(self, name=None):
+ data = super(Select, self).serialize(name)
+ data['inputType'] = self.type
+
+ data['inputOptions'] = []
+ for value, caption in self.get_choices():
+ data['inputOptions'].append({"value": value,
+ "caption": caption})
+
+ return data
+
+
+class Radio(Select):
+ type = 'radio'
+
+
+class ImageUpload(NameBaseField):
+ def __init__(self, label=None, labelHint=None):
+ self.label = label
+ self.labelHint = labelHint
+
+ def serialize(self, name=None):
+ data = super(ImageUpload, self).serialize(name)
+ data['inputType'] = 'imageUpload'
+ return data
diff --git a/libs/python_anticaptcha/proxy.py b/libs/python_anticaptcha/proxy.py
new file mode 100644
index 000000000..907232f7e
--- /dev/null
+++ b/libs/python_anticaptcha/proxy.py
@@ -0,0 +1,28 @@
+from six.moves.urllib_parse import urlparse
+
+
+class Proxy(object):
+ def __init__(self, proxy_type, proxy_address, proxy_port, proxy_login, proxy_password):
+ self.proxyType = proxy_type
+ self.proxyAddress = proxy_address
+ self.proxyPort = proxy_port
+ self.proxyLogin = proxy_login
+ self.proxyPassword = proxy_password
+
+ def serialize(self):
+ result = {'proxyType': self.proxyType,
+ 'proxyAddress': self.proxyAddress,
+ 'proxyPort': self.proxyPort}
+ if self.proxyLogin or self.proxyPassword:
+ result['proxyLogin'] = self.proxyLogin
+ result['proxyPassword'] = self.proxyPassword
+ return result
+
+ @classmethod
+ def parse_url(cls, url):
+ parsed = urlparse(url)
+ return cls(proxy_type=parsed.scheme,
+ proxy_address=parsed.hostname,
+ proxy_port=parsed.port,
+ proxy_login=parsed.username,
+ proxy_password=parsed.password)
diff --git a/libs/python_anticaptcha/tasks.py b/libs/python_anticaptcha/tasks.py
new file mode 100644
index 000000000..57462763f
--- /dev/null
+++ b/libs/python_anticaptcha/tasks.py
@@ -0,0 +1,128 @@
+import base64
+from .fields import BaseField
+
+
+class BaseTask(object):
+ def serialize(self, **result):
+ return result
+
+
+class ProxyMixin(BaseTask):
+ def __init__(self, *args, **kwargs):
+ self.proxy = kwargs.pop('proxy')
+ self.userAgent = kwargs.pop('user_agent')
+ self.cookies = kwargs.pop('cookies', '')
+ super(ProxyMixin, self).__init__(*args, **kwargs)
+
+ def serialize(self, **result):
+ result = super(ProxyMixin, self).serialize(**result)
+ result.update(self.proxy.serialize())
+ result['userAgent'] = self.userAgent
+ if self.cookies:
+ result['cookies'] = self.cookies
+ return result
+
+
+class NoCaptchaTaskProxylessTask(BaseTask):
+ type = "NoCaptchaTaskProxyless"
+ websiteURL = None
+ websiteKey = None
+ websiteSToken = None
+
+ def __init__(self, website_url, website_key, website_s_token=None, is_invisible=None):
+ self.websiteURL = website_url
+ self.websiteKey = website_key
+ self.websiteSToken = website_s_token
+ self.isInvisible = is_invisible
+
+ def serialize(self):
+ data = {'type': self.type,
+ 'websiteURL': self.websiteURL,
+ 'websiteKey': self.websiteKey}
+ if self.websiteSToken is not None:
+ data['websiteSToken'] = self.websiteSToken
+ if self.isInvisible is not None:
+ data['isInvisible'] = self.isInvisible
+ return data
+
+
+class FunCaptchaTask(ProxyMixin):
+ type = "FunCaptchaTask"
+ websiteURL = None
+ websiteKey = None
+
+ def __init__(self, website_url, website_key, *args, **kwargs):
+ self.websiteURL = website_url
+ self.websiteKey = website_key
+ super(FunCaptchaTask, self).__init__(*args, **kwargs)
+
+ def serialize(self, **result):
+ result = super(FunCaptchaTask, self).serialize(**result)
+ result.update({'type': self.type,
+ 'websiteURL': self.websiteURL,
+ 'websitePublicKey': self.websiteKey})
+ return result
+
+
+class NoCaptchaTask(ProxyMixin, NoCaptchaTaskProxylessTask):
+ type = "NoCaptchaTask"
+
+
+class ImageToTextTask(object):
+ type = "ImageToTextTask"
+ fp = None
+ phrase = None
+ case = None
+ numeric = None
+ math = None
+ minLength = None
+ maxLength = None
+
+ def __init__(self, fp, phrase=None, case=None, numeric=None, math=None, min_length=None, max_length=None):
+ self.fp = fp
+ self.phrase = phrase
+ self.case = case
+ self.numeric = numeric
+ self.math = math
+ self.minLength = min_length
+ self.maxLength = max_length
+
+ def serialize(self):
+ return {'type': self.type,
+ 'body': base64.b64encode(self.fp.read()).decode('utf-8'),
+ 'phrase': self.phrase,
+ 'case': self.case,
+ 'numeric': self.numeric,
+ 'math': self.math,
+ 'minLength': self.minLength,
+ 'maxLength': self.maxLength}
+
+
+class CustomCaptchaTask(BaseTask):
+ type = 'CustomCaptchaTask'
+ imageUrl = None
+ assignment = None
+ form = None
+
+ def __init__(self, imageUrl, form=None, assignment=None):
+ self.imageUrl = imageUrl
+ self.form = form or {}
+ self.assignment = assignment
+
+ def serialize(self):
+ data = super(CustomCaptchaTask, self).serialize()
+ data.update({'type': self.type,
+ 'imageUrl': self.imageUrl})
+ if self.form:
+ forms = []
+ for name, field in self.form.items():
+ if isinstance(field, BaseField):
+ forms.append(field.serialize(name))
+ else:
+ field = field.copy()
+ field['name'] = name
+ forms.append(field)
+ data['forms'] = forms
+ if self.assignment:
+ data['assignment'] = self.assignment
+ return data
diff --git a/libs/subliminal_patch/core.py b/libs/subliminal_patch/core.py
index 5dda9fb3c..df38b4e09 100644
--- a/libs/subliminal_patch/core.py
+++ b/libs/subliminal_patch/core.py
@@ -518,10 +518,20 @@ def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, ski
hints["expected_title"] = [hints["title"]]
guessed_result = guessit(guess_from, options=hints)
+
logger.debug('GuessIt found: %s', json.dumps(guessed_result, cls=GuessitEncoder, indent=4, ensure_ascii=False))
video = Video.fromguess(path, guessed_result)
video.hints = hints
+ # get possibly alternative title from the filename itself
+ alt_guess = guessit(filename, options=hints)
+ if "title" in alt_guess and alt_guess["title"] != guessed_result["title"]:
+ if video_type == "episode":
+ video.alternative_series.append(alt_guess["title"])
+ else:
+ video.alternative_titles.append(alt_guess["title"])
+ logger.debug("Adding alternative title: %s", alt_guess["title"])
+
if dont_use_actual_file:
return video
diff --git a/libs/subliminal_patch/http.py b/libs/subliminal_patch/http.py
index d6fddb358..c813f5585 100644
--- a/libs/subliminal_patch/http.py
+++ b/libs/subliminal_patch/http.py
@@ -8,10 +8,18 @@ import requests
import xmlrpclib
import dns.resolver
-from requests import Session, exceptions
+from requests import exceptions
from urllib3.util import connection
from retry.api import retry_call
from exceptions import APIThrottled
+from dogpile.cache.api import NO_VALUE
+from subliminal.cache import region
+from cfscrape import CloudflareScraper
+
+try:
+ from urlparse import urlparse
+except ImportError:
+ from urllib.parse import urlparse
from subzero.lib.io import get_viable_encoding
@@ -30,24 +38,58 @@ custom_resolver = dns.resolver.Resolver(configure=False)
custom_resolver.nameservers = ['8.8.8.8', '1.1.1.1']
-class CertifiSession(Session):
+class CertifiSession(CloudflareScraper):
timeout = 10
def __init__(self):
super(CertifiSession, self).__init__()
self.verify = pem_file
-
- def request(self, *args, **kwargs):
+ self.headers.update({
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+ 'Accept-Language': 'en-US,en;q=0.5',
+ 'Cache-Control': 'no-cache',
+ 'Pragma': 'no-cache',
+ 'DNT': '1'
+ })
+
+ def request(self, method, url, *args, **kwargs):
if kwargs.get('timeout') is None:
kwargs['timeout'] = self.timeout
- return super(CertifiSession, self).request(*args, **kwargs)
+
+ parsed_url = urlparse(url)
+ domain = parsed_url.netloc
+
+ cache_key = "cf_data_%s" % domain
+
+ if not self.cookies.get("__cfduid", "", domain=domain):
+ cf_data = region.get(cache_key)
+ if cf_data is not NO_VALUE:
+ cf_cookies, user_agent = cf_data
+ logger.debug("Trying to use old cf data for %s: %s", domain, cf_data)
+ for cookie, value in cf_cookies.iteritems():
+ self.cookies.set(cookie, value, domain=domain)
+
+ self.headers['User-Agent'] = user_agent
+
+ ret = super(CertifiSession, self).request(method, url, *args, **kwargs)
+ try:
+ cf_data = self.get_live_tokens(domain)
+ except:
+ pass
+ else:
+ if cf_data != region.get(cache_key) and self.cookies.get("__cfduid", "", domain=domain)\
+ and self.cookies.get("cf_clearance", "", domain=domain):
+ logger.debug("Storing cf data for %s: %s", domain, cf_data)
+ region.set(cache_key, cf_data)
+
+ return ret
class RetryingSession(CertifiSession):
proxied_functions = ("get", "post")
def __init__(self):
- super(CertifiSession, self).__init__()
+ super(RetryingSession, self).__init__()
self.verify = pem_file
proxy = os.environ.get('SZ_HTTP_PROXY')
@@ -62,7 +104,7 @@ class RetryingSession(CertifiSession):
# fixme: may be a little loud
logger.debug("Using proxy %s for: %s", self.proxies["http"], args[0])
- return retry_call(getattr(super(CertifiSession, self), method), fargs=args, fkwargs=kwargs, tries=3, delay=5,
+ return retry_call(getattr(super(RetryingSession, self), method), fargs=args, fkwargs=kwargs, tries=3, delay=5,
exceptions=(exceptions.ConnectionError,
exceptions.ProxyError,
exceptions.SSLError,
diff --git a/libs/subliminal_patch/pitcher.py b/libs/subliminal_patch/pitcher.py
new file mode 100644
index 000000000..b2cef63b3
--- /dev/null
+++ b/libs/subliminal_patch/pitcher.py
@@ -0,0 +1,257 @@
+# coding=utf-8
+
+import os
+import time
+import logging
+import json
+from subliminal.cache import region
+from dogpile.cache.api import NO_VALUE
+from python_anticaptcha import AnticaptchaClient, NoCaptchaTaskProxylessTask, NoCaptchaTask, AnticaptchaException,\
+ Proxy
+from deathbycaptcha import SocketClient as DBCClient, DEFAULT_TOKEN_TIMEOUT
+
+
+logger = logging.getLogger(__name__)
+
+
+class PitcherRegistry(object):
+ pitchers = []
+ pitchers_by_key = {}
+
+ def register(self, cls):
+ idx = len(self.pitchers)
+ self.pitchers.append(cls)
+ key = "%s_%s" % (cls.name, cls.needs_proxy)
+ key_by_source = "%s_%s" % (cls.source, cls.needs_proxy)
+ self.pitchers_by_key[key] = idx
+ self.pitchers_by_key[key_by_source] = idx
+ return cls
+
+ def get_pitcher(self, name_or_site=None, with_proxy=False):
+ name_or_site = name_or_site or os.environ.get("ANTICAPTCHA_CLASS")
+ if not name_or_site:
+ raise Exception("AntiCaptcha class not given, exiting")
+
+ key = "%s_%s" % (name_or_site, with_proxy)
+
+ if key not in self.pitchers_by_key:
+ raise Exception("Pitcher %s not found (proxy: %s)" % (name_or_site, with_proxy))
+
+ return self.pitchers[self.pitchers_by_key.get(key)]
+
+
+registry = pitchers = PitcherRegistry()
+
+
+class Pitcher(object):
+ name = None
+ source = None
+ needs_proxy = False
+ tries = 3
+ job = None
+ client = None
+ client_key = None
+ website_url = None
+ website_key = None
+ website_name = None
+ solve_time = None
+ success = False
+
+ def __init__(self, website_name, website_url, website_key, tries=3, client_key=None, *args, **kwargs):
+ self.tries = tries
+ self.client_key = client_key or os.environ.get("ANTICAPTCHA_ACCOUNT_KEY")
+ if not self.client_key:
+ raise Exception("AntiCaptcha key not given, exiting")
+
+ self.website_name = website_name
+ self.website_key = website_key
+ self.website_url = website_url
+ self.success = False
+ self.solve_time = None
+
+ def get_client(self):
+ raise NotImplementedError
+
+ def get_job(self):
+ raise NotImplementedError
+
+ def _throw(self):
+ self.client = self.get_client()
+ self.job = self.get_job()
+
+ def throw(self):
+ t = time.time()
+ data = self._throw()
+ if self.success:
+ self.solve_time = time.time() - t
+ logger.info("%s: Solving took %ss", self.website_name, int(self.solve_time))
+ return data
+
+
+@registry.register
+class AntiCaptchaProxyLessPitcher(Pitcher):
+ name = "AntiCaptchaProxyLess"
+ source = "anti-captcha.com"
+ host = "api.anti-captcha.com"
+ language_pool = "en"
+ tries = 5
+ use_ssl = True
+ is_invisible = False
+
+ def __init__(self, website_name, website_url, website_key, tries=3, host=None, language_pool=None,
+ use_ssl=True, is_invisible=False, *args, **kwargs):
+ super(AntiCaptchaProxyLessPitcher, self).__init__(website_name, website_url, website_key, tries=tries, *args,
+ **kwargs)
+ self.host = host or self.host
+ self.language_pool = language_pool or self.language_pool
+ self.use_ssl = use_ssl
+ self.is_invisible = is_invisible
+
+ def get_client(self):
+ return AnticaptchaClient(self.client_key, self.language_pool, self.host, self.use_ssl)
+
+ def get_job(self):
+ task = NoCaptchaTaskProxylessTask(website_url=self.website_url, website_key=self.website_key,
+ is_invisible=self.is_invisible)
+ return self.client.createTask(task)
+
+ def _throw(self):
+ for i in range(self.tries):
+ try:
+ super(AntiCaptchaProxyLessPitcher, self)._throw()
+ self.job.join()
+ ret = self.job.get_solution_response()
+ if ret:
+ self.success = True
+ return ret
+ except AnticaptchaException as e:
+ if i >= self.tries - 1:
+ logger.error("%s: Captcha solving finally failed. Exiting", self.website_name)
+ return
+
+ if e.error_code == 'ERROR_ZERO_BALANCE':
+ logger.error("%s: No balance left on captcha solving service. Exiting", self.website_name)
+ return
+
+ elif e.error_code == 'ERROR_NO_SLOT_AVAILABLE':
+ logger.info("%s: No captcha solving slot available, retrying", self.website_name)
+ time.sleep(5.0)
+ continue
+
+ elif e.error_code == 'ERROR_KEY_DOES_NOT_EXIST':
+ logger.error("%s: Bad AntiCaptcha API key", self.website_name)
+ return
+
+ elif e.error_id is None and e.error_code == 250:
+ # timeout
+ if i < self.tries:
+ logger.info("%s: Captcha solving timed out, retrying", self.website_name)
+ time.sleep(1.0)
+ continue
+ else:
+ logger.error("%s: Captcha solving timed out three times; bailing out", self.website_name)
+ return
+ raise
+
+
+@registry.register
+class AntiCaptchaPitcher(AntiCaptchaProxyLessPitcher):
+ name = "AntiCaptcha"
+ proxy = None
+ needs_proxy = True
+ user_agent = None
+ cookies = None
+
+ def __init__(self, *args, **kwargs):
+ self.proxy = Proxy.parse_url(kwargs.pop("proxy"))
+ self.user_agent = kwargs.pop("user_agent")
+ cookies = kwargs.pop("cookies", {})
+ if isinstance(cookies, dict):
+ self.cookies = ";".join(["%s=%s" % (k, v) for k, v in cookies.iteritems()])
+
+ super(AntiCaptchaPitcher, self).__init__(*args, **kwargs)
+
+ def get_job(self):
+ task = NoCaptchaTask(website_url=self.website_url, website_key=self.website_key, proxy=self.proxy,
+ user_agent=self.user_agent, cookies=self.cookies, is_invisible=self.is_invisible)
+ return self.client.createTask(task)
+
+
+@registry.register
+class DBCProxyLessPitcher(Pitcher):
+ name = "DeathByCaptchaProxyLess"
+ source = "deathbycaptcha.com"
+ username = None
+ password = None
+
+ def __init__(self, website_name, website_url, website_key,
+ timeout=DEFAULT_TOKEN_TIMEOUT, tries=3, *args, **kwargs):
+ super(DBCProxyLessPitcher, self).__init__(website_name, website_url, website_key, tries=tries)
+
+ self.username, self.password = self.client_key.split(":", 1)
+ self.timeout = timeout
+
+ def get_client(self):
+ return DBCClient(self.username, self.password)
+
+ def get_job(self):
+ pass
+
+ @property
+ def payload_dict(self):
+ return {
+ "googlekey": self.website_key,
+ "pageurl": self.website_url
+ }
+
+ def _throw(self):
+ super(DBCProxyLessPitcher, self)._throw()
+ payload = json.dumps(self.payload_dict)
+ for i in range(self.tries):
+ try:
+ #balance = self.client.get_balance()
+ data = self.client.decode(timeout=self.timeout, type=4, token_params=payload)
+ if data and data["is_correct"] and data["text"]:
+ self.success = True
+ return data["text"]
+ except:
+ raise
+
+
+@registry.register
+class DBCPitcher(DBCProxyLessPitcher):
+ name = "DeathByCaptcha"
+ proxy = None
+ needs_proxy = True
+ proxy_type = "HTTP"
+
+ def __init__(self, *args, **kwargs):
+ self.proxy = kwargs.pop("proxy")
+ super(DBCPitcher, self).__init__(*args, **kwargs)
+
+ @property
+ def payload_dict(self):
+ payload = super(DBCPitcher, self).payload_dict
+ payload.update({
+ "proxytype": self.proxy_type,
+ "proxy": self.proxy
+ })
+ return payload
+
+
+def load_verification(site_name, session, callback=lambda x: None):
+ ccks = region.get("%s_data" % site_name, expiration_time=15552000) # 6m
+ if ccks != NO_VALUE:
+ cookies, user_agent = ccks
+ logger.debug("%s: Re-using previous user agent: %s", site_name.capitalize(), user_agent)
+ session.headers["User-Agent"] = user_agent
+ try:
+ session.cookies._cookies.update(cookies)
+ return callback(region)
+ except:
+ return False
+ return False
+
+
+def store_verification(site_name, session):
+ region.set("%s_data" % site_name, (session.cookies._cookies, session.headers["User-Agent"]))
diff --git a/libs/subliminal_patch/providers/addic7ed.py b/libs/subliminal_patch/providers/addic7ed.py
index 51913d887..2d556d877 100644
--- a/libs/subliminal_patch/providers/addic7ed.py
+++ b/libs/subliminal_patch/providers/addic7ed.py
@@ -4,18 +4,17 @@ import re
import datetime
import subliminal
import time
+
from random import randint
-from dogpile.cache.api import NO_VALUE
from requests import Session
-
-from subliminal.exceptions import ServiceUnavailable, DownloadLimitExceeded, AuthenticationError
+from subliminal.cache import region
+from subliminal.exceptions import DownloadLimitExceeded, AuthenticationError
from subliminal.providers.addic7ed import Addic7edProvider as _Addic7edProvider, \
Addic7edSubtitle as _Addic7edSubtitle, ParserBeautifulSoup, show_cells_re
-from subliminal.cache import region
from subliminal.subtitle import fix_line_ending
from subliminal_patch.utils import sanitize
from subliminal_patch.exceptions import TooManyRequests
-
+from subliminal_patch.pitcher import pitchers, load_verification, store_verification
from subzero.language import Language
logger = logging.getLogger(__name__)
@@ -64,6 +63,7 @@ class Addic7edProvider(_Addic7edProvider):
USE_ADDICTED_RANDOM_AGENTS = False
hearing_impaired_verifiable = True
subtitle_class = Addic7edSubtitle
+ server_url = 'https://www.addic7ed.com/'
sanitize_characters = {'-', ':', '(', ')', '.', '/'}
@@ -75,45 +75,76 @@ class Addic7edProvider(_Addic7edProvider):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % subliminal.__short_version__
- if self.USE_ADDICTED_RANDOM_AGENTS:
- from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
- logger.debug("Addic7ed: using random user agents")
- self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
- self.session.headers['Referer'] = self.server_url
+ from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
+ logger.debug("Addic7ed: using random user agents")
+ self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
+ self.session.headers['Referer'] = self.server_url
# login
if self.username and self.password:
- ccks = region.get("addic7ed_cookies", expiration_time=86400)
- if ccks != NO_VALUE:
- try:
- self.session.cookies._cookies.update(ccks)
- r = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10)
- if r.status_code == 302:
- logger.info('Addic7ed: Login expired')
- region.delete("addic7ed_cookies")
- else:
- logger.info('Addic7ed: Reusing old login')
- self.logged_in = True
- return
- except:
- pass
+ def check_verification(cache_region):
+ rr = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10,
+ headers={"Referer": self.server_url})
+ if rr.status_code == 302:
+ logger.info('Addic7ed: Login expired')
+ cache_region.delete("addic7ed_data")
+ else:
+ logger.info('Addic7ed: Re-using old login')
+ self.logged_in = True
+ return True
+
+ if load_verification("addic7ed", self.session, callback=check_verification):
+ return
logger.info('Addic7ed: Logging in')
- data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'}
- r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10,
- headers={"Referer": self.server_url + "login.php"})
+ data = {'username': self.username, 'password': self.password, 'Submit': 'Log in', 'url': '',
+ 'remember': 'true'}
+
+ tries = 0
+ while tries < 3:
+ r = self.session.get(self.server_url + 'login.php', timeout=10, headers={"Referer": self.server_url})
+ if "grecaptcha" in r.content:
+ logger.info('Addic7ed: Solving captcha. This might take a couple of minutes, but should only '
+ 'happen once every so often')
+
+ site_key = re.search(r'grecaptcha.execute\(\'(.+?)\',', r.content).group(1)
+ if not site_key:
+ logger.error("Addic7ed: Captcha site-key not found!")
+ return
- if "relax, slow down" in r.content:
- raise TooManyRequests(self.username)
+ pitcher = pitchers.get_pitcher()("Addic7ed", self.server_url + 'login.php', site_key,
+ user_agent=self.session.headers["User-Agent"],
+ cookies=self.session.cookies.get_dict(),
+ is_invisible=True)
- if r.status_code != 302:
- raise AuthenticationError(self.username)
+ result = pitcher.throw()
+ if not result:
+ raise Exception("Addic7ed: Couldn't solve captcha!")
+
+ data["recaptcha_response"] = result
+
+ r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10,
+ headers={"Referer": self.server_url + "login.php"})
+
+ if "relax, slow down" in r.content:
+ raise TooManyRequests(self.username)
+
+ if r.status_code != 302:
+ if "User doesn't exist" in r.content and tries <= 2:
+ logger.info("Addic7ed: Error, trying again. (%s/%s)", tries+1, 3)
+ tries += 1
+ continue
+
+ raise AuthenticationError(self.username)
+ break
- region.set("addic7ed_cookies", self.session.cookies._cookies)
+ store_verification("addic7ed", self.session)
logger.debug('Addic7ed: Logged in')
self.logged_in = True
+ def terminate(self):
+ self.session.close()
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
def _get_show_ids(self):
@@ -140,7 +171,7 @@ class Addic7edProvider(_Addic7edProvider):
# populate the show ids
show_ids = {}
- for show in soup.select('td.version > h3 > a[href^="/show/"]'):
+ for show in soup.select('td > h3 > a[href^="/show/"]'):
show_clean = sanitize(show.text, default_characters=self.sanitize_characters)
try:
show_id = int(show['href'][6:])
diff --git a/libs/subliminal_patch/providers/opensubtitles.py b/libs/subliminal_patch/providers/opensubtitles.py
index 032b89058..4ce3aacea 100644
--- a/libs/subliminal_patch/providers/opensubtitles.py
+++ b/libs/subliminal_patch/providers/opensubtitles.py
@@ -11,8 +11,8 @@ from babelfish import language_converters
from dogpile.cache.api import NO_VALUE
from subliminal.exceptions import ConfigurationError, ServiceUnavailable
from subliminal.providers.opensubtitles import OpenSubtitlesProvider as _OpenSubtitlesProvider,\
- OpenSubtitlesSubtitle as _OpenSubtitlesSubtitle, Episode, ServerProxy, Unauthorized, NoSession, \
- DownloadLimitReached, InvalidImdbid, UnknownUserAgent, DisabledUserAgent, OpenSubtitlesError
+ OpenSubtitlesSubtitle as _OpenSubtitlesSubtitle, Episode, Movie, ServerProxy, Unauthorized, NoSession, \
+ DownloadLimitReached, InvalidImdbid, UnknownUserAgent, DisabledUserAgent, OpenSubtitlesError, sanitize
from mixins import ProviderRetryMixin
from subliminal.subtitle import fix_line_ending
from subliminal_patch.http import SubZeroRequestsTransport
@@ -45,6 +45,19 @@ class OpenSubtitlesSubtitle(_OpenSubtitlesSubtitle):
def get_matches(self, video, hearing_impaired=False):
matches = super(OpenSubtitlesSubtitle, self).get_matches(video)
+ # episode
+ if isinstance(video, Episode) and self.movie_kind == 'episode':
+ # series
+ if video.series and (sanitize(self.series_name) in (
+ sanitize(name) for name in [video.series] + video.alternative_series)):
+ matches.add('series')
+ # movie
+ elif isinstance(video, Movie) and self.movie_kind == 'movie':
+ # title
+ if video.title and (sanitize(self.movie_name) in (
+ sanitize(name) for name in [video.title] + video.alternative_titles)):
+ matches.add('title')
+
sub_fps = None
try:
sub_fps = float(self.fps)
@@ -205,19 +218,19 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
season = episode = None
if isinstance(video, Episode):
- query = video.series
+ query = [video.series] + video.alternative_series
season = video.season
episode = episode = min(video.episode) if isinstance(video.episode, list) else video.episode
if video.is_special:
season = None
episode = None
- query = u"%s %s" % (video.series, video.title)
+ query = [u"%s %s" % (series, video.title) for series in [video.series] + video.alternative_series]
logger.info("%s: Searching for special: %r", self.__class__, query)
# elif ('opensubtitles' not in video.hashes or not video.size) and not video.imdb_id:
# query = video.name.split(os.sep)[-1]
else:
- query = video.title
+ query = [video.title] + video.alternative_titles
return self.query(languages, hash=video.hashes.get('opensubtitles'), size=video.size, imdb_id=video.imdb_id,
query=query, season=season, episode=episode, tag=video.original_name,
@@ -238,9 +251,11 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
else:
criteria.append({'imdbid': imdb_id[2:]})
if query and season and episode:
- criteria.append({'query': query.replace('\'', ''), 'season': season, 'episode': episode})
+ for q in query:
+ criteria.append({'query': q.replace('\'', ''), 'season': season, 'episode': episode})
elif query:
- criteria.append({'query': query.replace('\'', '')})
+ for q in query:
+ criteria.append({'query': q.replace('\'', '')})
if not criteria:
raise ValueError('Not enough information')
diff --git a/libs/subliminal_patch/providers/subscene.py b/libs/subliminal_patch/providers/subscene.py
index 38a97c579..d6a294cdb 100644
--- a/libs/subliminal_patch/providers/subscene.py
+++ b/libs/subliminal_patch/providers/subscene.py
@@ -5,6 +5,7 @@ import logging
import os
import time
import inflect
+import cfscrape
from random import randint
from zipfile import ZipFile
@@ -12,7 +13,9 @@ from zipfile import ZipFile
from babelfish import language_converters
from guessit import guessit
from requests import Session
+from dogpile.cache.api import NO_VALUE
from subliminal import Episode, ProviderError
+from subliminal.cache import region
from subliminal.utils import sanitize_release_group
from subliminal_patch.providers import Provider
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
@@ -125,6 +128,7 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
self.session = Session()
from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
+ self.session.headers['Referer'] = "https://subscene.com"
def terminate(self):
logger.info("Closing session")
@@ -198,43 +202,48 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
subtitles = []
logger.debug(u"Searching for: %s", vfn)
film = search(vfn, session=self.session)
+
if film and film.subtitles:
logger.debug('Release results found: %s', len(film.subtitles))
subtitles = self.parse_results(video, film)
else:
logger.debug('No release results found')
+ time.sleep(self.search_throttle)
+
# re-search for episodes without explicit release name
if isinstance(video, Episode):
#term = u"%s S%02iE%02i" % (video.series, video.season, video.episode)
- term = u"%s - %s Season" % (video.series, p.number_to_words("%sth" % video.season).capitalize())
- time.sleep(self.search_throttle)
- logger.debug('Searching for alternative results: %s', term)
- film = search(term, session=self.session, release=False)
- if film and film.subtitles:
- logger.debug('Alternative results found: %s', len(film.subtitles))
- subtitles += self.parse_results(video, film)
- else:
- logger.debug('No alternative results found')
-
- # packs
- if video.season_fully_aired:
- term = u"%s S%02i" % (video.series, video.season)
- logger.debug('Searching for packs: %s', term)
+ for series in [video.series] + video.alternative_series:
+ term = u"%s - %s Season" % (series, p.number_to_words("%sth" % video.season).capitalize())
time.sleep(self.search_throttle)
- film = search(term, session=self.session)
+ logger.debug('Searching for alternative results: %s', term)
+ film = search(term, session=self.session, release=False)
if film and film.subtitles:
- logger.debug('Pack results found: %s', len(film.subtitles))
+ logger.debug('Alternative results found: %s', len(film.subtitles))
subtitles += self.parse_results(video, film)
else:
- logger.debug('No pack results found')
- else:
- logger.debug("Not searching for packs, because the season hasn't fully aired")
+ logger.debug('No alternative results found')
+
+ # packs
+ if video.season_fully_aired:
+ term = u"%s S%02i" % (series, video.season)
+ logger.debug('Searching for packs: %s', term)
+ time.sleep(self.search_throttle)
+ film = search(term, session=self.session)
+ if film and film.subtitles:
+ logger.debug('Pack results found: %s', len(film.subtitles))
+ subtitles += self.parse_results(video, film)
+ else:
+ logger.debug('No pack results found')
+ else:
+ logger.debug("Not searching for packs, because the season hasn't fully aired")
else:
- logger.debug('Searching for movie results: %s', video.title)
- film = search(video.title, year=video.year, session=self.session, limit_to=None, release=False)
- if film and film.subtitles:
- subtitles += self.parse_results(video, film)
+ for title in [video.title] + video.alternative_titles:
+ logger.debug('Searching for movie results: %s', title)
+ film = search(title, year=video.year, session=self.session, limit_to=None, release=False)
+ if film and film.subtitles:
+ subtitles += self.parse_results(video, film)
logger.info("%s subtitles found" % len(subtitles))
return subtitles
diff --git a/libs/subliminal_patch/providers/subssabbz.py b/libs/subliminal_patch/providers/subssabbz.py
index 17df5b975..ddcd47a7b 100644
--- a/libs/subliminal_patch/providers/subssabbz.py
+++ b/libs/subliminal_patch/providers/subssabbz.py
@@ -26,16 +26,22 @@ class SubsSabBzSubtitle(Subtitle):
"""SubsSabBz Subtitle."""
provider_name = 'subssabbz'
- def __init__(self, langauge, filename, type):
+ def __init__(self, langauge, filename, type, video, link):
super(SubsSabBzSubtitle, self).__init__(langauge)
self.langauge = langauge
self.filename = filename
+ self.page_link = link
self.type = type
+ self.video = video
@property
def id(self):
return self.filename
+ def make_picklable(self):
+ self.content = None
+ return self
+
def get_matches(self, video):
matches = set()
@@ -118,7 +124,7 @@ class SubsSabBzProvider(Provider):
for row in rows[:10]:
a_element_wrapper = row.find('td', { 'class': 'c2field' })
if a_element_wrapper:
- element = row.find('a')
+ element = a_element_wrapper.find('a')
if element:
link = element.get('href')
logger.info('Found subtitle link %r', link)
@@ -130,15 +136,22 @@ class SubsSabBzProvider(Provider):
return [s for l in languages for s in self.query(l, video)]
def download_subtitle(self, subtitle):
- pass
+ if subtitle.content:
+ pass
+ else:
+ seeking_subtitle_file = subtitle.filename
+ arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video)
+ for s in arch:
+ if s.filename == seeking_subtitle_file:
+ subtitle.content = s.content
- def process_archive_subtitle_files(self, archiveStream, language, video):
+ def process_archive_subtitle_files(self, archiveStream, language, video, link):
subtitles = []
type = 'episode' if isinstance(video, Episode) else 'movie'
for file_name in archiveStream.namelist():
if file_name.lower().endswith(('.srt', '.sub')):
logger.info('Found subtitle file %r', file_name)
- subtitle = SubsSabBzSubtitle(language, file_name, type)
+ subtitle = SubsSabBzSubtitle(language, file_name, type, video, link)
subtitle.content = archiveStream.read(file_name)
subtitles.append(subtitle)
return subtitles
@@ -152,8 +165,8 @@ class SubsSabBzProvider(Provider):
archive_stream = io.BytesIO(request.content)
if is_rarfile(archive_stream):
- return self.process_archive_subtitle_files( RarFile(archive_stream), language, video )
+ return self.process_archive_subtitle_files( RarFile(archive_stream), language, video, link )
elif is_zipfile(archive_stream):
- return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video )
+ return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video, link )
else:
raise ValueError('Not a valid archive')
diff --git a/libs/subliminal_patch/providers/subsunacs.py b/libs/subliminal_patch/providers/subsunacs.py
index bbc41f520..d616901eb 100644
--- a/libs/subliminal_patch/providers/subsunacs.py
+++ b/libs/subliminal_patch/providers/subsunacs.py
@@ -26,19 +26,25 @@ class SubsUnacsSubtitle(Subtitle):
"""SubsUnacs Subtitle."""
provider_name = 'subsunacs'
- def __init__(self, langauge, filename, type):
+ def __init__(self, langauge, filename, type, video, link):
super(SubsUnacsSubtitle, self).__init__(langauge)
self.langauge = langauge
self.filename = filename
+ self.page_link = link
self.type = type
+ self.video = video
@property
def id(self):
return self.filename
+ def make_picklable(self):
+ self.content = None
+ return self
+
def get_matches(self, video):
matches = set()
-
+
video_filename = video.name
video_filename = os.path.basename(video_filename)
video_filename, _ = os.path.splitext(video_filename)
@@ -77,11 +83,11 @@ class SubsUnacsProvider(Provider):
def terminate(self):
self.session.close()
-
+
def query(self, language, video):
subtitles = []
isEpisode = isinstance(video, Episode)
-
+
params = {
'm': '',
'l': 0,
@@ -117,7 +123,7 @@ class SubsUnacsProvider(Provider):
soup = BeautifulSoup(response.content, 'html.parser')
rows = soup.findAll('td', {'class': 'tdMovie'})
-
+
# Search on first 10 rows only
for row in rows[:10]:
element = row.find('a', {'class': 'tooltip'})
@@ -125,37 +131,44 @@ class SubsUnacsProvider(Provider):
link = element.get('href')
logger.info('Found subtitle link %r', link)
subtitles = subtitles + self.download_archive_and_add_subtitle_files('https://subsunacs.net' + link, language, video)
-
+
return subtitles
def list_subtitles(self, video, languages):
return [s for l in languages for s in self.query(l, video)]
def download_subtitle(self, subtitle):
- pass
-
- def process_archive_subtitle_files(self, archiveStream, language, video):
+ if subtitle.content:
+ pass
+ else:
+ seeking_subtitle_file = subtitle.filename
+ arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video)
+ for s in arch:
+ if s.filename == seeking_subtitle_file:
+ subtitle.content = s.content
+
+ def process_archive_subtitle_files(self, archiveStream, language, video, link):
subtitles = []
type = 'episode' if isinstance(video, Episode) else 'movie'
for file_name in archiveStream.namelist():
if file_name.lower().endswith(('.srt', '.sub')):
logger.info('Found subtitle file %r', file_name)
- subtitle = SubsUnacsSubtitle(language, file_name, type)
+ subtitle = SubsUnacsSubtitle(language, file_name, type, video, link)
subtitle.content = archiveStream.read(file_name)
subtitles.append(subtitle)
return subtitles
-
+
def download_archive_and_add_subtitle_files(self, link, language, video ):
logger.info('Downloading subtitle %r', link)
request = self.session.get(link, headers={
- 'Referer': 'https://subsunacs.net/search.php'
+ 'Referer': 'https://subsunacs.net/search.php'
})
request.raise_for_status()
archive_stream = io.BytesIO(request.content)
if is_rarfile(archive_stream):
- return self.process_archive_subtitle_files( RarFile(archive_stream), language, video )
+ return self.process_archive_subtitle_files( RarFile(archive_stream), language, video, link )
elif is_zipfile(archive_stream):
- return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video )
+ return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video, link )
else:
raise ValueError('Not a valid archive')
diff --git a/libs/subliminal_patch/providers/titlovi.py b/libs/subliminal_patch/providers/titlovi.py
index ec339fef8..860932ca5 100644
--- a/libs/subliminal_patch/providers/titlovi.py
+++ b/libs/subliminal_patch/providers/titlovi.py
@@ -4,6 +4,7 @@ import io
import logging
import math
import re
+import time
import rarfile
@@ -23,6 +24,7 @@ from subliminal.utils import sanitize_release_group
from subliminal.subtitle import guess_matches
from subliminal.video import Episode, Movie
from subliminal.subtitle import fix_line_ending
+from subliminal_patch.pitcher import pitchers, load_verification, store_verification
from subzero.language import Language
from random import randint
@@ -142,6 +144,7 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
logger.debug('User-Agent set to %s', self.session.headers['User-Agent'])
self.session.headers['Referer'] = self.server_url
logger.debug('Referer set to %s', self.session.headers['Referer'])
+ load_verification("titlovi", self.session)
def terminate(self):
self.session.close()
@@ -182,110 +185,144 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
r = self.session.get(self.search_url, params=params, timeout=10)
r.raise_for_status()
except RequestException as e:
- logger.exception('RequestException %s', e)
- break
-
- try:
- soup = BeautifulSoup(r.content, 'lxml')
-
- # number of results
- result_count = int(soup.select_one('.results_count b').string)
- except:
- result_count = None
-
- # exit if no results
- if not result_count:
- if not subtitles:
- logger.debug('No subtitles found')
- else:
- logger.debug("No more subtitles found")
- break
-
- # number of pages with results
- pages = int(math.ceil(result_count / float(items_per_page)))
-
- # get current page
- if 'pg' in params:
- current_page = int(params['pg'])
-
- try:
- sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit')
- for sub in sublist:
- # subtitle id
- sid = sub.find(attrs={'data-id': True}).attrs['data-id']
- # get download link
- download_link = self.download_url + sid
- # title and alternate title
- match = title_re.search(sub.a.string)
- if match:
- _title = match.group('title')
- alt_title = match.group('altitle')
+ captcha_passed = False
+ if e.response.status_code == 403 and "data-sitekey" in e.response.content:
+ logger.info('titlovi: Solving captcha. This might take a couple of minutes, but should only '
+ 'happen once every so often')
+
+ site_key = re.search(r'data-sitekey="(.+?)"', e.response.content).group(1)
+ challenge_s = re.search(r'type="hidden" name="s" value="(.+?)"', e.response.content).group(1)
+ challenge_ray = re.search(r'data-ray="(.+?)"', e.response.content).group(1)
+ if not all([site_key, challenge_s, challenge_ray]):
+ raise Exception("titlovi: Captcha site-key not found!")
+
+ pitcher = pitchers.get_pitcher()("titlovi", e.request.url, site_key,
+ user_agent=self.session.headers["User-Agent"],
+ cookies=self.session.cookies.get_dict(),
+ is_invisible=True)
+
+ result = pitcher.throw()
+ if not result:
+ raise Exception("titlovi: Couldn't solve captcha!")
+
+ s_params = {
+ "s": challenge_s,
+ "id": challenge_ray,
+ "g-recaptcha-response": result,
+ }
+ r = self.session.get(self.server_url + "/cdn-cgi/l/chk_captcha", params=s_params, timeout=10,
+ allow_redirects=False)
+ r.raise_for_status()
+ r = self.session.get(self.search_url, params=params, timeout=10)
+ r.raise_for_status()
+ store_verification("titlovi", self.session)
+ captcha_passed = True
+
+ if not captcha_passed:
+ logger.exception('RequestException %s', e)
+ break
+ else:
+ try:
+ soup = BeautifulSoup(r.content, 'lxml')
+
+ # number of results
+ result_count = int(soup.select_one('.results_count b').string)
+ except:
+ result_count = None
+
+ # exit if no results
+ if not result_count:
+ if not subtitles:
+ logger.debug('No subtitles found')
else:
- continue
-
- # page link
- page_link = self.server_url + sub.a.attrs['href']
- # subtitle language
- match = lang_re.search(sub.select_one('.lang').attrs['src'])
- if match:
- try:
- # decode language
- lang = Language.fromtitlovi(match.group('lang')+match.group('script'))
- except ValueError:
+ logger.debug("No more subtitles found")
+ break
+
+ # number of pages with results
+ pages = int(math.ceil(result_count / float(items_per_page)))
+
+ # get current page
+ if 'pg' in params:
+ current_page = int(params['pg'])
+
+ try:
+ sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit')
+ for sub in sublist:
+ # subtitle id
+ sid = sub.find(attrs={'data-id': True}).attrs['data-id']
+ # get download link
+ download_link = self.download_url + sid
+ # title and alternate title
+ match = title_re.search(sub.a.string)
+ if match:
+ _title = match.group('title')
+ alt_title = match.group('altitle')
+ else:
continue
- # relase year or series start year
- match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string)
- if match:
- r_year = int(match.group('year'))
- # fps
- match = fps_re.search(sub.select_one('.fps').string)
- if match:
- fps = match.group('fps')
- # releases
- releases = str(sub.select_one('.fps').parent.contents[0].string)
-
- # handle movies and series separately
- if is_episode:
- # season and episode info
- sxe = sub.select_one('.s0xe0y').string
- r_season = None
- r_episode = None
- if sxe:
- match = season_re.search(sxe)
- if match:
- r_season = int(match.group('season'))
- match = episode_re.search(sxe)
- if match:
- r_episode = int(match.group('episode'))
-
- subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
- alt_title=alt_title, season=r_season, episode=r_episode,
- year=r_year, fps=fps,
- asked_for_release_group=video.release_group,
- asked_for_episode=episode)
- else:
- subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
- alt_title=alt_title, year=r_year, fps=fps,
- asked_for_release_group=video.release_group)
- logger.debug('Found subtitle %r', subtitle)
-
- # prime our matches so we can use the values later
- subtitle.get_matches(video)
-
- # add found subtitles
- subtitles.append(subtitle)
-
- finally:
- soup.decompose()
-
- # stop on last page
- if current_page >= pages:
- break
-
- # increment current page
- params['pg'] = current_page + 1
- logger.debug('Getting page %d', params['pg'])
+ # page link
+ page_link = self.server_url + sub.a.attrs['href']
+ # subtitle language
+ match = lang_re.search(sub.select_one('.lang').attrs['src'])
+ if match:
+ try:
+ # decode language
+ lang = Language.fromtitlovi(match.group('lang')+match.group('script'))
+ except ValueError:
+ continue
+
+ # relase year or series start year
+ match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string)
+ if match:
+ r_year = int(match.group('year'))
+ # fps
+ match = fps_re.search(sub.select_one('.fps').string)
+ if match:
+ fps = match.group('fps')
+ # releases
+ releases = str(sub.select_one('.fps').parent.contents[0].string)
+
+ # handle movies and series separately
+ if is_episode:
+ # season and episode info
+ sxe = sub.select_one('.s0xe0y').string
+ r_season = None
+ r_episode = None
+ if sxe:
+ match = season_re.search(sxe)
+ if match:
+ r_season = int(match.group('season'))
+ match = episode_re.search(sxe)
+ if match:
+ r_episode = int(match.group('episode'))
+
+ subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
+ alt_title=alt_title, season=r_season, episode=r_episode,
+ year=r_year, fps=fps,
+ asked_for_release_group=video.release_group,
+ asked_for_episode=episode)
+ else:
+ subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title,
+ alt_title=alt_title, year=r_year, fps=fps,
+ asked_for_release_group=video.release_group)
+ logger.debug('Found subtitle %r', subtitle)
+
+ # prime our matches so we can use the values later
+ subtitle.get_matches(video)
+
+ # add found subtitles
+ subtitles.append(subtitle)
+
+ finally:
+ soup.decompose()
+
+ # stop on last page
+ if current_page >= pages:
+ break
+
+ # increment current page
+ params['pg'] = current_page + 1
+ logger.debug('Getting page %d', params['pg'])
return subtitles
diff --git a/libs/subliminal_patch/providers/zimuku.py b/libs/subliminal_patch/providers/zimuku.py
new file mode 100644
index 000000000..5090816f8
--- /dev/null
+++ b/libs/subliminal_patch/providers/zimuku.py
@@ -0,0 +1,208 @@
+# -*- coding: utf-8 -*-
+import io
+import logging
+import os
+import zipfile
+
+import rarfile
+from subzero.language import Language
+from guessit import guessit
+from requests import Session
+from six import text_type
+
+from subliminal import __short_version__
+from subliminal.providers import ParserBeautifulSoup, Provider
+from subliminal.subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending, guess_matches
+from subliminal.video import Episode, Movie
+
+logger = logging.getLogger(__name__)
+
+
+class ZimukuSubtitle(Subtitle):
+ """Zimuku Subtitle."""
+ provider_name = 'zimuku'
+
+ def __init__(self, language, page_link, version, download_link):
+ super(ZimukuSubtitle, self).__init__(language, page_link=page_link)
+ self.version = version
+ self.download_link = download_link
+ self.hearing_impaired = None
+ self.encoding = 'utf-8'
+
+ @property
+ def id(self):
+ return self.download_link
+
+ def get_matches(self, video):
+ matches = set()
+
+ # episode
+ if isinstance(video, Episode):
+ # other properties
+ matches |= guess_matches(video, guessit(self.version, {'type': 'episode'}), partial=True)
+ # movie
+ elif isinstance(video, Movie):
+ # other properties
+ matches |= guess_matches(video, guessit(self.version, {'type': 'movie'}), partial=True)
+
+ return matches
+
+
+class ZimukuProvider(Provider):
+ """Zimuku Provider."""
+ languages = {Language(l) for l in ['zho', 'eng']}
+
+ server_url = 'http://www.zimuku.la'
+ search_url = '/search?q={}'
+ download_url = 'http://www.zimuku.la/'
+
+ UserAgent = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)'
+
+ subtitle_class = ZimukuSubtitle
+
+ def __init__(self):
+ self.session = None
+
+ def initialize(self):
+ self.session = Session()
+ self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__)
+
+ def terminate(self):
+ self.session.close()
+
+ def query(self, keyword, season=None, episode=None, year=None):
+ params = keyword
+ if season and episode:
+ params += ' S{season:02d}E{episode:02d}'.format(season=season, episode=episode)
+ elif year:
+ params += ' {:4d}'.format(year)
+
+ logger.debug('Searching subtitles %r', params)
+ subtitles = []
+ search_link = self.server_url + text_type(self.search_url).format(params)
+
+ r = self.session.get(search_link, timeout=30)
+ r.raise_for_status()
+
+ if not r.content:
+ logger.debug('No data returned from provider')
+ return []
+
+ soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])
+
+ for entity in soup.select('div.item.prel.clearfix a:nth-of-type(2)'):
+ moviename = entity.text
+ entity_url = self.server_url + entity['href']
+ logger.debug(entity_url)
+ r = self.session.get(entity_url, timeout=30)
+ r.raise_for_status()
+ logger.debug('looking into ' + entity_url)
+
+ soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']).find("div", class_="subs box clearfix")
+ # loop over subtitles cells
+
+ subs = soup.tbody.find_all("tr")
+ for sub in subs:
+ page_link = '%s%s' % (self.server_url, sub.a.get('href').encode('utf-8'))
+ version = sub.a.text.encode('utf-8') or None
+ if version is None:
+ version = ""
+ try:
+ td = sub.find("td", class_="tac lang")
+ r2 = td.find_all("img")
+ langs = [x.get('title').encode('utf-8') for x in r2]
+ except:
+ langs = '未知'
+ name = '%s (%s)' % (version, ",".join(langs))
+
+ if ('English' in langs) and not(('简体中文' in langs) or ('繁體中文' in langs)):
+ language = Language('eng')
+ else:
+ language = Language('zho')
+ # read the item
+ subtitle = self.subtitle_class(language, page_link, version, page_link.replace("detail","dld"))
+
+ logger.debug('Found subtitle %r', subtitle)
+ subtitles.append(subtitle)
+
+ return subtitles
+
+ def list_subtitles(self, video, languages):
+ if isinstance(video, Episode):
+ titles = [video.series] + video.alternative_series
+ elif isinstance(video, Movie):
+ titles = [video.title] + video.alternative_titles
+ else:
+ titles = []
+
+ subtitles = []
+ # query for subtitles with the show_id
+ for title in titles:
+ if isinstance(video, Episode):
+ subtitles += [s for s in self.query(title, season=video.season, episode=video.episode,
+ year=video.year)
+ if s.language in languages]
+ elif isinstance(video, Movie):
+ subtitles += [s for s in self.query(title, year=video.year)
+ if s.language in languages]
+
+ return subtitles
+
+ def download_subtitle(self, subtitle):
+ if isinstance(subtitle, ZimukuSubtitle):
+ # download the subtitle
+ logger.info('Downloading subtitle %r', subtitle)
+ r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link},
+ timeout=30)
+ r.raise_for_status()
+
+ if not r.content:
+ logger.debug('Unable to download subtitle. No data returned from provider')
+ return
+
+ soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])
+ links = soup.find("div", {"class":"clearfix"}).find_all('a')
+ # TODO: add settings for choice
+
+ for down_link in links:
+ url = down_link.get('href').encode('utf-8')
+ url = self.server_url + url
+ r = self.session.get(url, headers={'Referer': subtitle.download_link},
+ timeout=30)
+ r.raise_for_status()
+
+ if len(r.content) > 1024:
+ break
+
+ archive_stream = io.BytesIO(r.content)
+ archive = None
+ if rarfile.is_rarfile(archive_stream):
+ logger.debug('Identified rar archive')
+ archive = rarfile.RarFile(archive_stream)
+ subtitle_content = _get_subtitle_from_archive(archive)
+ elif zipfile.is_zipfile(archive_stream):
+ logger.debug('Identified zip archive')
+ archive = zipfile.ZipFile(archive_stream)
+ subtitle_content = _get_subtitle_from_archive(archive)
+ else:
+ subtitle_content = r.content
+
+ if subtitle_content:
+ subtitle.content = fix_line_ending(subtitle_content)
+ else:
+ logger.debug('Could not extract subtitle from %r', archive)
+
+
+def _get_subtitle_from_archive(archive):
+ for name in archive.namelist():
+ # discard hidden files
+ if os.path.split(name)[-1].startswith('.'):
+ continue
+
+ # discard non-subtitle files
+ if not name.lower().endswith(SUBTITLE_EXTENSIONS):
+ continue
+
+ return archive.read(name)
+
+ return None
diff --git a/libs/subliminal_patch/refiners/omdb.py b/libs/subliminal_patch/refiners/omdb.py
index 9ecb5155b..bef212f75 100644
--- a/libs/subliminal_patch/refiners/omdb.py
+++ b/libs/subliminal_patch/refiners/omdb.py
@@ -4,7 +4,7 @@ import subliminal
import base64
import zlib
from subliminal import __short_version__
-from subliminal.refiners.omdb import OMDBClient, refine
+from subliminal.refiners.omdb import OMDBClient, refine as refine_orig, Episode, Movie
class SZOMDBClient(OMDBClient):
@@ -63,5 +63,13 @@ class SZOMDBClient(OMDBClient):
return j
+def refine(video, **kwargs):
+ refine_orig(video, **kwargs)
+ if isinstance(video, Episode) and video.series_imdb_id:
+ video.series_imdb_id = video.series_imdb_id.strip()
+ elif isinstance(video, Movie) and video.imdb_id:
+ video.imdb_id = video.imdb_id.strip()
+
+
omdb_client = SZOMDBClient(headers={'User-Agent': 'Subliminal/%s' % __short_version__})
subliminal.refiners.omdb.omdb_client = omdb_client
diff --git a/libs/subliminal_patch/subtitle.py b/libs/subliminal_patch/subtitle.py
index 9a165fe4b..69a3c1e5b 100644
--- a/libs/subliminal_patch/subtitle.py
+++ b/libs/subliminal_patch/subtitle.py
@@ -38,6 +38,8 @@ class Subtitle(Subtitle_):
plex_media_fps = None
skip_wrong_fps = False
wrong_fps = False
+ wrong_series = False
+ wrong_season_ep = False
is_pack = False
asked_for_release_group = None
asked_for_episode = None
@@ -356,7 +358,8 @@ def guess_matches(video, guess, partial=False):
matches = set()
if isinstance(video, Episode):
# series
- if video.series and 'title' in guess and sanitize(guess['title']) == sanitize(video.series):
+ if video.series and 'title' in guess and sanitize(guess['title']) in (
+ sanitize(name) for name in [video.series] + video.alternative_series):
matches.add('series')
# title
if video.title and 'episode_title' in guess and sanitize(guess['episode_title']) == sanitize(video.title):
@@ -384,7 +387,8 @@ def guess_matches(video, guess, partial=False):
if video.year and 'year' in guess and guess['year'] == video.year:
matches.add('year')
# title
- if video.title and 'title' in guess and sanitize(guess['title']) == sanitize(video.title):
+ if video.title and 'title' in guess and sanitize(guess['title']) in (
+ sanitize(name) for name in [video.title] + video.alternative_titles):
matches.add('title')
# release_group
diff --git a/views/settings.tpl b/views/settings.tpl
index 29072aeff..430cfc253 100644
--- a/views/settings.tpl
+++ b/views/settings.tpl
@@ -1228,12 +1228,104 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
@@ -1703,7 +1795,7 @@
-
+
+
+
+
+
+
@@ -2235,6 +2349,30 @@
}
});
+ if ($('#settings_anti_captcha_provider').val() === "None") {
+ $('.anticaptcha').hide();
+ $('.deathbycaptcha').hide();
+ } else if ($('#settings_anti_captcha_provider').val() === "anti-captcha") {
+ $('.anticaptcha').show();
+ $('.deathbycaptcha').hide();
+ } else if ($('#settings_anti_captcha_provider').val() === "death-by-cCaptcha") {
+ $('.deathbycaptcha').show();
+ $('.anticaptcha').hide();
+ }
+
+ $('#settings_anti_captcha_provider').dropdown('setting', 'onChange', function(){
+ if ($('#settings_anti_captcha_provider').val() === "None") {
+ $('.anticaptcha').hide();
+ $('.deathbycaptcha').hide();
+ } else if ($('#settings_anti_captcha_provider').val() === "anti-captcha") {
+ $('.anticaptcha').show();
+ $('.deathbycaptcha').hide();
+ } else if ($('#settings_anti_captcha_provider').val() === "death-by-captcha") {
+ $('.deathbycaptcha').show();
+ $('.anticaptcha').hide();
+ }
+ });
+
if ($('#settings_use_postprocessing').data("postprocessing") === "True") {
$('.postprocessing').show();
} else {
@@ -2445,6 +2583,8 @@
$('#settings_page_size').dropdown('set selected','{{!settings.general.page_size}}');
$('#settings_subfolder').dropdown('clear');
$('#settings_subfolder').dropdown('set selected', '{{!settings.general.subfolder}}');
+ $('#settings_anti_captcha_provider').dropdown('clear');
+ $('#settings_anti_captcha_provider').dropdown('set selected', '{{!settings.general.anti_captcha_provider}}');
$('#settings_proxy_type').dropdown('clear');
$('#settings_proxy_type').dropdown('set selected','{{!settings.proxy.type}}');
$('#settings_providers').dropdown('clear');
@@ -2616,6 +2756,7 @@
]
},
settings_days_to_upgrade_subs : {
+ depends: 'settings_upgrade_subs',
rules : [
{
type : 'integer[1..30]'
diff --git a/views/wizard.tpl b/views/wizard.tpl
index 49a215431..6619863d9 100644
--- a/views/wizard.tpl
+++ b/views/wizard.tpl
@@ -414,7 +414,7 @@
-
+
@@ -884,7 +884,7 @@
-
+