commit
b8df31f4a6
@ -0,0 +1,279 @@
|
|||||||
|
import logging
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
|
||||||
|
# based off of https://gist.github.com/doko-desuka/58d9212461f62583f8df9bc6387fade2
|
||||||
|
# and https://github.com/Anorov/cloudflare-scrape
|
||||||
|
# and https://github.com/VeNoMouS/cloudflare-scrape-js2py
|
||||||
|
|
||||||
|
'''''''''
|
||||||
|
Disables InsecureRequestWarning: Unverified HTTPS request is being made warnings.
|
||||||
|
'''''''''
|
||||||
|
import requests
|
||||||
|
from requests.packages.urllib3.exceptions import InsecureRequestWarning
|
||||||
|
|
||||||
|
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
|
||||||
|
''''''
|
||||||
|
from requests.sessions import Session
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
|
try:
|
||||||
|
from urlparse import urlparse
|
||||||
|
except ImportError:
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
DEFAULT_USER_AGENTS = [
|
||||||
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",
|
||||||
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/65.0.3325.181 Chrome/65.0.3325.181 Safari/537.36",
|
||||||
|
"Mozilla/5.0 (Linux; Android 7.0; Moto G (5) Build/NPPS25.137-93-8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.137 Mobile Safari/537.36",
|
||||||
|
"Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_4 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B554a Safari/9537.53",
|
||||||
|
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0",
|
||||||
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:59.0) Gecko/20100101 Firefox/59.0",
|
||||||
|
"Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0"
|
||||||
|
]
|
||||||
|
|
||||||
|
DEFAULT_USER_AGENT = random.choice(DEFAULT_USER_AGENTS)
|
||||||
|
|
||||||
|
BUG_REPORT = (
|
||||||
|
"Cloudflare may have changed their technique, or there may be a bug in the script.\n\nPlease read " "https://github.com/Anorov/cloudflare-scrape#updates, then file a "
|
||||||
|
"bug report at https://github.com/Anorov/cloudflare-scrape/issues.")
|
||||||
|
|
||||||
|
|
||||||
|
class CloudflareScraper(Session):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(CloudflareScraper, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
if "requests" in self.headers["User-Agent"]:
|
||||||
|
# Spoof Firefox on Linux if no custom User-Agent has been set
|
||||||
|
self.headers["User-Agent"] = random.choice(DEFAULT_USER_AGENTS)
|
||||||
|
|
||||||
|
def request(self, method, url, *args, **kwargs):
|
||||||
|
resp = super(CloudflareScraper, self).request(method, url, *args, **kwargs)
|
||||||
|
|
||||||
|
# Check if Cloudflare anti-bot is on
|
||||||
|
if (resp.status_code in (503, 429)
|
||||||
|
and resp.headers.get("Server", "").startswith("cloudflare")
|
||||||
|
and b"jschl_vc" in resp.content
|
||||||
|
and b"jschl_answer" in resp.content
|
||||||
|
):
|
||||||
|
return self.solve_cf_challenge(resp, **kwargs)
|
||||||
|
|
||||||
|
# Otherwise, no Cloudflare anti-bot detected
|
||||||
|
return resp
|
||||||
|
|
||||||
|
def solve_cf_challenge(self, resp, **original_kwargs):
|
||||||
|
body = resp.text
|
||||||
|
parsed_url = urlparse(resp.url)
|
||||||
|
domain = parsed_url.netloc
|
||||||
|
submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme, domain)
|
||||||
|
|
||||||
|
cloudflare_kwargs = deepcopy(original_kwargs)
|
||||||
|
params = cloudflare_kwargs.setdefault("params", {})
|
||||||
|
headers = cloudflare_kwargs.setdefault("headers", {})
|
||||||
|
headers["Referer"] = resp.url
|
||||||
|
|
||||||
|
try:
|
||||||
|
cf_delay = float(re.search('submit.*?(\d+)', body, re.DOTALL).group(1)) / 1000.0
|
||||||
|
|
||||||
|
form_index = body.find('id="challenge-form"')
|
||||||
|
if form_index == -1:
|
||||||
|
raise Exception('CF form not found')
|
||||||
|
sub_body = body[form_index:]
|
||||||
|
|
||||||
|
s_match = re.search('name="s" value="(.+?)"', sub_body)
|
||||||
|
if s_match:
|
||||||
|
params["s"] = s_match.group(1) # On older variants this parameter is absent.
|
||||||
|
params["jschl_vc"] = re.search(r'name="jschl_vc" value="(\w+)"', sub_body).group(1)
|
||||||
|
params["pass"] = re.search(r'name="pass" value="(.+?)"', sub_body).group(1)
|
||||||
|
|
||||||
|
if body.find('id="cf-dn-', form_index) != -1:
|
||||||
|
extra_div_expression = re.search('id="cf-dn-.*?>(.+?)<', sub_body).group(1)
|
||||||
|
|
||||||
|
# Initial value.
|
||||||
|
js_answer = self.cf_parse_expression(
|
||||||
|
re.search('setTimeout\(function\(.*?:(.*?)}', body, re.DOTALL).group(1)
|
||||||
|
)
|
||||||
|
# Extract the arithmetic operations.
|
||||||
|
builder = re.search("challenge-form'\);\s*;(.*);a.value", body, re.DOTALL).group(1)
|
||||||
|
# Remove a function semicolon before splitting on semicolons, else it messes the order.
|
||||||
|
lines = builder.replace(' return +(p)}();', '', 1).split(';')
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
if len(line) and '=' in line:
|
||||||
|
heading, expression = line.split('=', 1)
|
||||||
|
if 'eval(eval(atob' in expression:
|
||||||
|
# Uses the expression in an external <div>.
|
||||||
|
expression_value = self.cf_parse_expression(extra_div_expression)
|
||||||
|
elif '(function(p' in expression:
|
||||||
|
# Expression + domain sampling function.
|
||||||
|
expression_value = self.cf_parse_expression(expression, domain)
|
||||||
|
else:
|
||||||
|
expression_value = self.cf_parse_expression(expression)
|
||||||
|
js_answer = self.cf_arithmetic_op(heading[-1], js_answer, expression_value)
|
||||||
|
|
||||||
|
if '+ t.length' in body:
|
||||||
|
js_answer += len(domain) # Only older variants add the domain length.
|
||||||
|
|
||||||
|
params["jschl_answer"] = '%.10f' % js_answer
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Something is wrong with the page.
|
||||||
|
# This may indicate Cloudflare has changed their anti-bot
|
||||||
|
# technique. If you see this and are running the latest version,
|
||||||
|
# please open a GitHub issue so I can update the code accordingly.
|
||||||
|
logging.error("[!] %s Unable to parse Cloudflare anti-bots page. "
|
||||||
|
"Try upgrading cloudflare-scrape, or submit a bug report "
|
||||||
|
"if you are running the latest version. Please read "
|
||||||
|
"https://github.com/Anorov/cloudflare-scrape#updates "
|
||||||
|
"before submitting a bug report." % e)
|
||||||
|
raise
|
||||||
|
|
||||||
|
# Cloudflare requires a delay before solving the challenge.
|
||||||
|
# Always wait the full delay + 1s because of 'time.sleep()' imprecision.
|
||||||
|
time.sleep(cf_delay + 1.0)
|
||||||
|
|
||||||
|
# Requests transforms any request into a GET after a redirect,
|
||||||
|
# so the redirect has to be handled manually here to allow for
|
||||||
|
# performing other types of requests even as the first request.
|
||||||
|
method = resp.request.method
|
||||||
|
cloudflare_kwargs["allow_redirects"] = False
|
||||||
|
|
||||||
|
redirect = self.request(method, submit_url, **cloudflare_kwargs)
|
||||||
|
|
||||||
|
if 'Location' in redirect.headers:
|
||||||
|
redirect_location = urlparse(redirect.headers["Location"])
|
||||||
|
if not redirect_location.netloc:
|
||||||
|
redirect_url = "%s://%s%s" % (parsed_url.scheme, domain, redirect_location.path)
|
||||||
|
return self.request(method, redirect_url, **original_kwargs)
|
||||||
|
return self.request(method, redirect.headers["Location"], **original_kwargs)
|
||||||
|
else:
|
||||||
|
return redirect
|
||||||
|
|
||||||
|
def cf_sample_domain_function(self, func_expression, domain):
|
||||||
|
parameter_start_index = func_expression.find('}(') + 2
|
||||||
|
# Send the expression with the "+" char and enclosing parenthesis included, as they are
|
||||||
|
# stripped inside ".cf_parse_expression()'.
|
||||||
|
sample_index = self.cf_parse_expression(
|
||||||
|
func_expression[parameter_start_index: func_expression.rfind(')))')]
|
||||||
|
)
|
||||||
|
return ord(domain[int(sample_index)])
|
||||||
|
|
||||||
|
def cf_arithmetic_op(self, op, a, b):
|
||||||
|
if op == '+':
|
||||||
|
return a + b
|
||||||
|
elif op == '/':
|
||||||
|
return a / float(b)
|
||||||
|
elif op == '*':
|
||||||
|
return a * float(b)
|
||||||
|
elif op == '-':
|
||||||
|
return a - b
|
||||||
|
else:
|
||||||
|
raise Exception('Unknown operation')
|
||||||
|
|
||||||
|
def cf_parse_expression(self, expression, domain=None):
|
||||||
|
|
||||||
|
def _get_jsfuck_number(section):
|
||||||
|
digit_expressions = section.replace('!+[]', '1').replace('+!![]', '1').replace('+[]', '0').split('+')
|
||||||
|
return int(
|
||||||
|
# Form a number string, with each digit as the sum of the values inside each parenthesis block.
|
||||||
|
''.join(
|
||||||
|
str(sum(int(digit_char) for digit_char in digit_expression[1:-1])) # Strip the parenthesis.
|
||||||
|
for digit_expression in digit_expressions
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if '/' in expression:
|
||||||
|
dividend, divisor = expression.split('/')
|
||||||
|
dividend = dividend[2:-1] # Strip the leading '+' char and the enclosing parenthesis.
|
||||||
|
|
||||||
|
if domain:
|
||||||
|
# 2019-04-02: At this moment, this extra domain sampling function always appears on the
|
||||||
|
# divisor side, at the end.
|
||||||
|
divisor_a, divisor_b = divisor.split('))+(')
|
||||||
|
divisor_a = _get_jsfuck_number(divisor_a[5:]) # Left-strip the sequence of "(+(+(".
|
||||||
|
divisor_b = self.cf_sample_domain_function(divisor_b, domain)
|
||||||
|
return _get_jsfuck_number(dividend) / float(divisor_a + divisor_b)
|
||||||
|
else:
|
||||||
|
divisor = divisor[2:-1]
|
||||||
|
return _get_jsfuck_number(dividend) / float(_get_jsfuck_number(divisor))
|
||||||
|
else:
|
||||||
|
return _get_jsfuck_number(expression[2:-1])
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def create_scraper(cls, sess=None, **kwargs):
|
||||||
|
"""
|
||||||
|
Convenience function for creating a ready-to-go requests.Session (subclass) object.
|
||||||
|
"""
|
||||||
|
scraper = cls()
|
||||||
|
|
||||||
|
if sess:
|
||||||
|
attrs = ["auth", "cert", "cookies", "headers", "hooks", "params", "proxies", "data"]
|
||||||
|
for attr in attrs:
|
||||||
|
val = getattr(sess, attr, None)
|
||||||
|
if val:
|
||||||
|
setattr(scraper, attr, val)
|
||||||
|
|
||||||
|
return scraper
|
||||||
|
|
||||||
|
## Functions for integrating cloudflare-scrape with other applications and scripts
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_tokens(cls, url, user_agent=None, **kwargs):
|
||||||
|
scraper = cls.create_scraper()
|
||||||
|
if user_agent:
|
||||||
|
scraper.headers["User-Agent"] = user_agent
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = scraper.get(url, **kwargs)
|
||||||
|
resp.raise_for_status()
|
||||||
|
except Exception as e:
|
||||||
|
logging.error("'%s' returned an error. Could not collect tokens." % url)
|
||||||
|
raise
|
||||||
|
|
||||||
|
domain = urlparse(resp.url).netloc
|
||||||
|
cookie_domain = None
|
||||||
|
|
||||||
|
for d in scraper.cookies.list_domains():
|
||||||
|
if d.startswith(".") and d in ("." + domain):
|
||||||
|
cookie_domain = d
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
"Unable to find Cloudflare cookies. Does the site actually have Cloudflare IUAM (\"I'm Under Attack Mode\") enabled?")
|
||||||
|
|
||||||
|
return ({
|
||||||
|
"__cfduid": scraper.cookies.get("__cfduid", "", domain=cookie_domain),
|
||||||
|
"cf_clearance": scraper.cookies.get("cf_clearance", "", domain=cookie_domain)
|
||||||
|
},
|
||||||
|
scraper.headers["User-Agent"]
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_live_tokens(self, domain):
|
||||||
|
for d in self.cookies.list_domains():
|
||||||
|
if d.startswith(".") and d in ("." + domain):
|
||||||
|
cookie_domain = d
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
"Unable to find Cloudflare cookies. Does the site actually have Cloudflare IUAM (\"I'm Under Attack Mode\") enabled?")
|
||||||
|
|
||||||
|
return ({
|
||||||
|
"__cfduid": self.cookies.get("__cfduid", "", domain=cookie_domain),
|
||||||
|
"cf_clearance": self.cookies.get("cf_clearance", "", domain=cookie_domain)
|
||||||
|
},
|
||||||
|
self.headers["User-Agent"]
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_cookie_string(cls, url, user_agent=None, **kwargs):
|
||||||
|
"""
|
||||||
|
Convenience function for building a Cookie HTTP header value.
|
||||||
|
"""
|
||||||
|
tokens, user_agent = cls.get_tokens(url, user_agent=user_agent, **kwargs)
|
||||||
|
return "; ".join("=".join(pair) for pair in tokens.items()), user_agent
|
||||||
|
|
||||||
|
|
||||||
|
create_scraper = CloudflareScraper.create_scraper
|
||||||
|
get_tokens = CloudflareScraper.get_tokens
|
||||||
|
get_cookie_string = CloudflareScraper.get_cookie_string
|
@ -0,0 +1,516 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: UTF-8 -*-
|
||||||
|
|
||||||
|
"""Death by Captcha HTTP and socket API clients.
|
||||||
|
|
||||||
|
There are two types of Death by Captcha (DBC hereinafter) API: HTTP and
|
||||||
|
socket ones. Both offer the same functionalily, with the socket API
|
||||||
|
sporting faster responses and using way less connections.
|
||||||
|
|
||||||
|
To access the socket API, use SocketClient class; for the HTTP API, use
|
||||||
|
HttpClient class. Both are thread-safe. SocketClient keeps a persistent
|
||||||
|
connection opened and serializes all API requests sent through it, thus
|
||||||
|
it is advised to keep a pool of them if you're script is heavily
|
||||||
|
multithreaded.
|
||||||
|
|
||||||
|
Both SocketClient and HttpClient give you the following methods:
|
||||||
|
|
||||||
|
get_user()
|
||||||
|
Returns your DBC account details as a dict with the following keys:
|
||||||
|
|
||||||
|
"user": your account numeric ID; if login fails, it will be the only
|
||||||
|
item with the value of 0;
|
||||||
|
"rate": your CAPTCHA rate, i.e. how much you will be charged for one
|
||||||
|
solved CAPTCHA in US cents;
|
||||||
|
"balance": your DBC account balance in US cents;
|
||||||
|
"is_banned": flag indicating whether your account is suspended or not.
|
||||||
|
|
||||||
|
get_balance()
|
||||||
|
Returns your DBC account balance in US cents.
|
||||||
|
|
||||||
|
get_captcha(cid)
|
||||||
|
Returns an uploaded CAPTCHA details as a dict with the following keys:
|
||||||
|
|
||||||
|
"captcha": the CAPTCHA numeric ID; if no such CAPTCHAs found, it will
|
||||||
|
be the only item with the value of 0;
|
||||||
|
"text": the CAPTCHA text, if solved, otherwise None;
|
||||||
|
"is_correct": flag indicating whether the CAPTCHA was solved correctly
|
||||||
|
(DBC can detect that in rare cases).
|
||||||
|
|
||||||
|
The only argument `cid` is the CAPTCHA numeric ID.
|
||||||
|
|
||||||
|
get_text(cid)
|
||||||
|
Returns an uploaded CAPTCHA text (None if not solved). The only argument
|
||||||
|
`cid` is the CAPTCHA numeric ID.
|
||||||
|
|
||||||
|
report(cid)
|
||||||
|
Reports an incorrectly solved CAPTCHA. The only argument `cid` is the
|
||||||
|
CAPTCHA numeric ID. Returns True on success, False otherwise.
|
||||||
|
|
||||||
|
upload(captcha)
|
||||||
|
Uploads a CAPTCHA. The only argument `captcha` can be either file-like
|
||||||
|
object (any object with `read` method defined, actually, so StringIO
|
||||||
|
will do), or CAPTCHA image file name. On successul upload you'll get
|
||||||
|
the CAPTCHA details dict (see get_captcha() method).
|
||||||
|
|
||||||
|
NOTE: AT THIS POINT THE UPLOADED CAPTCHA IS NOT SOLVED YET! You have
|
||||||
|
to poll for its status periodically using get_captcha() or get_text()
|
||||||
|
method until the CAPTCHA is solved and you get the text.
|
||||||
|
|
||||||
|
decode(captcha, timeout=DEFAULT_TIMEOUT)
|
||||||
|
A convenient method that uploads a CAPTCHA and polls for its status
|
||||||
|
periodically, but no longer than `timeout` (defaults to 60 seconds).
|
||||||
|
If solved, you'll get the CAPTCHA details dict (see get_captcha()
|
||||||
|
method for details). See upload() method for details on `captcha`
|
||||||
|
argument.
|
||||||
|
|
||||||
|
Visit http://www.deathbycaptcha.com/user/api for updates.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import binascii
|
||||||
|
import errno
|
||||||
|
import imghdr
|
||||||
|
import random
|
||||||
|
import os
|
||||||
|
import select
|
||||||
|
import socket
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import urllib
|
||||||
|
import urllib2
|
||||||
|
try:
|
||||||
|
from json import read as json_decode, write as json_encode
|
||||||
|
except ImportError:
|
||||||
|
try:
|
||||||
|
from json import loads as json_decode, dumps as json_encode
|
||||||
|
except ImportError:
|
||||||
|
from simplejson import loads as json_decode, dumps as json_encode
|
||||||
|
|
||||||
|
|
||||||
|
# API version and unique software ID
|
||||||
|
API_VERSION = 'DBC/Python v4.6'
|
||||||
|
|
||||||
|
# Default CAPTCHA timeout and decode() polling interval
|
||||||
|
DEFAULT_TIMEOUT = 60
|
||||||
|
DEFAULT_TOKEN_TIMEOUT = 120
|
||||||
|
POLLS_INTERVAL = [1, 1, 2, 3, 2, 2, 3, 2, 2]
|
||||||
|
DFLT_POLL_INTERVAL = 3
|
||||||
|
|
||||||
|
# Base HTTP API url
|
||||||
|
HTTP_BASE_URL = 'http://api.dbcapi.me/api'
|
||||||
|
|
||||||
|
# Preferred HTTP API server's response content type, do not change
|
||||||
|
HTTP_RESPONSE_TYPE = 'application/json'
|
||||||
|
|
||||||
|
# Socket API server's host & ports range
|
||||||
|
SOCKET_HOST = 'api.dbcapi.me'
|
||||||
|
SOCKET_PORTS = range(8123, 8131)
|
||||||
|
|
||||||
|
|
||||||
|
def _load_image(captcha):
|
||||||
|
if hasattr(captcha, 'read'):
|
||||||
|
img = captcha.read()
|
||||||
|
elif type(captcha) == bytearray:
|
||||||
|
img = captcha
|
||||||
|
else:
|
||||||
|
img = ''
|
||||||
|
try:
|
||||||
|
captcha_file = open(captcha, 'rb')
|
||||||
|
except Exception:
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
img = captcha_file.read()
|
||||||
|
captcha_file.close()
|
||||||
|
if not len(img):
|
||||||
|
raise ValueError('CAPTCHA image is empty')
|
||||||
|
elif imghdr.what(None, img) is None:
|
||||||
|
raise TypeError('Unknown CAPTCHA image type')
|
||||||
|
else:
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
class AccessDeniedException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Client(object):
|
||||||
|
|
||||||
|
"""Death by Captcha API Client."""
|
||||||
|
|
||||||
|
def __init__(self, username, password):
|
||||||
|
self.is_verbose = False
|
||||||
|
self.userpwd = {'username': username, 'password': password}
|
||||||
|
|
||||||
|
def _log(self, cmd, msg=''):
|
||||||
|
if self.is_verbose:
|
||||||
|
print '%d %s %s' % (time.time(), cmd, msg.rstrip())
|
||||||
|
return self
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def connect(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_user(self):
|
||||||
|
"""Fetch user details -- ID, balance, rate and banned status."""
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def get_balance(self):
|
||||||
|
"""Fetch user balance (in US cents)."""
|
||||||
|
return self.get_user().get('balance')
|
||||||
|
|
||||||
|
def get_captcha(self, cid):
|
||||||
|
"""Fetch a CAPTCHA details -- ID, text and correctness flag."""
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def get_text(self, cid):
|
||||||
|
"""Fetch a CAPTCHA text."""
|
||||||
|
return self.get_captcha(cid).get('text') or None
|
||||||
|
|
||||||
|
def report(self, cid):
|
||||||
|
"""Report a CAPTCHA as incorrectly solved."""
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def upload(self, captcha):
|
||||||
|
"""Upload a CAPTCHA.
|
||||||
|
|
||||||
|
Accepts file names and file-like objects. Returns CAPTCHA details
|
||||||
|
dict on success.
|
||||||
|
|
||||||
|
"""
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def decode(self, captcha=None, timeout=None, **kwargs):
|
||||||
|
"""
|
||||||
|
Try to solve a CAPTCHA.
|
||||||
|
|
||||||
|
See Client.upload() for arguments details.
|
||||||
|
|
||||||
|
Uploads a CAPTCHA, polls for its status periodically with arbitrary
|
||||||
|
timeout (in seconds), returns CAPTCHA details if (correctly) solved.
|
||||||
|
"""
|
||||||
|
if not timeout:
|
||||||
|
if not captcha:
|
||||||
|
timeout = DEFAULT_TOKEN_TIMEOUT
|
||||||
|
else:
|
||||||
|
timeout = DEFAULT_TIMEOUT
|
||||||
|
|
||||||
|
deadline = time.time() + (max(0, timeout) or DEFAULT_TIMEOUT)
|
||||||
|
uploaded_captcha = self.upload(captcha, **kwargs)
|
||||||
|
if uploaded_captcha:
|
||||||
|
intvl_idx = 0 # POLL_INTERVAL index
|
||||||
|
while deadline > time.time() and not uploaded_captcha.get('text'):
|
||||||
|
intvl, intvl_idx = self._get_poll_interval(intvl_idx)
|
||||||
|
time.sleep(intvl)
|
||||||
|
pulled = self.get_captcha(uploaded_captcha['captcha'])
|
||||||
|
if pulled['captcha'] == uploaded_captcha['captcha']:
|
||||||
|
uploaded_captcha = pulled
|
||||||
|
if uploaded_captcha.get('text') and \
|
||||||
|
uploaded_captcha.get('is_correct'):
|
||||||
|
return uploaded_captcha
|
||||||
|
|
||||||
|
def _get_poll_interval(self, idx):
|
||||||
|
"""Returns poll interval and next index depending on index provided"""
|
||||||
|
|
||||||
|
if len(POLLS_INTERVAL) > idx:
|
||||||
|
intvl = POLLS_INTERVAL[idx]
|
||||||
|
else:
|
||||||
|
intvl = DFLT_POLL_INTERVAL
|
||||||
|
idx += 1
|
||||||
|
|
||||||
|
return intvl, idx
|
||||||
|
|
||||||
|
|
||||||
|
class HttpClient(Client):
|
||||||
|
|
||||||
|
"""Death by Captcha HTTP API client."""
|
||||||
|
|
||||||
|
def __init__(self, *args):
|
||||||
|
Client.__init__(self, *args)
|
||||||
|
self.opener = urllib2.build_opener(urllib2.HTTPRedirectHandler())
|
||||||
|
|
||||||
|
def _call(self, cmd, payload=None, headers=None):
|
||||||
|
if headers is None:
|
||||||
|
headers = {}
|
||||||
|
headers['Accept'] = HTTP_RESPONSE_TYPE
|
||||||
|
headers['User-Agent'] = API_VERSION
|
||||||
|
if hasattr(payload, 'items'):
|
||||||
|
payload = urllib.urlencode(payload)
|
||||||
|
self._log('SEND', '%s %d %s' % (cmd, len(payload), payload))
|
||||||
|
else:
|
||||||
|
self._log('SEND', '%s' % cmd)
|
||||||
|
if payload is not None:
|
||||||
|
headers['Content-Length'] = len(payload)
|
||||||
|
try:
|
||||||
|
response = self.opener.open(urllib2.Request(
|
||||||
|
HTTP_BASE_URL + '/' + cmd.strip('/'),
|
||||||
|
data=payload,
|
||||||
|
headers=headers
|
||||||
|
)).read()
|
||||||
|
except urllib2.HTTPError, err:
|
||||||
|
if 403 == err.code:
|
||||||
|
raise AccessDeniedException('Access denied, please check'
|
||||||
|
' your credentials and/or balance')
|
||||||
|
elif 400 == err.code or 413 == err.code:
|
||||||
|
raise ValueError("CAPTCHA was rejected by the service, check"
|
||||||
|
" if it's a valid image")
|
||||||
|
elif 503 == err.code:
|
||||||
|
raise OverflowError("CAPTCHA was rejected due to service"
|
||||||
|
" overload, try again later")
|
||||||
|
else:
|
||||||
|
raise err
|
||||||
|
else:
|
||||||
|
self._log('RECV', '%d %s' % (len(response), response))
|
||||||
|
try:
|
||||||
|
return json_decode(response)
|
||||||
|
except Exception:
|
||||||
|
raise RuntimeError('Invalid API response')
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def get_user(self):
|
||||||
|
return self._call('user', self.userpwd.copy()) or {'user': 0}
|
||||||
|
|
||||||
|
def get_captcha(self, cid):
|
||||||
|
return self._call('captcha/%d' % cid) or {'captcha': 0}
|
||||||
|
|
||||||
|
def report(self, cid):
|
||||||
|
return not self._call('captcha/%d/report' % cid,
|
||||||
|
self.userpwd.copy()).get('is_correct')
|
||||||
|
|
||||||
|
def upload(self, captcha=None, **kwargs):
|
||||||
|
boundary = binascii.hexlify(os.urandom(16))
|
||||||
|
banner = kwargs.get('banner', '')
|
||||||
|
if banner:
|
||||||
|
kwargs['banner'] = 'base64:' + base64.b64encode(_load_image(banner))
|
||||||
|
body = '\r\n'.join(('\r\n'.join((
|
||||||
|
'--%s' % boundary,
|
||||||
|
'Content-Disposition: form-data; name="%s"' % k,
|
||||||
|
'Content-Type: text/plain',
|
||||||
|
'Content-Length: %d' % len(str(v)),
|
||||||
|
'',
|
||||||
|
str(v)
|
||||||
|
))) for k, v in self.userpwd.items())
|
||||||
|
|
||||||
|
body += '\r\n'.join(('\r\n'.join((
|
||||||
|
'--%s' % boundary,
|
||||||
|
'Content-Disposition: form-data; name="%s"' % k,
|
||||||
|
'Content-Type: text/plain',
|
||||||
|
'Content-Length: %d' % len(str(v)),
|
||||||
|
'',
|
||||||
|
str(v)
|
||||||
|
))) for k, v in kwargs.items())
|
||||||
|
|
||||||
|
if captcha:
|
||||||
|
img = _load_image(captcha)
|
||||||
|
body += '\r\n'.join((
|
||||||
|
'',
|
||||||
|
'--%s' % boundary,
|
||||||
|
'Content-Disposition: form-data; name="captchafile"; '
|
||||||
|
'filename="captcha"',
|
||||||
|
'Content-Type: application/octet-stream',
|
||||||
|
'Content-Length: %d' % len(img),
|
||||||
|
'',
|
||||||
|
img,
|
||||||
|
'--%s--' % boundary,
|
||||||
|
''
|
||||||
|
))
|
||||||
|
|
||||||
|
response = self._call('captcha', body, {
|
||||||
|
'Content-Type': 'multipart/form-data; boundary="%s"' % boundary
|
||||||
|
}) or {}
|
||||||
|
if response.get('captcha'):
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
class SocketClient(Client):
|
||||||
|
|
||||||
|
"""Death by Captcha socket API client."""
|
||||||
|
|
||||||
|
TERMINATOR = '\r\n'
|
||||||
|
|
||||||
|
def __init__(self, *args):
|
||||||
|
Client.__init__(self, *args)
|
||||||
|
self.socket_lock = threading.Lock()
|
||||||
|
self.socket = None
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
if self.socket:
|
||||||
|
self._log('CLOSE')
|
||||||
|
try:
|
||||||
|
self.socket.shutdown(socket.SHUT_RDWR)
|
||||||
|
except socket.error:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
self.socket.close()
|
||||||
|
self.socket = None
|
||||||
|
|
||||||
|
def connect(self):
|
||||||
|
if not self.socket:
|
||||||
|
self._log('CONN')
|
||||||
|
host = (socket.gethostbyname(SOCKET_HOST),
|
||||||
|
random.choice(SOCKET_PORTS))
|
||||||
|
self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
self.socket.settimeout(0)
|
||||||
|
try:
|
||||||
|
self.socket.connect(host)
|
||||||
|
except socket.error, err:
|
||||||
|
if (err.args[0] not in
|
||||||
|
(errno.EAGAIN, errno.EWOULDBLOCK, errno.EINPROGRESS)):
|
||||||
|
self.close()
|
||||||
|
raise err
|
||||||
|
return self.socket
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
self.close()
|
||||||
|
|
||||||
|
def _sendrecv(self, sock, buf):
|
||||||
|
self._log('SEND', buf)
|
||||||
|
fds = [sock]
|
||||||
|
buf += self.TERMINATOR
|
||||||
|
response = ''
|
||||||
|
intvl_idx = 0
|
||||||
|
while True:
|
||||||
|
intvl, intvl_idx = self._get_poll_interval(intvl_idx)
|
||||||
|
rds, wrs, exs = select.select((not buf and fds) or [],
|
||||||
|
(buf and fds) or [],
|
||||||
|
fds,
|
||||||
|
intvl)
|
||||||
|
if exs:
|
||||||
|
raise IOError('select() failed')
|
||||||
|
try:
|
||||||
|
if wrs:
|
||||||
|
while buf:
|
||||||
|
buf = buf[wrs[0].send(buf):]
|
||||||
|
elif rds:
|
||||||
|
while True:
|
||||||
|
s = rds[0].recv(256)
|
||||||
|
if not s:
|
||||||
|
raise IOError('recv(): connection lost')
|
||||||
|
else:
|
||||||
|
response += s
|
||||||
|
except socket.error, err:
|
||||||
|
if (err.args[0] not in
|
||||||
|
(errno.EAGAIN, errno.EWOULDBLOCK, errno.EINPROGRESS)):
|
||||||
|
raise err
|
||||||
|
if response.endswith(self.TERMINATOR):
|
||||||
|
self._log('RECV', response)
|
||||||
|
return response.rstrip(self.TERMINATOR)
|
||||||
|
raise IOError('send/recv timed out')
|
||||||
|
|
||||||
|
def _call(self, cmd, data=None):
|
||||||
|
if data is None:
|
||||||
|
data = {}
|
||||||
|
data['cmd'] = cmd
|
||||||
|
data['version'] = API_VERSION
|
||||||
|
request = json_encode(data)
|
||||||
|
|
||||||
|
response = None
|
||||||
|
for _ in range(2):
|
||||||
|
if not self.socket and cmd != 'login':
|
||||||
|
self._call('login', self.userpwd.copy())
|
||||||
|
self.socket_lock.acquire()
|
||||||
|
try:
|
||||||
|
sock = self.connect()
|
||||||
|
response = self._sendrecv(sock, request)
|
||||||
|
except IOError, err:
|
||||||
|
sys.stderr.write(str(err) + "\n")
|
||||||
|
self.close()
|
||||||
|
except socket.error, err:
|
||||||
|
sys.stderr.write(str(err) + "\n")
|
||||||
|
self.close()
|
||||||
|
raise IOError('Connection refused')
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
finally:
|
||||||
|
self.socket_lock.release()
|
||||||
|
|
||||||
|
if response is None:
|
||||||
|
raise IOError('Connection lost or timed out during API request')
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = json_decode(response)
|
||||||
|
except Exception:
|
||||||
|
raise RuntimeError('Invalid API response')
|
||||||
|
|
||||||
|
if not response.get('error'):
|
||||||
|
return response
|
||||||
|
|
||||||
|
error = response['error']
|
||||||
|
if error in ('not-logged-in', 'invalid-credentials'):
|
||||||
|
raise AccessDeniedException('Access denied, check your credentials')
|
||||||
|
elif 'banned' == error:
|
||||||
|
raise AccessDeniedException('Access denied, account is suspended')
|
||||||
|
elif 'insufficient-funds' == error:
|
||||||
|
raise AccessDeniedException(
|
||||||
|
'CAPTCHA was rejected due to low balance')
|
||||||
|
elif 'invalid-captcha' == error:
|
||||||
|
raise ValueError('CAPTCHA is not a valid image')
|
||||||
|
elif 'service-overload' == error:
|
||||||
|
raise OverflowError(
|
||||||
|
'CAPTCHA was rejected due to service overload, try again later')
|
||||||
|
else:
|
||||||
|
self.socket_lock.acquire()
|
||||||
|
self.close()
|
||||||
|
self.socket_lock.release()
|
||||||
|
raise RuntimeError('API server error occured: %s' % error)
|
||||||
|
|
||||||
|
def get_user(self):
|
||||||
|
return self._call('user') or {'user': 0}
|
||||||
|
|
||||||
|
def get_captcha(self, cid):
|
||||||
|
return self._call('captcha', {'captcha': cid}) or {'captcha': 0}
|
||||||
|
|
||||||
|
def upload(self, captcha=None, **kwargs):
|
||||||
|
data = {}
|
||||||
|
if captcha:
|
||||||
|
data['captcha'] = base64.b64encode(_load_image(captcha))
|
||||||
|
if kwargs:
|
||||||
|
banner = kwargs.get('banner', '')
|
||||||
|
if banner:
|
||||||
|
kwargs['banner'] = base64.b64encode(_load_image(banner))
|
||||||
|
data.update(kwargs)
|
||||||
|
response = self._call('upload', data)
|
||||||
|
if response.get('captcha'):
|
||||||
|
uploaded_captcha = dict(
|
||||||
|
(k, response.get(k))
|
||||||
|
for k in ('captcha', 'text', 'is_correct')
|
||||||
|
)
|
||||||
|
if not uploaded_captcha['text']:
|
||||||
|
uploaded_captcha['text'] = None
|
||||||
|
return uploaded_captcha
|
||||||
|
|
||||||
|
def report(self, cid):
|
||||||
|
return not self._call('report', {'captcha': cid}).get('is_correct')
|
||||||
|
|
||||||
|
|
||||||
|
if '__main__' == __name__:
|
||||||
|
# Put your DBC username & password here:
|
||||||
|
# client = HttpClient(sys.argv[1], sys.argv[2])
|
||||||
|
client = SocketClient(sys.argv[1], sys.argv[2])
|
||||||
|
client.is_verbose = True
|
||||||
|
|
||||||
|
print 'Your balance is %s US cents' % client.get_balance()
|
||||||
|
|
||||||
|
for fn in sys.argv[3:]:
|
||||||
|
try:
|
||||||
|
# Put your CAPTCHA image file name or file-like object, and optional
|
||||||
|
# solving timeout (in seconds) here:
|
||||||
|
captcha = client.decode(fn, DEFAULT_TIMEOUT)
|
||||||
|
except Exception, e:
|
||||||
|
sys.stderr.write('Failed uploading CAPTCHA: %s\n' % (e, ))
|
||||||
|
captcha = None
|
||||||
|
|
||||||
|
if captcha:
|
||||||
|
print 'CAPTCHA %d solved: %s' % \
|
||||||
|
(captcha['captcha'], captcha['text'])
|
||||||
|
|
||||||
|
# Report as incorrectly solved if needed. Make sure the CAPTCHA was
|
||||||
|
# in fact incorrectly solved!
|
||||||
|
# try:
|
||||||
|
# client.report(captcha['captcha'])
|
||||||
|
# except Exception, e:
|
||||||
|
# sys.stderr.write('Failed reporting CAPTCHA: %s\n' % (e, ))
|
@ -0,0 +1,7 @@
|
|||||||
|
from .base import AnticaptchaClient
|
||||||
|
from .tasks import NoCaptchaTask, NoCaptchaTaskProxylessTask, ImageToTextTask, FunCaptchaTask
|
||||||
|
from .proxy import Proxy
|
||||||
|
from .exceptions import AnticaptchaException
|
||||||
|
from .fields import SimpleText, Image, WebLink, TextInput, Textarea, Checkbox, Select, Radio, ImageUpload
|
||||||
|
|
||||||
|
AnticatpchaException = AnticaptchaException
|
@ -0,0 +1,114 @@
|
|||||||
|
import requests
|
||||||
|
import time
|
||||||
|
|
||||||
|
from six.moves.urllib_parse import urljoin
|
||||||
|
from .exceptions import AnticaptchaException
|
||||||
|
|
||||||
|
SLEEP_EVERY_CHECK_FINISHED = 3
|
||||||
|
MAXIMUM_JOIN_TIME = 60 * 5
|
||||||
|
|
||||||
|
|
||||||
|
class Job(object):
|
||||||
|
client = None
|
||||||
|
task_id = None
|
||||||
|
_last_result = None
|
||||||
|
|
||||||
|
def __init__(self, client, task_id):
|
||||||
|
self.client = client
|
||||||
|
self.task_id = task_id
|
||||||
|
|
||||||
|
def _update(self):
|
||||||
|
self._last_result = self.client.getTaskResult(self.task_id)
|
||||||
|
|
||||||
|
def check_is_ready(self):
|
||||||
|
self._update()
|
||||||
|
return self._last_result['status'] == 'ready'
|
||||||
|
|
||||||
|
def get_solution_response(self): # Recaptcha
|
||||||
|
return self._last_result['solution']['gRecaptchaResponse']
|
||||||
|
|
||||||
|
def get_token_response(self): # Funcaptcha
|
||||||
|
return self._last_result['solution']['token']
|
||||||
|
|
||||||
|
def get_answers(self):
|
||||||
|
return self._last_result['solution']['answers']
|
||||||
|
|
||||||
|
def get_captcha_text(self): # Image
|
||||||
|
return self._last_result['solution']['text']
|
||||||
|
|
||||||
|
def report_incorrect(self):
|
||||||
|
return self.client.reportIncorrectImage(self.task_id)
|
||||||
|
|
||||||
|
def join(self, maximum_time=None):
|
||||||
|
elapsed_time = 0
|
||||||
|
maximum_time = maximum_time or MAXIMUM_JOIN_TIME
|
||||||
|
while not self.check_is_ready():
|
||||||
|
time.sleep(SLEEP_EVERY_CHECK_FINISHED)
|
||||||
|
elapsed_time += SLEEP_EVERY_CHECK_FINISHED
|
||||||
|
if elapsed_time is not None and elapsed_time > maximum_time:
|
||||||
|
raise AnticaptchaException(None, 250,
|
||||||
|
"The execution time exceeded a maximum time of {} seconds. It takes {} seconds.".format(
|
||||||
|
maximum_time, elapsed_time))
|
||||||
|
|
||||||
|
|
||||||
|
class AnticaptchaClient(object):
|
||||||
|
client_key = None
|
||||||
|
CREATE_TASK_URL = "/createTask"
|
||||||
|
TASK_RESULT_URL = "/getTaskResult"
|
||||||
|
BALANCE_URL = "/getBalance"
|
||||||
|
REPORT_IMAGE_URL = "/reportIncorrectImageCaptcha"
|
||||||
|
SOFT_ID = 847
|
||||||
|
language_pool = "en"
|
||||||
|
|
||||||
|
def __init__(self, client_key, language_pool="en", host="api.anti-captcha.com", use_ssl=True):
|
||||||
|
self.client_key = client_key
|
||||||
|
self.language_pool = language_pool
|
||||||
|
self.base_url = "{proto}://{host}/".format(proto="https" if use_ssl else "http",
|
||||||
|
host=host)
|
||||||
|
self.session = requests.Session()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def client_ip(self):
|
||||||
|
if not hasattr(self, '_client_ip'):
|
||||||
|
self._client_ip = self.session.get('http://httpbin.org/ip').json()['origin']
|
||||||
|
return self._client_ip
|
||||||
|
|
||||||
|
def _check_response(self, response):
|
||||||
|
if response.get('errorId', False) == 11:
|
||||||
|
response['errorDescription'] = "{} Your missing IP address is {}.".format(response['errorDescription'],
|
||||||
|
self.client_ip)
|
||||||
|
if response.get('errorId', False):
|
||||||
|
raise AnticaptchaException(response['errorId'],
|
||||||
|
response['errorCode'],
|
||||||
|
response['errorDescription'])
|
||||||
|
|
||||||
|
def createTask(self, task):
|
||||||
|
request = {"clientKey": self.client_key,
|
||||||
|
"task": task.serialize(),
|
||||||
|
"softId": self.SOFT_ID,
|
||||||
|
"languagePool": self.language_pool,
|
||||||
|
}
|
||||||
|
response = self.session.post(urljoin(self.base_url, self.CREATE_TASK_URL), json=request).json()
|
||||||
|
self._check_response(response)
|
||||||
|
return Job(self, response['taskId'])
|
||||||
|
|
||||||
|
def getTaskResult(self, task_id):
|
||||||
|
request = {"clientKey": self.client_key,
|
||||||
|
"taskId": task_id}
|
||||||
|
response = self.session.post(urljoin(self.base_url, self.TASK_RESULT_URL), json=request).json()
|
||||||
|
self._check_response(response)
|
||||||
|
return response
|
||||||
|
|
||||||
|
def getBalance(self):
|
||||||
|
request = {"clientKey": self.client_key}
|
||||||
|
response = self.session.post(urljoin(self.base_url, self.BALANCE_URL), json=request).json()
|
||||||
|
self._check_response(response)
|
||||||
|
return response['balance']
|
||||||
|
|
||||||
|
def reportIncorrectImage(self, task_id):
|
||||||
|
request = {"clientKey": self.client_key,
|
||||||
|
"taskId": task_id
|
||||||
|
}
|
||||||
|
response = self.session.post(urljoin(self.base_url, self.REPORT_IMAGE_URL), json=request).json()
|
||||||
|
self._check_response(response)
|
||||||
|
return response.get('status', False) != False
|
@ -0,0 +1,23 @@
|
|||||||
|
class AnticaptchaException(Exception):
|
||||||
|
def __init__(self, error_id, error_code, error_description, *args):
|
||||||
|
super(AnticaptchaException, self).__init__("[{}:{}]{}".format(error_code, error_id, error_description))
|
||||||
|
self.error_description = error_description
|
||||||
|
self.error_id = error_id
|
||||||
|
self.error_code = error_code
|
||||||
|
|
||||||
|
|
||||||
|
AnticatpchaException = AnticaptchaException
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidWidthException(AnticaptchaException):
|
||||||
|
def __init__(self, width):
|
||||||
|
self.width = width
|
||||||
|
msg = 'Invalid width (%s). Can be one of these: 100, 50, 33, 25.' % (self.width,)
|
||||||
|
super(InvalidWidthException, self).__init__("AC-1", 1, msg)
|
||||||
|
|
||||||
|
|
||||||
|
class MissingNameException(AnticaptchaException):
|
||||||
|
def __init__(self, cls):
|
||||||
|
self.cls = cls
|
||||||
|
msg = 'Missing name data in {0}. Provide {0}.__init__(name="X") or {0}.serialize(name="X")'.format(str(self.cls))
|
||||||
|
super(MissingNameException, self).__init__("AC-2", 2, msg)
|
@ -0,0 +1,199 @@
|
|||||||
|
import six
|
||||||
|
from python_anticaptcha.exceptions import InvalidWidthException, MissingNameException
|
||||||
|
|
||||||
|
|
||||||
|
class BaseField(object):
|
||||||
|
label = None
|
||||||
|
labelHint = None
|
||||||
|
|
||||||
|
def serialize(self, name=None):
|
||||||
|
data = {}
|
||||||
|
if self.label:
|
||||||
|
data['label'] = self.label or False
|
||||||
|
if self.labelHint:
|
||||||
|
data['labelHint'] = self.labelHint or False
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
class NameBaseField(BaseField):
|
||||||
|
name = None
|
||||||
|
|
||||||
|
def serialize(self, name=None):
|
||||||
|
data = super(NameBaseField, self).serialize(name)
|
||||||
|
if name:
|
||||||
|
data['name'] = name
|
||||||
|
elif self.name:
|
||||||
|
data['name'] = self.name
|
||||||
|
else:
|
||||||
|
raise MissingNameException(cls=self.__class__)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
class SimpleText(BaseField):
|
||||||
|
contentType = 'text'
|
||||||
|
|
||||||
|
def __init__(self, content, label=None, labelHint=None, width=None):
|
||||||
|
self.label = label
|
||||||
|
self.labelHint = labelHint
|
||||||
|
|
||||||
|
self.content = content
|
||||||
|
self.width = width
|
||||||
|
|
||||||
|
def serialize(self, name=None):
|
||||||
|
data = super(SimpleText, self).serialize(name)
|
||||||
|
data['contentType'] = self.contentType
|
||||||
|
data['content'] = self.content
|
||||||
|
|
||||||
|
if self.width:
|
||||||
|
if self.width not in [100, 50, 33, 25]:
|
||||||
|
raise InvalidWidthException(self.width)
|
||||||
|
data['inputOptions'] = {}
|
||||||
|
data['width'] = self.width
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
class Image(BaseField):
|
||||||
|
contentType = 'image'
|
||||||
|
|
||||||
|
def __init__(self, imageUrl, label=None, labelHint=None):
|
||||||
|
self.label = label
|
||||||
|
self.labelHint = labelHint
|
||||||
|
self.imageUrl = imageUrl
|
||||||
|
|
||||||
|
def serialize(self, name=None):
|
||||||
|
data = super(Image, self).serialize(name)
|
||||||
|
data['contentType'] = self.contentType
|
||||||
|
data['content'] = self.imageUrl
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
class WebLink(BaseField):
|
||||||
|
contentType = 'link'
|
||||||
|
|
||||||
|
def __init__(self, linkText, linkUrl, label=None, labelHint=None, width=None):
|
||||||
|
self.label = label
|
||||||
|
self.labelHint = labelHint
|
||||||
|
|
||||||
|
self.linkText = linkText
|
||||||
|
self.linkUrl = linkUrl
|
||||||
|
|
||||||
|
self.width = width
|
||||||
|
|
||||||
|
def serialize(self, name=None):
|
||||||
|
data = super(WebLink, self).serialize(name)
|
||||||
|
data['contentType'] = self.contentType
|
||||||
|
|
||||||
|
if self.width:
|
||||||
|
if self.width not in [100, 50, 33, 25]:
|
||||||
|
raise InvalidWidthException(self.width)
|
||||||
|
data['inputOptions'] = {}
|
||||||
|
data['width'] = self.width
|
||||||
|
|
||||||
|
data.update({'content': {'url': self.linkUrl,
|
||||||
|
'text': self.linkText}})
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
class TextInput(NameBaseField):
|
||||||
|
def __init__(self, placeHolder=None, label=None, labelHint=None, width=None):
|
||||||
|
self.label = label
|
||||||
|
self.labelHint = labelHint
|
||||||
|
|
||||||
|
self.placeHolder = placeHolder
|
||||||
|
|
||||||
|
self.width = width
|
||||||
|
|
||||||
|
def serialize(self, name=None):
|
||||||
|
data = super(TextInput, self).serialize(name)
|
||||||
|
data['inputType'] = 'text'
|
||||||
|
|
||||||
|
data['inputOptions'] = {}
|
||||||
|
|
||||||
|
if self.width:
|
||||||
|
if self.width not in [100, 50, 33, 25]:
|
||||||
|
raise InvalidWidthException(self.width)
|
||||||
|
|
||||||
|
data['inputOptions']['width'] = str(self.width)
|
||||||
|
|
||||||
|
if self.placeHolder:
|
||||||
|
data['inputOptions']['placeHolder'] = self.placeHolder
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
class Textarea(NameBaseField):
|
||||||
|
def __init__(self, placeHolder=None, rows=None, label=None, width=None, labelHint=None):
|
||||||
|
self.label = label
|
||||||
|
self.labelHint = labelHint
|
||||||
|
|
||||||
|
self.placeHolder = placeHolder
|
||||||
|
self.rows = rows
|
||||||
|
self.width = width
|
||||||
|
|
||||||
|
def serialize(self, name=None):
|
||||||
|
data = super(Textarea, self).serialize(name)
|
||||||
|
data['inputType'] = 'textarea'
|
||||||
|
data['inputOptions'] = {}
|
||||||
|
if self.rows:
|
||||||
|
data['inputOptions']['rows'] = str(self.rows)
|
||||||
|
if self.placeHolder:
|
||||||
|
data['inputOptions']['placeHolder'] = self.placeHolder
|
||||||
|
if self.width:
|
||||||
|
data['inputOptions']['width'] = str(self.width)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
class Checkbox(NameBaseField):
|
||||||
|
def __init__(self, text, label=None, labelHint=None):
|
||||||
|
self.label = label
|
||||||
|
self.labelHint = labelHint
|
||||||
|
|
||||||
|
self.text = text
|
||||||
|
|
||||||
|
def serialize(self, name=None):
|
||||||
|
data = super(Checkbox, self).serialize(name)
|
||||||
|
data['inputType'] = 'checkbox'
|
||||||
|
data['inputOptions'] = {'label': self.text}
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
class Select(NameBaseField):
|
||||||
|
type = 'select'
|
||||||
|
|
||||||
|
def __init__(self, label=None, choices=None, labelHint=None):
|
||||||
|
self.label = label
|
||||||
|
self.labelHint = labelHint
|
||||||
|
self.choices = choices or ()
|
||||||
|
|
||||||
|
def get_choices(self):
|
||||||
|
for choice in self.choices:
|
||||||
|
if isinstance(choice, six.text_type):
|
||||||
|
yield choice, choice
|
||||||
|
else:
|
||||||
|
yield choice
|
||||||
|
|
||||||
|
def serialize(self, name=None):
|
||||||
|
data = super(Select, self).serialize(name)
|
||||||
|
data['inputType'] = self.type
|
||||||
|
|
||||||
|
data['inputOptions'] = []
|
||||||
|
for value, caption in self.get_choices():
|
||||||
|
data['inputOptions'].append({"value": value,
|
||||||
|
"caption": caption})
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
class Radio(Select):
|
||||||
|
type = 'radio'
|
||||||
|
|
||||||
|
|
||||||
|
class ImageUpload(NameBaseField):
|
||||||
|
def __init__(self, label=None, labelHint=None):
|
||||||
|
self.label = label
|
||||||
|
self.labelHint = labelHint
|
||||||
|
|
||||||
|
def serialize(self, name=None):
|
||||||
|
data = super(ImageUpload, self).serialize(name)
|
||||||
|
data['inputType'] = 'imageUpload'
|
||||||
|
return data
|
@ -0,0 +1,28 @@
|
|||||||
|
from six.moves.urllib_parse import urlparse
|
||||||
|
|
||||||
|
|
||||||
|
class Proxy(object):
|
||||||
|
def __init__(self, proxy_type, proxy_address, proxy_port, proxy_login, proxy_password):
|
||||||
|
self.proxyType = proxy_type
|
||||||
|
self.proxyAddress = proxy_address
|
||||||
|
self.proxyPort = proxy_port
|
||||||
|
self.proxyLogin = proxy_login
|
||||||
|
self.proxyPassword = proxy_password
|
||||||
|
|
||||||
|
def serialize(self):
|
||||||
|
result = {'proxyType': self.proxyType,
|
||||||
|
'proxyAddress': self.proxyAddress,
|
||||||
|
'proxyPort': self.proxyPort}
|
||||||
|
if self.proxyLogin or self.proxyPassword:
|
||||||
|
result['proxyLogin'] = self.proxyLogin
|
||||||
|
result['proxyPassword'] = self.proxyPassword
|
||||||
|
return result
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def parse_url(cls, url):
|
||||||
|
parsed = urlparse(url)
|
||||||
|
return cls(proxy_type=parsed.scheme,
|
||||||
|
proxy_address=parsed.hostname,
|
||||||
|
proxy_port=parsed.port,
|
||||||
|
proxy_login=parsed.username,
|
||||||
|
proxy_password=parsed.password)
|
@ -0,0 +1,128 @@
|
|||||||
|
import base64
|
||||||
|
from .fields import BaseField
|
||||||
|
|
||||||
|
|
||||||
|
class BaseTask(object):
|
||||||
|
def serialize(self, **result):
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
class ProxyMixin(BaseTask):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
self.proxy = kwargs.pop('proxy')
|
||||||
|
self.userAgent = kwargs.pop('user_agent')
|
||||||
|
self.cookies = kwargs.pop('cookies', '')
|
||||||
|
super(ProxyMixin, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def serialize(self, **result):
|
||||||
|
result = super(ProxyMixin, self).serialize(**result)
|
||||||
|
result.update(self.proxy.serialize())
|
||||||
|
result['userAgent'] = self.userAgent
|
||||||
|
if self.cookies:
|
||||||
|
result['cookies'] = self.cookies
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
class NoCaptchaTaskProxylessTask(BaseTask):
|
||||||
|
type = "NoCaptchaTaskProxyless"
|
||||||
|
websiteURL = None
|
||||||
|
websiteKey = None
|
||||||
|
websiteSToken = None
|
||||||
|
|
||||||
|
def __init__(self, website_url, website_key, website_s_token=None, is_invisible=None):
|
||||||
|
self.websiteURL = website_url
|
||||||
|
self.websiteKey = website_key
|
||||||
|
self.websiteSToken = website_s_token
|
||||||
|
self.isInvisible = is_invisible
|
||||||
|
|
||||||
|
def serialize(self):
|
||||||
|
data = {'type': self.type,
|
||||||
|
'websiteURL': self.websiteURL,
|
||||||
|
'websiteKey': self.websiteKey}
|
||||||
|
if self.websiteSToken is not None:
|
||||||
|
data['websiteSToken'] = self.websiteSToken
|
||||||
|
if self.isInvisible is not None:
|
||||||
|
data['isInvisible'] = self.isInvisible
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
class FunCaptchaTask(ProxyMixin):
|
||||||
|
type = "FunCaptchaTask"
|
||||||
|
websiteURL = None
|
||||||
|
websiteKey = None
|
||||||
|
|
||||||
|
def __init__(self, website_url, website_key, *args, **kwargs):
|
||||||
|
self.websiteURL = website_url
|
||||||
|
self.websiteKey = website_key
|
||||||
|
super(FunCaptchaTask, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def serialize(self, **result):
|
||||||
|
result = super(FunCaptchaTask, self).serialize(**result)
|
||||||
|
result.update({'type': self.type,
|
||||||
|
'websiteURL': self.websiteURL,
|
||||||
|
'websitePublicKey': self.websiteKey})
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
class NoCaptchaTask(ProxyMixin, NoCaptchaTaskProxylessTask):
|
||||||
|
type = "NoCaptchaTask"
|
||||||
|
|
||||||
|
|
||||||
|
class ImageToTextTask(object):
|
||||||
|
type = "ImageToTextTask"
|
||||||
|
fp = None
|
||||||
|
phrase = None
|
||||||
|
case = None
|
||||||
|
numeric = None
|
||||||
|
math = None
|
||||||
|
minLength = None
|
||||||
|
maxLength = None
|
||||||
|
|
||||||
|
def __init__(self, fp, phrase=None, case=None, numeric=None, math=None, min_length=None, max_length=None):
|
||||||
|
self.fp = fp
|
||||||
|
self.phrase = phrase
|
||||||
|
self.case = case
|
||||||
|
self.numeric = numeric
|
||||||
|
self.math = math
|
||||||
|
self.minLength = min_length
|
||||||
|
self.maxLength = max_length
|
||||||
|
|
||||||
|
def serialize(self):
|
||||||
|
return {'type': self.type,
|
||||||
|
'body': base64.b64encode(self.fp.read()).decode('utf-8'),
|
||||||
|
'phrase': self.phrase,
|
||||||
|
'case': self.case,
|
||||||
|
'numeric': self.numeric,
|
||||||
|
'math': self.math,
|
||||||
|
'minLength': self.minLength,
|
||||||
|
'maxLength': self.maxLength}
|
||||||
|
|
||||||
|
|
||||||
|
class CustomCaptchaTask(BaseTask):
|
||||||
|
type = 'CustomCaptchaTask'
|
||||||
|
imageUrl = None
|
||||||
|
assignment = None
|
||||||
|
form = None
|
||||||
|
|
||||||
|
def __init__(self, imageUrl, form=None, assignment=None):
|
||||||
|
self.imageUrl = imageUrl
|
||||||
|
self.form = form or {}
|
||||||
|
self.assignment = assignment
|
||||||
|
|
||||||
|
def serialize(self):
|
||||||
|
data = super(CustomCaptchaTask, self).serialize()
|
||||||
|
data.update({'type': self.type,
|
||||||
|
'imageUrl': self.imageUrl})
|
||||||
|
if self.form:
|
||||||
|
forms = []
|
||||||
|
for name, field in self.form.items():
|
||||||
|
if isinstance(field, BaseField):
|
||||||
|
forms.append(field.serialize(name))
|
||||||
|
else:
|
||||||
|
field = field.copy()
|
||||||
|
field['name'] = name
|
||||||
|
forms.append(field)
|
||||||
|
data['forms'] = forms
|
||||||
|
if self.assignment:
|
||||||
|
data['assignment'] = self.assignment
|
||||||
|
return data
|
@ -0,0 +1,257 @@
|
|||||||
|
# coding=utf-8
|
||||||
|
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
import json
|
||||||
|
from subliminal.cache import region
|
||||||
|
from dogpile.cache.api import NO_VALUE
|
||||||
|
from python_anticaptcha import AnticaptchaClient, NoCaptchaTaskProxylessTask, NoCaptchaTask, AnticaptchaException,\
|
||||||
|
Proxy
|
||||||
|
from deathbycaptcha import SocketClient as DBCClient, DEFAULT_TOKEN_TIMEOUT
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class PitcherRegistry(object):
|
||||||
|
pitchers = []
|
||||||
|
pitchers_by_key = {}
|
||||||
|
|
||||||
|
def register(self, cls):
|
||||||
|
idx = len(self.pitchers)
|
||||||
|
self.pitchers.append(cls)
|
||||||
|
key = "%s_%s" % (cls.name, cls.needs_proxy)
|
||||||
|
key_by_source = "%s_%s" % (cls.source, cls.needs_proxy)
|
||||||
|
self.pitchers_by_key[key] = idx
|
||||||
|
self.pitchers_by_key[key_by_source] = idx
|
||||||
|
return cls
|
||||||
|
|
||||||
|
def get_pitcher(self, name_or_site=None, with_proxy=False):
|
||||||
|
name_or_site = name_or_site or os.environ.get("ANTICAPTCHA_CLASS")
|
||||||
|
if not name_or_site:
|
||||||
|
raise Exception("AntiCaptcha class not given, exiting")
|
||||||
|
|
||||||
|
key = "%s_%s" % (name_or_site, with_proxy)
|
||||||
|
|
||||||
|
if key not in self.pitchers_by_key:
|
||||||
|
raise Exception("Pitcher %s not found (proxy: %s)" % (name_or_site, with_proxy))
|
||||||
|
|
||||||
|
return self.pitchers[self.pitchers_by_key.get(key)]
|
||||||
|
|
||||||
|
|
||||||
|
registry = pitchers = PitcherRegistry()
|
||||||
|
|
||||||
|
|
||||||
|
class Pitcher(object):
|
||||||
|
name = None
|
||||||
|
source = None
|
||||||
|
needs_proxy = False
|
||||||
|
tries = 3
|
||||||
|
job = None
|
||||||
|
client = None
|
||||||
|
client_key = None
|
||||||
|
website_url = None
|
||||||
|
website_key = None
|
||||||
|
website_name = None
|
||||||
|
solve_time = None
|
||||||
|
success = False
|
||||||
|
|
||||||
|
def __init__(self, website_name, website_url, website_key, tries=3, client_key=None, *args, **kwargs):
|
||||||
|
self.tries = tries
|
||||||
|
self.client_key = client_key or os.environ.get("ANTICAPTCHA_ACCOUNT_KEY")
|
||||||
|
if not self.client_key:
|
||||||
|
raise Exception("AntiCaptcha key not given, exiting")
|
||||||
|
|
||||||
|
self.website_name = website_name
|
||||||
|
self.website_key = website_key
|
||||||
|
self.website_url = website_url
|
||||||
|
self.success = False
|
||||||
|
self.solve_time = None
|
||||||
|
|
||||||
|
def get_client(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def get_job(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def _throw(self):
|
||||||
|
self.client = self.get_client()
|
||||||
|
self.job = self.get_job()
|
||||||
|
|
||||||
|
def throw(self):
|
||||||
|
t = time.time()
|
||||||
|
data = self._throw()
|
||||||
|
if self.success:
|
||||||
|
self.solve_time = time.time() - t
|
||||||
|
logger.info("%s: Solving took %ss", self.website_name, int(self.solve_time))
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
@registry.register
|
||||||
|
class AntiCaptchaProxyLessPitcher(Pitcher):
|
||||||
|
name = "AntiCaptchaProxyLess"
|
||||||
|
source = "anti-captcha.com"
|
||||||
|
host = "api.anti-captcha.com"
|
||||||
|
language_pool = "en"
|
||||||
|
tries = 5
|
||||||
|
use_ssl = True
|
||||||
|
is_invisible = False
|
||||||
|
|
||||||
|
def __init__(self, website_name, website_url, website_key, tries=3, host=None, language_pool=None,
|
||||||
|
use_ssl=True, is_invisible=False, *args, **kwargs):
|
||||||
|
super(AntiCaptchaProxyLessPitcher, self).__init__(website_name, website_url, website_key, tries=tries, *args,
|
||||||
|
**kwargs)
|
||||||
|
self.host = host or self.host
|
||||||
|
self.language_pool = language_pool or self.language_pool
|
||||||
|
self.use_ssl = use_ssl
|
||||||
|
self.is_invisible = is_invisible
|
||||||
|
|
||||||
|
def get_client(self):
|
||||||
|
return AnticaptchaClient(self.client_key, self.language_pool, self.host, self.use_ssl)
|
||||||
|
|
||||||
|
def get_job(self):
|
||||||
|
task = NoCaptchaTaskProxylessTask(website_url=self.website_url, website_key=self.website_key,
|
||||||
|
is_invisible=self.is_invisible)
|
||||||
|
return self.client.createTask(task)
|
||||||
|
|
||||||
|
def _throw(self):
|
||||||
|
for i in range(self.tries):
|
||||||
|
try:
|
||||||
|
super(AntiCaptchaProxyLessPitcher, self)._throw()
|
||||||
|
self.job.join()
|
||||||
|
ret = self.job.get_solution_response()
|
||||||
|
if ret:
|
||||||
|
self.success = True
|
||||||
|
return ret
|
||||||
|
except AnticaptchaException as e:
|
||||||
|
if i >= self.tries - 1:
|
||||||
|
logger.error("%s: Captcha solving finally failed. Exiting", self.website_name)
|
||||||
|
return
|
||||||
|
|
||||||
|
if e.error_code == 'ERROR_ZERO_BALANCE':
|
||||||
|
logger.error("%s: No balance left on captcha solving service. Exiting", self.website_name)
|
||||||
|
return
|
||||||
|
|
||||||
|
elif e.error_code == 'ERROR_NO_SLOT_AVAILABLE':
|
||||||
|
logger.info("%s: No captcha solving slot available, retrying", self.website_name)
|
||||||
|
time.sleep(5.0)
|
||||||
|
continue
|
||||||
|
|
||||||
|
elif e.error_code == 'ERROR_KEY_DOES_NOT_EXIST':
|
||||||
|
logger.error("%s: Bad AntiCaptcha API key", self.website_name)
|
||||||
|
return
|
||||||
|
|
||||||
|
elif e.error_id is None and e.error_code == 250:
|
||||||
|
# timeout
|
||||||
|
if i < self.tries:
|
||||||
|
logger.info("%s: Captcha solving timed out, retrying", self.website_name)
|
||||||
|
time.sleep(1.0)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
logger.error("%s: Captcha solving timed out three times; bailing out", self.website_name)
|
||||||
|
return
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
@registry.register
|
||||||
|
class AntiCaptchaPitcher(AntiCaptchaProxyLessPitcher):
|
||||||
|
name = "AntiCaptcha"
|
||||||
|
proxy = None
|
||||||
|
needs_proxy = True
|
||||||
|
user_agent = None
|
||||||
|
cookies = None
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
self.proxy = Proxy.parse_url(kwargs.pop("proxy"))
|
||||||
|
self.user_agent = kwargs.pop("user_agent")
|
||||||
|
cookies = kwargs.pop("cookies", {})
|
||||||
|
if isinstance(cookies, dict):
|
||||||
|
self.cookies = ";".join(["%s=%s" % (k, v) for k, v in cookies.iteritems()])
|
||||||
|
|
||||||
|
super(AntiCaptchaPitcher, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def get_job(self):
|
||||||
|
task = NoCaptchaTask(website_url=self.website_url, website_key=self.website_key, proxy=self.proxy,
|
||||||
|
user_agent=self.user_agent, cookies=self.cookies, is_invisible=self.is_invisible)
|
||||||
|
return self.client.createTask(task)
|
||||||
|
|
||||||
|
|
||||||
|
@registry.register
|
||||||
|
class DBCProxyLessPitcher(Pitcher):
|
||||||
|
name = "DeathByCaptchaProxyLess"
|
||||||
|
source = "deathbycaptcha.com"
|
||||||
|
username = None
|
||||||
|
password = None
|
||||||
|
|
||||||
|
def __init__(self, website_name, website_url, website_key,
|
||||||
|
timeout=DEFAULT_TOKEN_TIMEOUT, tries=3, *args, **kwargs):
|
||||||
|
super(DBCProxyLessPitcher, self).__init__(website_name, website_url, website_key, tries=tries)
|
||||||
|
|
||||||
|
self.username, self.password = self.client_key.split(":", 1)
|
||||||
|
self.timeout = timeout
|
||||||
|
|
||||||
|
def get_client(self):
|
||||||
|
return DBCClient(self.username, self.password)
|
||||||
|
|
||||||
|
def get_job(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@property
|
||||||
|
def payload_dict(self):
|
||||||
|
return {
|
||||||
|
"googlekey": self.website_key,
|
||||||
|
"pageurl": self.website_url
|
||||||
|
}
|
||||||
|
|
||||||
|
def _throw(self):
|
||||||
|
super(DBCProxyLessPitcher, self)._throw()
|
||||||
|
payload = json.dumps(self.payload_dict)
|
||||||
|
for i in range(self.tries):
|
||||||
|
try:
|
||||||
|
#balance = self.client.get_balance()
|
||||||
|
data = self.client.decode(timeout=self.timeout, type=4, token_params=payload)
|
||||||
|
if data and data["is_correct"] and data["text"]:
|
||||||
|
self.success = True
|
||||||
|
return data["text"]
|
||||||
|
except:
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
@registry.register
|
||||||
|
class DBCPitcher(DBCProxyLessPitcher):
|
||||||
|
name = "DeathByCaptcha"
|
||||||
|
proxy = None
|
||||||
|
needs_proxy = True
|
||||||
|
proxy_type = "HTTP"
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
self.proxy = kwargs.pop("proxy")
|
||||||
|
super(DBCPitcher, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def payload_dict(self):
|
||||||
|
payload = super(DBCPitcher, self).payload_dict
|
||||||
|
payload.update({
|
||||||
|
"proxytype": self.proxy_type,
|
||||||
|
"proxy": self.proxy
|
||||||
|
})
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
|
def load_verification(site_name, session, callback=lambda x: None):
|
||||||
|
ccks = region.get("%s_data" % site_name, expiration_time=15552000) # 6m
|
||||||
|
if ccks != NO_VALUE:
|
||||||
|
cookies, user_agent = ccks
|
||||||
|
logger.debug("%s: Re-using previous user agent: %s", site_name.capitalize(), user_agent)
|
||||||
|
session.headers["User-Agent"] = user_agent
|
||||||
|
try:
|
||||||
|
session.cookies._cookies.update(cookies)
|
||||||
|
return callback(region)
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def store_verification(site_name, session):
|
||||||
|
region.set("%s_data" % site_name, (session.cookies._cookies, session.headers["User-Agent"]))
|
Loading…
Reference in new issue