parent
0d05000e97
commit
afb2a86810
@ -0,0 +1,516 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
|
||||
"""Death by Captcha HTTP and socket API clients.
|
||||
|
||||
There are two types of Death by Captcha (DBC hereinafter) API: HTTP and
|
||||
socket ones. Both offer the same functionalily, with the socket API
|
||||
sporting faster responses and using way less connections.
|
||||
|
||||
To access the socket API, use SocketClient class; for the HTTP API, use
|
||||
HttpClient class. Both are thread-safe. SocketClient keeps a persistent
|
||||
connection opened and serializes all API requests sent through it, thus
|
||||
it is advised to keep a pool of them if you're script is heavily
|
||||
multithreaded.
|
||||
|
||||
Both SocketClient and HttpClient give you the following methods:
|
||||
|
||||
get_user()
|
||||
Returns your DBC account details as a dict with the following keys:
|
||||
|
||||
"user": your account numeric ID; if login fails, it will be the only
|
||||
item with the value of 0;
|
||||
"rate": your CAPTCHA rate, i.e. how much you will be charged for one
|
||||
solved CAPTCHA in US cents;
|
||||
"balance": your DBC account balance in US cents;
|
||||
"is_banned": flag indicating whether your account is suspended or not.
|
||||
|
||||
get_balance()
|
||||
Returns your DBC account balance in US cents.
|
||||
|
||||
get_captcha(cid)
|
||||
Returns an uploaded CAPTCHA details as a dict with the following keys:
|
||||
|
||||
"captcha": the CAPTCHA numeric ID; if no such CAPTCHAs found, it will
|
||||
be the only item with the value of 0;
|
||||
"text": the CAPTCHA text, if solved, otherwise None;
|
||||
"is_correct": flag indicating whether the CAPTCHA was solved correctly
|
||||
(DBC can detect that in rare cases).
|
||||
|
||||
The only argument `cid` is the CAPTCHA numeric ID.
|
||||
|
||||
get_text(cid)
|
||||
Returns an uploaded CAPTCHA text (None if not solved). The only argument
|
||||
`cid` is the CAPTCHA numeric ID.
|
||||
|
||||
report(cid)
|
||||
Reports an incorrectly solved CAPTCHA. The only argument `cid` is the
|
||||
CAPTCHA numeric ID. Returns True on success, False otherwise.
|
||||
|
||||
upload(captcha)
|
||||
Uploads a CAPTCHA. The only argument `captcha` can be either file-like
|
||||
object (any object with `read` method defined, actually, so StringIO
|
||||
will do), or CAPTCHA image file name. On successul upload you'll get
|
||||
the CAPTCHA details dict (see get_captcha() method).
|
||||
|
||||
NOTE: AT THIS POINT THE UPLOADED CAPTCHA IS NOT SOLVED YET! You have
|
||||
to poll for its status periodically using get_captcha() or get_text()
|
||||
method until the CAPTCHA is solved and you get the text.
|
||||
|
||||
decode(captcha, timeout=DEFAULT_TIMEOUT)
|
||||
A convenient method that uploads a CAPTCHA and polls for its status
|
||||
periodically, but no longer than `timeout` (defaults to 60 seconds).
|
||||
If solved, you'll get the CAPTCHA details dict (see get_captcha()
|
||||
method for details). See upload() method for details on `captcha`
|
||||
argument.
|
||||
|
||||
Visit http://www.deathbycaptcha.com/user/api for updates.
|
||||
|
||||
"""
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
import errno
|
||||
import imghdr
|
||||
import random
|
||||
import os
|
||||
import select
|
||||
import socket
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import urllib
|
||||
import urllib2
|
||||
try:
|
||||
from json import read as json_decode, write as json_encode
|
||||
except ImportError:
|
||||
try:
|
||||
from json import loads as json_decode, dumps as json_encode
|
||||
except ImportError:
|
||||
from simplejson import loads as json_decode, dumps as json_encode
|
||||
|
||||
|
||||
# API version and unique software ID
|
||||
API_VERSION = 'DBC/Python v4.6'
|
||||
|
||||
# Default CAPTCHA timeout and decode() polling interval
|
||||
DEFAULT_TIMEOUT = 60
|
||||
DEFAULT_TOKEN_TIMEOUT = 120
|
||||
POLLS_INTERVAL = [1, 1, 2, 3, 2, 2, 3, 2, 2]
|
||||
DFLT_POLL_INTERVAL = 3
|
||||
|
||||
# Base HTTP API url
|
||||
HTTP_BASE_URL = 'http://api.dbcapi.me/api'
|
||||
|
||||
# Preferred HTTP API server's response content type, do not change
|
||||
HTTP_RESPONSE_TYPE = 'application/json'
|
||||
|
||||
# Socket API server's host & ports range
|
||||
SOCKET_HOST = 'api.dbcapi.me'
|
||||
SOCKET_PORTS = range(8123, 8131)
|
||||
|
||||
|
||||
def _load_image(captcha):
|
||||
if hasattr(captcha, 'read'):
|
||||
img = captcha.read()
|
||||
elif type(captcha) == bytearray:
|
||||
img = captcha
|
||||
else:
|
||||
img = ''
|
||||
try:
|
||||
captcha_file = open(captcha, 'rb')
|
||||
except Exception:
|
||||
raise
|
||||
else:
|
||||
img = captcha_file.read()
|
||||
captcha_file.close()
|
||||
if not len(img):
|
||||
raise ValueError('CAPTCHA image is empty')
|
||||
elif imghdr.what(None, img) is None:
|
||||
raise TypeError('Unknown CAPTCHA image type')
|
||||
else:
|
||||
return img
|
||||
|
||||
|
||||
class AccessDeniedException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Client(object):
|
||||
|
||||
"""Death by Captcha API Client."""
|
||||
|
||||
def __init__(self, username, password):
|
||||
self.is_verbose = False
|
||||
self.userpwd = {'username': username, 'password': password}
|
||||
|
||||
def _log(self, cmd, msg=''):
|
||||
if self.is_verbose:
|
||||
print '%d %s %s' % (time.time(), cmd, msg.rstrip())
|
||||
return self
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def connect(self):
|
||||
pass
|
||||
|
||||
def get_user(self):
|
||||
"""Fetch user details -- ID, balance, rate and banned status."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_balance(self):
|
||||
"""Fetch user balance (in US cents)."""
|
||||
return self.get_user().get('balance')
|
||||
|
||||
def get_captcha(self, cid):
|
||||
"""Fetch a CAPTCHA details -- ID, text and correctness flag."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_text(self, cid):
|
||||
"""Fetch a CAPTCHA text."""
|
||||
return self.get_captcha(cid).get('text') or None
|
||||
|
||||
def report(self, cid):
|
||||
"""Report a CAPTCHA as incorrectly solved."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def upload(self, captcha):
|
||||
"""Upload a CAPTCHA.
|
||||
|
||||
Accepts file names and file-like objects. Returns CAPTCHA details
|
||||
dict on success.
|
||||
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def decode(self, captcha=None, timeout=None, **kwargs):
|
||||
"""
|
||||
Try to solve a CAPTCHA.
|
||||
|
||||
See Client.upload() for arguments details.
|
||||
|
||||
Uploads a CAPTCHA, polls for its status periodically with arbitrary
|
||||
timeout (in seconds), returns CAPTCHA details if (correctly) solved.
|
||||
"""
|
||||
if not timeout:
|
||||
if not captcha:
|
||||
timeout = DEFAULT_TOKEN_TIMEOUT
|
||||
else:
|
||||
timeout = DEFAULT_TIMEOUT
|
||||
|
||||
deadline = time.time() + (max(0, timeout) or DEFAULT_TIMEOUT)
|
||||
uploaded_captcha = self.upload(captcha, **kwargs)
|
||||
if uploaded_captcha:
|
||||
intvl_idx = 0 # POLL_INTERVAL index
|
||||
while deadline > time.time() and not uploaded_captcha.get('text'):
|
||||
intvl, intvl_idx = self._get_poll_interval(intvl_idx)
|
||||
time.sleep(intvl)
|
||||
pulled = self.get_captcha(uploaded_captcha['captcha'])
|
||||
if pulled['captcha'] == uploaded_captcha['captcha']:
|
||||
uploaded_captcha = pulled
|
||||
if uploaded_captcha.get('text') and \
|
||||
uploaded_captcha.get('is_correct'):
|
||||
return uploaded_captcha
|
||||
|
||||
def _get_poll_interval(self, idx):
|
||||
"""Returns poll interval and next index depending on index provided"""
|
||||
|
||||
if len(POLLS_INTERVAL) > idx:
|
||||
intvl = POLLS_INTERVAL[idx]
|
||||
else:
|
||||
intvl = DFLT_POLL_INTERVAL
|
||||
idx += 1
|
||||
|
||||
return intvl, idx
|
||||
|
||||
|
||||
class HttpClient(Client):
|
||||
|
||||
"""Death by Captcha HTTP API client."""
|
||||
|
||||
def __init__(self, *args):
|
||||
Client.__init__(self, *args)
|
||||
self.opener = urllib2.build_opener(urllib2.HTTPRedirectHandler())
|
||||
|
||||
def _call(self, cmd, payload=None, headers=None):
|
||||
if headers is None:
|
||||
headers = {}
|
||||
headers['Accept'] = HTTP_RESPONSE_TYPE
|
||||
headers['User-Agent'] = API_VERSION
|
||||
if hasattr(payload, 'items'):
|
||||
payload = urllib.urlencode(payload)
|
||||
self._log('SEND', '%s %d %s' % (cmd, len(payload), payload))
|
||||
else:
|
||||
self._log('SEND', '%s' % cmd)
|
||||
if payload is not None:
|
||||
headers['Content-Length'] = len(payload)
|
||||
try:
|
||||
response = self.opener.open(urllib2.Request(
|
||||
HTTP_BASE_URL + '/' + cmd.strip('/'),
|
||||
data=payload,
|
||||
headers=headers
|
||||
)).read()
|
||||
except urllib2.HTTPError, err:
|
||||
if 403 == err.code:
|
||||
raise AccessDeniedException('Access denied, please check'
|
||||
' your credentials and/or balance')
|
||||
elif 400 == err.code or 413 == err.code:
|
||||
raise ValueError("CAPTCHA was rejected by the service, check"
|
||||
" if it's a valid image")
|
||||
elif 503 == err.code:
|
||||
raise OverflowError("CAPTCHA was rejected due to service"
|
||||
" overload, try again later")
|
||||
else:
|
||||
raise err
|
||||
else:
|
||||
self._log('RECV', '%d %s' % (len(response), response))
|
||||
try:
|
||||
return json_decode(response)
|
||||
except Exception:
|
||||
raise RuntimeError('Invalid API response')
|
||||
return {}
|
||||
|
||||
def get_user(self):
|
||||
return self._call('user', self.userpwd.copy()) or {'user': 0}
|
||||
|
||||
def get_captcha(self, cid):
|
||||
return self._call('captcha/%d' % cid) or {'captcha': 0}
|
||||
|
||||
def report(self, cid):
|
||||
return not self._call('captcha/%d/report' % cid,
|
||||
self.userpwd.copy()).get('is_correct')
|
||||
|
||||
def upload(self, captcha=None, **kwargs):
|
||||
boundary = binascii.hexlify(os.urandom(16))
|
||||
banner = kwargs.get('banner', '')
|
||||
if banner:
|
||||
kwargs['banner'] = 'base64:' + base64.b64encode(_load_image(banner))
|
||||
body = '\r\n'.join(('\r\n'.join((
|
||||
'--%s' % boundary,
|
||||
'Content-Disposition: form-data; name="%s"' % k,
|
||||
'Content-Type: text/plain',
|
||||
'Content-Length: %d' % len(str(v)),
|
||||
'',
|
||||
str(v)
|
||||
))) for k, v in self.userpwd.items())
|
||||
|
||||
body += '\r\n'.join(('\r\n'.join((
|
||||
'--%s' % boundary,
|
||||
'Content-Disposition: form-data; name="%s"' % k,
|
||||
'Content-Type: text/plain',
|
||||
'Content-Length: %d' % len(str(v)),
|
||||
'',
|
||||
str(v)
|
||||
))) for k, v in kwargs.items())
|
||||
|
||||
if captcha:
|
||||
img = _load_image(captcha)
|
||||
body += '\r\n'.join((
|
||||
'',
|
||||
'--%s' % boundary,
|
||||
'Content-Disposition: form-data; name="captchafile"; '
|
||||
'filename="captcha"',
|
||||
'Content-Type: application/octet-stream',
|
||||
'Content-Length: %d' % len(img),
|
||||
'',
|
||||
img,
|
||||
'--%s--' % boundary,
|
||||
''
|
||||
))
|
||||
|
||||
response = self._call('captcha', body, {
|
||||
'Content-Type': 'multipart/form-data; boundary="%s"' % boundary
|
||||
}) or {}
|
||||
if response.get('captcha'):
|
||||
return response
|
||||
|
||||
|
||||
class SocketClient(Client):
|
||||
|
||||
"""Death by Captcha socket API client."""
|
||||
|
||||
TERMINATOR = '\r\n'
|
||||
|
||||
def __init__(self, *args):
|
||||
Client.__init__(self, *args)
|
||||
self.socket_lock = threading.Lock()
|
||||
self.socket = None
|
||||
|
||||
def close(self):
|
||||
if self.socket:
|
||||
self._log('CLOSE')
|
||||
try:
|
||||
self.socket.shutdown(socket.SHUT_RDWR)
|
||||
except socket.error:
|
||||
pass
|
||||
finally:
|
||||
self.socket.close()
|
||||
self.socket = None
|
||||
|
||||
def connect(self):
|
||||
if not self.socket:
|
||||
self._log('CONN')
|
||||
host = (socket.gethostbyname(SOCKET_HOST),
|
||||
random.choice(SOCKET_PORTS))
|
||||
self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
self.socket.settimeout(0)
|
||||
try:
|
||||
self.socket.connect(host)
|
||||
except socket.error, err:
|
||||
if (err.args[0] not in
|
||||
(errno.EAGAIN, errno.EWOULDBLOCK, errno.EINPROGRESS)):
|
||||
self.close()
|
||||
raise err
|
||||
return self.socket
|
||||
|
||||
def __del__(self):
|
||||
self.close()
|
||||
|
||||
def _sendrecv(self, sock, buf):
|
||||
self._log('SEND', buf)
|
||||
fds = [sock]
|
||||
buf += self.TERMINATOR
|
||||
response = ''
|
||||
intvl_idx = 0
|
||||
while True:
|
||||
intvl, intvl_idx = self._get_poll_interval(intvl_idx)
|
||||
rds, wrs, exs = select.select((not buf and fds) or [],
|
||||
(buf and fds) or [],
|
||||
fds,
|
||||
intvl)
|
||||
if exs:
|
||||
raise IOError('select() failed')
|
||||
try:
|
||||
if wrs:
|
||||
while buf:
|
||||
buf = buf[wrs[0].send(buf):]
|
||||
elif rds:
|
||||
while True:
|
||||
s = rds[0].recv(256)
|
||||
if not s:
|
||||
raise IOError('recv(): connection lost')
|
||||
else:
|
||||
response += s
|
||||
except socket.error, err:
|
||||
if (err.args[0] not in
|
||||
(errno.EAGAIN, errno.EWOULDBLOCK, errno.EINPROGRESS)):
|
||||
raise err
|
||||
if response.endswith(self.TERMINATOR):
|
||||
self._log('RECV', response)
|
||||
return response.rstrip(self.TERMINATOR)
|
||||
raise IOError('send/recv timed out')
|
||||
|
||||
def _call(self, cmd, data=None):
|
||||
if data is None:
|
||||
data = {}
|
||||
data['cmd'] = cmd
|
||||
data['version'] = API_VERSION
|
||||
request = json_encode(data)
|
||||
|
||||
response = None
|
||||
for _ in range(2):
|
||||
if not self.socket and cmd != 'login':
|
||||
self._call('login', self.userpwd.copy())
|
||||
self.socket_lock.acquire()
|
||||
try:
|
||||
sock = self.connect()
|
||||
response = self._sendrecv(sock, request)
|
||||
except IOError, err:
|
||||
sys.stderr.write(str(err) + "\n")
|
||||
self.close()
|
||||
except socket.error, err:
|
||||
sys.stderr.write(str(err) + "\n")
|
||||
self.close()
|
||||
raise IOError('Connection refused')
|
||||
else:
|
||||
break
|
||||
finally:
|
||||
self.socket_lock.release()
|
||||
|
||||
if response is None:
|
||||
raise IOError('Connection lost or timed out during API request')
|
||||
|
||||
try:
|
||||
response = json_decode(response)
|
||||
except Exception:
|
||||
raise RuntimeError('Invalid API response')
|
||||
|
||||
if not response.get('error'):
|
||||
return response
|
||||
|
||||
error = response['error']
|
||||
if error in ('not-logged-in', 'invalid-credentials'):
|
||||
raise AccessDeniedException('Access denied, check your credentials')
|
||||
elif 'banned' == error:
|
||||
raise AccessDeniedException('Access denied, account is suspended')
|
||||
elif 'insufficient-funds' == error:
|
||||
raise AccessDeniedException(
|
||||
'CAPTCHA was rejected due to low balance')
|
||||
elif 'invalid-captcha' == error:
|
||||
raise ValueError('CAPTCHA is not a valid image')
|
||||
elif 'service-overload' == error:
|
||||
raise OverflowError(
|
||||
'CAPTCHA was rejected due to service overload, try again later')
|
||||
else:
|
||||
self.socket_lock.acquire()
|
||||
self.close()
|
||||
self.socket_lock.release()
|
||||
raise RuntimeError('API server error occured: %s' % error)
|
||||
|
||||
def get_user(self):
|
||||
return self._call('user') or {'user': 0}
|
||||
|
||||
def get_captcha(self, cid):
|
||||
return self._call('captcha', {'captcha': cid}) or {'captcha': 0}
|
||||
|
||||
def upload(self, captcha=None, **kwargs):
|
||||
data = {}
|
||||
if captcha:
|
||||
data['captcha'] = base64.b64encode(_load_image(captcha))
|
||||
if kwargs:
|
||||
banner = kwargs.get('banner', '')
|
||||
if banner:
|
||||
kwargs['banner'] = base64.b64encode(_load_image(banner))
|
||||
data.update(kwargs)
|
||||
response = self._call('upload', data)
|
||||
if response.get('captcha'):
|
||||
uploaded_captcha = dict(
|
||||
(k, response.get(k))
|
||||
for k in ('captcha', 'text', 'is_correct')
|
||||
)
|
||||
if not uploaded_captcha['text']:
|
||||
uploaded_captcha['text'] = None
|
||||
return uploaded_captcha
|
||||
|
||||
def report(self, cid):
|
||||
return not self._call('report', {'captcha': cid}).get('is_correct')
|
||||
|
||||
|
||||
if '__main__' == __name__:
|
||||
# Put your DBC username & password here:
|
||||
# client = HttpClient(sys.argv[1], sys.argv[2])
|
||||
client = SocketClient(sys.argv[1], sys.argv[2])
|
||||
client.is_verbose = True
|
||||
|
||||
print 'Your balance is %s US cents' % client.get_balance()
|
||||
|
||||
for fn in sys.argv[3:]:
|
||||
try:
|
||||
# Put your CAPTCHA image file name or file-like object, and optional
|
||||
# solving timeout (in seconds) here:
|
||||
captcha = client.decode(fn, DEFAULT_TIMEOUT)
|
||||
except Exception, e:
|
||||
sys.stderr.write('Failed uploading CAPTCHA: %s\n' % (e, ))
|
||||
captcha = None
|
||||
|
||||
if captcha:
|
||||
print 'CAPTCHA %d solved: %s' % \
|
||||
(captcha['captcha'], captcha['text'])
|
||||
|
||||
# Report as incorrectly solved if needed. Make sure the CAPTCHA was
|
||||
# in fact incorrectly solved!
|
||||
# try:
|
||||
# client.report(captcha['captcha'])
|
||||
# except Exception, e:
|
||||
# sys.stderr.write('Failed reporting CAPTCHA: %s\n' % (e, ))
|
@ -0,0 +1,7 @@
|
||||
from .base import AnticaptchaClient
|
||||
from .tasks import NoCaptchaTask, NoCaptchaTaskProxylessTask, ImageToTextTask, FunCaptchaTask
|
||||
from .proxy import Proxy
|
||||
from .exceptions import AnticaptchaException
|
||||
from .fields import SimpleText, Image, WebLink, TextInput, Textarea, Checkbox, Select, Radio, ImageUpload
|
||||
|
||||
AnticatpchaException = AnticaptchaException
|
@ -0,0 +1,114 @@
|
||||
import requests
|
||||
import time
|
||||
|
||||
from six.moves.urllib_parse import urljoin
|
||||
from .exceptions import AnticaptchaException
|
||||
|
||||
SLEEP_EVERY_CHECK_FINISHED = 3
|
||||
MAXIMUM_JOIN_TIME = 60 * 5
|
||||
|
||||
|
||||
class Job(object):
|
||||
client = None
|
||||
task_id = None
|
||||
_last_result = None
|
||||
|
||||
def __init__(self, client, task_id):
|
||||
self.client = client
|
||||
self.task_id = task_id
|
||||
|
||||
def _update(self):
|
||||
self._last_result = self.client.getTaskResult(self.task_id)
|
||||
|
||||
def check_is_ready(self):
|
||||
self._update()
|
||||
return self._last_result['status'] == 'ready'
|
||||
|
||||
def get_solution_response(self): # Recaptcha
|
||||
return self._last_result['solution']['gRecaptchaResponse']
|
||||
|
||||
def get_token_response(self): # Funcaptcha
|
||||
return self._last_result['solution']['token']
|
||||
|
||||
def get_answers(self):
|
||||
return self._last_result['solution']['answers']
|
||||
|
||||
def get_captcha_text(self): # Image
|
||||
return self._last_result['solution']['text']
|
||||
|
||||
def report_incorrect(self):
|
||||
return self.client.reportIncorrectImage(self.task_id)
|
||||
|
||||
def join(self, maximum_time=None):
|
||||
elapsed_time = 0
|
||||
maximum_time = maximum_time or MAXIMUM_JOIN_TIME
|
||||
while not self.check_is_ready():
|
||||
time.sleep(SLEEP_EVERY_CHECK_FINISHED)
|
||||
elapsed_time += SLEEP_EVERY_CHECK_FINISHED
|
||||
if elapsed_time is not None and elapsed_time > maximum_time:
|
||||
raise AnticaptchaException(None, 250,
|
||||
"The execution time exceeded a maximum time of {} seconds. It takes {} seconds.".format(
|
||||
maximum_time, elapsed_time))
|
||||
|
||||
|
||||
class AnticaptchaClient(object):
|
||||
client_key = None
|
||||
CREATE_TASK_URL = "/createTask"
|
||||
TASK_RESULT_URL = "/getTaskResult"
|
||||
BALANCE_URL = "/getBalance"
|
||||
REPORT_IMAGE_URL = "/reportIncorrectImageCaptcha"
|
||||
SOFT_ID = 847
|
||||
language_pool = "en"
|
||||
|
||||
def __init__(self, client_key, language_pool="en", host="api.anti-captcha.com", use_ssl=True):
|
||||
self.client_key = client_key
|
||||
self.language_pool = language_pool
|
||||
self.base_url = "{proto}://{host}/".format(proto="https" if use_ssl else "http",
|
||||
host=host)
|
||||
self.session = requests.Session()
|
||||
|
||||
@property
|
||||
def client_ip(self):
|
||||
if not hasattr(self, '_client_ip'):
|
||||
self._client_ip = self.session.get('http://httpbin.org/ip').json()['origin']
|
||||
return self._client_ip
|
||||
|
||||
def _check_response(self, response):
|
||||
if response.get('errorId', False) == 11:
|
||||
response['errorDescription'] = "{} Your missing IP address is {}.".format(response['errorDescription'],
|
||||
self.client_ip)
|
||||
if response.get('errorId', False):
|
||||
raise AnticaptchaException(response['errorId'],
|
||||
response['errorCode'],
|
||||
response['errorDescription'])
|
||||
|
||||
def createTask(self, task):
|
||||
request = {"clientKey": self.client_key,
|
||||
"task": task.serialize(),
|
||||
"softId": self.SOFT_ID,
|
||||
"languagePool": self.language_pool,
|
||||
}
|
||||
response = self.session.post(urljoin(self.base_url, self.CREATE_TASK_URL), json=request).json()
|
||||
self._check_response(response)
|
||||
return Job(self, response['taskId'])
|
||||
|
||||
def getTaskResult(self, task_id):
|
||||
request = {"clientKey": self.client_key,
|
||||
"taskId": task_id}
|
||||
response = self.session.post(urljoin(self.base_url, self.TASK_RESULT_URL), json=request).json()
|
||||
self._check_response(response)
|
||||
return response
|
||||
|
||||
def getBalance(self):
|
||||
request = {"clientKey": self.client_key}
|
||||
response = self.session.post(urljoin(self.base_url, self.BALANCE_URL), json=request).json()
|
||||
self._check_response(response)
|
||||
return response['balance']
|
||||
|
||||
def reportIncorrectImage(self, task_id):
|
||||
request = {"clientKey": self.client_key,
|
||||
"taskId": task_id
|
||||
}
|
||||
response = self.session.post(urljoin(self.base_url, self.REPORT_IMAGE_URL), json=request).json()
|
||||
self._check_response(response)
|
||||
return response.get('status', False) != False
|
@ -0,0 +1,23 @@
|
||||
class AnticaptchaException(Exception):
|
||||
def __init__(self, error_id, error_code, error_description, *args):
|
||||
super(AnticaptchaException, self).__init__("[{}:{}]{}".format(error_code, error_id, error_description))
|
||||
self.error_description = error_description
|
||||
self.error_id = error_id
|
||||
self.error_code = error_code
|
||||
|
||||
|
||||
AnticatpchaException = AnticaptchaException
|
||||
|
||||
|
||||
class InvalidWidthException(AnticaptchaException):
|
||||
def __init__(self, width):
|
||||
self.width = width
|
||||
msg = 'Invalid width (%s). Can be one of these: 100, 50, 33, 25.' % (self.width,)
|
||||
super(InvalidWidthException, self).__init__("AC-1", 1, msg)
|
||||
|
||||
|
||||
class MissingNameException(AnticaptchaException):
|
||||
def __init__(self, cls):
|
||||
self.cls = cls
|
||||
msg = 'Missing name data in {0}. Provide {0}.__init__(name="X") or {0}.serialize(name="X")'.format(str(self.cls))
|
||||
super(MissingNameException, self).__init__("AC-2", 2, msg)
|
@ -0,0 +1,199 @@
|
||||
import six
|
||||
from python_anticaptcha.exceptions import InvalidWidthException, MissingNameException
|
||||
|
||||
|
||||
class BaseField(object):
|
||||
label = None
|
||||
labelHint = None
|
||||
|
||||
def serialize(self, name=None):
|
||||
data = {}
|
||||
if self.label:
|
||||
data['label'] = self.label or False
|
||||
if self.labelHint:
|
||||
data['labelHint'] = self.labelHint or False
|
||||
return data
|
||||
|
||||
|
||||
class NameBaseField(BaseField):
|
||||
name = None
|
||||
|
||||
def serialize(self, name=None):
|
||||
data = super(NameBaseField, self).serialize(name)
|
||||
if name:
|
||||
data['name'] = name
|
||||
elif self.name:
|
||||
data['name'] = self.name
|
||||
else:
|
||||
raise MissingNameException(cls=self.__class__)
|
||||
return data
|
||||
|
||||
|
||||
class SimpleText(BaseField):
|
||||
contentType = 'text'
|
||||
|
||||
def __init__(self, content, label=None, labelHint=None, width=None):
|
||||
self.label = label
|
||||
self.labelHint = labelHint
|
||||
|
||||
self.content = content
|
||||
self.width = width
|
||||
|
||||
def serialize(self, name=None):
|
||||
data = super(SimpleText, self).serialize(name)
|
||||
data['contentType'] = self.contentType
|
||||
data['content'] = self.content
|
||||
|
||||
if self.width:
|
||||
if self.width not in [100, 50, 33, 25]:
|
||||
raise InvalidWidthException(self.width)
|
||||
data['inputOptions'] = {}
|
||||
data['width'] = self.width
|
||||
return data
|
||||
|
||||
|
||||
class Image(BaseField):
|
||||
contentType = 'image'
|
||||
|
||||
def __init__(self, imageUrl, label=None, labelHint=None):
|
||||
self.label = label
|
||||
self.labelHint = labelHint
|
||||
self.imageUrl = imageUrl
|
||||
|
||||
def serialize(self, name=None):
|
||||
data = super(Image, self).serialize(name)
|
||||
data['contentType'] = self.contentType
|
||||
data['content'] = self.imageUrl
|
||||
return data
|
||||
|
||||
|
||||
class WebLink(BaseField):
|
||||
contentType = 'link'
|
||||
|
||||
def __init__(self, linkText, linkUrl, label=None, labelHint=None, width=None):
|
||||
self.label = label
|
||||
self.labelHint = labelHint
|
||||
|
||||
self.linkText = linkText
|
||||
self.linkUrl = linkUrl
|
||||
|
||||
self.width = width
|
||||
|
||||
def serialize(self, name=None):
|
||||
data = super(WebLink, self).serialize(name)
|
||||
data['contentType'] = self.contentType
|
||||
|
||||
if self.width:
|
||||
if self.width not in [100, 50, 33, 25]:
|
||||
raise InvalidWidthException(self.width)
|
||||
data['inputOptions'] = {}
|
||||
data['width'] = self.width
|
||||
|
||||
data.update({'content': {'url': self.linkUrl,
|
||||
'text': self.linkText}})
|
||||
|
||||
return data
|
||||
|
||||
|
||||
class TextInput(NameBaseField):
|
||||
def __init__(self, placeHolder=None, label=None, labelHint=None, width=None):
|
||||
self.label = label
|
||||
self.labelHint = labelHint
|
||||
|
||||
self.placeHolder = placeHolder
|
||||
|
||||
self.width = width
|
||||
|
||||
def serialize(self, name=None):
|
||||
data = super(TextInput, self).serialize(name)
|
||||
data['inputType'] = 'text'
|
||||
|
||||
data['inputOptions'] = {}
|
||||
|
||||
if self.width:
|
||||
if self.width not in [100, 50, 33, 25]:
|
||||
raise InvalidWidthException(self.width)
|
||||
|
||||
data['inputOptions']['width'] = str(self.width)
|
||||
|
||||
if self.placeHolder:
|
||||
data['inputOptions']['placeHolder'] = self.placeHolder
|
||||
return data
|
||||
|
||||
|
||||
class Textarea(NameBaseField):
|
||||
def __init__(self, placeHolder=None, rows=None, label=None, width=None, labelHint=None):
|
||||
self.label = label
|
||||
self.labelHint = labelHint
|
||||
|
||||
self.placeHolder = placeHolder
|
||||
self.rows = rows
|
||||
self.width = width
|
||||
|
||||
def serialize(self, name=None):
|
||||
data = super(Textarea, self).serialize(name)
|
||||
data['inputType'] = 'textarea'
|
||||
data['inputOptions'] = {}
|
||||
if self.rows:
|
||||
data['inputOptions']['rows'] = str(self.rows)
|
||||
if self.placeHolder:
|
||||
data['inputOptions']['placeHolder'] = self.placeHolder
|
||||
if self.width:
|
||||
data['inputOptions']['width'] = str(self.width)
|
||||
return data
|
||||
|
||||
|
||||
class Checkbox(NameBaseField):
|
||||
def __init__(self, text, label=None, labelHint=None):
|
||||
self.label = label
|
||||
self.labelHint = labelHint
|
||||
|
||||
self.text = text
|
||||
|
||||
def serialize(self, name=None):
|
||||
data = super(Checkbox, self).serialize(name)
|
||||
data['inputType'] = 'checkbox'
|
||||
data['inputOptions'] = {'label': self.text}
|
||||
return data
|
||||
|
||||
|
||||
class Select(NameBaseField):
|
||||
type = 'select'
|
||||
|
||||
def __init__(self, label=None, choices=None, labelHint=None):
|
||||
self.label = label
|
||||
self.labelHint = labelHint
|
||||
self.choices = choices or ()
|
||||
|
||||
def get_choices(self):
|
||||
for choice in self.choices:
|
||||
if isinstance(choice, six.text_type):
|
||||
yield choice, choice
|
||||
else:
|
||||
yield choice
|
||||
|
||||
def serialize(self, name=None):
|
||||
data = super(Select, self).serialize(name)
|
||||
data['inputType'] = self.type
|
||||
|
||||
data['inputOptions'] = []
|
||||
for value, caption in self.get_choices():
|
||||
data['inputOptions'].append({"value": value,
|
||||
"caption": caption})
|
||||
|
||||
return data
|
||||
|
||||
|
||||
class Radio(Select):
|
||||
type = 'radio'
|
||||
|
||||
|
||||
class ImageUpload(NameBaseField):
|
||||
def __init__(self, label=None, labelHint=None):
|
||||
self.label = label
|
||||
self.labelHint = labelHint
|
||||
|
||||
def serialize(self, name=None):
|
||||
data = super(ImageUpload, self).serialize(name)
|
||||
data['inputType'] = 'imageUpload'
|
||||
return data
|
@ -0,0 +1,28 @@
|
||||
from six.moves.urllib_parse import urlparse
|
||||
|
||||
|
||||
class Proxy(object):
|
||||
def __init__(self, proxy_type, proxy_address, proxy_port, proxy_login, proxy_password):
|
||||
self.proxyType = proxy_type
|
||||
self.proxyAddress = proxy_address
|
||||
self.proxyPort = proxy_port
|
||||
self.proxyLogin = proxy_login
|
||||
self.proxyPassword = proxy_password
|
||||
|
||||
def serialize(self):
|
||||
result = {'proxyType': self.proxyType,
|
||||
'proxyAddress': self.proxyAddress,
|
||||
'proxyPort': self.proxyPort}
|
||||
if self.proxyLogin or self.proxyPassword:
|
||||
result['proxyLogin'] = self.proxyLogin
|
||||
result['proxyPassword'] = self.proxyPassword
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def parse_url(cls, url):
|
||||
parsed = urlparse(url)
|
||||
return cls(proxy_type=parsed.scheme,
|
||||
proxy_address=parsed.hostname,
|
||||
proxy_port=parsed.port,
|
||||
proxy_login=parsed.username,
|
||||
proxy_password=parsed.password)
|
@ -0,0 +1,128 @@
|
||||
import base64
|
||||
from .fields import BaseField
|
||||
|
||||
|
||||
class BaseTask(object):
|
||||
def serialize(self, **result):
|
||||
return result
|
||||
|
||||
|
||||
class ProxyMixin(BaseTask):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.proxy = kwargs.pop('proxy')
|
||||
self.userAgent = kwargs.pop('user_agent')
|
||||
self.cookies = kwargs.pop('cookies', '')
|
||||
super(ProxyMixin, self).__init__(*args, **kwargs)
|
||||
|
||||
def serialize(self, **result):
|
||||
result = super(ProxyMixin, self).serialize(**result)
|
||||
result.update(self.proxy.serialize())
|
||||
result['userAgent'] = self.userAgent
|
||||
if self.cookies:
|
||||
result['cookies'] = self.cookies
|
||||
return result
|
||||
|
||||
|
||||
class NoCaptchaTaskProxylessTask(BaseTask):
|
||||
type = "NoCaptchaTaskProxyless"
|
||||
websiteURL = None
|
||||
websiteKey = None
|
||||
websiteSToken = None
|
||||
|
||||
def __init__(self, website_url, website_key, website_s_token=None, is_invisible=None):
|
||||
self.websiteURL = website_url
|
||||
self.websiteKey = website_key
|
||||
self.websiteSToken = website_s_token
|
||||
self.isInvisible = is_invisible
|
||||
|
||||
def serialize(self):
|
||||
data = {'type': self.type,
|
||||
'websiteURL': self.websiteURL,
|
||||
'websiteKey': self.websiteKey}
|
||||
if self.websiteSToken is not None:
|
||||
data['websiteSToken'] = self.websiteSToken
|
||||
if self.isInvisible is not None:
|
||||
data['isInvisible'] = self.isInvisible
|
||||
return data
|
||||
|
||||
|
||||
class FunCaptchaTask(ProxyMixin):
|
||||
type = "FunCaptchaTask"
|
||||
websiteURL = None
|
||||
websiteKey = None
|
||||
|
||||
def __init__(self, website_url, website_key, *args, **kwargs):
|
||||
self.websiteURL = website_url
|
||||
self.websiteKey = website_key
|
||||
super(FunCaptchaTask, self).__init__(*args, **kwargs)
|
||||
|
||||
def serialize(self, **result):
|
||||
result = super(FunCaptchaTask, self).serialize(**result)
|
||||
result.update({'type': self.type,
|
||||
'websiteURL': self.websiteURL,
|
||||
'websitePublicKey': self.websiteKey})
|
||||
return result
|
||||
|
||||
|
||||
class NoCaptchaTask(ProxyMixin, NoCaptchaTaskProxylessTask):
|
||||
type = "NoCaptchaTask"
|
||||
|
||||
|
||||
class ImageToTextTask(object):
|
||||
type = "ImageToTextTask"
|
||||
fp = None
|
||||
phrase = None
|
||||
case = None
|
||||
numeric = None
|
||||
math = None
|
||||
minLength = None
|
||||
maxLength = None
|
||||
|
||||
def __init__(self, fp, phrase=None, case=None, numeric=None, math=None, min_length=None, max_length=None):
|
||||
self.fp = fp
|
||||
self.phrase = phrase
|
||||
self.case = case
|
||||
self.numeric = numeric
|
||||
self.math = math
|
||||
self.minLength = min_length
|
||||
self.maxLength = max_length
|
||||
|
||||
def serialize(self):
|
||||
return {'type': self.type,
|
||||
'body': base64.b64encode(self.fp.read()).decode('utf-8'),
|
||||
'phrase': self.phrase,
|
||||
'case': self.case,
|
||||
'numeric': self.numeric,
|
||||
'math': self.math,
|
||||
'minLength': self.minLength,
|
||||
'maxLength': self.maxLength}
|
||||
|
||||
|
||||
class CustomCaptchaTask(BaseTask):
|
||||
type = 'CustomCaptchaTask'
|
||||
imageUrl = None
|
||||
assignment = None
|
||||
form = None
|
||||
|
||||
def __init__(self, imageUrl, form=None, assignment=None):
|
||||
self.imageUrl = imageUrl
|
||||
self.form = form or {}
|
||||
self.assignment = assignment
|
||||
|
||||
def serialize(self):
|
||||
data = super(CustomCaptchaTask, self).serialize()
|
||||
data.update({'type': self.type,
|
||||
'imageUrl': self.imageUrl})
|
||||
if self.form:
|
||||
forms = []
|
||||
for name, field in self.form.items():
|
||||
if isinstance(field, BaseField):
|
||||
forms.append(field.serialize(name))
|
||||
else:
|
||||
field = field.copy()
|
||||
field['name'] = name
|
||||
forms.append(field)
|
||||
data['forms'] = forms
|
||||
if self.assignment:
|
||||
data['assignment'] = self.assignment
|
||||
return data
|
@ -0,0 +1,212 @@
|
||||
# coding=utf-8
|
||||
|
||||
import time
|
||||
import logging
|
||||
import json
|
||||
import requests
|
||||
from python_anticaptcha import AnticaptchaClient, NoCaptchaTaskProxylessTask, NoCaptchaTask, AnticaptchaException,\
|
||||
Proxy
|
||||
from deathbycaptcha import SocketClient as DBCClient, DEFAULT_TOKEN_TIMEOUT
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PitcherRegistry(object):
|
||||
pitchers = {}
|
||||
|
||||
def register(self, cls):
|
||||
self.pitchers[cls.name] = cls
|
||||
return cls
|
||||
|
||||
def get_pitcher(self, name):
|
||||
return self.pitchers[name]
|
||||
|
||||
|
||||
registry = pitchers = PitcherRegistry()
|
||||
|
||||
|
||||
class Pitcher(object):
|
||||
name = None
|
||||
tries = 3
|
||||
job = None
|
||||
client = None
|
||||
website_url = None
|
||||
website_key = None
|
||||
website_name = None
|
||||
solve_time = None
|
||||
success = False
|
||||
|
||||
def __init__(self, website_name, website_url, website_key, tries=3, *args, **kwargs):
|
||||
self.tries = tries
|
||||
self.website_name = website_name
|
||||
self.website_key = website_key
|
||||
self.website_url = website_url
|
||||
self.success = False
|
||||
self.solve_time = None
|
||||
|
||||
def get_client(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def get_job(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def _throw(self):
|
||||
self.client = self.get_client()
|
||||
self.job = self.get_job()
|
||||
|
||||
def throw(self):
|
||||
t = time.time()
|
||||
data = self._throw()
|
||||
if self.success:
|
||||
self.solve_time = time.time() - t
|
||||
logger.info("%s: Solving took %ss", self.website_name, int(self.solve_time))
|
||||
return data
|
||||
|
||||
|
||||
@registry.register
|
||||
class AntiCaptchaProxyLessPitcher(Pitcher):
|
||||
name = "AntiCaptchaProxyLess"
|
||||
host = "api.anti-captcha.com"
|
||||
language_pool = "en"
|
||||
client_key = None
|
||||
use_ssl = True
|
||||
is_invisible = False
|
||||
|
||||
def __init__(self, website_name, client_key, website_url, website_key, tries=3, host=None, language_pool=None,
|
||||
use_ssl=True, is_invisible=False, *args, **kwargs):
|
||||
super(AntiCaptchaProxyLessPitcher, self).__init__(website_name, website_url, website_key, tries=tries, *args,
|
||||
**kwargs)
|
||||
self.client_key = client_key
|
||||
self.host = host or self.host
|
||||
self.language_pool = language_pool or self.language_pool
|
||||
self.use_ssl = use_ssl
|
||||
self.is_invisible = is_invisible
|
||||
|
||||
def get_client(self):
|
||||
return AnticaptchaClient(self.client_key, self.language_pool, self.host, self.use_ssl)
|
||||
|
||||
def get_job(self):
|
||||
task = NoCaptchaTaskProxylessTask(website_url=self.website_url, website_key=self.website_key,
|
||||
is_invisible=self.is_invisible)
|
||||
return self.client.createTask(task)
|
||||
|
||||
def _throw(self):
|
||||
for i in range(self.tries):
|
||||
try:
|
||||
super(AntiCaptchaProxyLessPitcher, self)._throw()
|
||||
self.job.join()
|
||||
ret = self.job.get_solution_response()
|
||||
if ret:
|
||||
self.success = True
|
||||
return ret
|
||||
except AnticaptchaException as e:
|
||||
if i >= self.tries - 1:
|
||||
logger.error("%s: Captcha solving finally failed. Exiting", self.website_name)
|
||||
return
|
||||
|
||||
if e.error_code == 'ERROR_ZERO_BALANCE':
|
||||
logger.error("%s: No balance left on captcha solving service. Exiting", self.website_name)
|
||||
return
|
||||
|
||||
elif e.error_code == 'ERROR_NO_SLOT_AVAILABLE':
|
||||
logger.info("%s: No captcha solving slot available, retrying", self.website_name)
|
||||
time.sleep(5.0)
|
||||
continue
|
||||
|
||||
elif e.error_code == 'ERROR_KEY_DOES_NOT_EXIST':
|
||||
logger.error("%s: Bad AntiCaptcha API key", self.website_name)
|
||||
return
|
||||
|
||||
elif e.error_id is None and e.error_code == 250:
|
||||
# timeout
|
||||
if i < self.tries:
|
||||
logger.info("%s: Captcha solving timed out, retrying", self.website_name)
|
||||
time.sleep(1.0)
|
||||
continue
|
||||
else:
|
||||
logger.error("%s: Captcha solving timed out three times; bailing out", self.website_name)
|
||||
return
|
||||
raise
|
||||
|
||||
|
||||
@registry.register
|
||||
class AntiCaptchaPitcher(AntiCaptchaProxyLessPitcher):
|
||||
name = "AntiCaptcha"
|
||||
proxy = None
|
||||
user_agent = None
|
||||
cookies = None
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.proxy = Proxy.parse_url(kwargs.pop("proxy"))
|
||||
print self.proxy.__dict__
|
||||
self.user_agent = kwargs.pop("user_agent")
|
||||
cookies = kwargs.pop("cookies", {})
|
||||
if isinstance(cookies, dict):
|
||||
self.cookies = ";".join(["%s=%s" % (k, v) for k, v in cookies.iteritems()])
|
||||
|
||||
super(AntiCaptchaPitcher, self).__init__(*args, **kwargs)
|
||||
|
||||
def get_job(self):
|
||||
task = NoCaptchaTask(website_url=self.website_url, website_key=self.website_key, proxy=self.proxy,
|
||||
user_agent=self.user_agent, cookies=self.cookies, is_invisible=self.is_invisible)
|
||||
return self.client.createTask(task)
|
||||
|
||||
|
||||
@registry.register
|
||||
class DBCProxyLessPitcher(Pitcher):
|
||||
name = "DeathByCaptchaProxyLess"
|
||||
username = None
|
||||
password = None
|
||||
|
||||
def __init__(self, website_name, client_key, website_url, website_key,
|
||||
timeout=DEFAULT_TOKEN_TIMEOUT, tries=3, *args, **kwargs):
|
||||
super(DBCProxyLessPitcher, self).__init__(website_name, website_url, website_key, tries=tries)
|
||||
|
||||
self.username, self.password = client_key.split(":", 1)
|
||||
self.timeout = timeout
|
||||
|
||||
def get_client(self):
|
||||
return DBCClient(self.username, self.password)
|
||||
|
||||
def get_job(self):
|
||||
pass
|
||||
|
||||
@property
|
||||
def payload_dict(self):
|
||||
return {
|
||||
"googlekey": self.website_key,
|
||||
"pageurl": self.website_url
|
||||
}
|
||||
|
||||
def _throw(self):
|
||||
super(DBCProxyLessPitcher, self)._throw()
|
||||
payload = json.dumps(self.payload_dict)
|
||||
try:
|
||||
#balance = self.client.get_balance()
|
||||
data = self.client.decode(timeout=self.timeout, type=4, token_params=payload)
|
||||
if data and data["is_correct"]:
|
||||
self.success = True
|
||||
return data["text"]
|
||||
except:
|
||||
raise
|
||||
|
||||
|
||||
@registry.register
|
||||
class DBCPitcher(DBCProxyLessPitcher):
|
||||
proxy = None
|
||||
proxy_type = "HTTP"
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.proxy = kwargs.pop("proxy")
|
||||
super(DBCPitcher, self).__init__(*args, **kwargs)
|
||||
|
||||
@property
|
||||
def payload_dict(self):
|
||||
payload = super(DBCPitcher, self).payload_dict
|
||||
payload.update({
|
||||
"proxytype": self.proxy_type,
|
||||
"proxy": self.proxy
|
||||
})
|
||||
return payload
|
||||
|
Loading…
Reference in new issue