Continuing development.

pull/384/head
Louis Vézina 6 years ago
parent 0d05000e97
commit afb2a86810

@ -0,0 +1,516 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""Death by Captcha HTTP and socket API clients.
There are two types of Death by Captcha (DBC hereinafter) API: HTTP and
socket ones. Both offer the same functionalily, with the socket API
sporting faster responses and using way less connections.
To access the socket API, use SocketClient class; for the HTTP API, use
HttpClient class. Both are thread-safe. SocketClient keeps a persistent
connection opened and serializes all API requests sent through it, thus
it is advised to keep a pool of them if you're script is heavily
multithreaded.
Both SocketClient and HttpClient give you the following methods:
get_user()
Returns your DBC account details as a dict with the following keys:
"user": your account numeric ID; if login fails, it will be the only
item with the value of 0;
"rate": your CAPTCHA rate, i.e. how much you will be charged for one
solved CAPTCHA in US cents;
"balance": your DBC account balance in US cents;
"is_banned": flag indicating whether your account is suspended or not.
get_balance()
Returns your DBC account balance in US cents.
get_captcha(cid)
Returns an uploaded CAPTCHA details as a dict with the following keys:
"captcha": the CAPTCHA numeric ID; if no such CAPTCHAs found, it will
be the only item with the value of 0;
"text": the CAPTCHA text, if solved, otherwise None;
"is_correct": flag indicating whether the CAPTCHA was solved correctly
(DBC can detect that in rare cases).
The only argument `cid` is the CAPTCHA numeric ID.
get_text(cid)
Returns an uploaded CAPTCHA text (None if not solved). The only argument
`cid` is the CAPTCHA numeric ID.
report(cid)
Reports an incorrectly solved CAPTCHA. The only argument `cid` is the
CAPTCHA numeric ID. Returns True on success, False otherwise.
upload(captcha)
Uploads a CAPTCHA. The only argument `captcha` can be either file-like
object (any object with `read` method defined, actually, so StringIO
will do), or CAPTCHA image file name. On successul upload you'll get
the CAPTCHA details dict (see get_captcha() method).
NOTE: AT THIS POINT THE UPLOADED CAPTCHA IS NOT SOLVED YET! You have
to poll for its status periodically using get_captcha() or get_text()
method until the CAPTCHA is solved and you get the text.
decode(captcha, timeout=DEFAULT_TIMEOUT)
A convenient method that uploads a CAPTCHA and polls for its status
periodically, but no longer than `timeout` (defaults to 60 seconds).
If solved, you'll get the CAPTCHA details dict (see get_captcha()
method for details). See upload() method for details on `captcha`
argument.
Visit http://www.deathbycaptcha.com/user/api for updates.
"""
import base64
import binascii
import errno
import imghdr
import random
import os
import select
import socket
import sys
import threading
import time
import urllib
import urllib2
try:
from json import read as json_decode, write as json_encode
except ImportError:
try:
from json import loads as json_decode, dumps as json_encode
except ImportError:
from simplejson import loads as json_decode, dumps as json_encode
# API version and unique software ID
API_VERSION = 'DBC/Python v4.6'
# Default CAPTCHA timeout and decode() polling interval
DEFAULT_TIMEOUT = 60
DEFAULT_TOKEN_TIMEOUT = 120
POLLS_INTERVAL = [1, 1, 2, 3, 2, 2, 3, 2, 2]
DFLT_POLL_INTERVAL = 3
# Base HTTP API url
HTTP_BASE_URL = 'http://api.dbcapi.me/api'
# Preferred HTTP API server's response content type, do not change
HTTP_RESPONSE_TYPE = 'application/json'
# Socket API server's host & ports range
SOCKET_HOST = 'api.dbcapi.me'
SOCKET_PORTS = range(8123, 8131)
def _load_image(captcha):
if hasattr(captcha, 'read'):
img = captcha.read()
elif type(captcha) == bytearray:
img = captcha
else:
img = ''
try:
captcha_file = open(captcha, 'rb')
except Exception:
raise
else:
img = captcha_file.read()
captcha_file.close()
if not len(img):
raise ValueError('CAPTCHA image is empty')
elif imghdr.what(None, img) is None:
raise TypeError('Unknown CAPTCHA image type')
else:
return img
class AccessDeniedException(Exception):
pass
class Client(object):
"""Death by Captcha API Client."""
def __init__(self, username, password):
self.is_verbose = False
self.userpwd = {'username': username, 'password': password}
def _log(self, cmd, msg=''):
if self.is_verbose:
print '%d %s %s' % (time.time(), cmd, msg.rstrip())
return self
def close(self):
pass
def connect(self):
pass
def get_user(self):
"""Fetch user details -- ID, balance, rate and banned status."""
raise NotImplementedError()
def get_balance(self):
"""Fetch user balance (in US cents)."""
return self.get_user().get('balance')
def get_captcha(self, cid):
"""Fetch a CAPTCHA details -- ID, text and correctness flag."""
raise NotImplementedError()
def get_text(self, cid):
"""Fetch a CAPTCHA text."""
return self.get_captcha(cid).get('text') or None
def report(self, cid):
"""Report a CAPTCHA as incorrectly solved."""
raise NotImplementedError()
def upload(self, captcha):
"""Upload a CAPTCHA.
Accepts file names and file-like objects. Returns CAPTCHA details
dict on success.
"""
raise NotImplementedError()
def decode(self, captcha=None, timeout=None, **kwargs):
"""
Try to solve a CAPTCHA.
See Client.upload() for arguments details.
Uploads a CAPTCHA, polls for its status periodically with arbitrary
timeout (in seconds), returns CAPTCHA details if (correctly) solved.
"""
if not timeout:
if not captcha:
timeout = DEFAULT_TOKEN_TIMEOUT
else:
timeout = DEFAULT_TIMEOUT
deadline = time.time() + (max(0, timeout) or DEFAULT_TIMEOUT)
uploaded_captcha = self.upload(captcha, **kwargs)
if uploaded_captcha:
intvl_idx = 0 # POLL_INTERVAL index
while deadline > time.time() and not uploaded_captcha.get('text'):
intvl, intvl_idx = self._get_poll_interval(intvl_idx)
time.sleep(intvl)
pulled = self.get_captcha(uploaded_captcha['captcha'])
if pulled['captcha'] == uploaded_captcha['captcha']:
uploaded_captcha = pulled
if uploaded_captcha.get('text') and \
uploaded_captcha.get('is_correct'):
return uploaded_captcha
def _get_poll_interval(self, idx):
"""Returns poll interval and next index depending on index provided"""
if len(POLLS_INTERVAL) > idx:
intvl = POLLS_INTERVAL[idx]
else:
intvl = DFLT_POLL_INTERVAL
idx += 1
return intvl, idx
class HttpClient(Client):
"""Death by Captcha HTTP API client."""
def __init__(self, *args):
Client.__init__(self, *args)
self.opener = urllib2.build_opener(urllib2.HTTPRedirectHandler())
def _call(self, cmd, payload=None, headers=None):
if headers is None:
headers = {}
headers['Accept'] = HTTP_RESPONSE_TYPE
headers['User-Agent'] = API_VERSION
if hasattr(payload, 'items'):
payload = urllib.urlencode(payload)
self._log('SEND', '%s %d %s' % (cmd, len(payload), payload))
else:
self._log('SEND', '%s' % cmd)
if payload is not None:
headers['Content-Length'] = len(payload)
try:
response = self.opener.open(urllib2.Request(
HTTP_BASE_URL + '/' + cmd.strip('/'),
data=payload,
headers=headers
)).read()
except urllib2.HTTPError, err:
if 403 == err.code:
raise AccessDeniedException('Access denied, please check'
' your credentials and/or balance')
elif 400 == err.code or 413 == err.code:
raise ValueError("CAPTCHA was rejected by the service, check"
" if it's a valid image")
elif 503 == err.code:
raise OverflowError("CAPTCHA was rejected due to service"
" overload, try again later")
else:
raise err
else:
self._log('RECV', '%d %s' % (len(response), response))
try:
return json_decode(response)
except Exception:
raise RuntimeError('Invalid API response')
return {}
def get_user(self):
return self._call('user', self.userpwd.copy()) or {'user': 0}
def get_captcha(self, cid):
return self._call('captcha/%d' % cid) or {'captcha': 0}
def report(self, cid):
return not self._call('captcha/%d/report' % cid,
self.userpwd.copy()).get('is_correct')
def upload(self, captcha=None, **kwargs):
boundary = binascii.hexlify(os.urandom(16))
banner = kwargs.get('banner', '')
if banner:
kwargs['banner'] = 'base64:' + base64.b64encode(_load_image(banner))
body = '\r\n'.join(('\r\n'.join((
'--%s' % boundary,
'Content-Disposition: form-data; name="%s"' % k,
'Content-Type: text/plain',
'Content-Length: %d' % len(str(v)),
'',
str(v)
))) for k, v in self.userpwd.items())
body += '\r\n'.join(('\r\n'.join((
'--%s' % boundary,
'Content-Disposition: form-data; name="%s"' % k,
'Content-Type: text/plain',
'Content-Length: %d' % len(str(v)),
'',
str(v)
))) for k, v in kwargs.items())
if captcha:
img = _load_image(captcha)
body += '\r\n'.join((
'',
'--%s' % boundary,
'Content-Disposition: form-data; name="captchafile"; '
'filename="captcha"',
'Content-Type: application/octet-stream',
'Content-Length: %d' % len(img),
'',
img,
'--%s--' % boundary,
''
))
response = self._call('captcha', body, {
'Content-Type': 'multipart/form-data; boundary="%s"' % boundary
}) or {}
if response.get('captcha'):
return response
class SocketClient(Client):
"""Death by Captcha socket API client."""
TERMINATOR = '\r\n'
def __init__(self, *args):
Client.__init__(self, *args)
self.socket_lock = threading.Lock()
self.socket = None
def close(self):
if self.socket:
self._log('CLOSE')
try:
self.socket.shutdown(socket.SHUT_RDWR)
except socket.error:
pass
finally:
self.socket.close()
self.socket = None
def connect(self):
if not self.socket:
self._log('CONN')
host = (socket.gethostbyname(SOCKET_HOST),
random.choice(SOCKET_PORTS))
self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.socket.settimeout(0)
try:
self.socket.connect(host)
except socket.error, err:
if (err.args[0] not in
(errno.EAGAIN, errno.EWOULDBLOCK, errno.EINPROGRESS)):
self.close()
raise err
return self.socket
def __del__(self):
self.close()
def _sendrecv(self, sock, buf):
self._log('SEND', buf)
fds = [sock]
buf += self.TERMINATOR
response = ''
intvl_idx = 0
while True:
intvl, intvl_idx = self._get_poll_interval(intvl_idx)
rds, wrs, exs = select.select((not buf and fds) or [],
(buf and fds) or [],
fds,
intvl)
if exs:
raise IOError('select() failed')
try:
if wrs:
while buf:
buf = buf[wrs[0].send(buf):]
elif rds:
while True:
s = rds[0].recv(256)
if not s:
raise IOError('recv(): connection lost')
else:
response += s
except socket.error, err:
if (err.args[0] not in
(errno.EAGAIN, errno.EWOULDBLOCK, errno.EINPROGRESS)):
raise err
if response.endswith(self.TERMINATOR):
self._log('RECV', response)
return response.rstrip(self.TERMINATOR)
raise IOError('send/recv timed out')
def _call(self, cmd, data=None):
if data is None:
data = {}
data['cmd'] = cmd
data['version'] = API_VERSION
request = json_encode(data)
response = None
for _ in range(2):
if not self.socket and cmd != 'login':
self._call('login', self.userpwd.copy())
self.socket_lock.acquire()
try:
sock = self.connect()
response = self._sendrecv(sock, request)
except IOError, err:
sys.stderr.write(str(err) + "\n")
self.close()
except socket.error, err:
sys.stderr.write(str(err) + "\n")
self.close()
raise IOError('Connection refused')
else:
break
finally:
self.socket_lock.release()
if response is None:
raise IOError('Connection lost or timed out during API request')
try:
response = json_decode(response)
except Exception:
raise RuntimeError('Invalid API response')
if not response.get('error'):
return response
error = response['error']
if error in ('not-logged-in', 'invalid-credentials'):
raise AccessDeniedException('Access denied, check your credentials')
elif 'banned' == error:
raise AccessDeniedException('Access denied, account is suspended')
elif 'insufficient-funds' == error:
raise AccessDeniedException(
'CAPTCHA was rejected due to low balance')
elif 'invalid-captcha' == error:
raise ValueError('CAPTCHA is not a valid image')
elif 'service-overload' == error:
raise OverflowError(
'CAPTCHA was rejected due to service overload, try again later')
else:
self.socket_lock.acquire()
self.close()
self.socket_lock.release()
raise RuntimeError('API server error occured: %s' % error)
def get_user(self):
return self._call('user') or {'user': 0}
def get_captcha(self, cid):
return self._call('captcha', {'captcha': cid}) or {'captcha': 0}
def upload(self, captcha=None, **kwargs):
data = {}
if captcha:
data['captcha'] = base64.b64encode(_load_image(captcha))
if kwargs:
banner = kwargs.get('banner', '')
if banner:
kwargs['banner'] = base64.b64encode(_load_image(banner))
data.update(kwargs)
response = self._call('upload', data)
if response.get('captcha'):
uploaded_captcha = dict(
(k, response.get(k))
for k in ('captcha', 'text', 'is_correct')
)
if not uploaded_captcha['text']:
uploaded_captcha['text'] = None
return uploaded_captcha
def report(self, cid):
return not self._call('report', {'captcha': cid}).get('is_correct')
if '__main__' == __name__:
# Put your DBC username & password here:
# client = HttpClient(sys.argv[1], sys.argv[2])
client = SocketClient(sys.argv[1], sys.argv[2])
client.is_verbose = True
print 'Your balance is %s US cents' % client.get_balance()
for fn in sys.argv[3:]:
try:
# Put your CAPTCHA image file name or file-like object, and optional
# solving timeout (in seconds) here:
captcha = client.decode(fn, DEFAULT_TIMEOUT)
except Exception, e:
sys.stderr.write('Failed uploading CAPTCHA: %s\n' % (e, ))
captcha = None
if captcha:
print 'CAPTCHA %d solved: %s' % \
(captcha['captcha'], captcha['text'])
# Report as incorrectly solved if needed. Make sure the CAPTCHA was
# in fact incorrectly solved!
# try:
# client.report(captcha['captcha'])
# except Exception, e:
# sys.stderr.write('Failed reporting CAPTCHA: %s\n' % (e, ))

@ -0,0 +1,7 @@
from .base import AnticaptchaClient
from .tasks import NoCaptchaTask, NoCaptchaTaskProxylessTask, ImageToTextTask, FunCaptchaTask
from .proxy import Proxy
from .exceptions import AnticaptchaException
from .fields import SimpleText, Image, WebLink, TextInput, Textarea, Checkbox, Select, Radio, ImageUpload
AnticatpchaException = AnticaptchaException

@ -0,0 +1,114 @@
import requests
import time
from six.moves.urllib_parse import urljoin
from .exceptions import AnticaptchaException
SLEEP_EVERY_CHECK_FINISHED = 3
MAXIMUM_JOIN_TIME = 60 * 5
class Job(object):
client = None
task_id = None
_last_result = None
def __init__(self, client, task_id):
self.client = client
self.task_id = task_id
def _update(self):
self._last_result = self.client.getTaskResult(self.task_id)
def check_is_ready(self):
self._update()
return self._last_result['status'] == 'ready'
def get_solution_response(self): # Recaptcha
return self._last_result['solution']['gRecaptchaResponse']
def get_token_response(self): # Funcaptcha
return self._last_result['solution']['token']
def get_answers(self):
return self._last_result['solution']['answers']
def get_captcha_text(self): # Image
return self._last_result['solution']['text']
def report_incorrect(self):
return self.client.reportIncorrectImage(self.task_id)
def join(self, maximum_time=None):
elapsed_time = 0
maximum_time = maximum_time or MAXIMUM_JOIN_TIME
while not self.check_is_ready():
time.sleep(SLEEP_EVERY_CHECK_FINISHED)
elapsed_time += SLEEP_EVERY_CHECK_FINISHED
if elapsed_time is not None and elapsed_time > maximum_time:
raise AnticaptchaException(None, 250,
"The execution time exceeded a maximum time of {} seconds. It takes {} seconds.".format(
maximum_time, elapsed_time))
class AnticaptchaClient(object):
client_key = None
CREATE_TASK_URL = "/createTask"
TASK_RESULT_URL = "/getTaskResult"
BALANCE_URL = "/getBalance"
REPORT_IMAGE_URL = "/reportIncorrectImageCaptcha"
SOFT_ID = 847
language_pool = "en"
def __init__(self, client_key, language_pool="en", host="api.anti-captcha.com", use_ssl=True):
self.client_key = client_key
self.language_pool = language_pool
self.base_url = "{proto}://{host}/".format(proto="https" if use_ssl else "http",
host=host)
self.session = requests.Session()
@property
def client_ip(self):
if not hasattr(self, '_client_ip'):
self._client_ip = self.session.get('http://httpbin.org/ip').json()['origin']
return self._client_ip
def _check_response(self, response):
if response.get('errorId', False) == 11:
response['errorDescription'] = "{} Your missing IP address is {}.".format(response['errorDescription'],
self.client_ip)
if response.get('errorId', False):
raise AnticaptchaException(response['errorId'],
response['errorCode'],
response['errorDescription'])
def createTask(self, task):
request = {"clientKey": self.client_key,
"task": task.serialize(),
"softId": self.SOFT_ID,
"languagePool": self.language_pool,
}
response = self.session.post(urljoin(self.base_url, self.CREATE_TASK_URL), json=request).json()
self._check_response(response)
return Job(self, response['taskId'])
def getTaskResult(self, task_id):
request = {"clientKey": self.client_key,
"taskId": task_id}
response = self.session.post(urljoin(self.base_url, self.TASK_RESULT_URL), json=request).json()
self._check_response(response)
return response
def getBalance(self):
request = {"clientKey": self.client_key}
response = self.session.post(urljoin(self.base_url, self.BALANCE_URL), json=request).json()
self._check_response(response)
return response['balance']
def reportIncorrectImage(self, task_id):
request = {"clientKey": self.client_key,
"taskId": task_id
}
response = self.session.post(urljoin(self.base_url, self.REPORT_IMAGE_URL), json=request).json()
self._check_response(response)
return response.get('status', False) != False

@ -0,0 +1,23 @@
class AnticaptchaException(Exception):
def __init__(self, error_id, error_code, error_description, *args):
super(AnticaptchaException, self).__init__("[{}:{}]{}".format(error_code, error_id, error_description))
self.error_description = error_description
self.error_id = error_id
self.error_code = error_code
AnticatpchaException = AnticaptchaException
class InvalidWidthException(AnticaptchaException):
def __init__(self, width):
self.width = width
msg = 'Invalid width (%s). Can be one of these: 100, 50, 33, 25.' % (self.width,)
super(InvalidWidthException, self).__init__("AC-1", 1, msg)
class MissingNameException(AnticaptchaException):
def __init__(self, cls):
self.cls = cls
msg = 'Missing name data in {0}. Provide {0}.__init__(name="X") or {0}.serialize(name="X")'.format(str(self.cls))
super(MissingNameException, self).__init__("AC-2", 2, msg)

@ -0,0 +1,199 @@
import six
from python_anticaptcha.exceptions import InvalidWidthException, MissingNameException
class BaseField(object):
label = None
labelHint = None
def serialize(self, name=None):
data = {}
if self.label:
data['label'] = self.label or False
if self.labelHint:
data['labelHint'] = self.labelHint or False
return data
class NameBaseField(BaseField):
name = None
def serialize(self, name=None):
data = super(NameBaseField, self).serialize(name)
if name:
data['name'] = name
elif self.name:
data['name'] = self.name
else:
raise MissingNameException(cls=self.__class__)
return data
class SimpleText(BaseField):
contentType = 'text'
def __init__(self, content, label=None, labelHint=None, width=None):
self.label = label
self.labelHint = labelHint
self.content = content
self.width = width
def serialize(self, name=None):
data = super(SimpleText, self).serialize(name)
data['contentType'] = self.contentType
data['content'] = self.content
if self.width:
if self.width not in [100, 50, 33, 25]:
raise InvalidWidthException(self.width)
data['inputOptions'] = {}
data['width'] = self.width
return data
class Image(BaseField):
contentType = 'image'
def __init__(self, imageUrl, label=None, labelHint=None):
self.label = label
self.labelHint = labelHint
self.imageUrl = imageUrl
def serialize(self, name=None):
data = super(Image, self).serialize(name)
data['contentType'] = self.contentType
data['content'] = self.imageUrl
return data
class WebLink(BaseField):
contentType = 'link'
def __init__(self, linkText, linkUrl, label=None, labelHint=None, width=None):
self.label = label
self.labelHint = labelHint
self.linkText = linkText
self.linkUrl = linkUrl
self.width = width
def serialize(self, name=None):
data = super(WebLink, self).serialize(name)
data['contentType'] = self.contentType
if self.width:
if self.width not in [100, 50, 33, 25]:
raise InvalidWidthException(self.width)
data['inputOptions'] = {}
data['width'] = self.width
data.update({'content': {'url': self.linkUrl,
'text': self.linkText}})
return data
class TextInput(NameBaseField):
def __init__(self, placeHolder=None, label=None, labelHint=None, width=None):
self.label = label
self.labelHint = labelHint
self.placeHolder = placeHolder
self.width = width
def serialize(self, name=None):
data = super(TextInput, self).serialize(name)
data['inputType'] = 'text'
data['inputOptions'] = {}
if self.width:
if self.width not in [100, 50, 33, 25]:
raise InvalidWidthException(self.width)
data['inputOptions']['width'] = str(self.width)
if self.placeHolder:
data['inputOptions']['placeHolder'] = self.placeHolder
return data
class Textarea(NameBaseField):
def __init__(self, placeHolder=None, rows=None, label=None, width=None, labelHint=None):
self.label = label
self.labelHint = labelHint
self.placeHolder = placeHolder
self.rows = rows
self.width = width
def serialize(self, name=None):
data = super(Textarea, self).serialize(name)
data['inputType'] = 'textarea'
data['inputOptions'] = {}
if self.rows:
data['inputOptions']['rows'] = str(self.rows)
if self.placeHolder:
data['inputOptions']['placeHolder'] = self.placeHolder
if self.width:
data['inputOptions']['width'] = str(self.width)
return data
class Checkbox(NameBaseField):
def __init__(self, text, label=None, labelHint=None):
self.label = label
self.labelHint = labelHint
self.text = text
def serialize(self, name=None):
data = super(Checkbox, self).serialize(name)
data['inputType'] = 'checkbox'
data['inputOptions'] = {'label': self.text}
return data
class Select(NameBaseField):
type = 'select'
def __init__(self, label=None, choices=None, labelHint=None):
self.label = label
self.labelHint = labelHint
self.choices = choices or ()
def get_choices(self):
for choice in self.choices:
if isinstance(choice, six.text_type):
yield choice, choice
else:
yield choice
def serialize(self, name=None):
data = super(Select, self).serialize(name)
data['inputType'] = self.type
data['inputOptions'] = []
for value, caption in self.get_choices():
data['inputOptions'].append({"value": value,
"caption": caption})
return data
class Radio(Select):
type = 'radio'
class ImageUpload(NameBaseField):
def __init__(self, label=None, labelHint=None):
self.label = label
self.labelHint = labelHint
def serialize(self, name=None):
data = super(ImageUpload, self).serialize(name)
data['inputType'] = 'imageUpload'
return data

@ -0,0 +1,28 @@
from six.moves.urllib_parse import urlparse
class Proxy(object):
def __init__(self, proxy_type, proxy_address, proxy_port, proxy_login, proxy_password):
self.proxyType = proxy_type
self.proxyAddress = proxy_address
self.proxyPort = proxy_port
self.proxyLogin = proxy_login
self.proxyPassword = proxy_password
def serialize(self):
result = {'proxyType': self.proxyType,
'proxyAddress': self.proxyAddress,
'proxyPort': self.proxyPort}
if self.proxyLogin or self.proxyPassword:
result['proxyLogin'] = self.proxyLogin
result['proxyPassword'] = self.proxyPassword
return result
@classmethod
def parse_url(cls, url):
parsed = urlparse(url)
return cls(proxy_type=parsed.scheme,
proxy_address=parsed.hostname,
proxy_port=parsed.port,
proxy_login=parsed.username,
proxy_password=parsed.password)

@ -0,0 +1,128 @@
import base64
from .fields import BaseField
class BaseTask(object):
def serialize(self, **result):
return result
class ProxyMixin(BaseTask):
def __init__(self, *args, **kwargs):
self.proxy = kwargs.pop('proxy')
self.userAgent = kwargs.pop('user_agent')
self.cookies = kwargs.pop('cookies', '')
super(ProxyMixin, self).__init__(*args, **kwargs)
def serialize(self, **result):
result = super(ProxyMixin, self).serialize(**result)
result.update(self.proxy.serialize())
result['userAgent'] = self.userAgent
if self.cookies:
result['cookies'] = self.cookies
return result
class NoCaptchaTaskProxylessTask(BaseTask):
type = "NoCaptchaTaskProxyless"
websiteURL = None
websiteKey = None
websiteSToken = None
def __init__(self, website_url, website_key, website_s_token=None, is_invisible=None):
self.websiteURL = website_url
self.websiteKey = website_key
self.websiteSToken = website_s_token
self.isInvisible = is_invisible
def serialize(self):
data = {'type': self.type,
'websiteURL': self.websiteURL,
'websiteKey': self.websiteKey}
if self.websiteSToken is not None:
data['websiteSToken'] = self.websiteSToken
if self.isInvisible is not None:
data['isInvisible'] = self.isInvisible
return data
class FunCaptchaTask(ProxyMixin):
type = "FunCaptchaTask"
websiteURL = None
websiteKey = None
def __init__(self, website_url, website_key, *args, **kwargs):
self.websiteURL = website_url
self.websiteKey = website_key
super(FunCaptchaTask, self).__init__(*args, **kwargs)
def serialize(self, **result):
result = super(FunCaptchaTask, self).serialize(**result)
result.update({'type': self.type,
'websiteURL': self.websiteURL,
'websitePublicKey': self.websiteKey})
return result
class NoCaptchaTask(ProxyMixin, NoCaptchaTaskProxylessTask):
type = "NoCaptchaTask"
class ImageToTextTask(object):
type = "ImageToTextTask"
fp = None
phrase = None
case = None
numeric = None
math = None
minLength = None
maxLength = None
def __init__(self, fp, phrase=None, case=None, numeric=None, math=None, min_length=None, max_length=None):
self.fp = fp
self.phrase = phrase
self.case = case
self.numeric = numeric
self.math = math
self.minLength = min_length
self.maxLength = max_length
def serialize(self):
return {'type': self.type,
'body': base64.b64encode(self.fp.read()).decode('utf-8'),
'phrase': self.phrase,
'case': self.case,
'numeric': self.numeric,
'math': self.math,
'minLength': self.minLength,
'maxLength': self.maxLength}
class CustomCaptchaTask(BaseTask):
type = 'CustomCaptchaTask'
imageUrl = None
assignment = None
form = None
def __init__(self, imageUrl, form=None, assignment=None):
self.imageUrl = imageUrl
self.form = form or {}
self.assignment = assignment
def serialize(self):
data = super(CustomCaptchaTask, self).serialize()
data.update({'type': self.type,
'imageUrl': self.imageUrl})
if self.form:
forms = []
for name, field in self.form.items():
if isinstance(field, BaseField):
forms.append(field.serialize(name))
else:
field = field.copy()
field['name'] = name
forms.append(field)
data['forms'] = forms
if self.assignment:
data['assignment'] = self.assignment
return data

@ -0,0 +1,212 @@
# coding=utf-8
import time
import logging
import json
import requests
from python_anticaptcha import AnticaptchaClient, NoCaptchaTaskProxylessTask, NoCaptchaTask, AnticaptchaException,\
Proxy
from deathbycaptcha import SocketClient as DBCClient, DEFAULT_TOKEN_TIMEOUT
logger = logging.getLogger(__name__)
class PitcherRegistry(object):
pitchers = {}
def register(self, cls):
self.pitchers[cls.name] = cls
return cls
def get_pitcher(self, name):
return self.pitchers[name]
registry = pitchers = PitcherRegistry()
class Pitcher(object):
name = None
tries = 3
job = None
client = None
website_url = None
website_key = None
website_name = None
solve_time = None
success = False
def __init__(self, website_name, website_url, website_key, tries=3, *args, **kwargs):
self.tries = tries
self.website_name = website_name
self.website_key = website_key
self.website_url = website_url
self.success = False
self.solve_time = None
def get_client(self):
raise NotImplementedError
def get_job(self):
raise NotImplementedError
def _throw(self):
self.client = self.get_client()
self.job = self.get_job()
def throw(self):
t = time.time()
data = self._throw()
if self.success:
self.solve_time = time.time() - t
logger.info("%s: Solving took %ss", self.website_name, int(self.solve_time))
return data
@registry.register
class AntiCaptchaProxyLessPitcher(Pitcher):
name = "AntiCaptchaProxyLess"
host = "api.anti-captcha.com"
language_pool = "en"
client_key = None
use_ssl = True
is_invisible = False
def __init__(self, website_name, client_key, website_url, website_key, tries=3, host=None, language_pool=None,
use_ssl=True, is_invisible=False, *args, **kwargs):
super(AntiCaptchaProxyLessPitcher, self).__init__(website_name, website_url, website_key, tries=tries, *args,
**kwargs)
self.client_key = client_key
self.host = host or self.host
self.language_pool = language_pool or self.language_pool
self.use_ssl = use_ssl
self.is_invisible = is_invisible
def get_client(self):
return AnticaptchaClient(self.client_key, self.language_pool, self.host, self.use_ssl)
def get_job(self):
task = NoCaptchaTaskProxylessTask(website_url=self.website_url, website_key=self.website_key,
is_invisible=self.is_invisible)
return self.client.createTask(task)
def _throw(self):
for i in range(self.tries):
try:
super(AntiCaptchaProxyLessPitcher, self)._throw()
self.job.join()
ret = self.job.get_solution_response()
if ret:
self.success = True
return ret
except AnticaptchaException as e:
if i >= self.tries - 1:
logger.error("%s: Captcha solving finally failed. Exiting", self.website_name)
return
if e.error_code == 'ERROR_ZERO_BALANCE':
logger.error("%s: No balance left on captcha solving service. Exiting", self.website_name)
return
elif e.error_code == 'ERROR_NO_SLOT_AVAILABLE':
logger.info("%s: No captcha solving slot available, retrying", self.website_name)
time.sleep(5.0)
continue
elif e.error_code == 'ERROR_KEY_DOES_NOT_EXIST':
logger.error("%s: Bad AntiCaptcha API key", self.website_name)
return
elif e.error_id is None and e.error_code == 250:
# timeout
if i < self.tries:
logger.info("%s: Captcha solving timed out, retrying", self.website_name)
time.sleep(1.0)
continue
else:
logger.error("%s: Captcha solving timed out three times; bailing out", self.website_name)
return
raise
@registry.register
class AntiCaptchaPitcher(AntiCaptchaProxyLessPitcher):
name = "AntiCaptcha"
proxy = None
user_agent = None
cookies = None
def __init__(self, *args, **kwargs):
self.proxy = Proxy.parse_url(kwargs.pop("proxy"))
print self.proxy.__dict__
self.user_agent = kwargs.pop("user_agent")
cookies = kwargs.pop("cookies", {})
if isinstance(cookies, dict):
self.cookies = ";".join(["%s=%s" % (k, v) for k, v in cookies.iteritems()])
super(AntiCaptchaPitcher, self).__init__(*args, **kwargs)
def get_job(self):
task = NoCaptchaTask(website_url=self.website_url, website_key=self.website_key, proxy=self.proxy,
user_agent=self.user_agent, cookies=self.cookies, is_invisible=self.is_invisible)
return self.client.createTask(task)
@registry.register
class DBCProxyLessPitcher(Pitcher):
name = "DeathByCaptchaProxyLess"
username = None
password = None
def __init__(self, website_name, client_key, website_url, website_key,
timeout=DEFAULT_TOKEN_TIMEOUT, tries=3, *args, **kwargs):
super(DBCProxyLessPitcher, self).__init__(website_name, website_url, website_key, tries=tries)
self.username, self.password = client_key.split(":", 1)
self.timeout = timeout
def get_client(self):
return DBCClient(self.username, self.password)
def get_job(self):
pass
@property
def payload_dict(self):
return {
"googlekey": self.website_key,
"pageurl": self.website_url
}
def _throw(self):
super(DBCProxyLessPitcher, self)._throw()
payload = json.dumps(self.payload_dict)
try:
#balance = self.client.get_balance()
data = self.client.decode(timeout=self.timeout, type=4, token_params=payload)
if data and data["is_correct"]:
self.success = True
return data["text"]
except:
raise
@registry.register
class DBCPitcher(DBCProxyLessPitcher):
proxy = None
proxy_type = "HTTP"
def __init__(self, *args, **kwargs):
self.proxy = kwargs.pop("proxy")
super(DBCPitcher, self).__init__(*args, **kwargs)
@property
def payload_dict(self):
payload = super(DBCPitcher, self).payload_dict
payload.update({
"proxytype": self.proxy_type,
"proxy": self.proxy
})
return payload
Loading…
Cancel
Save