diff --git a/libs/deathbycaptcha.py b/libs/deathbycaptcha.py new file mode 100644 index 000000000..3c2fafb77 --- /dev/null +++ b/libs/deathbycaptcha.py @@ -0,0 +1,516 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +"""Death by Captcha HTTP and socket API clients. + +There are two types of Death by Captcha (DBC hereinafter) API: HTTP and +socket ones. Both offer the same functionalily, with the socket API +sporting faster responses and using way less connections. + +To access the socket API, use SocketClient class; for the HTTP API, use +HttpClient class. Both are thread-safe. SocketClient keeps a persistent +connection opened and serializes all API requests sent through it, thus +it is advised to keep a pool of them if you're script is heavily +multithreaded. + +Both SocketClient and HttpClient give you the following methods: + +get_user() + Returns your DBC account details as a dict with the following keys: + + "user": your account numeric ID; if login fails, it will be the only + item with the value of 0; + "rate": your CAPTCHA rate, i.e. how much you will be charged for one + solved CAPTCHA in US cents; + "balance": your DBC account balance in US cents; + "is_banned": flag indicating whether your account is suspended or not. + +get_balance() + Returns your DBC account balance in US cents. + +get_captcha(cid) + Returns an uploaded CAPTCHA details as a dict with the following keys: + + "captcha": the CAPTCHA numeric ID; if no such CAPTCHAs found, it will + be the only item with the value of 0; + "text": the CAPTCHA text, if solved, otherwise None; + "is_correct": flag indicating whether the CAPTCHA was solved correctly + (DBC can detect that in rare cases). + + The only argument `cid` is the CAPTCHA numeric ID. + +get_text(cid) + Returns an uploaded CAPTCHA text (None if not solved). The only argument + `cid` is the CAPTCHA numeric ID. + +report(cid) + Reports an incorrectly solved CAPTCHA. The only argument `cid` is the + CAPTCHA numeric ID. Returns True on success, False otherwise. + +upload(captcha) + Uploads a CAPTCHA. The only argument `captcha` can be either file-like + object (any object with `read` method defined, actually, so StringIO + will do), or CAPTCHA image file name. On successul upload you'll get + the CAPTCHA details dict (see get_captcha() method). + + NOTE: AT THIS POINT THE UPLOADED CAPTCHA IS NOT SOLVED YET! You have + to poll for its status periodically using get_captcha() or get_text() + method until the CAPTCHA is solved and you get the text. + +decode(captcha, timeout=DEFAULT_TIMEOUT) + A convenient method that uploads a CAPTCHA and polls for its status + periodically, but no longer than `timeout` (defaults to 60 seconds). + If solved, you'll get the CAPTCHA details dict (see get_captcha() + method for details). See upload() method for details on `captcha` + argument. + +Visit http://www.deathbycaptcha.com/user/api for updates. + +""" + +import base64 +import binascii +import errno +import imghdr +import random +import os +import select +import socket +import sys +import threading +import time +import urllib +import urllib2 +try: + from json import read as json_decode, write as json_encode +except ImportError: + try: + from json import loads as json_decode, dumps as json_encode + except ImportError: + from simplejson import loads as json_decode, dumps as json_encode + + +# API version and unique software ID +API_VERSION = 'DBC/Python v4.6' + +# Default CAPTCHA timeout and decode() polling interval +DEFAULT_TIMEOUT = 60 +DEFAULT_TOKEN_TIMEOUT = 120 +POLLS_INTERVAL = [1, 1, 2, 3, 2, 2, 3, 2, 2] +DFLT_POLL_INTERVAL = 3 + +# Base HTTP API url +HTTP_BASE_URL = 'http://api.dbcapi.me/api' + +# Preferred HTTP API server's response content type, do not change +HTTP_RESPONSE_TYPE = 'application/json' + +# Socket API server's host & ports range +SOCKET_HOST = 'api.dbcapi.me' +SOCKET_PORTS = range(8123, 8131) + + +def _load_image(captcha): + if hasattr(captcha, 'read'): + img = captcha.read() + elif type(captcha) == bytearray: + img = captcha + else: + img = '' + try: + captcha_file = open(captcha, 'rb') + except Exception: + raise + else: + img = captcha_file.read() + captcha_file.close() + if not len(img): + raise ValueError('CAPTCHA image is empty') + elif imghdr.what(None, img) is None: + raise TypeError('Unknown CAPTCHA image type') + else: + return img + + +class AccessDeniedException(Exception): + pass + + +class Client(object): + + """Death by Captcha API Client.""" + + def __init__(self, username, password): + self.is_verbose = False + self.userpwd = {'username': username, 'password': password} + + def _log(self, cmd, msg=''): + if self.is_verbose: + print '%d %s %s' % (time.time(), cmd, msg.rstrip()) + return self + + def close(self): + pass + + def connect(self): + pass + + def get_user(self): + """Fetch user details -- ID, balance, rate and banned status.""" + raise NotImplementedError() + + def get_balance(self): + """Fetch user balance (in US cents).""" + return self.get_user().get('balance') + + def get_captcha(self, cid): + """Fetch a CAPTCHA details -- ID, text and correctness flag.""" + raise NotImplementedError() + + def get_text(self, cid): + """Fetch a CAPTCHA text.""" + return self.get_captcha(cid).get('text') or None + + def report(self, cid): + """Report a CAPTCHA as incorrectly solved.""" + raise NotImplementedError() + + def upload(self, captcha): + """Upload a CAPTCHA. + + Accepts file names and file-like objects. Returns CAPTCHA details + dict on success. + + """ + raise NotImplementedError() + + def decode(self, captcha=None, timeout=None, **kwargs): + """ + Try to solve a CAPTCHA. + + See Client.upload() for arguments details. + + Uploads a CAPTCHA, polls for its status periodically with arbitrary + timeout (in seconds), returns CAPTCHA details if (correctly) solved. + """ + if not timeout: + if not captcha: + timeout = DEFAULT_TOKEN_TIMEOUT + else: + timeout = DEFAULT_TIMEOUT + + deadline = time.time() + (max(0, timeout) or DEFAULT_TIMEOUT) + uploaded_captcha = self.upload(captcha, **kwargs) + if uploaded_captcha: + intvl_idx = 0 # POLL_INTERVAL index + while deadline > time.time() and not uploaded_captcha.get('text'): + intvl, intvl_idx = self._get_poll_interval(intvl_idx) + time.sleep(intvl) + pulled = self.get_captcha(uploaded_captcha['captcha']) + if pulled['captcha'] == uploaded_captcha['captcha']: + uploaded_captcha = pulled + if uploaded_captcha.get('text') and \ + uploaded_captcha.get('is_correct'): + return uploaded_captcha + + def _get_poll_interval(self, idx): + """Returns poll interval and next index depending on index provided""" + + if len(POLLS_INTERVAL) > idx: + intvl = POLLS_INTERVAL[idx] + else: + intvl = DFLT_POLL_INTERVAL + idx += 1 + + return intvl, idx + + +class HttpClient(Client): + + """Death by Captcha HTTP API client.""" + + def __init__(self, *args): + Client.__init__(self, *args) + self.opener = urllib2.build_opener(urllib2.HTTPRedirectHandler()) + + def _call(self, cmd, payload=None, headers=None): + if headers is None: + headers = {} + headers['Accept'] = HTTP_RESPONSE_TYPE + headers['User-Agent'] = API_VERSION + if hasattr(payload, 'items'): + payload = urllib.urlencode(payload) + self._log('SEND', '%s %d %s' % (cmd, len(payload), payload)) + else: + self._log('SEND', '%s' % cmd) + if payload is not None: + headers['Content-Length'] = len(payload) + try: + response = self.opener.open(urllib2.Request( + HTTP_BASE_URL + '/' + cmd.strip('/'), + data=payload, + headers=headers + )).read() + except urllib2.HTTPError, err: + if 403 == err.code: + raise AccessDeniedException('Access denied, please check' + ' your credentials and/or balance') + elif 400 == err.code or 413 == err.code: + raise ValueError("CAPTCHA was rejected by the service, check" + " if it's a valid image") + elif 503 == err.code: + raise OverflowError("CAPTCHA was rejected due to service" + " overload, try again later") + else: + raise err + else: + self._log('RECV', '%d %s' % (len(response), response)) + try: + return json_decode(response) + except Exception: + raise RuntimeError('Invalid API response') + return {} + + def get_user(self): + return self._call('user', self.userpwd.copy()) or {'user': 0} + + def get_captcha(self, cid): + return self._call('captcha/%d' % cid) or {'captcha': 0} + + def report(self, cid): + return not self._call('captcha/%d/report' % cid, + self.userpwd.copy()).get('is_correct') + + def upload(self, captcha=None, **kwargs): + boundary = binascii.hexlify(os.urandom(16)) + banner = kwargs.get('banner', '') + if banner: + kwargs['banner'] = 'base64:' + base64.b64encode(_load_image(banner)) + body = '\r\n'.join(('\r\n'.join(( + '--%s' % boundary, + 'Content-Disposition: form-data; name="%s"' % k, + 'Content-Type: text/plain', + 'Content-Length: %d' % len(str(v)), + '', + str(v) + ))) for k, v in self.userpwd.items()) + + body += '\r\n'.join(('\r\n'.join(( + '--%s' % boundary, + 'Content-Disposition: form-data; name="%s"' % k, + 'Content-Type: text/plain', + 'Content-Length: %d' % len(str(v)), + '', + str(v) + ))) for k, v in kwargs.items()) + + if captcha: + img = _load_image(captcha) + body += '\r\n'.join(( + '', + '--%s' % boundary, + 'Content-Disposition: form-data; name="captchafile"; ' + 'filename="captcha"', + 'Content-Type: application/octet-stream', + 'Content-Length: %d' % len(img), + '', + img, + '--%s--' % boundary, + '' + )) + + response = self._call('captcha', body, { + 'Content-Type': 'multipart/form-data; boundary="%s"' % boundary + }) or {} + if response.get('captcha'): + return response + + +class SocketClient(Client): + + """Death by Captcha socket API client.""" + + TERMINATOR = '\r\n' + + def __init__(self, *args): + Client.__init__(self, *args) + self.socket_lock = threading.Lock() + self.socket = None + + def close(self): + if self.socket: + self._log('CLOSE') + try: + self.socket.shutdown(socket.SHUT_RDWR) + except socket.error: + pass + finally: + self.socket.close() + self.socket = None + + def connect(self): + if not self.socket: + self._log('CONN') + host = (socket.gethostbyname(SOCKET_HOST), + random.choice(SOCKET_PORTS)) + self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.socket.settimeout(0) + try: + self.socket.connect(host) + except socket.error, err: + if (err.args[0] not in + (errno.EAGAIN, errno.EWOULDBLOCK, errno.EINPROGRESS)): + self.close() + raise err + return self.socket + + def __del__(self): + self.close() + + def _sendrecv(self, sock, buf): + self._log('SEND', buf) + fds = [sock] + buf += self.TERMINATOR + response = '' + intvl_idx = 0 + while True: + intvl, intvl_idx = self._get_poll_interval(intvl_idx) + rds, wrs, exs = select.select((not buf and fds) or [], + (buf and fds) or [], + fds, + intvl) + if exs: + raise IOError('select() failed') + try: + if wrs: + while buf: + buf = buf[wrs[0].send(buf):] + elif rds: + while True: + s = rds[0].recv(256) + if not s: + raise IOError('recv(): connection lost') + else: + response += s + except socket.error, err: + if (err.args[0] not in + (errno.EAGAIN, errno.EWOULDBLOCK, errno.EINPROGRESS)): + raise err + if response.endswith(self.TERMINATOR): + self._log('RECV', response) + return response.rstrip(self.TERMINATOR) + raise IOError('send/recv timed out') + + def _call(self, cmd, data=None): + if data is None: + data = {} + data['cmd'] = cmd + data['version'] = API_VERSION + request = json_encode(data) + + response = None + for _ in range(2): + if not self.socket and cmd != 'login': + self._call('login', self.userpwd.copy()) + self.socket_lock.acquire() + try: + sock = self.connect() + response = self._sendrecv(sock, request) + except IOError, err: + sys.stderr.write(str(err) + "\n") + self.close() + except socket.error, err: + sys.stderr.write(str(err) + "\n") + self.close() + raise IOError('Connection refused') + else: + break + finally: + self.socket_lock.release() + + if response is None: + raise IOError('Connection lost or timed out during API request') + + try: + response = json_decode(response) + except Exception: + raise RuntimeError('Invalid API response') + + if not response.get('error'): + return response + + error = response['error'] + if error in ('not-logged-in', 'invalid-credentials'): + raise AccessDeniedException('Access denied, check your credentials') + elif 'banned' == error: + raise AccessDeniedException('Access denied, account is suspended') + elif 'insufficient-funds' == error: + raise AccessDeniedException( + 'CAPTCHA was rejected due to low balance') + elif 'invalid-captcha' == error: + raise ValueError('CAPTCHA is not a valid image') + elif 'service-overload' == error: + raise OverflowError( + 'CAPTCHA was rejected due to service overload, try again later') + else: + self.socket_lock.acquire() + self.close() + self.socket_lock.release() + raise RuntimeError('API server error occured: %s' % error) + + def get_user(self): + return self._call('user') or {'user': 0} + + def get_captcha(self, cid): + return self._call('captcha', {'captcha': cid}) or {'captcha': 0} + + def upload(self, captcha=None, **kwargs): + data = {} + if captcha: + data['captcha'] = base64.b64encode(_load_image(captcha)) + if kwargs: + banner = kwargs.get('banner', '') + if banner: + kwargs['banner'] = base64.b64encode(_load_image(banner)) + data.update(kwargs) + response = self._call('upload', data) + if response.get('captcha'): + uploaded_captcha = dict( + (k, response.get(k)) + for k in ('captcha', 'text', 'is_correct') + ) + if not uploaded_captcha['text']: + uploaded_captcha['text'] = None + return uploaded_captcha + + def report(self, cid): + return not self._call('report', {'captcha': cid}).get('is_correct') + + +if '__main__' == __name__: + # Put your DBC username & password here: + # client = HttpClient(sys.argv[1], sys.argv[2]) + client = SocketClient(sys.argv[1], sys.argv[2]) + client.is_verbose = True + + print 'Your balance is %s US cents' % client.get_balance() + + for fn in sys.argv[3:]: + try: + # Put your CAPTCHA image file name or file-like object, and optional + # solving timeout (in seconds) here: + captcha = client.decode(fn, DEFAULT_TIMEOUT) + except Exception, e: + sys.stderr.write('Failed uploading CAPTCHA: %s\n' % (e, )) + captcha = None + + if captcha: + print 'CAPTCHA %d solved: %s' % \ + (captcha['captcha'], captcha['text']) + + # Report as incorrectly solved if needed. Make sure the CAPTCHA was + # in fact incorrectly solved! + # try: + # client.report(captcha['captcha']) + # except Exception, e: + # sys.stderr.write('Failed reporting CAPTCHA: %s\n' % (e, )) diff --git a/libs/python_anticaptcha/__init__.py b/libs/python_anticaptcha/__init__.py new file mode 100644 index 000000000..ac9f0550f --- /dev/null +++ b/libs/python_anticaptcha/__init__.py @@ -0,0 +1,7 @@ +from .base import AnticaptchaClient +from .tasks import NoCaptchaTask, NoCaptchaTaskProxylessTask, ImageToTextTask, FunCaptchaTask +from .proxy import Proxy +from .exceptions import AnticaptchaException +from .fields import SimpleText, Image, WebLink, TextInput, Textarea, Checkbox, Select, Radio, ImageUpload + +AnticatpchaException = AnticaptchaException \ No newline at end of file diff --git a/libs/python_anticaptcha/base.py b/libs/python_anticaptcha/base.py new file mode 100644 index 000000000..fca8cdf53 --- /dev/null +++ b/libs/python_anticaptcha/base.py @@ -0,0 +1,114 @@ +import requests +import time + +from six.moves.urllib_parse import urljoin +from .exceptions import AnticaptchaException + +SLEEP_EVERY_CHECK_FINISHED = 3 +MAXIMUM_JOIN_TIME = 60 * 5 + + +class Job(object): + client = None + task_id = None + _last_result = None + + def __init__(self, client, task_id): + self.client = client + self.task_id = task_id + + def _update(self): + self._last_result = self.client.getTaskResult(self.task_id) + + def check_is_ready(self): + self._update() + return self._last_result['status'] == 'ready' + + def get_solution_response(self): # Recaptcha + return self._last_result['solution']['gRecaptchaResponse'] + + def get_token_response(self): # Funcaptcha + return self._last_result['solution']['token'] + + def get_answers(self): + return self._last_result['solution']['answers'] + + def get_captcha_text(self): # Image + return self._last_result['solution']['text'] + + def report_incorrect(self): + return self.client.reportIncorrectImage(self.task_id) + + def join(self, maximum_time=None): + elapsed_time = 0 + maximum_time = maximum_time or MAXIMUM_JOIN_TIME + while not self.check_is_ready(): + time.sleep(SLEEP_EVERY_CHECK_FINISHED) + elapsed_time += SLEEP_EVERY_CHECK_FINISHED + if elapsed_time is not None and elapsed_time > maximum_time: + raise AnticaptchaException(None, 250, + "The execution time exceeded a maximum time of {} seconds. It takes {} seconds.".format( + maximum_time, elapsed_time)) + + +class AnticaptchaClient(object): + client_key = None + CREATE_TASK_URL = "/createTask" + TASK_RESULT_URL = "/getTaskResult" + BALANCE_URL = "/getBalance" + REPORT_IMAGE_URL = "/reportIncorrectImageCaptcha" + SOFT_ID = 847 + language_pool = "en" + + def __init__(self, client_key, language_pool="en", host="api.anti-captcha.com", use_ssl=True): + self.client_key = client_key + self.language_pool = language_pool + self.base_url = "{proto}://{host}/".format(proto="https" if use_ssl else "http", + host=host) + self.session = requests.Session() + + @property + def client_ip(self): + if not hasattr(self, '_client_ip'): + self._client_ip = self.session.get('http://httpbin.org/ip').json()['origin'] + return self._client_ip + + def _check_response(self, response): + if response.get('errorId', False) == 11: + response['errorDescription'] = "{} Your missing IP address is {}.".format(response['errorDescription'], + self.client_ip) + if response.get('errorId', False): + raise AnticaptchaException(response['errorId'], + response['errorCode'], + response['errorDescription']) + + def createTask(self, task): + request = {"clientKey": self.client_key, + "task": task.serialize(), + "softId": self.SOFT_ID, + "languagePool": self.language_pool, + } + response = self.session.post(urljoin(self.base_url, self.CREATE_TASK_URL), json=request).json() + self._check_response(response) + return Job(self, response['taskId']) + + def getTaskResult(self, task_id): + request = {"clientKey": self.client_key, + "taskId": task_id} + response = self.session.post(urljoin(self.base_url, self.TASK_RESULT_URL), json=request).json() + self._check_response(response) + return response + + def getBalance(self): + request = {"clientKey": self.client_key} + response = self.session.post(urljoin(self.base_url, self.BALANCE_URL), json=request).json() + self._check_response(response) + return response['balance'] + + def reportIncorrectImage(self, task_id): + request = {"clientKey": self.client_key, + "taskId": task_id + } + response = self.session.post(urljoin(self.base_url, self.REPORT_IMAGE_URL), json=request).json() + self._check_response(response) + return response.get('status', False) != False diff --git a/libs/python_anticaptcha/exceptions.py b/libs/python_anticaptcha/exceptions.py new file mode 100644 index 000000000..f37eb372c --- /dev/null +++ b/libs/python_anticaptcha/exceptions.py @@ -0,0 +1,23 @@ +class AnticaptchaException(Exception): + def __init__(self, error_id, error_code, error_description, *args): + super(AnticaptchaException, self).__init__("[{}:{}]{}".format(error_code, error_id, error_description)) + self.error_description = error_description + self.error_id = error_id + self.error_code = error_code + + +AnticatpchaException = AnticaptchaException + + +class InvalidWidthException(AnticaptchaException): + def __init__(self, width): + self.width = width + msg = 'Invalid width (%s). Can be one of these: 100, 50, 33, 25.' % (self.width,) + super(InvalidWidthException, self).__init__("AC-1", 1, msg) + + +class MissingNameException(AnticaptchaException): + def __init__(self, cls): + self.cls = cls + msg = 'Missing name data in {0}. Provide {0}.__init__(name="X") or {0}.serialize(name="X")'.format(str(self.cls)) + super(MissingNameException, self).__init__("AC-2", 2, msg) diff --git a/libs/python_anticaptcha/fields.py b/libs/python_anticaptcha/fields.py new file mode 100644 index 000000000..9e6245946 --- /dev/null +++ b/libs/python_anticaptcha/fields.py @@ -0,0 +1,199 @@ +import six +from python_anticaptcha.exceptions import InvalidWidthException, MissingNameException + + +class BaseField(object): + label = None + labelHint = None + + def serialize(self, name=None): + data = {} + if self.label: + data['label'] = self.label or False + if self.labelHint: + data['labelHint'] = self.labelHint or False + return data + + +class NameBaseField(BaseField): + name = None + + def serialize(self, name=None): + data = super(NameBaseField, self).serialize(name) + if name: + data['name'] = name + elif self.name: + data['name'] = self.name + else: + raise MissingNameException(cls=self.__class__) + return data + + +class SimpleText(BaseField): + contentType = 'text' + + def __init__(self, content, label=None, labelHint=None, width=None): + self.label = label + self.labelHint = labelHint + + self.content = content + self.width = width + + def serialize(self, name=None): + data = super(SimpleText, self).serialize(name) + data['contentType'] = self.contentType + data['content'] = self.content + + if self.width: + if self.width not in [100, 50, 33, 25]: + raise InvalidWidthException(self.width) + data['inputOptions'] = {} + data['width'] = self.width + return data + + +class Image(BaseField): + contentType = 'image' + + def __init__(self, imageUrl, label=None, labelHint=None): + self.label = label + self.labelHint = labelHint + self.imageUrl = imageUrl + + def serialize(self, name=None): + data = super(Image, self).serialize(name) + data['contentType'] = self.contentType + data['content'] = self.imageUrl + return data + + +class WebLink(BaseField): + contentType = 'link' + + def __init__(self, linkText, linkUrl, label=None, labelHint=None, width=None): + self.label = label + self.labelHint = labelHint + + self.linkText = linkText + self.linkUrl = linkUrl + + self.width = width + + def serialize(self, name=None): + data = super(WebLink, self).serialize(name) + data['contentType'] = self.contentType + + if self.width: + if self.width not in [100, 50, 33, 25]: + raise InvalidWidthException(self.width) + data['inputOptions'] = {} + data['width'] = self.width + + data.update({'content': {'url': self.linkUrl, + 'text': self.linkText}}) + + return data + + +class TextInput(NameBaseField): + def __init__(self, placeHolder=None, label=None, labelHint=None, width=None): + self.label = label + self.labelHint = labelHint + + self.placeHolder = placeHolder + + self.width = width + + def serialize(self, name=None): + data = super(TextInput, self).serialize(name) + data['inputType'] = 'text' + + data['inputOptions'] = {} + + if self.width: + if self.width not in [100, 50, 33, 25]: + raise InvalidWidthException(self.width) + + data['inputOptions']['width'] = str(self.width) + + if self.placeHolder: + data['inputOptions']['placeHolder'] = self.placeHolder + return data + + +class Textarea(NameBaseField): + def __init__(self, placeHolder=None, rows=None, label=None, width=None, labelHint=None): + self.label = label + self.labelHint = labelHint + + self.placeHolder = placeHolder + self.rows = rows + self.width = width + + def serialize(self, name=None): + data = super(Textarea, self).serialize(name) + data['inputType'] = 'textarea' + data['inputOptions'] = {} + if self.rows: + data['inputOptions']['rows'] = str(self.rows) + if self.placeHolder: + data['inputOptions']['placeHolder'] = self.placeHolder + if self.width: + data['inputOptions']['width'] = str(self.width) + return data + + +class Checkbox(NameBaseField): + def __init__(self, text, label=None, labelHint=None): + self.label = label + self.labelHint = labelHint + + self.text = text + + def serialize(self, name=None): + data = super(Checkbox, self).serialize(name) + data['inputType'] = 'checkbox' + data['inputOptions'] = {'label': self.text} + return data + + +class Select(NameBaseField): + type = 'select' + + def __init__(self, label=None, choices=None, labelHint=None): + self.label = label + self.labelHint = labelHint + self.choices = choices or () + + def get_choices(self): + for choice in self.choices: + if isinstance(choice, six.text_type): + yield choice, choice + else: + yield choice + + def serialize(self, name=None): + data = super(Select, self).serialize(name) + data['inputType'] = self.type + + data['inputOptions'] = [] + for value, caption in self.get_choices(): + data['inputOptions'].append({"value": value, + "caption": caption}) + + return data + + +class Radio(Select): + type = 'radio' + + +class ImageUpload(NameBaseField): + def __init__(self, label=None, labelHint=None): + self.label = label + self.labelHint = labelHint + + def serialize(self, name=None): + data = super(ImageUpload, self).serialize(name) + data['inputType'] = 'imageUpload' + return data diff --git a/libs/python_anticaptcha/proxy.py b/libs/python_anticaptcha/proxy.py new file mode 100644 index 000000000..907232f7e --- /dev/null +++ b/libs/python_anticaptcha/proxy.py @@ -0,0 +1,28 @@ +from six.moves.urllib_parse import urlparse + + +class Proxy(object): + def __init__(self, proxy_type, proxy_address, proxy_port, proxy_login, proxy_password): + self.proxyType = proxy_type + self.proxyAddress = proxy_address + self.proxyPort = proxy_port + self.proxyLogin = proxy_login + self.proxyPassword = proxy_password + + def serialize(self): + result = {'proxyType': self.proxyType, + 'proxyAddress': self.proxyAddress, + 'proxyPort': self.proxyPort} + if self.proxyLogin or self.proxyPassword: + result['proxyLogin'] = self.proxyLogin + result['proxyPassword'] = self.proxyPassword + return result + + @classmethod + def parse_url(cls, url): + parsed = urlparse(url) + return cls(proxy_type=parsed.scheme, + proxy_address=parsed.hostname, + proxy_port=parsed.port, + proxy_login=parsed.username, + proxy_password=parsed.password) diff --git a/libs/python_anticaptcha/tasks.py b/libs/python_anticaptcha/tasks.py new file mode 100644 index 000000000..57462763f --- /dev/null +++ b/libs/python_anticaptcha/tasks.py @@ -0,0 +1,128 @@ +import base64 +from .fields import BaseField + + +class BaseTask(object): + def serialize(self, **result): + return result + + +class ProxyMixin(BaseTask): + def __init__(self, *args, **kwargs): + self.proxy = kwargs.pop('proxy') + self.userAgent = kwargs.pop('user_agent') + self.cookies = kwargs.pop('cookies', '') + super(ProxyMixin, self).__init__(*args, **kwargs) + + def serialize(self, **result): + result = super(ProxyMixin, self).serialize(**result) + result.update(self.proxy.serialize()) + result['userAgent'] = self.userAgent + if self.cookies: + result['cookies'] = self.cookies + return result + + +class NoCaptchaTaskProxylessTask(BaseTask): + type = "NoCaptchaTaskProxyless" + websiteURL = None + websiteKey = None + websiteSToken = None + + def __init__(self, website_url, website_key, website_s_token=None, is_invisible=None): + self.websiteURL = website_url + self.websiteKey = website_key + self.websiteSToken = website_s_token + self.isInvisible = is_invisible + + def serialize(self): + data = {'type': self.type, + 'websiteURL': self.websiteURL, + 'websiteKey': self.websiteKey} + if self.websiteSToken is not None: + data['websiteSToken'] = self.websiteSToken + if self.isInvisible is not None: + data['isInvisible'] = self.isInvisible + return data + + +class FunCaptchaTask(ProxyMixin): + type = "FunCaptchaTask" + websiteURL = None + websiteKey = None + + def __init__(self, website_url, website_key, *args, **kwargs): + self.websiteURL = website_url + self.websiteKey = website_key + super(FunCaptchaTask, self).__init__(*args, **kwargs) + + def serialize(self, **result): + result = super(FunCaptchaTask, self).serialize(**result) + result.update({'type': self.type, + 'websiteURL': self.websiteURL, + 'websitePublicKey': self.websiteKey}) + return result + + +class NoCaptchaTask(ProxyMixin, NoCaptchaTaskProxylessTask): + type = "NoCaptchaTask" + + +class ImageToTextTask(object): + type = "ImageToTextTask" + fp = None + phrase = None + case = None + numeric = None + math = None + minLength = None + maxLength = None + + def __init__(self, fp, phrase=None, case=None, numeric=None, math=None, min_length=None, max_length=None): + self.fp = fp + self.phrase = phrase + self.case = case + self.numeric = numeric + self.math = math + self.minLength = min_length + self.maxLength = max_length + + def serialize(self): + return {'type': self.type, + 'body': base64.b64encode(self.fp.read()).decode('utf-8'), + 'phrase': self.phrase, + 'case': self.case, + 'numeric': self.numeric, + 'math': self.math, + 'minLength': self.minLength, + 'maxLength': self.maxLength} + + +class CustomCaptchaTask(BaseTask): + type = 'CustomCaptchaTask' + imageUrl = None + assignment = None + form = None + + def __init__(self, imageUrl, form=None, assignment=None): + self.imageUrl = imageUrl + self.form = form or {} + self.assignment = assignment + + def serialize(self): + data = super(CustomCaptchaTask, self).serialize() + data.update({'type': self.type, + 'imageUrl': self.imageUrl}) + if self.form: + forms = [] + for name, field in self.form.items(): + if isinstance(field, BaseField): + forms.append(field.serialize(name)) + else: + field = field.copy() + field['name'] = name + forms.append(field) + data['forms'] = forms + if self.assignment: + data['assignment'] = self.assignment + return data diff --git a/libs/subliminal_patch/pitcher.py b/libs/subliminal_patch/pitcher.py new file mode 100644 index 000000000..12be90384 --- /dev/null +++ b/libs/subliminal_patch/pitcher.py @@ -0,0 +1,212 @@ +# coding=utf-8 + +import time +import logging +import json +import requests +from python_anticaptcha import AnticaptchaClient, NoCaptchaTaskProxylessTask, NoCaptchaTask, AnticaptchaException,\ + Proxy +from deathbycaptcha import SocketClient as DBCClient, DEFAULT_TOKEN_TIMEOUT + + +logger = logging.getLogger(__name__) + + +class PitcherRegistry(object): + pitchers = {} + + def register(self, cls): + self.pitchers[cls.name] = cls + return cls + + def get_pitcher(self, name): + return self.pitchers[name] + + +registry = pitchers = PitcherRegistry() + + +class Pitcher(object): + name = None + tries = 3 + job = None + client = None + website_url = None + website_key = None + website_name = None + solve_time = None + success = False + + def __init__(self, website_name, website_url, website_key, tries=3, *args, **kwargs): + self.tries = tries + self.website_name = website_name + self.website_key = website_key + self.website_url = website_url + self.success = False + self.solve_time = None + + def get_client(self): + raise NotImplementedError + + def get_job(self): + raise NotImplementedError + + def _throw(self): + self.client = self.get_client() + self.job = self.get_job() + + def throw(self): + t = time.time() + data = self._throw() + if self.success: + self.solve_time = time.time() - t + logger.info("%s: Solving took %ss", self.website_name, int(self.solve_time)) + return data + + +@registry.register +class AntiCaptchaProxyLessPitcher(Pitcher): + name = "AntiCaptchaProxyLess" + host = "api.anti-captcha.com" + language_pool = "en" + client_key = None + use_ssl = True + is_invisible = False + + def __init__(self, website_name, client_key, website_url, website_key, tries=3, host=None, language_pool=None, + use_ssl=True, is_invisible=False, *args, **kwargs): + super(AntiCaptchaProxyLessPitcher, self).__init__(website_name, website_url, website_key, tries=tries, *args, + **kwargs) + self.client_key = client_key + self.host = host or self.host + self.language_pool = language_pool or self.language_pool + self.use_ssl = use_ssl + self.is_invisible = is_invisible + + def get_client(self): + return AnticaptchaClient(self.client_key, self.language_pool, self.host, self.use_ssl) + + def get_job(self): + task = NoCaptchaTaskProxylessTask(website_url=self.website_url, website_key=self.website_key, + is_invisible=self.is_invisible) + return self.client.createTask(task) + + def _throw(self): + for i in range(self.tries): + try: + super(AntiCaptchaProxyLessPitcher, self)._throw() + self.job.join() + ret = self.job.get_solution_response() + if ret: + self.success = True + return ret + except AnticaptchaException as e: + if i >= self.tries - 1: + logger.error("%s: Captcha solving finally failed. Exiting", self.website_name) + return + + if e.error_code == 'ERROR_ZERO_BALANCE': + logger.error("%s: No balance left on captcha solving service. Exiting", self.website_name) + return + + elif e.error_code == 'ERROR_NO_SLOT_AVAILABLE': + logger.info("%s: No captcha solving slot available, retrying", self.website_name) + time.sleep(5.0) + continue + + elif e.error_code == 'ERROR_KEY_DOES_NOT_EXIST': + logger.error("%s: Bad AntiCaptcha API key", self.website_name) + return + + elif e.error_id is None and e.error_code == 250: + # timeout + if i < self.tries: + logger.info("%s: Captcha solving timed out, retrying", self.website_name) + time.sleep(1.0) + continue + else: + logger.error("%s: Captcha solving timed out three times; bailing out", self.website_name) + return + raise + + +@registry.register +class AntiCaptchaPitcher(AntiCaptchaProxyLessPitcher): + name = "AntiCaptcha" + proxy = None + user_agent = None + cookies = None + + def __init__(self, *args, **kwargs): + self.proxy = Proxy.parse_url(kwargs.pop("proxy")) + print self.proxy.__dict__ + self.user_agent = kwargs.pop("user_agent") + cookies = kwargs.pop("cookies", {}) + if isinstance(cookies, dict): + self.cookies = ";".join(["%s=%s" % (k, v) for k, v in cookies.iteritems()]) + + super(AntiCaptchaPitcher, self).__init__(*args, **kwargs) + + def get_job(self): + task = NoCaptchaTask(website_url=self.website_url, website_key=self.website_key, proxy=self.proxy, + user_agent=self.user_agent, cookies=self.cookies, is_invisible=self.is_invisible) + return self.client.createTask(task) + + +@registry.register +class DBCProxyLessPitcher(Pitcher): + name = "DeathByCaptchaProxyLess" + username = None + password = None + + def __init__(self, website_name, client_key, website_url, website_key, + timeout=DEFAULT_TOKEN_TIMEOUT, tries=3, *args, **kwargs): + super(DBCProxyLessPitcher, self).__init__(website_name, website_url, website_key, tries=tries) + + self.username, self.password = client_key.split(":", 1) + self.timeout = timeout + + def get_client(self): + return DBCClient(self.username, self.password) + + def get_job(self): + pass + + @property + def payload_dict(self): + return { + "googlekey": self.website_key, + "pageurl": self.website_url + } + + def _throw(self): + super(DBCProxyLessPitcher, self)._throw() + payload = json.dumps(self.payload_dict) + try: + #balance = self.client.get_balance() + data = self.client.decode(timeout=self.timeout, type=4, token_params=payload) + if data and data["is_correct"]: + self.success = True + return data["text"] + except: + raise + + +@registry.register +class DBCPitcher(DBCProxyLessPitcher): + proxy = None + proxy_type = "HTTP" + + def __init__(self, *args, **kwargs): + self.proxy = kwargs.pop("proxy") + super(DBCPitcher, self).__init__(*args, **kwargs) + + @property + def payload_dict(self): + payload = super(DBCPitcher, self).payload_dict + payload.update({ + "proxytype": self.proxy_type, + "proxy": self.proxy + }) + return payload +