parent
1428edfb8b
commit
213a04405d
@ -1,19 +1,17 @@
|
||||
cloudscraper-1.2.71.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||
cloudscraper-1.2.71.dist-info/LICENSE,sha256=luC9NJPEX0JAQUKWkzWlAOaaE69fNKnW1uIuDKmWERc,1091
|
||||
cloudscraper-1.2.71.dist-info/METADATA,sha256=ywzk5ZCEv-I8Y9gajnVCsiAR3DrdmeiRLam3EGTJ0UA,19942
|
||||
cloudscraper-1.2.71.dist-info/RECORD,,
|
||||
cloudscraper-1.2.71.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||
cloudscraper-1.2.71.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
|
||||
cloudscraper-1.2.71.dist-info/top_level.txt,sha256=OFEsobVl62sa2NzpgNtfHZkIw_qZr_wljhjmlP9oGiM,13
|
||||
cloudscraper/__init__.py,sha256=Eg8AqKak2yYcraKqt7O3LJLNmppC2uL7dvAANiyxh5w,15960
|
||||
cloudscraper/captcha/2captcha.py,sha256=yyDWvL6HVK4pM69aRpOV9mwzbtPC0yGz_mWkQ7-mkmI,10643
|
||||
cloudscraper/captcha/9kw.py,sha256=5EAUyO_vBEuLKsr4sXYa25MSVOm3BXVAdcenF6ZPsgI,7701
|
||||
cloudscraper-1.2.58.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||
cloudscraper-1.2.58.dist-info/LICENSE,sha256=luC9NJPEX0JAQUKWkzWlAOaaE69fNKnW1uIuDKmWERc,1091
|
||||
cloudscraper-1.2.58.dist-info/METADATA,sha256=q25vkvMHkAxmuZRwak56i4CLAFUuG5EwEzz1oEXOY3U,19537
|
||||
cloudscraper-1.2.58.dist-info/RECORD,,
|
||||
cloudscraper-1.2.58.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||
cloudscraper-1.2.58.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
|
||||
cloudscraper-1.2.58.dist-info/top_level.txt,sha256=OFEsobVl62sa2NzpgNtfHZkIw_qZr_wljhjmlP9oGiM,13
|
||||
cloudscraper/__init__.py,sha256=gsOMaKAKNfJUR4FkiEefAA2fAHVFuSwkblGgqxClsrw,32790
|
||||
cloudscraper/captcha/2captcha.py,sha256=CWF62VmLqb_KvSH-dqzo1XEwCBOQh1Aee-G18cX_7aw,10371
|
||||
cloudscraper/captcha/9kw.py,sha256=1dfhRHKeCx8yIE1opWyQ1Q7aHJlXDdkv1bV2Bfzbrf8,7387
|
||||
cloudscraper/captcha/__init__.py,sha256=VORxm32xqLrEE-zxFWgEhSbtqfigjCfwodChg1VlQ6c,1511
|
||||
cloudscraper/captcha/anticaptcha.py,sha256=YUsLviq3ZtbjTUnAPq6zVEieHmeSgnmiXKcqXZeO5qA,6152
|
||||
cloudscraper/captcha/capmonster.py,sha256=_9AUr6vHG4c5XLc5XqvnnMqgcvuKnzz1ckJpSySjgKQ,6143
|
||||
cloudscraper/captcha/capsolver.py,sha256=x38fO0m_k2W8nO3IppXADZsfCYl0iyvRgajZ5s5iTSU,6060
|
||||
cloudscraper/captcha/deathbycaptcha.py,sha256=asUX_quUsjAyWVRc7_8o_ryHZFotN-NP60mQiuN-c1U,8673
|
||||
cloudscraper/cloudflare.py,sha256=i1jyJcY-aRy3IQ-7YUly8qGUovO4Nx99M_FKfz4eivQ,19993
|
||||
cloudscraper/captcha/anticaptcha.py,sha256=cK8LON8M-8MN1wx_rSMTTqxrpwbL65Z2svH-LtGiA40,3478
|
||||
cloudscraper/captcha/capmonster.py,sha256=oVXdv2Wrgh2nWFrYttUzbqW9xZU1j6A4cDDcZINIoVg,5695
|
||||
cloudscraper/captcha/deathbycaptcha.py,sha256=UJqkh35gcKVdIhwNqF7N_0ixpIPT2PHiMbT378wEM4w,8073
|
||||
cloudscraper/exceptions.py,sha256=WSMgI8PRvU3g4KDFrjU-42p83lSAVOw8tN2NSqqIUfw,2397
|
||||
cloudscraper/help.py,sha256=fNYNGFQjiCL1d-gCpDoulBk4iHOuzNhLBudi7NrOHSg,2100
|
||||
cloudscraper/interpreters/__init__.py,sha256=mWY8LuzDRYWGGnKz5vYSdrOnoVaeWlixmAtZN8Pq6bY,1734
|
@ -1,188 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import requests
|
||||
|
||||
try:
|
||||
from urlparse import urlparse
|
||||
except ImportError:
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from ..exceptions import (
|
||||
CaptchaServiceUnavailable,
|
||||
CaptchaAPIError,
|
||||
CaptchaTimeout,
|
||||
CaptchaParameter,
|
||||
CaptchaBadJobID
|
||||
)
|
||||
|
||||
try:
|
||||
import polling2
|
||||
except ImportError:
|
||||
raise ImportError("Please install the python module 'polling2' via pip")
|
||||
|
||||
from . import Captcha
|
||||
|
||||
|
||||
class captchaSolver(Captcha):
|
||||
def __init__(self):
|
||||
super(captchaSolver, self).__init__('capsolver')
|
||||
self.host = 'https://api.capsolver.com'
|
||||
self.session = requests.Session()
|
||||
self.captchaType = {
|
||||
'reCaptcha': 'ReCaptchaV2Task',
|
||||
'hCaptcha': 'HCaptchaTask',
|
||||
'turnstile': 'AntiCloudflareTask'
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@staticmethod
|
||||
def checkErrorStatus(response, fnct):
|
||||
if response.status_code in [500, 502]:
|
||||
raise CaptchaServiceUnavailable(f'CapSolver: Server Side Error {response.status_code}')
|
||||
|
||||
try:
|
||||
rPayload = response.json()
|
||||
except Exception:
|
||||
return
|
||||
|
||||
if rPayload.get('errorDescription', False) and 'Current system busy' not in rPayload['errorDescription']:
|
||||
raise CaptchaAPIError(
|
||||
f"CapSolver -> {fnct} -> {rPayload.get('errorDescription')}"
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def requestJob(self, jobID):
|
||||
if not jobID:
|
||||
raise CaptchaBadJobID("CapSolver: Error bad job id to request task result.")
|
||||
|
||||
def _checkRequest(response):
|
||||
self.checkErrorStatus(response, 'requestJob')
|
||||
try:
|
||||
if response.ok and response.json()['status'] == 'ready':
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
response = polling2.poll(
|
||||
lambda: self.session.post(
|
||||
f'{self.host}/getTaskResult',
|
||||
json={
|
||||
'clientKey': self.api_key,
|
||||
'taskId': jobID
|
||||
},
|
||||
timeout=30
|
||||
),
|
||||
check_success=_checkRequest,
|
||||
step=5,
|
||||
timeout=180
|
||||
)
|
||||
|
||||
if response:
|
||||
try:
|
||||
rPayload = response.json()['solution']
|
||||
if 'token' in rPayload:
|
||||
return rPayload['token']
|
||||
else:
|
||||
return rPayload['gRecaptchaResponse']
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
raise CaptchaTimeout(
|
||||
"CapSolver: Error failed to solve Captcha."
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def requestSolve(self, captchaType, url, siteKey):
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def _checkRequest(response):
|
||||
self.checkErrorStatus(response, 'createTask')
|
||||
try:
|
||||
rPayload = response.json()
|
||||
if response.ok:
|
||||
if rPayload.get("taskId", False):
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
payload = {
|
||||
'clientKey': self.api_key,
|
||||
'appId': '9E717405-8C70-49B3-B277-7C2F2196484B',
|
||||
'task': {
|
||||
'type': self.captchaType[captchaType],
|
||||
'websiteURL': url,
|
||||
'websiteKey': siteKey
|
||||
}
|
||||
}
|
||||
|
||||
if captchaType == 'turnstile':
|
||||
payload['task']['metadata'] = {'type': 'turnstile'}
|
||||
|
||||
if self.proxy:
|
||||
payload['task']['proxy'] = self.proxy
|
||||
else:
|
||||
payload['task']['type'] = f"{self.captchaType[captchaType]}Proxyless"
|
||||
|
||||
response = polling2.poll(
|
||||
lambda: self.session.post(
|
||||
f'{self.host}/createTask',
|
||||
json=payload,
|
||||
allow_redirects=False,
|
||||
timeout=30
|
||||
),
|
||||
check_success=_checkRequest,
|
||||
step=5,
|
||||
timeout=180
|
||||
)
|
||||
|
||||
if response:
|
||||
rPayload = response.json()
|
||||
if rPayload.get('taskId'):
|
||||
return rPayload['taskId']
|
||||
|
||||
raise CaptchaBadJobID(
|
||||
'CapSolver: Error no job id was returned.'
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams):
|
||||
if not captchaParams.get('api_key'):
|
||||
raise CaptchaParameter("CapSolver: Missing api_key parameter.")
|
||||
self.api_key = captchaParams.get('api_key')
|
||||
|
||||
if captchaParams.get('proxy') and not captchaParams.get('no_proxy'):
|
||||
hostParsed = urlparse(captchaParams.get('proxy', {}).get('https'))
|
||||
|
||||
if not hostParsed.scheme:
|
||||
raise CaptchaParameter('Cannot parse proxy correctly, bad scheme')
|
||||
|
||||
if not hostParsed.netloc:
|
||||
raise CaptchaParameter('Cannot parse proxy correctly, bad netloc')
|
||||
|
||||
self.proxy = captchaParams['proxy']['https']
|
||||
else:
|
||||
self.proxy = None
|
||||
|
||||
try:
|
||||
jobID = self.requestSolve(captchaType, url, siteKey)
|
||||
return self.requestJob(jobID)
|
||||
except polling2.TimeoutException:
|
||||
raise CaptchaTimeout(
|
||||
f"CapSolver: Captcha solve (task ID: {jobID}) took to long."
|
||||
)
|
||||
|
||||
raise CaptchaAPIError('CapSolver: Job Failure.')
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
captchaSolver()
|
@ -1,490 +0,0 @@
|
||||
# Cloudflare V1
|
||||
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
|
||||
from copy import deepcopy
|
||||
from collections import OrderedDict
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
try:
|
||||
from HTMLParser import HTMLParser
|
||||
except ImportError:
|
||||
if sys.version_info >= (3, 4):
|
||||
import html
|
||||
else:
|
||||
from html.parser import HTMLParser
|
||||
|
||||
try:
|
||||
from urlparse import urlparse, urljoin
|
||||
except ImportError:
|
||||
from urllib.parse import urlparse, urljoin
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
from .exceptions import (
|
||||
CloudflareCode1020,
|
||||
CloudflareIUAMError,
|
||||
CloudflareSolveError,
|
||||
CloudflareChallengeError,
|
||||
CloudflareCaptchaError,
|
||||
CloudflareCaptchaProvider
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
from .captcha import Captcha
|
||||
from .interpreters import JavaScriptInterpreter
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class Cloudflare():
|
||||
|
||||
def __init__(self, cloudscraper):
|
||||
self.cloudscraper = cloudscraper
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Unescape / decode html entities
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@staticmethod
|
||||
def unescape(html_text):
|
||||
if sys.version_info >= (3, 0):
|
||||
if sys.version_info >= (3, 4):
|
||||
return html.unescape(html_text)
|
||||
|
||||
return HTMLParser().unescape(html_text)
|
||||
|
||||
return HTMLParser().unescape(html_text)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# check if the response contains a valid Cloudflare challenge
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@staticmethod
|
||||
def is_IUAM_Challenge(resp):
|
||||
try:
|
||||
return (
|
||||
resp.headers.get('Server', '').startswith('cloudflare')
|
||||
and resp.status_code in [429, 503]
|
||||
and re.search(r'/cdn-cgi/images/trace/jsch/', resp.text, re.M | re.S)
|
||||
and re.search(
|
||||
r'''<form .*?="challenge-form" action="/\S+__cf_chl_f_tk=''',
|
||||
resp.text,
|
||||
re.M | re.S
|
||||
)
|
||||
)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# check if the response contains new Cloudflare challenge
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def is_New_IUAM_Challenge(self, resp):
|
||||
try:
|
||||
return (
|
||||
self.is_IUAM_Challenge(resp)
|
||||
and re.search(
|
||||
r'''cpo.src\s*=\s*['"]/cdn-cgi/challenge-platform/\S+orchestrate/jsch/v1''',
|
||||
resp.text,
|
||||
re.M | re.S
|
||||
)
|
||||
)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# check if the response contains a v2 hCaptcha Cloudflare challenge
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def is_New_Captcha_Challenge(self, resp):
|
||||
try:
|
||||
return (
|
||||
self.is_Captcha_Challenge(resp)
|
||||
and re.search(
|
||||
r'''cpo.src\s*=\s*['"]/cdn-cgi/challenge-platform/\S+orchestrate/(captcha|managed)/v1''',
|
||||
resp.text,
|
||||
re.M | re.S
|
||||
)
|
||||
)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# check if the response contains a Cloudflare hCaptcha challenge
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@staticmethod
|
||||
def is_Captcha_Challenge(resp):
|
||||
try:
|
||||
return (
|
||||
resp.headers.get('Server', '').startswith('cloudflare')
|
||||
and resp.status_code == 403
|
||||
and re.search(r'/cdn-cgi/images/trace/(captcha|managed)/', resp.text, re.M | re.S)
|
||||
and re.search(
|
||||
r'''<form .*?="challenge-form" action="/\S+__cf_chl_f_tk=''',
|
||||
resp.text,
|
||||
re.M | re.S
|
||||
)
|
||||
)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# check if the response contains Firewall 1020 Error
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
@staticmethod
|
||||
def is_Firewall_Blocked(resp):
|
||||
try:
|
||||
return (
|
||||
resp.headers.get('Server', '').startswith('cloudflare')
|
||||
and resp.status_code == 403
|
||||
and re.search(
|
||||
r'<span class="cf-error-code">1020</span>',
|
||||
resp.text,
|
||||
re.M | re.DOTALL
|
||||
)
|
||||
)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Wrapper for is_Captcha_Challenge, is_IUAM_Challenge, is_Firewall_Blocked
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def is_Challenge_Request(self, resp):
|
||||
if self.is_Firewall_Blocked(resp):
|
||||
self.cloudscraper.simpleException(
|
||||
CloudflareCode1020,
|
||||
'Cloudflare has blocked this request (Code 1020 Detected).'
|
||||
)
|
||||
|
||||
if self.is_New_Captcha_Challenge(resp):
|
||||
self.cloudscraper.simpleException(
|
||||
CloudflareChallengeError,
|
||||
'Detected a Cloudflare version 2 Captcha challenge, This feature is not available in the opensource (free) version.'
|
||||
)
|
||||
|
||||
if self.is_New_IUAM_Challenge(resp):
|
||||
self.cloudscraper.simpleException(
|
||||
CloudflareChallengeError,
|
||||
'Detected a Cloudflare version 2 challenge, This feature is not available in the opensource (free) version.'
|
||||
)
|
||||
|
||||
if self.is_Captcha_Challenge(resp) or self.is_IUAM_Challenge(resp):
|
||||
if self.cloudscraper.debug:
|
||||
print('Detected a Cloudflare version 1 challenge.')
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Try to solve cloudflare javascript challenge.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def IUAM_Challenge_Response(self, body, url, interpreter):
|
||||
try:
|
||||
formPayload = re.search(
|
||||
r'<form (?P<form>.*?="challenge-form" '
|
||||
r'action="(?P<challengeUUID>.*?'
|
||||
r'__cf_chl_f_tk=\S+)"(.*?)</form>)',
|
||||
body,
|
||||
re.M | re.DOTALL
|
||||
).groupdict()
|
||||
|
||||
if not all(key in formPayload for key in ['form', 'challengeUUID']):
|
||||
self.cloudscraper.simpleException(
|
||||
CloudflareIUAMError,
|
||||
"Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
|
||||
)
|
||||
|
||||
payload = OrderedDict()
|
||||
for challengeParam in re.findall(r'^\s*<input\s(.*?)/>', formPayload['form'], re.M | re.S):
|
||||
inputPayload = dict(re.findall(r'(\S+)="(\S+)"', challengeParam))
|
||||
if inputPayload.get('name') in ['r', 'jschl_vc', 'pass']:
|
||||
payload.update({inputPayload['name']: inputPayload['value']})
|
||||
|
||||
except AttributeError:
|
||||
self.cloudscraper.simpleException(
|
||||
CloudflareIUAMError,
|
||||
"Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
|
||||
)
|
||||
|
||||
hostParsed = urlparse(url)
|
||||
|
||||
try:
|
||||
payload['jschl_answer'] = JavaScriptInterpreter.dynamicImport(
|
||||
interpreter
|
||||
).solveChallenge(body, hostParsed.netloc)
|
||||
except Exception as e:
|
||||
self.cloudscraper.simpleException(
|
||||
CloudflareIUAMError,
|
||||
f"Unable to parse Cloudflare anti-bots page: {getattr(e, 'message', e)}"
|
||||
)
|
||||
|
||||
return {
|
||||
'url': f"{hostParsed.scheme}://{hostParsed.netloc}{self.unescape(formPayload['challengeUUID'])}",
|
||||
'data': payload
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Try to solve the Captcha challenge via 3rd party.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def captcha_Challenge_Response(self, provider, provider_params, body, url):
|
||||
try:
|
||||
formPayload = re.search(
|
||||
r'<form (?P<form>.*?="challenge-form" '
|
||||
r'action="(?P<challengeUUID>.*?__cf_chl_captcha_tk__=\S+)"(.*?)</form>)',
|
||||
body,
|
||||
re.M | re.DOTALL
|
||||
).groupdict()
|
||||
|
||||
if not all(key in formPayload for key in ['form', 'challengeUUID']):
|
||||
self.cloudscraper.simpleException(
|
||||
CloudflareCaptchaError,
|
||||
"Cloudflare Captcha detected, unfortunately we can't extract the parameters correctly."
|
||||
)
|
||||
|
||||
payload = OrderedDict(
|
||||
re.findall(
|
||||
r'(name="r"\svalue|data-ray|data-sitekey|name="cf_captcha_kind"\svalue)="(.*?)"',
|
||||
formPayload['form']
|
||||
)
|
||||
)
|
||||
|
||||
captchaType = 'reCaptcha' if payload['name="cf_captcha_kind" value'] == 're' else 'hCaptcha'
|
||||
|
||||
except (AttributeError, KeyError):
|
||||
self.cloudscraper.simpleException(
|
||||
CloudflareCaptchaError,
|
||||
"Cloudflare Captcha detected, unfortunately we can't extract the parameters correctly."
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Pass proxy parameter to provider to solve captcha.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if self.cloudscraper.proxies and self.cloudscraper.proxies != self.cloudscraper.captcha.get('proxy'):
|
||||
self.cloudscraper.captcha['proxy'] = self.proxies
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Pass User-Agent if provider supports it to solve captcha.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
self.cloudscraper.captcha['User-Agent'] = self.cloudscraper.headers['User-Agent']
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Submit job to provider to request captcha solve.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
captchaResponse = Captcha.dynamicImport(
|
||||
provider.lower()
|
||||
).solveCaptcha(
|
||||
captchaType,
|
||||
url,
|
||||
payload['data-sitekey'],
|
||||
provider_params
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Parse and handle the response of solved captcha.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
dataPayload = OrderedDict([
|
||||
('r', payload.get('name="r" value', '')),
|
||||
('cf_captcha_kind', payload['name="cf_captcha_kind" value']),
|
||||
('id', payload.get('data-ray')),
|
||||
('g-recaptcha-response', captchaResponse)
|
||||
])
|
||||
|
||||
if captchaType == 'hCaptcha':
|
||||
dataPayload.update({'h-captcha-response': captchaResponse})
|
||||
|
||||
hostParsed = urlparse(url)
|
||||
|
||||
return {
|
||||
'url': f"{hostParsed.scheme}://{hostParsed.netloc}{self.unescape(formPayload['challengeUUID'])}",
|
||||
'data': dataPayload
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Attempt to handle and send the challenge response back to cloudflare
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
def Challenge_Response(self, resp, **kwargs):
|
||||
if self.is_Captcha_Challenge(resp):
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# double down on the request as some websites are only checking
|
||||
# if cfuid is populated before issuing Captcha.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if self.cloudscraper.doubleDown:
|
||||
resp = self.cloudscraper.decodeBrotli(
|
||||
self.cloudscraper.perform_request(resp.request.method, resp.url, **kwargs)
|
||||
)
|
||||
|
||||
if not self.is_Captcha_Challenge(resp):
|
||||
return resp
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# if no captcha provider raise a runtime error.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if (
|
||||
not self.cloudscraper.captcha
|
||||
or not isinstance(self.cloudscraper.captcha, dict)
|
||||
or not self.cloudscraper.captcha.get('provider')
|
||||
):
|
||||
self.cloudscraper.simpleException(
|
||||
CloudflareCaptchaProvider,
|
||||
"Cloudflare Captcha detected, unfortunately you haven't loaded an anti Captcha provider "
|
||||
"correctly via the 'captcha' parameter."
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# if provider is return_response, return the response without doing anything.
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if self.cloudscraper.captcha.get('provider') == 'return_response':
|
||||
return resp
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Submit request to parser wrapper to solve captcha
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
submit_url = self.captcha_Challenge_Response(
|
||||
self.cloudscraper.captcha.get('provider'),
|
||||
self.cloudscraper.captcha,
|
||||
resp.text,
|
||||
resp.url
|
||||
)
|
||||
else:
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Cloudflare requires a delay before solving the challenge
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if not self.cloudscraper.delay:
|
||||
try:
|
||||
delay = float(
|
||||
re.search(
|
||||
r'submit\(\);\r?\n\s*},\s*([0-9]+)',
|
||||
resp.text
|
||||
).group(1)
|
||||
) / float(1000)
|
||||
if isinstance(delay, (int, float)):
|
||||
self.cloudscraper.delay = delay
|
||||
except (AttributeError, ValueError):
|
||||
self.cloudscraper.simpleException(
|
||||
CloudflareIUAMError,
|
||||
"Cloudflare IUAM possibility malformed, issue extracing delay value."
|
||||
)
|
||||
|
||||
time.sleep(self.cloudscraper.delay)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
submit_url = self.IUAM_Challenge_Response(
|
||||
resp.text,
|
||||
resp.url,
|
||||
self.cloudscraper.interpreter
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Send the Challenge Response back to Cloudflare
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if submit_url:
|
||||
|
||||
def updateAttr(obj, name, newValue):
|
||||
try:
|
||||
obj[name].update(newValue)
|
||||
return obj[name]
|
||||
except (AttributeError, KeyError):
|
||||
obj[name] = {}
|
||||
obj[name].update(newValue)
|
||||
return obj[name]
|
||||
|
||||
cloudflare_kwargs = deepcopy(kwargs)
|
||||
cloudflare_kwargs['allow_redirects'] = False
|
||||
cloudflare_kwargs['data'] = updateAttr(
|
||||
cloudflare_kwargs,
|
||||
'data',
|
||||
submit_url['data']
|
||||
)
|
||||
|
||||
urlParsed = urlparse(resp.url)
|
||||
cloudflare_kwargs['headers'] = updateAttr(
|
||||
cloudflare_kwargs,
|
||||
'headers',
|
||||
{
|
||||
'Origin': f'{urlParsed.scheme}://{urlParsed.netloc}',
|
||||
'Referer': resp.url
|
||||
}
|
||||
)
|
||||
|
||||
challengeSubmitResponse = self.cloudscraper.request(
|
||||
'POST',
|
||||
submit_url['url'],
|
||||
**cloudflare_kwargs
|
||||
)
|
||||
|
||||
if challengeSubmitResponse.status_code == 400:
|
||||
self.cloudscraper.simpleException(
|
||||
CloudflareSolveError,
|
||||
'Invalid challenge answer detected, Cloudflare broken?'
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# Return response if Cloudflare is doing content pass through instead of 3xx
|
||||
# else request with redirect URL also handle protocol scheme change http -> https
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
if not challengeSubmitResponse.is_redirect:
|
||||
return challengeSubmitResponse
|
||||
|
||||
else:
|
||||
cloudflare_kwargs = deepcopy(kwargs)
|
||||
cloudflare_kwargs['headers'] = updateAttr(
|
||||
cloudflare_kwargs,
|
||||
'headers',
|
||||
{'Referer': challengeSubmitResponse.url}
|
||||
)
|
||||
|
||||
if not urlparse(challengeSubmitResponse.headers['Location']).netloc:
|
||||
redirect_location = urljoin(
|
||||
challengeSubmitResponse.url,
|
||||
challengeSubmitResponse.headers['Location']
|
||||
)
|
||||
else:
|
||||
redirect_location = challengeSubmitResponse.headers['Location']
|
||||
|
||||
return self.cloudscraper.request(
|
||||
resp.request.method,
|
||||
redirect_location,
|
||||
**cloudflare_kwargs
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
||||
# We shouldn't be here...
|
||||
# Re-request the original query and/or process again....
|
||||
# ------------------------------------------------------------------------------- #
|
||||
|
||||
return self.cloudscraper.request(resp.request.method, resp.url, **kwargs)
|
||||
|
||||
# ------------------------------------------------------------------------------- #
|
Loading…
Reference in new issue