|
|
@ -8,12 +8,19 @@ import requests
|
|
|
|
import xmlrpclib
|
|
|
|
import xmlrpclib
|
|
|
|
import dns.resolver
|
|
|
|
import dns.resolver
|
|
|
|
|
|
|
|
|
|
|
|
from requests import Session, exceptions
|
|
|
|
from requests import exceptions
|
|
|
|
from urllib3.util import connection
|
|
|
|
from urllib3.util import connection
|
|
|
|
from retry.api import retry_call
|
|
|
|
from retry.api import retry_call
|
|
|
|
from exceptions import APIThrottled
|
|
|
|
from exceptions import APIThrottled
|
|
|
|
|
|
|
|
from dogpile.cache.api import NO_VALUE
|
|
|
|
|
|
|
|
from subliminal.cache import region
|
|
|
|
from cfscrape import CloudflareScraper
|
|
|
|
from cfscrape import CloudflareScraper
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
from urlparse import urlparse
|
|
|
|
|
|
|
|
except ImportError:
|
|
|
|
|
|
|
|
from urllib.parse import urlparse
|
|
|
|
|
|
|
|
|
|
|
|
from subzero.lib.io import get_viable_encoding
|
|
|
|
from subzero.lib.io import get_viable_encoding
|
|
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
@ -45,10 +52,37 @@ class CertifiSession(CloudflareScraper):
|
|
|
|
'DNT': '1'
|
|
|
|
'DNT': '1'
|
|
|
|
})
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
def request(self, *args, **kwargs):
|
|
|
|
def request(self, method, url, *args, **kwargs):
|
|
|
|
if kwargs.get('timeout') is None:
|
|
|
|
if kwargs.get('timeout') is None:
|
|
|
|
kwargs['timeout'] = self.timeout
|
|
|
|
kwargs['timeout'] = self.timeout
|
|
|
|
return super(CertifiSession, self).request(*args, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
parsed_url = urlparse(url)
|
|
|
|
|
|
|
|
domain = parsed_url.netloc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cache_key = "cf_data_%s" % domain
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if not self.cookies.get("__cfduid", "", domain=domain) or not self.cookies.get("cf_clearance", "",
|
|
|
|
|
|
|
|
domain=domain):
|
|
|
|
|
|
|
|
cf_data = region.get(cache_key)
|
|
|
|
|
|
|
|
if cf_data is not NO_VALUE:
|
|
|
|
|
|
|
|
cf_cookies, user_agent = cf_data
|
|
|
|
|
|
|
|
logger.debug("Trying to use old cf data for %s: %s", domain, cf_data)
|
|
|
|
|
|
|
|
for cookie, value in cf_cookies.iteritems():
|
|
|
|
|
|
|
|
self.cookies.set(cookie, value, domain=domain)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.headers['User-Agent'] = user_agent
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ret = super(CertifiSession, self).request(method, url, *args, **kwargs)
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
cf_data = self.get_live_tokens(domain)
|
|
|
|
|
|
|
|
except:
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
if cf_data != region.get(cache_key):
|
|
|
|
|
|
|
|
logger.debug("Storing cf data for %s: %s", domain, cf_data)
|
|
|
|
|
|
|
|
region.set(cache_key, cf_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return ret
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class RetryingSession(CertifiSession):
|
|
|
|
class RetryingSession(CertifiSession):
|
|
|
|