Merge remote-tracking branch 'origin/development' into development

pull/427/head
Louis Vézina 6 years ago
commit 8ed4385e83

@ -77,7 +77,8 @@ class CloudflareScraper(Session):
pass pass
def request(self, method, url, *args, **kwargs): def request(self, method, url, *args, **kwargs):
self.headers = ( if not isinstance(self.headers, OrderedDict):
self.headers = \
OrderedDict( OrderedDict(
[ [
('User-Agent', self.headers['User-Agent']), ('User-Agent', self.headers['User-Agent']),
@ -88,7 +89,6 @@ class CloudflareScraper(Session):
('Upgrade-Insecure-Requests', '1') ('Upgrade-Insecure-Requests', '1')
] ]
) )
)
resp = super(CloudflareScraper, self).request(method, url, *args, **kwargs) resp = super(CloudflareScraper, self).request(method, url, *args, **kwargs)
@ -127,7 +127,7 @@ class CloudflareScraper(Session):
submit_url = '{}://{}/cdn-cgi/l/chk_jschl'.format(parsed_url.scheme, domain) submit_url = '{}://{}/cdn-cgi/l/chk_jschl'.format(parsed_url.scheme, domain)
cloudflare_kwargs = deepcopy(original_kwargs) cloudflare_kwargs = deepcopy(original_kwargs)
headers = cloudflare_kwargs.setdefault('headers', {'Referer': resp.url}) headers = cloudflare_kwargs.setdefault('headers', OrderedDict({'Referer': resp.url}))
try: try:
params = cloudflare_kwargs.setdefault( params = cloudflare_kwargs.setdefault(

@ -1,4 +1,5 @@
# coding=utf-8 # coding=utf-8
import certifi import certifi
import ssl import ssl
import os import os
@ -8,7 +9,9 @@ import requests
import xmlrpclib import xmlrpclib
import dns.resolver import dns.resolver
from collections import OrderedDict
from requests import exceptions from requests import exceptions
from requests.utils import default_user_agent
from urllib3.util import connection from urllib3.util import connection
from retry.api import retry_call from retry.api import retry_call
from exceptions import APIThrottled from exceptions import APIThrottled
@ -61,13 +64,18 @@ class CertifiSession(TimeoutSession):
class CFSession(CloudflareScraper, CertifiSession, TimeoutSession): class CFSession(CloudflareScraper, CertifiSession, TimeoutSession):
def __init__(self): def __init__(self):
super(CFSession, self).__init__() super(CFSession, self).__init__()
self.headers.update({ self.headers = OrderedDict([
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', ('User-Agent', default_user_agent()),
'Accept-Language': 'en-US,en;q=0.5', ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
'Cache-Control': 'no-cache', ('Accept-Language', 'en-US,en;q=0.5'),
'Pragma': 'no-cache', ('Accept-Encoding', 'gzip, deflate'),
'DNT': '1' ('Connection', 'keep-alive'),
}) ('Pragma', 'no-cache'),
('Cache-Control', 'no-cache'),
('Upgrade-Insecure-Requests', '1'),
('DNT', '1'),
])
self.debug = os.environ.get("CF_DEBUG", False)
def request(self, method, url, *args, **kwargs): def request(self, method, url, *args, **kwargs):
parsed_url = urlparse(url) parsed_url = urlparse(url)

@ -128,7 +128,6 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
self.session = Session() self.session = Session()
from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)] self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
self.session.headers['Referer'] = "https://subscene.com"
def terminate(self): def terminate(self):
logger.info("Closing session") logger.info("Closing session")

Loading…
Cancel
Save