from __future__ import absolute_import
import collections
import functools
import logging
from . _collections import RecentlyUsedContainer
from . connectionpool import HTTPConnectionPool , HTTPSConnectionPool
from . connectionpool import port_by_scheme
from . exceptions import LocationValueError , MaxRetryError , ProxySchemeUnknown
from . packages import six
from . packages . six . moves . urllib . parse import urljoin
from . request import RequestMethods
from . util . url import parse_url
from . util . retry import Retry
__all__ = [ ' PoolManager ' , ' ProxyManager ' , ' proxy_from_url ' ]
log = logging . getLogger ( __name__ )
SSL_KEYWORDS = ( ' key_file ' , ' cert_file ' , ' cert_reqs ' , ' ca_certs ' ,
' ssl_version ' , ' ca_cert_dir ' , ' ssl_context ' ,
' key_password ' )
# All known keyword arguments that could be provided to the pool manager, its
# pools, or the underlying connections. This is used to construct a pool key.
_key_fields = (
' key_scheme ' , # str
' key_host ' , # str
' key_port ' , # int
' key_timeout ' , # int or float or Timeout
' key_retries ' , # int or Retry
' key_strict ' , # bool
' key_block ' , # bool
' key_source_address ' , # str
' key_key_file ' , # str
' key_key_password ' , # str
' key_cert_file ' , # str
' key_cert_reqs ' , # str
' key_ca_certs ' , # str
' key_ssl_version ' , # str
' key_ca_cert_dir ' , # str
' key_ssl_context ' , # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext
' key_maxsize ' , # int
' key_headers ' , # dict
' key__proxy ' , # parsed proxy url
' key__proxy_headers ' , # dict
' key_socket_options ' , # list of (level (int), optname (int), value (int or str)) tuples
' key__socks_options ' , # dict
' key_assert_hostname ' , # bool or string
' key_assert_fingerprint ' , # str
' key_server_hostname ' , # str
)
#: The namedtuple class used to construct keys for the connection pool.
#: All custom key schemes should include the fields in this key at a minimum.
PoolKey = collections . namedtuple ( ' PoolKey ' , _key_fields )
def _default_key_normalizer ( key_class , request_context ) :
"""
Create a pool key out of a request context dictionary .
According to RFC 3986 , both the scheme and host are case - insensitive .
Therefore , this function normalizes both before constructing the pool
key for an HTTPS request . If you wish to change this behaviour , provide
alternate callables to ` ` key_fn_by_scheme ` ` .
: param key_class :
The class to use when constructing the key . This should be a namedtuple
with the ` ` scheme ` ` and ` ` host ` ` keys at a minimum .
: type key_class : namedtuple
: param request_context :
A dictionary - like object that contain the context for a request .
: type request_context : dict
: return : A namedtuple that can be used as a connection pool key .
: rtype : PoolKey
"""
# Since we mutate the dictionary, make a copy first
context = request_context . copy ( )
context [ ' scheme ' ] = context [ ' scheme ' ] . lower ( )
context [ ' host ' ] = context [ ' host ' ] . lower ( )
# These are both dictionaries and need to be transformed into frozensets
for key in ( ' headers ' , ' _proxy_headers ' , ' _socks_options ' ) :
if key in context and context [ key ] is not None :
context [ key ] = frozenset ( context [ key ] . items ( ) )
# The socket_options key may be a list and needs to be transformed into a
# tuple.
socket_opts = context . get ( ' socket_options ' )
if socket_opts is not None :
context [ ' socket_options ' ] = tuple ( socket_opts )
# Map the kwargs to the names in the namedtuple - this is necessary since
# namedtuples can't have fields starting with '_'.
for key in list ( context . keys ( ) ) :
context [ ' key_ ' + key ] = context . pop ( key )
# Default to ``None`` for keys missing from the context
for field in key_class . _fields :
if field not in context :
context [ field ] = None
return key_class ( * * context )
#: A dictionary that maps a scheme to a callable that creates a pool key.
#: This can be used to alter the way pool keys are constructed, if desired.
#: Each PoolManager makes a copy of this dictionary so they can be configured
#: globally here, or individually on the instance.
key_fn_by_scheme = {
' http ' : functools . partial ( _default_key_normalizer , PoolKey ) ,
' https ' : functools . partial ( _default_key_normalizer , PoolKey ) ,
}
pool_classes_by_scheme = {
' http ' : HTTPConnectionPool ,
' https ' : HTTPSConnectionPool ,
}
class PoolManager ( RequestMethods ) :
"""
Allows for arbitrary requests while transparently keeping track of
necessary connection pools for you .
: param num_pools :
Number of connection pools to cache before discarding the least
recently used pool .
: param headers :
Headers to include with all requests , unless other headers are given
explicitly .
: param \\* * connection_pool_kw :
Additional parameters are used to create fresh
: class : ` urllib3 . connectionpool . ConnectionPool ` instances .
Example : :
>> > manager = PoolManager ( num_pools = 2 )
>> > r = manager . request ( ' GET ' , ' http://google.com/ ' )
>> > r = manager . request ( ' GET ' , ' http://google.com/mail ' )
>> > r = manager . request ( ' GET ' , ' http://yahoo.com/ ' )
>> > len ( manager . pools )
2
"""
proxy = None
def __init__ ( self , num_pools = 10 , headers = None , * * connection_pool_kw ) :
RequestMethods . __init__ ( self , headers )
self . connection_pool_kw = connection_pool_kw
self . pools = RecentlyUsedContainer ( num_pools ,
dispose_func = lambda p : p . close ( ) )
# Locally set the pool classes and keys so other PoolManagers can
# override them.
self . pool_classes_by_scheme = pool_classes_by_scheme
self . key_fn_by_scheme = key_fn_by_scheme . copy ( )
def __enter__ ( self ) :
return self
def __exit__ ( self , exc_type , exc_val , exc_tb ) :
self . clear ( )
# Return False to re-raise any potential exceptions
return False
def _new_pool ( self , scheme , host , port , request_context = None ) :
"""
Create a new : class : ` ConnectionPool ` based on host , port , scheme , and
any additional pool keyword arguments .
If ` ` request_context ` ` is provided , it is provided as keyword arguments
to the pool class used . This method is used to actually create the
connection pools handed out by : meth : ` connection_from_url ` and
companion methods . It is intended to be overridden for customization .
"""
pool_cls = self . pool_classes_by_scheme [ scheme ]
if request_context is None :
request_context = self . connection_pool_kw . copy ( )
# Although the context has everything necessary to create the pool,
# this function has historically only used the scheme, host, and port
# in the positional args. When an API change is acceptable these can
# be removed.
for key in ( ' scheme ' , ' host ' , ' port ' ) :
request_context . pop ( key , None )
if scheme == ' http ' :
for kw in SSL_KEYWORDS :
request_context . pop ( kw , None )
return pool_cls ( host , port , * * request_context )
def clear ( self ) :
"""
Empty our store of pools and direct them all to close .
This will not affect in - flight connections , but they will not be
re - used after completion .
"""
self . pools . clear ( )
def connection_from_host ( self , host , port = None , scheme = ' http ' , pool_kwargs = None ) :
"""
Get a : class : ` ConnectionPool ` based on the host , port , and scheme .
If ` ` port ` ` isn ' t given, it will be derived from the ``scheme`` using
` ` urllib3 . connectionpool . port_by_scheme ` ` . If ` ` pool_kwargs ` ` is
provided , it is merged with the instance ' s ``connection_pool_kw``
variable and used to create the new connection pool , if one is
needed .
"""
if not host :
raise LocationValueError ( " No host specified. " )
request_context = self . _merge_pool_kwargs ( pool_kwargs )
request_context [ ' scheme ' ] = scheme or ' http '
if not port :
port = port_by_scheme . get ( request_context [ ' scheme ' ] . lower ( ) , 80 )
request_context [ ' port ' ] = port
request_context [ ' host ' ] = host
return self . connection_from_context ( request_context )
def connection_from_context ( self , request_context ) :
"""
Get a : class : ` ConnectionPool ` based on the request context .
` ` request_context ` ` must at least contain the ` ` scheme ` ` key and its
value must be a key in ` ` key_fn_by_scheme ` ` instance variable .
"""
scheme = request_context [ ' scheme ' ] . lower ( )
pool_key_constructor = self . key_fn_by_scheme [ scheme ]
pool_key = pool_key_constructor ( request_context )
return self . connection_from_pool_key ( pool_key , request_context = request_context )
def connection_from_pool_key ( self , pool_key , request_context = None ) :
"""
Get a : class : ` ConnectionPool ` based on the provided pool key .
` ` pool_key ` ` should be a namedtuple that only contains immutable
objects . At a minimum it must have the ` ` scheme ` ` , ` ` host ` ` , and
` ` port ` ` fields .
"""
with self . pools . lock :
# If the scheme, host, or port doesn't match existing open
# connections, open a new ConnectionPool.
pool = self . pools . get ( pool_key )
if pool :
return pool
# Make a fresh ConnectionPool of the desired type
scheme = request_context [ ' scheme ' ]
host = request_context [ ' host ' ]
port = request_context [ ' port ' ]
pool = self . _new_pool ( scheme , host , port , request_context = request_context )
self . pools [ pool_key ] = pool
return pool
def connection_from_url ( self , url , pool_kwargs = None ) :
"""
Similar to : func : ` urllib3 . connectionpool . connection_from_url ` .
If ` ` pool_kwargs ` ` is not provided and a new pool needs to be
constructed , ` ` self . connection_pool_kw ` ` is used to initialize
the : class : ` urllib3 . connectionpool . ConnectionPool ` . If ` ` pool_kwargs ` `
is provided , it is used instead . Note that if a new pool does not
need to be created for the request , the provided ` ` pool_kwargs ` ` are
not used .
"""
u = parse_url ( url )
return self . connection_from_host ( u . host , port = u . port , scheme = u . scheme ,
pool_kwargs = pool_kwargs )
def _merge_pool_kwargs ( self , override ) :
"""
Merge a dictionary of override values for self . connection_pool_kw .
This does not modify self . connection_pool_kw and returns a new dict .
Any keys in the override dictionary with a value of ` ` None ` ` are
removed from the merged dictionary .
"""
base_pool_kwargs = self . connection_pool_kw . copy ( )
if override :
for key , value in override . items ( ) :
if value is None :
try :
del base_pool_kwargs [ key ]
except KeyError :
pass
else :
base_pool_kwargs [ key ] = value
return base_pool_kwargs
def urlopen ( self , method , url , redirect = True , * * kw ) :
"""
Same as : meth : ` urllib3 . connectionpool . HTTPConnectionPool . urlopen `
with custom cross - host redirect logic and only sends the request - uri
portion of the ` ` url ` ` .
The given ` ` url ` ` parameter must be absolute , such that an appropriate
: class : ` urllib3 . connectionpool . ConnectionPool ` can be chosen for it .
"""
u = parse_url ( url )
conn = self . connection_from_host ( u . host , port = u . port , scheme = u . scheme )
kw [ ' assert_same_host ' ] = False
kw [ ' redirect ' ] = False
if ' headers ' not in kw :
kw [ ' headers ' ] = self . headers . copy ( )
if self . proxy is not None and u . scheme == " http " :
response = conn . urlopen ( method , url , * * kw )
else :
response = conn . urlopen ( method , u . request_uri , * * kw )
redirect_location = redirect and response . get_redirect_location ( )
if not redirect_location :
return response
# Support relative URLs for redirecting.
redirect_location = urljoin ( url , redirect_location )
# RFC 7231, Section 6.4.4
if response . status == 303 :
method = ' GET '
retries = kw . get ( ' retries ' )
if not isinstance ( retries , Retry ) :
retries = Retry . from_int ( retries , redirect = redirect )
# Strip headers marked as unsafe to forward to the redirected location.
# Check remove_headers_on_redirect to avoid a potential network call within
# conn.is_same_host() which may use socket.gethostbyname() in the future.
if ( retries . remove_headers_on_redirect
and not conn . is_same_host ( redirect_location ) ) :
headers = list ( six . iterkeys ( kw [ ' headers ' ] ) )
for header in headers :
if header . lower ( ) in retries . remove_headers_on_redirect :
kw [ ' headers ' ] . pop ( header , None )
try :
retries = retries . increment ( method , url , response = response , _pool = conn )
except MaxRetryError :
if retries . raise_on_redirect :
raise
return response
kw [ ' retries ' ] = retries
kw [ ' redirect ' ] = redirect
log . info ( " Redirecting %s -> %s " , url , redirect_location )
return self . urlopen ( method , redirect_location , * * kw )
class ProxyManager ( PoolManager ) :
"""
Behaves just like : class : ` PoolManager ` , but sends all requests through
the defined proxy , using the CONNECT method for HTTPS URLs .
: param proxy_url :
The URL of the proxy to be used .
: param proxy_headers :
A dictionary containing headers that will be sent to the proxy . In case
of HTTP they are being sent with each request , while in the
HTTPS / CONNECT case they are sent only once . Could be used for proxy
authentication .
Example :
>> > proxy = urllib3 . ProxyManager ( ' http://localhost:3128/ ' )
>> > r1 = proxy . request ( ' GET ' , ' http://google.com/ ' )
>> > r2 = proxy . request ( ' GET ' , ' http://httpbin.org/ ' )
>> > len ( proxy . pools )
1
>> > r3 = proxy . request ( ' GET ' , ' https://httpbin.org/ ' )
>> > r4 = proxy . request ( ' GET ' , ' https://twitter.com/ ' )
>> > len ( proxy . pools )
3
"""
def __init__ ( self , proxy_url , num_pools = 10 , headers = None ,
proxy_headers = None , * * connection_pool_kw ) :
if isinstance ( proxy_url , HTTPConnectionPool ) :
proxy_url = ' %s :// %s : %i ' % ( proxy_url . scheme , proxy_url . host ,
proxy_url . port )
proxy = parse_url ( proxy_url )
if not proxy . port :
port = port_by_scheme . get ( proxy . scheme , 80 )
proxy = proxy . _replace ( port = port )
if proxy . scheme not in ( " http " , " https " ) :
raise ProxySchemeUnknown ( proxy . scheme )
self . proxy = proxy
self . proxy_headers = proxy_headers or { }
connection_pool_kw [ ' _proxy ' ] = self . proxy
connection_pool_kw [ ' _proxy_headers ' ] = self . proxy_headers
super ( ProxyManager , self ) . __init__ (
num_pools , headers , * * connection_pool_kw )
def connection_from_host ( self , host , port = None , scheme = ' http ' , pool_kwargs = None ) :
if scheme == " https " :
return super ( ProxyManager , self ) . connection_from_host (
host , port , scheme , pool_kwargs = pool_kwargs )
return super ( ProxyManager , self ) . connection_from_host (
self . proxy . host , self . proxy . port , self . proxy . scheme , pool_kwargs = pool_kwargs )
def _set_proxy_headers ( self , url , headers = None ) :
"""
Sets headers needed by proxies : specifically , the Accept and Host
headers . Only sets headers not provided by the user .
"""
headers_ = { ' Accept ' : ' */* ' }
netloc = parse_url ( url ) . netloc
if netloc :
headers_ [ ' Host ' ] = netloc
if headers :
headers_ . update ( headers )
return headers_
def urlopen ( self , method , url , redirect = True , * * kw ) :
" Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute. "
u = parse_url ( url )
if u . scheme == " http " :
# For proxied HTTPS requests, httplib sets the necessary headers
# on the CONNECT to the proxy. For HTTP, we'll definitely
# need to set 'Host' at the very least.
headers = kw . get ( ' headers ' , self . headers )
kw [ ' headers ' ] = self . _set_proxy_headers ( url , headers )
return super ( ProxyManager , self ) . urlopen ( method , url , redirect = redirect , * * kw )
def proxy_from_url ( url , * * kw ) :
return ProxyManager ( proxy_url = url , * * kw )