You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1187 lines
42 KiB
1187 lines
42 KiB
from __future__ import annotations
|
|
|
|
import errno
|
|
import logging
|
|
import queue
|
|
import sys
|
|
import typing
|
|
import warnings
|
|
import weakref
|
|
from socket import timeout as SocketTimeout
|
|
from types import TracebackType
|
|
|
|
from ._base_connection import _TYPE_BODY
|
|
from ._collections import HTTPHeaderDict
|
|
from ._request_methods import RequestMethods
|
|
from .connection import (
|
|
BaseSSLError,
|
|
BrokenPipeError,
|
|
DummyConnection,
|
|
HTTPConnection,
|
|
HTTPException,
|
|
HTTPSConnection,
|
|
ProxyConfig,
|
|
_wrap_proxy_error,
|
|
)
|
|
from .connection import port_by_scheme as port_by_scheme
|
|
from .exceptions import (
|
|
ClosedPoolError,
|
|
EmptyPoolError,
|
|
FullPoolError,
|
|
HostChangedError,
|
|
InsecureRequestWarning,
|
|
LocationValueError,
|
|
MaxRetryError,
|
|
NewConnectionError,
|
|
ProtocolError,
|
|
ProxyError,
|
|
ReadTimeoutError,
|
|
SSLError,
|
|
TimeoutError,
|
|
)
|
|
from .response import BaseHTTPResponse
|
|
from .util.connection import is_connection_dropped
|
|
from .util.proxy import connection_requires_http_tunnel
|
|
from .util.request import _TYPE_BODY_POSITION, set_file_position
|
|
from .util.retry import Retry
|
|
from .util.ssl_match_hostname import CertificateError
|
|
from .util.timeout import _DEFAULT_TIMEOUT, _TYPE_DEFAULT, Timeout
|
|
from .util.url import Url, _encode_target
|
|
from .util.url import _normalize_host as normalize_host
|
|
from .util.url import parse_url
|
|
from .util.util import to_str
|
|
|
|
if typing.TYPE_CHECKING:
|
|
import ssl
|
|
from typing import Literal
|
|
|
|
from ._base_connection import BaseHTTPConnection, BaseHTTPSConnection
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
_TYPE_TIMEOUT = typing.Union[Timeout, float, _TYPE_DEFAULT, None]
|
|
|
|
_SelfT = typing.TypeVar("_SelfT")
|
|
|
|
|
|
# Pool objects
|
|
class ConnectionPool:
|
|
"""
|
|
Base class for all connection pools, such as
|
|
:class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`.
|
|
|
|
.. note::
|
|
ConnectionPool.urlopen() does not normalize or percent-encode target URIs
|
|
which is useful if your target server doesn't support percent-encoded
|
|
target URIs.
|
|
"""
|
|
|
|
scheme: str | None = None
|
|
QueueCls = queue.LifoQueue
|
|
|
|
def __init__(self, host: str, port: int | None = None) -> None:
|
|
if not host:
|
|
raise LocationValueError("No host specified.")
|
|
|
|
self.host = _normalize_host(host, scheme=self.scheme)
|
|
self.port = port
|
|
|
|
# This property uses 'normalize_host()' (not '_normalize_host()')
|
|
# to avoid removing square braces around IPv6 addresses.
|
|
# This value is sent to `HTTPConnection.set_tunnel()` if called
|
|
# because square braces are required for HTTP CONNECT tunneling.
|
|
self._tunnel_host = normalize_host(host, scheme=self.scheme).lower()
|
|
|
|
def __str__(self) -> str:
|
|
return f"{type(self).__name__}(host={self.host!r}, port={self.port!r})"
|
|
|
|
def __enter__(self: _SelfT) -> _SelfT:
|
|
return self
|
|
|
|
def __exit__(
|
|
self,
|
|
exc_type: type[BaseException] | None,
|
|
exc_val: BaseException | None,
|
|
exc_tb: TracebackType | None,
|
|
) -> Literal[False]:
|
|
self.close()
|
|
# Return False to re-raise any potential exceptions
|
|
return False
|
|
|
|
def close(self) -> None:
|
|
"""
|
|
Close all pooled connections and disable the pool.
|
|
"""
|
|
|
|
|
|
# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252
|
|
_blocking_errnos = {errno.EAGAIN, errno.EWOULDBLOCK}
|
|
|
|
|
|
class HTTPConnectionPool(ConnectionPool, RequestMethods):
|
|
"""
|
|
Thread-safe connection pool for one host.
|
|
|
|
:param host:
|
|
Host used for this HTTP Connection (e.g. "localhost"), passed into
|
|
:class:`http.client.HTTPConnection`.
|
|
|
|
:param port:
|
|
Port used for this HTTP Connection (None is equivalent to 80), passed
|
|
into :class:`http.client.HTTPConnection`.
|
|
|
|
:param timeout:
|
|
Socket timeout in seconds for each individual connection. This can
|
|
be a float or integer, which sets the timeout for the HTTP request,
|
|
or an instance of :class:`urllib3.util.Timeout` which gives you more
|
|
fine-grained control over request timeouts. After the constructor has
|
|
been parsed, this is always a `urllib3.util.Timeout` object.
|
|
|
|
:param maxsize:
|
|
Number of connections to save that can be reused. More than 1 is useful
|
|
in multithreaded situations. If ``block`` is set to False, more
|
|
connections will be created but they will not be saved once they've
|
|
been used.
|
|
|
|
:param block:
|
|
If set to True, no more than ``maxsize`` connections will be used at
|
|
a time. When no free connections are available, the call will block
|
|
until a connection has been released. This is a useful side effect for
|
|
particular multithreaded situations where one does not want to use more
|
|
than maxsize connections per host to prevent flooding.
|
|
|
|
:param headers:
|
|
Headers to include with all requests, unless other headers are given
|
|
explicitly.
|
|
|
|
:param retries:
|
|
Retry configuration to use by default with requests in this pool.
|
|
|
|
:param _proxy:
|
|
Parsed proxy URL, should not be used directly, instead, see
|
|
:class:`urllib3.ProxyManager`
|
|
|
|
:param _proxy_headers:
|
|
A dictionary with proxy headers, should not be used directly,
|
|
instead, see :class:`urllib3.ProxyManager`
|
|
|
|
:param \\**conn_kw:
|
|
Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`,
|
|
:class:`urllib3.connection.HTTPSConnection` instances.
|
|
"""
|
|
|
|
scheme = "http"
|
|
ConnectionCls: (
|
|
type[BaseHTTPConnection] | type[BaseHTTPSConnection]
|
|
) = HTTPConnection
|
|
|
|
def __init__(
|
|
self,
|
|
host: str,
|
|
port: int | None = None,
|
|
timeout: _TYPE_TIMEOUT | None = _DEFAULT_TIMEOUT,
|
|
maxsize: int = 1,
|
|
block: bool = False,
|
|
headers: typing.Mapping[str, str] | None = None,
|
|
retries: Retry | bool | int | None = None,
|
|
_proxy: Url | None = None,
|
|
_proxy_headers: typing.Mapping[str, str] | None = None,
|
|
_proxy_config: ProxyConfig | None = None,
|
|
**conn_kw: typing.Any,
|
|
):
|
|
ConnectionPool.__init__(self, host, port)
|
|
RequestMethods.__init__(self, headers)
|
|
|
|
if not isinstance(timeout, Timeout):
|
|
timeout = Timeout.from_float(timeout)
|
|
|
|
if retries is None:
|
|
retries = Retry.DEFAULT
|
|
|
|
self.timeout = timeout
|
|
self.retries = retries
|
|
|
|
self.pool: queue.LifoQueue[typing.Any] | None = self.QueueCls(maxsize)
|
|
self.block = block
|
|
|
|
self.proxy = _proxy
|
|
self.proxy_headers = _proxy_headers or {}
|
|
self.proxy_config = _proxy_config
|
|
|
|
# Fill the queue up so that doing get() on it will block properly
|
|
for _ in range(maxsize):
|
|
self.pool.put(None)
|
|
|
|
# These are mostly for testing and debugging purposes.
|
|
self.num_connections = 0
|
|
self.num_requests = 0
|
|
self.conn_kw = conn_kw
|
|
|
|
if self.proxy:
|
|
# Enable Nagle's algorithm for proxies, to avoid packet fragmentation.
|
|
# We cannot know if the user has added default socket options, so we cannot replace the
|
|
# list.
|
|
self.conn_kw.setdefault("socket_options", [])
|
|
|
|
self.conn_kw["proxy"] = self.proxy
|
|
self.conn_kw["proxy_config"] = self.proxy_config
|
|
|
|
# Do not pass 'self' as callback to 'finalize'.
|
|
# Then the 'finalize' would keep an endless living (leak) to self.
|
|
# By just passing a reference to the pool allows the garbage collector
|
|
# to free self if nobody else has a reference to it.
|
|
pool = self.pool
|
|
|
|
# Close all the HTTPConnections in the pool before the
|
|
# HTTPConnectionPool object is garbage collected.
|
|
weakref.finalize(self, _close_pool_connections, pool)
|
|
|
|
def _new_conn(self) -> BaseHTTPConnection:
|
|
"""
|
|
Return a fresh :class:`HTTPConnection`.
|
|
"""
|
|
self.num_connections += 1
|
|
log.debug(
|
|
"Starting new HTTP connection (%d): %s:%s",
|
|
self.num_connections,
|
|
self.host,
|
|
self.port or "80",
|
|
)
|
|
|
|
conn = self.ConnectionCls(
|
|
host=self.host,
|
|
port=self.port,
|
|
timeout=self.timeout.connect_timeout,
|
|
**self.conn_kw,
|
|
)
|
|
return conn
|
|
|
|
def _get_conn(self, timeout: float | None = None) -> BaseHTTPConnection:
|
|
"""
|
|
Get a connection. Will return a pooled connection if one is available.
|
|
|
|
If no connections are available and :prop:`.block` is ``False``, then a
|
|
fresh connection is returned.
|
|
|
|
:param timeout:
|
|
Seconds to wait before giving up and raising
|
|
:class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and
|
|
:prop:`.block` is ``True``.
|
|
"""
|
|
conn = None
|
|
|
|
if self.pool is None:
|
|
raise ClosedPoolError(self, "Pool is closed.")
|
|
|
|
try:
|
|
conn = self.pool.get(block=self.block, timeout=timeout)
|
|
|
|
except AttributeError: # self.pool is None
|
|
raise ClosedPoolError(self, "Pool is closed.") from None # Defensive:
|
|
|
|
except queue.Empty:
|
|
if self.block:
|
|
raise EmptyPoolError(
|
|
self,
|
|
"Pool is empty and a new connection can't be opened due to blocking mode.",
|
|
) from None
|
|
pass # Oh well, we'll create a new connection then
|
|
|
|
# If this is a persistent connection, check if it got disconnected
|
|
if conn and is_connection_dropped(conn):
|
|
log.debug("Resetting dropped connection: %s", self.host)
|
|
conn.close()
|
|
|
|
return conn or self._new_conn()
|
|
|
|
def _put_conn(self, conn: BaseHTTPConnection | None) -> None:
|
|
"""
|
|
Put a connection back into the pool.
|
|
|
|
:param conn:
|
|
Connection object for the current host and port as returned by
|
|
:meth:`._new_conn` or :meth:`._get_conn`.
|
|
|
|
If the pool is already full, the connection is closed and discarded
|
|
because we exceeded maxsize. If connections are discarded frequently,
|
|
then maxsize should be increased.
|
|
|
|
If the pool is closed, then the connection will be closed and discarded.
|
|
"""
|
|
if self.pool is not None:
|
|
try:
|
|
self.pool.put(conn, block=False)
|
|
return # Everything is dandy, done.
|
|
except AttributeError:
|
|
# self.pool is None.
|
|
pass
|
|
except queue.Full:
|
|
# Connection never got put back into the pool, close it.
|
|
if conn:
|
|
conn.close()
|
|
|
|
if self.block:
|
|
# This should never happen if you got the conn from self._get_conn
|
|
raise FullPoolError(
|
|
self,
|
|
"Pool reached maximum size and no more connections are allowed.",
|
|
) from None
|
|
|
|
log.warning(
|
|
"Connection pool is full, discarding connection: %s. Connection pool size: %s",
|
|
self.host,
|
|
self.pool.qsize(),
|
|
)
|
|
|
|
# Connection never got put back into the pool, close it.
|
|
if conn:
|
|
conn.close()
|
|
|
|
def _validate_conn(self, conn: BaseHTTPConnection) -> None:
|
|
"""
|
|
Called right before a request is made, after the socket is created.
|
|
"""
|
|
|
|
def _prepare_proxy(self, conn: BaseHTTPConnection) -> None:
|
|
# Nothing to do for HTTP connections.
|
|
pass
|
|
|
|
def _get_timeout(self, timeout: _TYPE_TIMEOUT) -> Timeout:
|
|
"""Helper that always returns a :class:`urllib3.util.Timeout`"""
|
|
if timeout is _DEFAULT_TIMEOUT:
|
|
return self.timeout.clone()
|
|
|
|
if isinstance(timeout, Timeout):
|
|
return timeout.clone()
|
|
else:
|
|
# User passed us an int/float. This is for backwards compatibility,
|
|
# can be removed later
|
|
return Timeout.from_float(timeout)
|
|
|
|
def _raise_timeout(
|
|
self,
|
|
err: BaseSSLError | OSError | SocketTimeout,
|
|
url: str,
|
|
timeout_value: _TYPE_TIMEOUT | None,
|
|
) -> None:
|
|
"""Is the error actually a timeout? Will raise a ReadTimeout or pass"""
|
|
|
|
if isinstance(err, SocketTimeout):
|
|
raise ReadTimeoutError(
|
|
self, url, f"Read timed out. (read timeout={timeout_value})"
|
|
) from err
|
|
|
|
# See the above comment about EAGAIN in Python 3.
|
|
if hasattr(err, "errno") and err.errno in _blocking_errnos:
|
|
raise ReadTimeoutError(
|
|
self, url, f"Read timed out. (read timeout={timeout_value})"
|
|
) from err
|
|
|
|
def _make_request(
|
|
self,
|
|
conn: BaseHTTPConnection,
|
|
method: str,
|
|
url: str,
|
|
body: _TYPE_BODY | None = None,
|
|
headers: typing.Mapping[str, str] | None = None,
|
|
retries: Retry | None = None,
|
|
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
|
|
chunked: bool = False,
|
|
response_conn: BaseHTTPConnection | None = None,
|
|
preload_content: bool = True,
|
|
decode_content: bool = True,
|
|
enforce_content_length: bool = True,
|
|
) -> BaseHTTPResponse:
|
|
"""
|
|
Perform a request on a given urllib connection object taken from our
|
|
pool.
|
|
|
|
:param conn:
|
|
a connection from one of our connection pools
|
|
|
|
:param method:
|
|
HTTP request method (such as GET, POST, PUT, etc.)
|
|
|
|
:param url:
|
|
The URL to perform the request on.
|
|
|
|
:param body:
|
|
Data to send in the request body, either :class:`str`, :class:`bytes`,
|
|
an iterable of :class:`str`/:class:`bytes`, or a file-like object.
|
|
|
|
:param headers:
|
|
Dictionary of custom headers to send, such as User-Agent,
|
|
If-None-Match, etc. If None, pool headers are used. If provided,
|
|
these headers completely replace any pool-specific headers.
|
|
|
|
:param retries:
|
|
Configure the number of retries to allow before raising a
|
|
:class:`~urllib3.exceptions.MaxRetryError` exception.
|
|
|
|
Pass ``None`` to retry until you receive a response. Pass a
|
|
:class:`~urllib3.util.retry.Retry` object for fine-grained control
|
|
over different types of retries.
|
|
Pass an integer number to retry connection errors that many times,
|
|
but no other types of errors. Pass zero to never retry.
|
|
|
|
If ``False``, then retries are disabled and any exception is raised
|
|
immediately. Also, instead of raising a MaxRetryError on redirects,
|
|
the redirect response will be returned.
|
|
|
|
:type retries: :class:`~urllib3.util.retry.Retry`, False, or an int.
|
|
|
|
:param timeout:
|
|
If specified, overrides the default timeout for this one
|
|
request. It may be a float (in seconds) or an instance of
|
|
:class:`urllib3.util.Timeout`.
|
|
|
|
:param chunked:
|
|
If True, urllib3 will send the body using chunked transfer
|
|
encoding. Otherwise, urllib3 will send the body using the standard
|
|
content-length form. Defaults to False.
|
|
|
|
:param response_conn:
|
|
Set this to ``None`` if you will handle releasing the connection or
|
|
set the connection to have the response release it.
|
|
|
|
:param preload_content:
|
|
If True, the response's body will be preloaded during construction.
|
|
|
|
:param decode_content:
|
|
If True, will attempt to decode the body based on the
|
|
'content-encoding' header.
|
|
|
|
:param enforce_content_length:
|
|
Enforce content length checking. Body returned by server must match
|
|
value of Content-Length header, if present. Otherwise, raise error.
|
|
"""
|
|
self.num_requests += 1
|
|
|
|
timeout_obj = self._get_timeout(timeout)
|
|
timeout_obj.start_connect()
|
|
conn.timeout = Timeout.resolve_default_timeout(timeout_obj.connect_timeout)
|
|
|
|
try:
|
|
# Trigger any extra validation we need to do.
|
|
try:
|
|
self._validate_conn(conn)
|
|
except (SocketTimeout, BaseSSLError) as e:
|
|
self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)
|
|
raise
|
|
|
|
# _validate_conn() starts the connection to an HTTPS proxy
|
|
# so we need to wrap errors with 'ProxyError' here too.
|
|
except (
|
|
OSError,
|
|
NewConnectionError,
|
|
TimeoutError,
|
|
BaseSSLError,
|
|
CertificateError,
|
|
SSLError,
|
|
) as e:
|
|
new_e: Exception = e
|
|
if isinstance(e, (BaseSSLError, CertificateError)):
|
|
new_e = SSLError(e)
|
|
# If the connection didn't successfully connect to it's proxy
|
|
# then there
|
|
if isinstance(
|
|
new_e, (OSError, NewConnectionError, TimeoutError, SSLError)
|
|
) and (conn and conn.proxy and not conn.has_connected_to_proxy):
|
|
new_e = _wrap_proxy_error(new_e, conn.proxy.scheme)
|
|
raise new_e
|
|
|
|
# conn.request() calls http.client.*.request, not the method in
|
|
# urllib3.request. It also calls makefile (recv) on the socket.
|
|
try:
|
|
conn.request(
|
|
method,
|
|
url,
|
|
body=body,
|
|
headers=headers,
|
|
chunked=chunked,
|
|
preload_content=preload_content,
|
|
decode_content=decode_content,
|
|
enforce_content_length=enforce_content_length,
|
|
)
|
|
|
|
# We are swallowing BrokenPipeError (errno.EPIPE) since the server is
|
|
# legitimately able to close the connection after sending a valid response.
|
|
# With this behaviour, the received response is still readable.
|
|
except BrokenPipeError:
|
|
pass
|
|
except OSError as e:
|
|
# MacOS/Linux
|
|
# EPROTOTYPE and ECONNRESET are needed on macOS
|
|
# https://erickt.github.io/blog/2014/11/19/adventures-in-debugging-a-potential-osx-kernel-bug/
|
|
# Condition changed later to emit ECONNRESET instead of only EPROTOTYPE.
|
|
if e.errno != errno.EPROTOTYPE and e.errno != errno.ECONNRESET:
|
|
raise
|
|
|
|
# Reset the timeout for the recv() on the socket
|
|
read_timeout = timeout_obj.read_timeout
|
|
|
|
if not conn.is_closed:
|
|
# In Python 3 socket.py will catch EAGAIN and return None when you
|
|
# try and read into the file pointer created by http.client, which
|
|
# instead raises a BadStatusLine exception. Instead of catching
|
|
# the exception and assuming all BadStatusLine exceptions are read
|
|
# timeouts, check for a zero timeout before making the request.
|
|
if read_timeout == 0:
|
|
raise ReadTimeoutError(
|
|
self, url, f"Read timed out. (read timeout={read_timeout})"
|
|
)
|
|
conn.timeout = read_timeout
|
|
|
|
# Receive the response from the server
|
|
try:
|
|
response = conn.getresponse()
|
|
except (BaseSSLError, OSError) as e:
|
|
self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
|
|
raise
|
|
|
|
# Set properties that are used by the pooling layer.
|
|
response.retries = retries
|
|
response._connection = response_conn # type: ignore[attr-defined]
|
|
response._pool = self # type: ignore[attr-defined]
|
|
|
|
# emscripten connection doesn't have _http_vsn_str
|
|
http_version = getattr(conn, "_http_vsn_str", "HTTP/?")
|
|
log.debug(
|
|
'%s://%s:%s "%s %s %s" %s %s',
|
|
self.scheme,
|
|
self.host,
|
|
self.port,
|
|
method,
|
|
url,
|
|
# HTTP version
|
|
http_version,
|
|
response.status,
|
|
response.length_remaining,
|
|
)
|
|
|
|
return response
|
|
|
|
def close(self) -> None:
|
|
"""
|
|
Close all pooled connections and disable the pool.
|
|
"""
|
|
if self.pool is None:
|
|
return
|
|
# Disable access to the pool
|
|
old_pool, self.pool = self.pool, None
|
|
|
|
# Close all the HTTPConnections in the pool.
|
|
_close_pool_connections(old_pool)
|
|
|
|
def is_same_host(self, url: str) -> bool:
|
|
"""
|
|
Check if the given ``url`` is a member of the same host as this
|
|
connection pool.
|
|
"""
|
|
if url.startswith("/"):
|
|
return True
|
|
|
|
# TODO: Add optional support for socket.gethostbyname checking.
|
|
scheme, _, host, port, *_ = parse_url(url)
|
|
scheme = scheme or "http"
|
|
if host is not None:
|
|
host = _normalize_host(host, scheme=scheme)
|
|
|
|
# Use explicit default port for comparison when none is given
|
|
if self.port and not port:
|
|
port = port_by_scheme.get(scheme)
|
|
elif not self.port and port == port_by_scheme.get(scheme):
|
|
port = None
|
|
|
|
return (scheme, host, port) == (self.scheme, self.host, self.port)
|
|
|
|
def urlopen( # type: ignore[override]
|
|
self,
|
|
method: str,
|
|
url: str,
|
|
body: _TYPE_BODY | None = None,
|
|
headers: typing.Mapping[str, str] | None = None,
|
|
retries: Retry | bool | int | None = None,
|
|
redirect: bool = True,
|
|
assert_same_host: bool = True,
|
|
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
|
|
pool_timeout: int | None = None,
|
|
release_conn: bool | None = None,
|
|
chunked: bool = False,
|
|
body_pos: _TYPE_BODY_POSITION | None = None,
|
|
preload_content: bool = True,
|
|
decode_content: bool = True,
|
|
**response_kw: typing.Any,
|
|
) -> BaseHTTPResponse:
|
|
"""
|
|
Get a connection from the pool and perform an HTTP request. This is the
|
|
lowest level call for making a request, so you'll need to specify all
|
|
the raw details.
|
|
|
|
.. note::
|
|
|
|
More commonly, it's appropriate to use a convenience method
|
|
such as :meth:`request`.
|
|
|
|
.. note::
|
|
|
|
`release_conn` will only behave as expected if
|
|
`preload_content=False` because we want to make
|
|
`preload_content=False` the default behaviour someday soon without
|
|
breaking backwards compatibility.
|
|
|
|
:param method:
|
|
HTTP request method (such as GET, POST, PUT, etc.)
|
|
|
|
:param url:
|
|
The URL to perform the request on.
|
|
|
|
:param body:
|
|
Data to send in the request body, either :class:`str`, :class:`bytes`,
|
|
an iterable of :class:`str`/:class:`bytes`, or a file-like object.
|
|
|
|
:param headers:
|
|
Dictionary of custom headers to send, such as User-Agent,
|
|
If-None-Match, etc. If None, pool headers are used. If provided,
|
|
these headers completely replace any pool-specific headers.
|
|
|
|
:param retries:
|
|
Configure the number of retries to allow before raising a
|
|
:class:`~urllib3.exceptions.MaxRetryError` exception.
|
|
|
|
If ``None`` (default) will retry 3 times, see ``Retry.DEFAULT``. Pass a
|
|
:class:`~urllib3.util.retry.Retry` object for fine-grained control
|
|
over different types of retries.
|
|
Pass an integer number to retry connection errors that many times,
|
|
but no other types of errors. Pass zero to never retry.
|
|
|
|
If ``False``, then retries are disabled and any exception is raised
|
|
immediately. Also, instead of raising a MaxRetryError on redirects,
|
|
the redirect response will be returned.
|
|
|
|
:type retries: :class:`~urllib3.util.retry.Retry`, False, or an int.
|
|
|
|
:param redirect:
|
|
If True, automatically handle redirects (status codes 301, 302,
|
|
303, 307, 308). Each redirect counts as a retry. Disabling retries
|
|
will disable redirect, too.
|
|
|
|
:param assert_same_host:
|
|
If ``True``, will make sure that the host of the pool requests is
|
|
consistent else will raise HostChangedError. When ``False``, you can
|
|
use the pool on an HTTP proxy and request foreign hosts.
|
|
|
|
:param timeout:
|
|
If specified, overrides the default timeout for this one
|
|
request. It may be a float (in seconds) or an instance of
|
|
:class:`urllib3.util.Timeout`.
|
|
|
|
:param pool_timeout:
|
|
If set and the pool is set to block=True, then this method will
|
|
block for ``pool_timeout`` seconds and raise EmptyPoolError if no
|
|
connection is available within the time period.
|
|
|
|
:param bool preload_content:
|
|
If True, the response's body will be preloaded into memory.
|
|
|
|
:param bool decode_content:
|
|
If True, will attempt to decode the body based on the
|
|
'content-encoding' header.
|
|
|
|
:param release_conn:
|
|
If False, then the urlopen call will not release the connection
|
|
back into the pool once a response is received (but will release if
|
|
you read the entire contents of the response such as when
|
|
`preload_content=True`). This is useful if you're not preloading
|
|
the response's content immediately. You will need to call
|
|
``r.release_conn()`` on the response ``r`` to return the connection
|
|
back into the pool. If None, it takes the value of ``preload_content``
|
|
which defaults to ``True``.
|
|
|
|
:param bool chunked:
|
|
If True, urllib3 will send the body using chunked transfer
|
|
encoding. Otherwise, urllib3 will send the body using the standard
|
|
content-length form. Defaults to False.
|
|
|
|
:param int body_pos:
|
|
Position to seek to in file-like body in the event of a retry or
|
|
redirect. Typically this won't need to be set because urllib3 will
|
|
auto-populate the value when needed.
|
|
"""
|
|
parsed_url = parse_url(url)
|
|
destination_scheme = parsed_url.scheme
|
|
|
|
if headers is None:
|
|
headers = self.headers
|
|
|
|
if not isinstance(retries, Retry):
|
|
retries = Retry.from_int(retries, redirect=redirect, default=self.retries)
|
|
|
|
if release_conn is None:
|
|
release_conn = preload_content
|
|
|
|
# Check host
|
|
if assert_same_host and not self.is_same_host(url):
|
|
raise HostChangedError(self, url, retries)
|
|
|
|
# Ensure that the URL we're connecting to is properly encoded
|
|
if url.startswith("/"):
|
|
url = to_str(_encode_target(url))
|
|
else:
|
|
url = to_str(parsed_url.url)
|
|
|
|
conn = None
|
|
|
|
# Track whether `conn` needs to be released before
|
|
# returning/raising/recursing. Update this variable if necessary, and
|
|
# leave `release_conn` constant throughout the function. That way, if
|
|
# the function recurses, the original value of `release_conn` will be
|
|
# passed down into the recursive call, and its value will be respected.
|
|
#
|
|
# See issue #651 [1] for details.
|
|
#
|
|
# [1] <https://github.com/urllib3/urllib3/issues/651>
|
|
release_this_conn = release_conn
|
|
|
|
http_tunnel_required = connection_requires_http_tunnel(
|
|
self.proxy, self.proxy_config, destination_scheme
|
|
)
|
|
|
|
# Merge the proxy headers. Only done when not using HTTP CONNECT. We
|
|
# have to copy the headers dict so we can safely change it without those
|
|
# changes being reflected in anyone else's copy.
|
|
if not http_tunnel_required:
|
|
headers = headers.copy() # type: ignore[attr-defined]
|
|
headers.update(self.proxy_headers) # type: ignore[union-attr]
|
|
|
|
# Must keep the exception bound to a separate variable or else Python 3
|
|
# complains about UnboundLocalError.
|
|
err = None
|
|
|
|
# Keep track of whether we cleanly exited the except block. This
|
|
# ensures we do proper cleanup in finally.
|
|
clean_exit = False
|
|
|
|
# Rewind body position, if needed. Record current position
|
|
# for future rewinds in the event of a redirect/retry.
|
|
body_pos = set_file_position(body, body_pos)
|
|
|
|
try:
|
|
# Request a connection from the queue.
|
|
timeout_obj = self._get_timeout(timeout)
|
|
conn = self._get_conn(timeout=pool_timeout)
|
|
|
|
conn.timeout = timeout_obj.connect_timeout # type: ignore[assignment]
|
|
|
|
# Is this a closed/new connection that requires CONNECT tunnelling?
|
|
if self.proxy is not None and http_tunnel_required and conn.is_closed:
|
|
try:
|
|
self._prepare_proxy(conn)
|
|
except (BaseSSLError, OSError, SocketTimeout) as e:
|
|
self._raise_timeout(
|
|
err=e, url=self.proxy.url, timeout_value=conn.timeout
|
|
)
|
|
raise
|
|
|
|
# If we're going to release the connection in ``finally:``, then
|
|
# the response doesn't need to know about the connection. Otherwise
|
|
# it will also try to release it and we'll have a double-release
|
|
# mess.
|
|
response_conn = conn if not release_conn else None
|
|
|
|
# Make the request on the HTTPConnection object
|
|
response = self._make_request(
|
|
conn,
|
|
method,
|
|
url,
|
|
timeout=timeout_obj,
|
|
body=body,
|
|
headers=headers,
|
|
chunked=chunked,
|
|
retries=retries,
|
|
response_conn=response_conn,
|
|
preload_content=preload_content,
|
|
decode_content=decode_content,
|
|
**response_kw,
|
|
)
|
|
|
|
# Everything went great!
|
|
clean_exit = True
|
|
|
|
except EmptyPoolError:
|
|
# Didn't get a connection from the pool, no need to clean up
|
|
clean_exit = True
|
|
release_this_conn = False
|
|
raise
|
|
|
|
except (
|
|
TimeoutError,
|
|
HTTPException,
|
|
OSError,
|
|
ProtocolError,
|
|
BaseSSLError,
|
|
SSLError,
|
|
CertificateError,
|
|
ProxyError,
|
|
) as e:
|
|
# Discard the connection for these exceptions. It will be
|
|
# replaced during the next _get_conn() call.
|
|
clean_exit = False
|
|
new_e: Exception = e
|
|
if isinstance(e, (BaseSSLError, CertificateError)):
|
|
new_e = SSLError(e)
|
|
if isinstance(
|
|
new_e,
|
|
(
|
|
OSError,
|
|
NewConnectionError,
|
|
TimeoutError,
|
|
SSLError,
|
|
HTTPException,
|
|
),
|
|
) and (conn and conn.proxy and not conn.has_connected_to_proxy):
|
|
new_e = _wrap_proxy_error(new_e, conn.proxy.scheme)
|
|
elif isinstance(new_e, (OSError, HTTPException)):
|
|
new_e = ProtocolError("Connection aborted.", new_e)
|
|
|
|
retries = retries.increment(
|
|
method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2]
|
|
)
|
|
retries.sleep()
|
|
|
|
# Keep track of the error for the retry warning.
|
|
err = e
|
|
|
|
finally:
|
|
if not clean_exit:
|
|
# We hit some kind of exception, handled or otherwise. We need
|
|
# to throw the connection away unless explicitly told not to.
|
|
# Close the connection, set the variable to None, and make sure
|
|
# we put the None back in the pool to avoid leaking it.
|
|
if conn:
|
|
conn.close()
|
|
conn = None
|
|
release_this_conn = True
|
|
|
|
if release_this_conn:
|
|
# Put the connection back to be reused. If the connection is
|
|
# expired then it will be None, which will get replaced with a
|
|
# fresh connection during _get_conn.
|
|
self._put_conn(conn)
|
|
|
|
if not conn:
|
|
# Try again
|
|
log.warning(
|
|
"Retrying (%r) after connection broken by '%r': %s", retries, err, url
|
|
)
|
|
return self.urlopen(
|
|
method,
|
|
url,
|
|
body,
|
|
headers,
|
|
retries,
|
|
redirect,
|
|
assert_same_host,
|
|
timeout=timeout,
|
|
pool_timeout=pool_timeout,
|
|
release_conn=release_conn,
|
|
chunked=chunked,
|
|
body_pos=body_pos,
|
|
preload_content=preload_content,
|
|
decode_content=decode_content,
|
|
**response_kw,
|
|
)
|
|
|
|
# Handle redirect?
|
|
redirect_location = redirect and response.get_redirect_location()
|
|
if redirect_location:
|
|
if response.status == 303:
|
|
# Change the method according to RFC 9110, Section 15.4.4.
|
|
method = "GET"
|
|
# And lose the body not to transfer anything sensitive.
|
|
body = None
|
|
headers = HTTPHeaderDict(headers)._prepare_for_method_change()
|
|
|
|
try:
|
|
retries = retries.increment(method, url, response=response, _pool=self)
|
|
except MaxRetryError:
|
|
if retries.raise_on_redirect:
|
|
response.drain_conn()
|
|
raise
|
|
return response
|
|
|
|
response.drain_conn()
|
|
retries.sleep_for_retry(response)
|
|
log.debug("Redirecting %s -> %s", url, redirect_location)
|
|
return self.urlopen(
|
|
method,
|
|
redirect_location,
|
|
body,
|
|
headers,
|
|
retries=retries,
|
|
redirect=redirect,
|
|
assert_same_host=assert_same_host,
|
|
timeout=timeout,
|
|
pool_timeout=pool_timeout,
|
|
release_conn=release_conn,
|
|
chunked=chunked,
|
|
body_pos=body_pos,
|
|
preload_content=preload_content,
|
|
decode_content=decode_content,
|
|
**response_kw,
|
|
)
|
|
|
|
# Check if we should retry the HTTP response.
|
|
has_retry_after = bool(response.headers.get("Retry-After"))
|
|
if retries.is_retry(method, response.status, has_retry_after):
|
|
try:
|
|
retries = retries.increment(method, url, response=response, _pool=self)
|
|
except MaxRetryError:
|
|
if retries.raise_on_status:
|
|
response.drain_conn()
|
|
raise
|
|
return response
|
|
|
|
response.drain_conn()
|
|
retries.sleep(response)
|
|
log.debug("Retry: %s", url)
|
|
return self.urlopen(
|
|
method,
|
|
url,
|
|
body,
|
|
headers,
|
|
retries=retries,
|
|
redirect=redirect,
|
|
assert_same_host=assert_same_host,
|
|
timeout=timeout,
|
|
pool_timeout=pool_timeout,
|
|
release_conn=release_conn,
|
|
chunked=chunked,
|
|
body_pos=body_pos,
|
|
preload_content=preload_content,
|
|
decode_content=decode_content,
|
|
**response_kw,
|
|
)
|
|
|
|
return response
|
|
|
|
|
|
class HTTPSConnectionPool(HTTPConnectionPool):
|
|
"""
|
|
Same as :class:`.HTTPConnectionPool`, but HTTPS.
|
|
|
|
:class:`.HTTPSConnection` uses one of ``assert_fingerprint``,
|
|
``assert_hostname`` and ``host`` in this order to verify connections.
|
|
If ``assert_hostname`` is False, no verification is done.
|
|
|
|
The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs``,
|
|
``ca_cert_dir``, ``ssl_version``, ``key_password`` are only used if :mod:`ssl`
|
|
is available and are fed into :meth:`urllib3.util.ssl_wrap_socket` to upgrade
|
|
the connection socket into an SSL socket.
|
|
"""
|
|
|
|
scheme = "https"
|
|
ConnectionCls: type[BaseHTTPSConnection] = HTTPSConnection
|
|
|
|
def __init__(
|
|
self,
|
|
host: str,
|
|
port: int | None = None,
|
|
timeout: _TYPE_TIMEOUT | None = _DEFAULT_TIMEOUT,
|
|
maxsize: int = 1,
|
|
block: bool = False,
|
|
headers: typing.Mapping[str, str] | None = None,
|
|
retries: Retry | bool | int | None = None,
|
|
_proxy: Url | None = None,
|
|
_proxy_headers: typing.Mapping[str, str] | None = None,
|
|
key_file: str | None = None,
|
|
cert_file: str | None = None,
|
|
cert_reqs: int | str | None = None,
|
|
key_password: str | None = None,
|
|
ca_certs: str | None = None,
|
|
ssl_version: int | str | None = None,
|
|
ssl_minimum_version: ssl.TLSVersion | None = None,
|
|
ssl_maximum_version: ssl.TLSVersion | None = None,
|
|
assert_hostname: str | Literal[False] | None = None,
|
|
assert_fingerprint: str | None = None,
|
|
ca_cert_dir: str | None = None,
|
|
**conn_kw: typing.Any,
|
|
) -> None:
|
|
super().__init__(
|
|
host,
|
|
port,
|
|
timeout,
|
|
maxsize,
|
|
block,
|
|
headers,
|
|
retries,
|
|
_proxy,
|
|
_proxy_headers,
|
|
**conn_kw,
|
|
)
|
|
|
|
self.key_file = key_file
|
|
self.cert_file = cert_file
|
|
self.cert_reqs = cert_reqs
|
|
self.key_password = key_password
|
|
self.ca_certs = ca_certs
|
|
self.ca_cert_dir = ca_cert_dir
|
|
self.ssl_version = ssl_version
|
|
self.ssl_minimum_version = ssl_minimum_version
|
|
self.ssl_maximum_version = ssl_maximum_version
|
|
self.assert_hostname = assert_hostname
|
|
self.assert_fingerprint = assert_fingerprint
|
|
|
|
def _prepare_proxy(self, conn: HTTPSConnection) -> None: # type: ignore[override]
|
|
"""Establishes a tunnel connection through HTTP CONNECT."""
|
|
if self.proxy and self.proxy.scheme == "https":
|
|
tunnel_scheme = "https"
|
|
else:
|
|
tunnel_scheme = "http"
|
|
|
|
conn.set_tunnel(
|
|
scheme=tunnel_scheme,
|
|
host=self._tunnel_host,
|
|
port=self.port,
|
|
headers=self.proxy_headers,
|
|
)
|
|
conn.connect()
|
|
|
|
def _new_conn(self) -> BaseHTTPSConnection:
|
|
"""
|
|
Return a fresh :class:`urllib3.connection.HTTPConnection`.
|
|
"""
|
|
self.num_connections += 1
|
|
log.debug(
|
|
"Starting new HTTPS connection (%d): %s:%s",
|
|
self.num_connections,
|
|
self.host,
|
|
self.port or "443",
|
|
)
|
|
|
|
if not self.ConnectionCls or self.ConnectionCls is DummyConnection: # type: ignore[comparison-overlap]
|
|
raise ImportError(
|
|
"Can't connect to HTTPS URL because the SSL module is not available."
|
|
)
|
|
|
|
actual_host: str = self.host
|
|
actual_port = self.port
|
|
if self.proxy is not None and self.proxy.host is not None:
|
|
actual_host = self.proxy.host
|
|
actual_port = self.proxy.port
|
|
|
|
return self.ConnectionCls(
|
|
host=actual_host,
|
|
port=actual_port,
|
|
timeout=self.timeout.connect_timeout,
|
|
cert_file=self.cert_file,
|
|
key_file=self.key_file,
|
|
key_password=self.key_password,
|
|
cert_reqs=self.cert_reqs,
|
|
ca_certs=self.ca_certs,
|
|
ca_cert_dir=self.ca_cert_dir,
|
|
assert_hostname=self.assert_hostname,
|
|
assert_fingerprint=self.assert_fingerprint,
|
|
ssl_version=self.ssl_version,
|
|
ssl_minimum_version=self.ssl_minimum_version,
|
|
ssl_maximum_version=self.ssl_maximum_version,
|
|
**self.conn_kw,
|
|
)
|
|
|
|
def _validate_conn(self, conn: BaseHTTPConnection) -> None:
|
|
"""
|
|
Called right before a request is made, after the socket is created.
|
|
"""
|
|
super()._validate_conn(conn)
|
|
|
|
# Force connect early to allow us to validate the connection.
|
|
if conn.is_closed:
|
|
conn.connect()
|
|
|
|
# TODO revise this, see https://github.com/urllib3/urllib3/issues/2791
|
|
if not conn.is_verified and not conn.proxy_is_verified:
|
|
warnings.warn(
|
|
(
|
|
f"Unverified HTTPS request is being made to host '{conn.host}'. "
|
|
"Adding certificate verification is strongly advised. See: "
|
|
"https://urllib3.readthedocs.io/en/latest/advanced-usage.html"
|
|
"#tls-warnings"
|
|
),
|
|
InsecureRequestWarning,
|
|
)
|
|
|
|
|
|
def connection_from_url(url: str, **kw: typing.Any) -> HTTPConnectionPool:
|
|
"""
|
|
Given a url, return an :class:`.ConnectionPool` instance of its host.
|
|
|
|
This is a shortcut for not having to parse out the scheme, host, and port
|
|
of the url before creating an :class:`.ConnectionPool` instance.
|
|
|
|
:param url:
|
|
Absolute URL string that must include the scheme. Port is optional.
|
|
|
|
:param \\**kw:
|
|
Passes additional parameters to the constructor of the appropriate
|
|
:class:`.ConnectionPool`. Useful for specifying things like
|
|
timeout, maxsize, headers, etc.
|
|
|
|
Example::
|
|
|
|
>>> conn = connection_from_url('http://google.com/')
|
|
>>> r = conn.request('GET', '/')
|
|
"""
|
|
scheme, _, host, port, *_ = parse_url(url)
|
|
scheme = scheme or "http"
|
|
port = port or port_by_scheme.get(scheme, 80)
|
|
if scheme == "https":
|
|
return HTTPSConnectionPool(host, port=port, **kw) # type: ignore[arg-type]
|
|
else:
|
|
return HTTPConnectionPool(host, port=port, **kw) # type: ignore[arg-type]
|
|
|
|
|
|
@typing.overload
|
|
def _normalize_host(host: None, scheme: str | None) -> None:
|
|
...
|
|
|
|
|
|
@typing.overload
|
|
def _normalize_host(host: str, scheme: str | None) -> str:
|
|
...
|
|
|
|
|
|
def _normalize_host(host: str | None, scheme: str | None) -> str | None:
|
|
"""
|
|
Normalize hosts for comparisons and use with sockets.
|
|
"""
|
|
|
|
host = normalize_host(host, scheme)
|
|
|
|
# httplib doesn't like it when we include brackets in IPv6 addresses
|
|
# Specifically, if we include brackets but also pass the port then
|
|
# httplib crazily doubles up the square brackets on the Host header.
|
|
# Instead, we need to make sure we never pass ``None`` as the port.
|
|
# However, for backward compatibility reasons we can't actually
|
|
# *assert* that. See http://bugs.python.org/issue28539
|
|
if host and host.startswith("[") and host.endswith("]"):
|
|
host = host[1:-1]
|
|
return host
|
|
|
|
|
|
def _url_from_pool(
|
|
pool: HTTPConnectionPool | HTTPSConnectionPool, path: str | None = None
|
|
) -> str:
|
|
"""Returns the URL from a given connection pool. This is mainly used for testing and logging."""
|
|
return Url(scheme=pool.scheme, host=pool.host, port=pool.port, path=path).url
|
|
|
|
|
|
def _close_pool_connections(pool: queue.LifoQueue[typing.Any]) -> None:
|
|
"""Drains a queue of connections and closes each one."""
|
|
try:
|
|
while True:
|
|
conn = pool.get(block=False)
|
|
if conn:
|
|
conn.close()
|
|
except queue.Empty:
|
|
pass # Done.
|