HI!
Came across an issue recently: our team uses Ansible with a common task that checks whether deployment was successful using HTTP requests to some healthcheck endpoint. Setting up Nginx with PROXY protocol enabled revealed that there is no support for the protocol in common Python libraries.
PROXY protocol requires sending corresponding header before TLS, HTTP, IMAP or any other protocol data is sent, so it looks like HTTPConnection should open it on behalf of the client.
Protocol feels rather popular: Nginx, HAProxy, Envoy web balancers, AWS and GCP managed balancers support it. curl tool offers corresponding option --haproxy-protocol.
At the same time, the only mention I found was a server-side implementation of the protocol: proxy-protocol · PyPI .
It actually feels like Python can also allow connecting these servers without reimplementing HTTP libraries from scratch.
As a PoC, I created a simple socket wrapper that can be used with any of higher-level protocols like HTTP, HTTPS, IMAP, etc:
diff --git a/Lib/haproxyproto.py b/Lib/haproxyproto.py
new file mode 100644
index 00000000000..dfe422aa164
--- /dev/null
+++ b/Lib/haproxyproto.py
@@ -0,0 +1,30 @@
+"""Module implements client-side part of HAProxy PROXYv1 protocol.
+
+Specification: https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
+
+The function it exposes is haproxified() that is a wrapper for socket.
+"""
+
+from socket import SocketKind, AddressFamily
+
+
+def haproxified(sock):
+ """Sends PROXYv1 header to the 'sock'"""
+
+ # PROXYv1 protocol only works for TCP streams
+ if sock.type != SocketKind.SOCK_STREAM:
+ raise TypeError('Can only haproxify SOCK_STREAM')
+
+ if sock.family not in (AddressFamily.AF_INET, AddressFamily.AF_INET6):
+ raise TypeError('Can only haproxify AF_INET and AF_INET6')
+
+ laddr = sock.getsockname()
+ raddr = sock.getpeername()
+
+ # Both PROXYv2 and PROXYv1 support v1's human-readable header,
+ # so use it unconditionally.
+ family = 'TCP4' if sock.family == AddressFamily.AF_INET else 'TCP6'
+ header = f'PROXY {family} {laddr[0]} {raddr[0]} {laddr[1]} {raddr[1]}\r\n'
+ sock.sendall(header.encode('utf-8'))
+
+ return sock
diff --git a/Lib/http/client.py b/Lib/http/client.py
index 4b9a61cfc11..350d83dd40e 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -78,6 +78,7 @@
import sys
import collections.abc
from urllib.parse import urlsplit
+from haproxyproto import haproxified
# HTTPMessage, parse_headers(), and the HTTP status code constants are
# intentionally omitted for simplicity
@@ -868,7 +869,8 @@ def _get_content_length(body, method):
return None
def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
- source_address=None, blocksize=8192, *, max_response_headers=None):
+ source_address=None, blocksize=8192, *, max_response_headers=None,
+ haproxify=False):
self.timeout = timeout
self.source_address = source_address
self.blocksize = blocksize
@@ -882,6 +884,7 @@ def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
self._tunnel_headers = {}
self._raw_proxy_headers = None
self.max_response_headers = max_response_headers
+ self.haproxify = haproxify
(self.host, self.port) = self._get_hostport(host, port)
@@ -1013,6 +1016,9 @@ def connect(self):
if e.errno != errno.ENOPROTOOPT:
raise
+ if self.haproxify:
+ self.sock = haproxified(self.sock)
+
if self._tunnel_host:
self._tunnel()
@@ -1465,11 +1471,12 @@ class HTTPSConnection(HTTPConnection):
def __init__(self, host, port=None,
*, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
source_address=None, context=None, blocksize=8192,
- max_response_headers=None):
+ max_response_headers=None, haproxify=False):
super(HTTPSConnection, self).__init__(host, port, timeout,
source_address,
blocksize=blocksize,
- max_response_headers=max_response_headers)
+ max_response_headers=max_response_headers,
+ haproxify=haproxify)
if context is None:
context = _create_https_context(self._http_vsn)
self._context = context
Looking forward to your thoughts and ideas regarding this little addition ![]()
Thanks!