From 559caf920537ece2ef058e1de5e36af44756bb19 Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" Date: Wed, 6 Dec 2023 15:30:50 +0100 Subject: [PATCH 01/16] pytest: raise on malformed test fixtures and unbreak test depending on backslash escape --- tests/treq.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tests/treq.py b/tests/treq.py index ffe0691fd..05adb1461 100644 --- a/tests/treq.py +++ b/tests/treq.py @@ -51,7 +51,9 @@ def __init__(self, fname, expect): with open(self.fname, 'rb') as handle: self.data = handle.read() self.data = self.data.replace(b"\n", b"").replace(b"\\r\\n", b"\r\n") - self.data = self.data.replace(b"\\0", b"\000") + self.data = self.data.replace(b"\\0", b"\000").replace(b"\\n", b"\n").replace(b"\\t", b"\t") + if b"\\" in self.data: + raise AssertionError("Unexpected backslash in test data - only handling HTAB, NUL and CRLF") # Functions for sending data to the parser. # These functions mock out reading from a @@ -262,7 +264,8 @@ def same(self, req, sizer, matcher, exp): assert req.trailers == exp.get("trailers", []) -class badrequest(object): +class badrequest: + # FIXME: no good reason why this cannot match what the more extensive mechanism above def __init__(self, fname): self.fname = fname self.name = os.path.basename(fname) @@ -270,7 +273,9 @@ def __init__(self, fname): with open(self.fname) as handle: self.data = handle.read() self.data = self.data.replace("\n", "").replace("\\r\\n", "\r\n") - self.data = self.data.replace("\\0", "\000") + self.data = self.data.replace("\\0", "\000").replace("\\n", "\n").replace("\\t", "\t") + if "\\" in self.data: + raise AssertionError("Unexpected backslash in test data - only handling HTAB, NUL and CRLF") self.data = self.data.encode('latin1') def send(self): @@ -283,4 +288,6 @@ def send(self): def check(self, cfg): p = RequestParser(cfg, self.send(), None) - next(p) + # must fully consume iterator, otherwise EOF errors could go unnoticed + for _ in p: + pass From 2dbe49de99f681923330d98f5b872730c73943d0 Mon Sep 17 00:00:00 2001 From: Ben Kallus Date: Mon, 28 Aug 2023 22:32:36 -0400 Subject: [PATCH 02/16] RFC compliant header field+chunk validation * update HEADER_RE and HEADER_VALUE_RE to match the RFCs * update chunk length parsing to disallow 0x prefix and digit-separating underscores. --- gunicorn/http/body.py | 5 ++--- gunicorn/http/message.py | 2 +- gunicorn/http/wsgi.py | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/gunicorn/http/body.py b/gunicorn/http/body.py index aa1af2cb3..41fe334bc 100644 --- a/gunicorn/http/body.py +++ b/gunicorn/http/body.py @@ -86,10 +86,9 @@ def parse_chunk_size(self, unreader, data=None): line, rest_chunk = data[:idx], data[idx + 2:] chunk_size = line.split(b";", 1)[0].strip() - try: - chunk_size = int(chunk_size, 16) - except ValueError: + if any(n not in b"0123456789abcdefABCDEF" for n in chunk_size): raise InvalidChunkSize(chunk_size) + chunk_size = int(chunk_size, 16) if chunk_size == 0: try: diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index 1f93c7145..0006fa61b 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -21,7 +21,7 @@ MAX_HEADERS = 32768 DEFAULT_MAX_HEADERFIELD_SIZE = 8190 -HEADER_RE = re.compile(r"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\"]") +HEADER_RE = re.compile(r"[^!#$%&'*+\-.\^_`|~0-9a-zA-Z]") METH_RE = re.compile(r"[A-Z0-9$-_.]{3,20}") VERSION_RE = re.compile(r"HTTP/(\d+)\.(\d+)") diff --git a/gunicorn/http/wsgi.py b/gunicorn/http/wsgi.py index 25715eab7..10c5a3dd5 100644 --- a/gunicorn/http/wsgi.py +++ b/gunicorn/http/wsgi.py @@ -18,7 +18,7 @@ # with sending files in blocks over 2GB. BLKSIZE = 0x3FFFFFFF -HEADER_VALUE_RE = re.compile(r'[\x00-\x1F\x7F]') +HEADER_VALUE_RE = re.compile(r'[^ \t\x21-\x7e\x80-\xff]') log = logging.getLogger(__name__) From 735e9e867af23acfbe8eca9ea2e546542d65e8c9 Mon Sep 17 00:00:00 2001 From: Ben Kallus Date: Mon, 4 Dec 2023 17:08:16 -0500 Subject: [PATCH 03/16] Disallow empty header names. --- gunicorn/http/message.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index 0006fa61b..11ee4d15c 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -83,7 +83,7 @@ def parse_headers(self, data): # Parse initial header name : value pair. curr = lines.pop(0) header_length = len(curr) - if curr.find(":") < 0: + if curr.find(":") <= 0: raise InvalidHeader(curr.strip()) name, value = curr.split(":", 1) if self.cfg.strip_header_spaces: From 72238fcf8d29db62fdbb9d2481ced801e27fe139 Mon Sep 17 00:00:00 2001 From: Ben Kallus Date: Wed, 6 Dec 2023 17:28:40 -0500 Subject: [PATCH 04/16] RFC compliant request line and header parsing - Unify HEADER_RE and METH_RE - Replace CRLF with SP during obs-fold processing (See RFC 9112 Section 5.2, last paragraph) - Stop stripping header names. - Remove HTAB in OWS in header values that use obs-fold (See RFC 9112 Section 5.2, last paragraph) - Use fullmatch instead of search, which has problems with empty strings. (See GHSA-68xg-gqqm-vgj8) - Split proxy protocol line on space only. (See proxy protocol Section 2.1, bullet 3) - Use fullmatch for method and version (Thank you to Paul Dorn for noticing this.) - Replace calls to str.strip() with str.strip(' \t') - Split request line on SP only. Co-authored-by: Paul Dorn --- gunicorn/http/message.py | 33 +++++++++-------- gunicorn/http/wsgi.py | 23 ++++++------ tests/requests/invalid/003.http | 4 +-- tests/requests/invalid/003.py | 4 +-- tests/requests/valid/016.py | 64 ++++++++++++++++----------------- tests/requests/valid/031.http | 2 ++ tests/requests/valid/031.py | 7 ++++ 7 files changed, 74 insertions(+), 63 deletions(-) create mode 100644 tests/requests/valid/031.http create mode 100644 tests/requests/valid/031.py diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index 11ee4d15c..db3d44bb2 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -21,8 +21,7 @@ MAX_HEADERS = 32768 DEFAULT_MAX_HEADERFIELD_SIZE = 8190 -HEADER_RE = re.compile(r"[^!#$%&'*+\-.\^_`|~0-9a-zA-Z]") -METH_RE = re.compile(r"[A-Z0-9$-_.]{3,20}") +TOKEN_RE = re.compile(r"[!#$%&'*+\-.\^_`|~0-9a-zA-Z]+") VERSION_RE = re.compile(r"HTTP/(\d+)\.(\d+)") @@ -63,8 +62,8 @@ def parse_headers(self, data): cfg = self.cfg headers = [] - # Split lines on \r\n keeping the \r\n on each line - lines = [bytes_to_str(line) + "\r\n" for line in data.split(b"\r\n")] + # Split lines on \r\n + lines = [bytes_to_str(line) for line in data.split(b"\r\n")] # handle scheme headers scheme_header = False @@ -80,30 +79,30 @@ def parse_headers(self, data): if len(headers) >= self.limit_request_fields: raise LimitRequestHeaders("limit request headers fields") - # Parse initial header name : value pair. + # Parse initial header name: value pair. curr = lines.pop(0) - header_length = len(curr) + header_length = len(curr) + len("\r\n") if curr.find(":") <= 0: - raise InvalidHeader(curr.strip()) + raise InvalidHeader(curr) name, value = curr.split(":", 1) if self.cfg.strip_header_spaces: name = name.rstrip(" \t").upper() else: name = name.upper() - if HEADER_RE.search(name): + if not TOKEN_RE.fullmatch(name): raise InvalidHeaderName(name) - name, value = name.strip(), [value.lstrip()] + value = [value.lstrip(" \t")] # Consume value continuation lines while lines and lines[0].startswith((" ", "\t")): curr = lines.pop(0) - header_length += len(curr) + header_length += len(curr) + len("\r\n") if header_length > self.limit_request_field_size > 0: raise LimitRequestHeaders("limit request headers " "fields size") - value.append(curr) - value = ''.join(value).rstrip() + value.append(curr.strip("\t ")) + value = " ".join(value) if header_length > self.limit_request_field_size > 0: raise LimitRequestHeaders("limit request headers fields size") @@ -156,7 +155,7 @@ def set_body_reader(self): def should_close(self): for (h, v) in self.headers: if h == "CONNECTION": - v = v.lower().strip() + v = v.lower().strip(" \t") if v == "close": return True elif v == "keep-alive": @@ -283,7 +282,7 @@ def proxy_protocol_access_check(self): raise ForbiddenProxyRequest(self.peer_addr[0]) def parse_proxy_protocol(self, line): - bits = line.split() + bits = line.split(" ") if len(bits) != 6: raise InvalidProxyLine(line) @@ -328,12 +327,12 @@ def parse_proxy_protocol(self, line): } def parse_request_line(self, line_bytes): - bits = [bytes_to_str(bit) for bit in line_bytes.split(None, 2)] + bits = [bytes_to_str(bit) for bit in line_bytes.split(b" ", 2)] if len(bits) != 3: raise InvalidRequestLine(bytes_to_str(line_bytes)) # Method - if not METH_RE.match(bits[0]): + if not TOKEN_RE.fullmatch(bits[0]): raise InvalidRequestMethod(bits[0]) self.method = bits[0].upper() @@ -349,7 +348,7 @@ def parse_request_line(self, line_bytes): self.fragment = parts.fragment or "" # Version - match = VERSION_RE.match(bits[2]) + match = VERSION_RE.fullmatch(bits[2]) if match is None: raise InvalidHTTPVersion(bits[2]) self.version = (int(match.group(1)), int(match.group(2))) diff --git a/gunicorn/http/wsgi.py b/gunicorn/http/wsgi.py index 10c5a3dd5..7fca61422 100644 --- a/gunicorn/http/wsgi.py +++ b/gunicorn/http/wsgi.py @@ -9,7 +9,7 @@ import re import sys -from gunicorn.http.message import HEADER_RE +from gunicorn.http.message import TOKEN_RE from gunicorn.http.errors import InvalidHeader, InvalidHeaderName from gunicorn import SERVER_SOFTWARE, SERVER from gunicorn import util @@ -18,7 +18,9 @@ # with sending files in blocks over 2GB. BLKSIZE = 0x3FFFFFFF -HEADER_VALUE_RE = re.compile(r'[^ \t\x21-\x7e\x80-\xff]') +# RFC9110 5.5: field-vchar = VCHAR / obs-text +# RFC4234 B.1: VCHAR = 0x21-x07E = printable ASCII +HEADER_VALUE_RE = re.compile(r'[ \t\x21-\x7e\x80-\xff]*') log = logging.getLogger(__name__) @@ -249,31 +251,32 @@ def process_headers(self, headers): if not isinstance(name, str): raise TypeError('%r is not a string' % name) - if HEADER_RE.search(name): + if not TOKEN_RE.fullmatch(name): raise InvalidHeaderName('%r' % name) if not isinstance(value, str): raise TypeError('%r is not a string' % value) - if HEADER_VALUE_RE.search(value): + if not HEADER_VALUE_RE.fullmatch(value): raise InvalidHeader('%r' % value) - value = value.strip() - lname = name.lower().strip() + # RFC9110 5.5 + value = value.strip(" \t") + lname = name.lower() if lname == "content-length": self.response_length = int(value) elif util.is_hoppish(name): if lname == "connection": # handle websocket - if value.lower().strip() == "upgrade": + if value.lower() == "upgrade": self.upgrade = True elif lname == "upgrade": - if value.lower().strip() == "websocket": - self.headers.append((name.strip(), value)) + if value.lower() == "websocket": + self.headers.append((name, value)) # ignore hopbyhop headers continue - self.headers.append((name.strip(), value)) + self.headers.append((name, value)) def is_chunked(self): # Only use chunked responses when the client is diff --git a/tests/requests/invalid/003.http b/tests/requests/invalid/003.http index cd1ab7fcb..5a9eaafcd 100644 --- a/tests/requests/invalid/003.http +++ b/tests/requests/invalid/003.http @@ -1,2 +1,2 @@ --blargh /foo HTTP/1.1\r\n -\r\n \ No newline at end of file +GET\n/\nHTTP/1.1\r\n +\r\n diff --git a/tests/requests/invalid/003.py b/tests/requests/invalid/003.py index 86a0774e5..5a4ca8961 100644 --- a/tests/requests/invalid/003.py +++ b/tests/requests/invalid/003.py @@ -1,2 +1,2 @@ -from gunicorn.http.errors import InvalidRequestMethod -request = InvalidRequestMethod \ No newline at end of file +from gunicorn.http.errors import InvalidRequestLine +request = InvalidRequestLine diff --git a/tests/requests/valid/016.py b/tests/requests/valid/016.py index 139b27009..4e5144f8c 100644 --- a/tests/requests/valid/016.py +++ b/tests/requests/valid/016.py @@ -1,35 +1,35 @@ -certificate = """-----BEGIN CERTIFICATE-----\r\n - MIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx\r\n - ETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT\r\n - AkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu\r\n - dWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV\r\n - SzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV\r\n - BAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB\r\n - BQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF\r\n - W51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR\r\n - gW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL\r\n - 0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP\r\n - u2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR\r\n - wgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG\r\n - 1UdEwEB/wQCMAAwEQYJYIZIAYb4QgHTTPAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs\r\n - BglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD\r\n - VR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj\r\n - loCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj\r\n - aWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG\r\n - 9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE\r\n - IjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO\r\n - BgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1\r\n - cHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4Qg\r\n - EDBDAWLmh0dHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC\r\n - 5jcmwwPwYDVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv\r\n - Y3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3\r\n - XCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8\r\n - UO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk\r\n - hTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK\r\n - wTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu\r\n - Yhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3\r\n - RA==\r\n - -----END CERTIFICATE-----""".replace("\n\n", "\n") +certificate = """-----BEGIN CERTIFICATE----- + MIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx + ETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT + AkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu + dWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV + SzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV + BAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB + BQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF + W51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR + gW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL + 0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP + u2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR + wgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG + 1UdEwEB/wQCMAAwEQYJYIZIAYb4QgHTTPAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs + BglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD + VR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj + loCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj + aWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG + 9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE + IjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO + BgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1 + cHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4Qg + EDBDAWLmh0dHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC + 5jcmwwPwYDVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv + Y3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3 + XCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8 + UO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk + hTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK + wTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu + Yhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3 + RA== + -----END CERTIFICATE-----""".replace("\n", "") request = { "method": "GET", diff --git a/tests/requests/valid/031.http b/tests/requests/valid/031.http new file mode 100644 index 000000000..cd1ab7fcb --- /dev/null +++ b/tests/requests/valid/031.http @@ -0,0 +1,2 @@ +-blargh /foo HTTP/1.1\r\n +\r\n \ No newline at end of file diff --git a/tests/requests/valid/031.py b/tests/requests/valid/031.py new file mode 100644 index 000000000..9691a002b --- /dev/null +++ b/tests/requests/valid/031.py @@ -0,0 +1,7 @@ +request = { + "method": "-BLARGH", + "uri": uri("/foo"), + "version": (1, 1), + "headers": [], + "body": b"" +} From f0c91cca484820d1034f3a5278c0662aed2a23ea Mon Sep 17 00:00:00 2001 From: Tomi Belan Date: Sun, 22 May 2022 00:42:55 +0200 Subject: [PATCH 05/16] Check SCRIPT_NAME is at the request path's beginning --- gunicorn/http/errors.py | 9 +++++++++ gunicorn/http/wsgi.py | 8 ++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/gunicorn/http/errors.py b/gunicorn/http/errors.py index 7839ef05a..05abd1ab0 100644 --- a/gunicorn/http/errors.py +++ b/gunicorn/http/errors.py @@ -22,6 +22,15 @@ def __str__(self): return "No more data after: %r" % self.buf +class ConfigurationProblem(ParseException): + def __init__(self, info): + self.info = info + self.code = 500 + + def __str__(self): + return "Configuration problem: %s" % self.info + + class InvalidRequestLine(ParseException): def __init__(self, req): self.req = req diff --git a/gunicorn/http/wsgi.py b/gunicorn/http/wsgi.py index 7fca61422..bafed49eb 100644 --- a/gunicorn/http/wsgi.py +++ b/gunicorn/http/wsgi.py @@ -10,7 +10,7 @@ import sys from gunicorn.http.message import TOKEN_RE -from gunicorn.http.errors import InvalidHeader, InvalidHeaderName +from gunicorn.http.errors import ConfigurationProblem, InvalidHeader, InvalidHeaderName from gunicorn import SERVER_SOFTWARE, SERVER from gunicorn import util @@ -182,7 +182,11 @@ def create(req, sock, client, server, cfg): # set the path and script name path_info = req.path if script_name: - path_info = path_info.split(script_name, 1)[1] + if not path_info.startswith(script_name): + raise ConfigurationProblem( + "Request path %r does not start with SCRIPT_NAME %r" % + (path_info, script_name)) + path_info = path_info[len(script_name):] environ['PATH_INFO'] = util.unquote_to_wsgi_str(path_info) environ['SCRIPT_NAME'] = script_name From 13027ef797edba55967f366ec958a9a03b3d345b Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" Date: Wed, 6 Dec 2023 14:22:18 +0000 Subject: [PATCH 06/16] Create SECURITY.md --- SECURITY.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000..3d1923540 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,22 @@ +# Security Policy + +## Reporting a Vulnerability + +**Please note that public Github issues are open for everyone to see!** + +If you believe you are found a problem in Gunicorn software, examples or documentation, we encourage you to send your report privately via email, or via Github using the *Report a vulnerability* button in the [Security](https://github.com/benoitc/gunicorn/security) section. + +## Supported Releases + +At this time, **only the latest release** receives any security attention whatsoever. + +| Version | Status | +| ------- | ------------------ | +| latest release | :white_check_mark: | +| 21.2.0 | :x: | +| 20.0.0 | :x: | +| < 20.0 | :x: | + +## Python Versions + +Gunicorn runs on Python 3.7+, we *highly recommend* the latest release of a [supported series](https://devguide.python.org/version/) and will not prioritize issues exclusively affecting in EoL environments. From 42dd4190ac01e5cba017948ab882c71017ea18d6 Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" Date: Thu, 7 Dec 2023 18:46:13 +0100 Subject: [PATCH 07/16] test: verify TOKEN_RE against common HTTP Methods --- tests/test_http.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test_http.py b/tests/test_http.py index b6ca46b22..0eb694601 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -10,6 +10,17 @@ from gunicorn.http.wsgi import Response from gunicorn.http.unreader import Unreader, IterUnreader, SocketUnreader from gunicorn.http.errors import InvalidHeader, InvalidHeaderName +from gunicorn.http.message import TOKEN_RE + + +def test_method_pattern(): + assert TOKEN_RE.fullmatch("GET") + assert TOKEN_RE.fullmatch("MKCALENDAR") + assert not TOKEN_RE.fullmatch("GET:") + assert not TOKEN_RE.fullmatch("GET;") + RFC9110_5_6_2_TOKEN_DELIM = r'"(),/:;<=>?@[\]{}' + for bad_char in RFC9110_5_6_2_TOKEN_DELIM: + assert not TOKEN_RE.match(bad_char) def assert_readline(payload, size, expected): From b2846783d799bdde24fa209d767bd4ffb54cc1be Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" Date: Thu, 7 Dec 2023 18:46:31 +0100 Subject: [PATCH 08/16] strict: header field validation: stop casefolding * refusing lowercase and ASCII 0x23 (#) had been partially enforced before * do not casefold by default, HTTP methods are case sensitive --- gunicorn/config.py | 46 +++++++++++++++++++++++++++- gunicorn/http/message.py | 26 +++++++++++++--- tests/requests/invalid/003b.http | 2 ++ tests/requests/invalid/003b.py | 2 ++ tests/requests/invalid/003c.http | 2 ++ tests/requests/invalid/003c.py | 2 ++ tests/requests/valid/031.http | 4 +-- tests/requests/valid/031compat.http | 2 ++ tests/requests/valid/031compat.py | 13 ++++++++ tests/requests/valid/031compat2.http | 2 ++ tests/requests/valid/031compat2.py | 12 ++++++++ 11 files changed, 105 insertions(+), 8 deletions(-) create mode 100644 tests/requests/invalid/003b.http create mode 100644 tests/requests/invalid/003b.py create mode 100644 tests/requests/invalid/003c.http create mode 100644 tests/requests/invalid/003c.py create mode 100644 tests/requests/valid/031compat.http create mode 100644 tests/requests/valid/031compat.py create mode 100644 tests/requests/valid/031compat2.http create mode 100644 tests/requests/valid/031compat2.py diff --git a/gunicorn/config.py b/gunicorn/config.py index 84e7619e4..808a4b046 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -2254,5 +2254,49 @@ class StripHeaderSpaces(Setting): This is known to induce vulnerabilities and is not compliant with the HTTP/1.1 standard. See https://portswigger.net/research/http-desync-attacks-request-smuggling-reborn. - Use with care and only if necessary. + Use with care and only if necessary. May be removed in a future version. + + .. versionadded:: 20.0.1 + """ + + +class PermitUnconventionalHTTPMethod(Setting): + name = "permit_unconventional_http_method" + section = "Server Mechanics" + cli = ["--permit-unconventional-http-method"] + validator = validate_bool + action = "store_true" + default = False + desc = """\ + Permit HTTP methods not matching conventions, such as IANA registration guidelines + + This permits request methods of length less than 3 or more than 20, + methods with lowercase characters or methods containing the # character. + HTTP methods are case sensitive by definition, and merely uppercase by convention. + + This option is provided to diagnose backwards-incompatible changes. + + Use with care and only if necessary. May be removed in a future version. + + .. versionadded:: 22.0.0 """ + + +class CasefoldHTTPMethod(Setting): + name = "casefold_http_method" + section = "Server Mechanics" + cli = ["--casefold-http-method"] + validator = validate_bool + action = "store_true" + default = False + desc = """\ + Transform received HTTP methods to uppercase + + HTTP methods are case sensitive by definition, and merely uppercase by convention. + + This option is provided because previous versions of gunicorn defaulted to this behaviour. + + Use with care and only if necessary. May be removed in a future version. + + .. versionadded:: 22.0.0 + """ diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index db3d44bb2..2aa021c8d 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -21,7 +21,10 @@ MAX_HEADERS = 32768 DEFAULT_MAX_HEADERFIELD_SIZE = 8190 -TOKEN_RE = re.compile(r"[!#$%&'*+\-.\^_`|~0-9a-zA-Z]+") +# verbosely on purpose, avoid backslash ambiguity +RFC9110_5_6_2_TOKEN_SPECIALS = r"!#$%&'*+-.^_`|~" +TOKEN_RE = re.compile(r"[%s0-9a-zA-Z]+" % (re.escape(RFC9110_5_6_2_TOKEN_SPECIALS))) +METHOD_BADCHAR_RE = re.compile("[a-z#]") VERSION_RE = re.compile(r"HTTP/(\d+)\.(\d+)") @@ -331,10 +334,23 @@ def parse_request_line(self, line_bytes): if len(bits) != 3: raise InvalidRequestLine(bytes_to_str(line_bytes)) - # Method - if not TOKEN_RE.fullmatch(bits[0]): - raise InvalidRequestMethod(bits[0]) - self.method = bits[0].upper() + # Method: RFC9110 Section 9 + self.method = bits[0] + + # nonstandard restriction, suitable for all IANA registered methods + # partially enforced in previous gunicorn versions + if not self.cfg.permit_unconventional_http_method: + if METHOD_BADCHAR_RE.search(self.method): + raise InvalidRequestMethod(self.method) + if not 3 <= len(bits[0]) <= 20: + raise InvalidRequestMethod(self.method) + # standard restriction: RFC9110 token + if not TOKEN_RE.fullmatch(self.method): + raise InvalidRequestMethod(self.method) + # nonstandard and dangerous + # methods are merely uppercase by convention, no case-insensitive treatment is intended + if self.cfg.casefold_http_method: + self.method = self.method.upper() # URI self.uri = bits[1] diff --git a/tests/requests/invalid/003b.http b/tests/requests/invalid/003b.http new file mode 100644 index 000000000..e05fd9897 --- /dev/null +++ b/tests/requests/invalid/003b.http @@ -0,0 +1,2 @@ +bla:rgh /foo HTTP/1.1\r\n +\r\n diff --git a/tests/requests/invalid/003b.py b/tests/requests/invalid/003b.py new file mode 100644 index 000000000..86a0774e5 --- /dev/null +++ b/tests/requests/invalid/003b.py @@ -0,0 +1,2 @@ +from gunicorn.http.errors import InvalidRequestMethod +request = InvalidRequestMethod \ No newline at end of file diff --git a/tests/requests/invalid/003c.http b/tests/requests/invalid/003c.http new file mode 100644 index 000000000..98bd20bf1 --- /dev/null +++ b/tests/requests/invalid/003c.http @@ -0,0 +1,2 @@ +-bl /foo HTTP/1.1\r\n +\r\n diff --git a/tests/requests/invalid/003c.py b/tests/requests/invalid/003c.py new file mode 100644 index 000000000..1dac27c04 --- /dev/null +++ b/tests/requests/invalid/003c.py @@ -0,0 +1,2 @@ +from gunicorn.http.errors import InvalidRequestMethod +request = InvalidRequestMethod diff --git a/tests/requests/valid/031.http b/tests/requests/valid/031.http index cd1ab7fcb..ab3529da3 100644 --- a/tests/requests/valid/031.http +++ b/tests/requests/valid/031.http @@ -1,2 +1,2 @@ --blargh /foo HTTP/1.1\r\n -\r\n \ No newline at end of file +-BLARGH /foo HTTP/1.1\r\n +\r\n diff --git a/tests/requests/valid/031compat.http b/tests/requests/valid/031compat.http new file mode 100644 index 000000000..cd1ab7fcb --- /dev/null +++ b/tests/requests/valid/031compat.http @@ -0,0 +1,2 @@ +-blargh /foo HTTP/1.1\r\n +\r\n \ No newline at end of file diff --git a/tests/requests/valid/031compat.py b/tests/requests/valid/031compat.py new file mode 100644 index 000000000..424b7cb47 --- /dev/null +++ b/tests/requests/valid/031compat.py @@ -0,0 +1,13 @@ +from gunicorn.config import Config + +cfg = Config() +cfg.set("permit_unconventional_http_method", True) +cfg.set("casefold_http_method", True) + +request = { + "method": "-BLARGH", + "uri": uri("/foo"), + "version": (1, 1), + "headers": [], + "body": b"" +} diff --git a/tests/requests/valid/031compat2.http b/tests/requests/valid/031compat2.http new file mode 100644 index 000000000..dcbf4f134 --- /dev/null +++ b/tests/requests/valid/031compat2.http @@ -0,0 +1,2 @@ +-blargh /foo HTTP/1.1\r\n +\r\n diff --git a/tests/requests/valid/031compat2.py b/tests/requests/valid/031compat2.py new file mode 100644 index 000000000..594a8b6a8 --- /dev/null +++ b/tests/requests/valid/031compat2.py @@ -0,0 +1,12 @@ +from gunicorn.config import Config + +cfg = Config() +cfg.set("permit_unconventional_http_method", True) + +request = { + "method": "-blargh", + "uri": uri("/foo"), + "version": (1, 1), + "headers": [], + "body": b"" +} From 72b8970dbf2bf3444eb2e8b12aeff1a3d5922a9a Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" Date: Thu, 7 Dec 2023 08:11:34 +0100 Subject: [PATCH 09/16] silently drop or refuse header names w/ underscore Ambiguous mappings open a bottomless pit of "what is user input and what is proxy input" confusion. Default to what everyone else has been doing for years now, silently drop. see also https://nginx.org/r/underscores_in_headers --- gunicorn/config.py | 44 ++++++++++++++++++++++++++ gunicorn/http/message.py | 17 ++++++++++ gunicorn/http/wsgi.py | 2 ++ tests/requests/invalid/040.http | 6 ++++ tests/requests/invalid/040.py | 7 ++++ tests/requests/invalid/chunked_07.http | 10 ++++++ tests/requests/invalid/chunked_07.py | 7 ++++ tests/requests/valid/040.http | 6 ++++ tests/requests/valid/040.py | 9 ++++++ tests/requests/valid/040_compat.http | 6 ++++ tests/requests/valid/040_compat.py | 16 ++++++++++ 11 files changed, 130 insertions(+) create mode 100644 tests/requests/invalid/040.http create mode 100644 tests/requests/invalid/040.py create mode 100644 tests/requests/invalid/chunked_07.http create mode 100644 tests/requests/invalid/chunked_07.py create mode 100644 tests/requests/valid/040.http create mode 100644 tests/requests/valid/040.py create mode 100644 tests/requests/valid/040_compat.http create mode 100644 tests/requests/valid/040_compat.py diff --git a/gunicorn/config.py b/gunicorn/config.py index 808a4b046..50baeb616 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -2300,3 +2300,47 @@ class CasefoldHTTPMethod(Setting): .. versionadded:: 22.0.0 """ + + +def validate_header_map_behaviour(val): + # FIXME: refactor all of this subclassing stdlib argparse + + if val is None: + return + + if not isinstance(val, str): + raise TypeError("Invalid type for casting: %s" % val) + if val.lower().strip() == "drop": + return "drop" + elif val.lower().strip() == "refuse": + return "refuse" + elif val.lower().strip() == "dangerous": + return "dangerous" + else: + raise ValueError("Invalid header map behaviour: %s" % val) + + +class HeaderMap(Setting): + name = "header_map" + section = "Server Mechanics" + cli = ["--header-map"] + validator = validate_header_map_behaviour + default = "drop" + desc = """\ + Configure how header field names are mapped into environ + + Headers containing underscores are permitted by RFC9110, + but gunicorn joining headers of different names into + the same environment variable will dangerously confuse applications as to which is which. + + The safe default ``drop`` is to silently drop headers that cannot be unambiguously mapped. + The value ``refuse`` will return an error if a request contains *any* such header. + The value ``dangerous`` matches the previous, not advisabble, behaviour of mapping different + header field names into the same environ name. + + Use with care and only if necessary and after considering if your problem could + instead be solved by specifically renaming or rewriting only the intended headers + on a proxy in front of Gunicorn. + + .. versionadded:: 22.0.0 + """ diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index 2aa021c8d..59a780f6c 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -120,6 +120,23 @@ def parse_headers(self, data): scheme_header = True self.scheme = scheme + # ambiguous mapping allows fooling downstream, e.g. merging non-identical headers: + # X-Forwarded-For: 2001:db8::ha:cc:ed + # X_Forwarded_For: 127.0.0.1,::1 + # HTTP_X_FORWARDED_FOR = 2001:db8::ha:cc:ed,127.0.0.1,::1 + # Only modify after fixing *ALL* header transformations; network to wsgi env + if "_" in name: + if self.cfg.header_map == "dangerous": + # as if we did not know we cannot safely map this + pass + elif self.cfg.header_map == "drop": + # almost as if it never had been there + # but still counts against resource limits + continue + else: + # fail-safe fallthrough: refuse + raise InvalidHeaderName(name) + headers.append((name, value)) return headers diff --git a/gunicorn/http/wsgi.py b/gunicorn/http/wsgi.py index bafed49eb..6f3d9b68f 100644 --- a/gunicorn/http/wsgi.py +++ b/gunicorn/http/wsgi.py @@ -135,6 +135,8 @@ def create(req, sock, client, server, cfg): environ['CONTENT_LENGTH'] = hdr_value continue + # do not change lightly, this is a common source of security problems + # RFC9110 Section 17.10 discourages ambiguous or incomplete mappings key = 'HTTP_' + hdr_name.replace('-', '_') if key in environ: hdr_value = "%s,%s" % (environ[key], hdr_value) diff --git a/tests/requests/invalid/040.http b/tests/requests/invalid/040.http new file mode 100644 index 000000000..be03929d5 --- /dev/null +++ b/tests/requests/invalid/040.http @@ -0,0 +1,6 @@ +GET /keep/same/as?invalid/040 HTTP/1.0\r\n +Transfer_Encoding: tricked\r\n +Content-Length: 7\r\n +Content_Length: -1E23\r\n +\r\n +tricked\r\n diff --git a/tests/requests/invalid/040.py b/tests/requests/invalid/040.py new file mode 100644 index 000000000..643289fab --- /dev/null +++ b/tests/requests/invalid/040.py @@ -0,0 +1,7 @@ +from gunicorn.http.errors import InvalidHeaderName +from gunicorn.config import Config + +cfg = Config() +cfg.set("header_map", "refuse") + +request = InvalidHeaderName diff --git a/tests/requests/invalid/chunked_07.http b/tests/requests/invalid/chunked_07.http new file mode 100644 index 000000000..a78db48b7 --- /dev/null +++ b/tests/requests/invalid/chunked_07.http @@ -0,0 +1,10 @@ +POST /chunked_ambiguous_header_mapping HTTP/1.1\r\n +Transfer_Encoding: gzip\r\n +Transfer-Encoding: chunked\r\n +\r\n +5\r\n +hello\r\n +6\r\n + world\r\n +0\r\n +\r\n diff --git a/tests/requests/invalid/chunked_07.py b/tests/requests/invalid/chunked_07.py new file mode 100644 index 000000000..643289fab --- /dev/null +++ b/tests/requests/invalid/chunked_07.py @@ -0,0 +1,7 @@ +from gunicorn.http.errors import InvalidHeaderName +from gunicorn.config import Config + +cfg = Config() +cfg.set("header_map", "refuse") + +request = InvalidHeaderName diff --git a/tests/requests/valid/040.http b/tests/requests/valid/040.http new file mode 100644 index 000000000..be03929d5 --- /dev/null +++ b/tests/requests/valid/040.http @@ -0,0 +1,6 @@ +GET /keep/same/as?invalid/040 HTTP/1.0\r\n +Transfer_Encoding: tricked\r\n +Content-Length: 7\r\n +Content_Length: -1E23\r\n +\r\n +tricked\r\n diff --git a/tests/requests/valid/040.py b/tests/requests/valid/040.py new file mode 100644 index 000000000..7c2243d9f --- /dev/null +++ b/tests/requests/valid/040.py @@ -0,0 +1,9 @@ +request = { + "method": "GET", + "uri": uri("/keep/same/as?invalid/040"), + "version": (1, 0), + "headers": [ + ("CONTENT-LENGTH", "7") + ], + "body": b'tricked' +} diff --git a/tests/requests/valid/040_compat.http b/tests/requests/valid/040_compat.http new file mode 100644 index 000000000..be03929d5 --- /dev/null +++ b/tests/requests/valid/040_compat.http @@ -0,0 +1,6 @@ +GET /keep/same/as?invalid/040 HTTP/1.0\r\n +Transfer_Encoding: tricked\r\n +Content-Length: 7\r\n +Content_Length: -1E23\r\n +\r\n +tricked\r\n diff --git a/tests/requests/valid/040_compat.py b/tests/requests/valid/040_compat.py new file mode 100644 index 000000000..5f13487c4 --- /dev/null +++ b/tests/requests/valid/040_compat.py @@ -0,0 +1,16 @@ +from gunicorn.config import Config + +cfg = Config() +cfg.set("header_map", "dangerous") + +request = { + "method": "GET", + "uri": uri("/keep/same/as?invalid/040"), + "version": (1, 0), + "headers": [ + ("TRANSFER_ENCODING", "tricked"), + ("CONTENT-LENGTH", "7"), + ("CONTENT_LENGTH", "-1E23"), + ], + "body": b'tricked' +} From 0b10cbab1d6368fcab2d5a7b6fe359a6cecc81a7 Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" Date: Thu, 7 Dec 2023 08:16:33 +0100 Subject: [PATCH 10/16] unconditionally log request error Somehow exception logging was conditional on successful request uri parsing. Add it back for the other branch. --- gunicorn/workers/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gunicorn/workers/base.py b/gunicorn/workers/base.py index f321dd2d4..f97d923c7 100644 --- a/gunicorn/workers/base.py +++ b/gunicorn/workers/base.py @@ -251,6 +251,8 @@ def handle_error(self, req, client, addr, exc): else: if hasattr(req, "uri"): self.log.exception("Error handling request %s", req.uri) + else: + self.log.exception("Error handling request (no URI read)") status_int = 500 reason = "Internal Server Error" mesg = "" From ac29c9b0a758d21f1e0fb3b3457239e523fa9f1d Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" Date: Thu, 7 Dec 2023 09:22:30 +0100 Subject: [PATCH 11/16] fail-safe on unsupported request framing If we promise wsgi.input_terminated, we better get it right - or not at all. * chunked encoding on HTTP <= 1.1 * chunked not last transfer coding * multiple chinked codings * any unknown codings (yes, this too! because we do not detect unusual syntax that is still chunked) * empty coding (plausibly harmless, but not see in real life anyway - refused, for the moment) --- gunicorn/config.py | 18 ++++++++++ gunicorn/http/errors.py | 9 +++++ gunicorn/http/message.py | 45 +++++++++++++++++++++++++ tests/requests/invalid/chunked_01.http | 12 +++++++ tests/requests/invalid/chunked_01.py | 2 ++ tests/requests/invalid/chunked_02.http | 9 +++++ tests/requests/invalid/chunked_02.py | 2 ++ tests/requests/invalid/chunked_03.http | 8 +++++ tests/requests/invalid/chunked_03.py | 2 ++ tests/requests/invalid/chunked_04.http | 11 ++++++ tests/requests/invalid/chunked_04.py | 2 ++ tests/requests/invalid/chunked_05.http | 11 ++++++ tests/requests/invalid/chunked_05.py | 2 ++ tests/requests/invalid/chunked_06.http | 9 +++++ tests/requests/invalid/chunked_06.py | 2 ++ tests/requests/invalid/chunked_08.http | 9 +++++ tests/requests/invalid/chunked_08.py | 2 ++ tests/requests/invalid/nonascii_01.http | 4 +++ tests/requests/invalid/nonascii_01.py | 5 +++ tests/requests/invalid/nonascii_02.http | 4 +++ tests/requests/invalid/nonascii_02.py | 5 +++ tests/requests/invalid/nonascii_04.http | 5 +++ tests/requests/invalid/nonascii_04.py | 5 +++ tests/requests/invalid/prefix_01.http | 2 ++ tests/requests/invalid/prefix_01.py | 2 ++ tests/requests/invalid/prefix_02.http | 2 ++ tests/requests/invalid/prefix_02.py | 2 ++ tests/requests/invalid/prefix_03.http | 4 +++ tests/requests/invalid/prefix_03.py | 5 +++ tests/requests/invalid/prefix_04.http | 5 +++ tests/requests/invalid/prefix_04.py | 5 +++ tests/requests/invalid/prefix_05.http | 4 +++ tests/requests/invalid/prefix_05.py | 5 +++ tests/requests/valid/025.http | 9 +++-- tests/requests/valid/025.py | 6 +++- tests/requests/valid/025compat.http | 18 ++++++++++ tests/requests/valid/025compat.py | 27 +++++++++++++++ tests/requests/valid/029.http | 2 +- tests/requests/valid/029.py | 2 +- tests/treq.py | 4 ++- 40 files changed, 281 insertions(+), 6 deletions(-) create mode 100644 tests/requests/invalid/chunked_01.http create mode 100644 tests/requests/invalid/chunked_01.py create mode 100644 tests/requests/invalid/chunked_02.http create mode 100644 tests/requests/invalid/chunked_02.py create mode 100644 tests/requests/invalid/chunked_03.http create mode 100644 tests/requests/invalid/chunked_03.py create mode 100644 tests/requests/invalid/chunked_04.http create mode 100644 tests/requests/invalid/chunked_04.py create mode 100644 tests/requests/invalid/chunked_05.http create mode 100644 tests/requests/invalid/chunked_05.py create mode 100644 tests/requests/invalid/chunked_06.http create mode 100644 tests/requests/invalid/chunked_06.py create mode 100644 tests/requests/invalid/chunked_08.http create mode 100644 tests/requests/invalid/chunked_08.py create mode 100644 tests/requests/invalid/nonascii_01.http create mode 100644 tests/requests/invalid/nonascii_01.py create mode 100644 tests/requests/invalid/nonascii_02.http create mode 100644 tests/requests/invalid/nonascii_02.py create mode 100644 tests/requests/invalid/nonascii_04.http create mode 100644 tests/requests/invalid/nonascii_04.py create mode 100644 tests/requests/invalid/prefix_01.http create mode 100644 tests/requests/invalid/prefix_01.py create mode 100644 tests/requests/invalid/prefix_02.http create mode 100644 tests/requests/invalid/prefix_02.py create mode 100644 tests/requests/invalid/prefix_03.http create mode 100644 tests/requests/invalid/prefix_03.py create mode 100644 tests/requests/invalid/prefix_04.http create mode 100644 tests/requests/invalid/prefix_04.py create mode 100644 tests/requests/invalid/prefix_05.http create mode 100644 tests/requests/invalid/prefix_05.py create mode 100644 tests/requests/valid/025compat.http create mode 100644 tests/requests/valid/025compat.py diff --git a/gunicorn/config.py b/gunicorn/config.py index 50baeb616..be9bb001c 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -2344,3 +2344,21 @@ class HeaderMap(Setting): .. versionadded:: 22.0.0 """ + + +class TolerateDangerousFraming(Setting): + name = "tolerate_dangerous_framing" + section = "Server Mechanics" + cli = ["--tolerate-dangerous-framing"] + validator = validate_bool + action = "store_true" + default = False + desc = """\ + Process requests with both Transfer-Encoding and Content-Length + + This is known to induce vulnerabilities, but not strictly forbidden by RFC9112. + + Use with care and only if necessary. May be removed in a future version. + + .. versionadded:: 22.0.0 + """ diff --git a/gunicorn/http/errors.py b/gunicorn/http/errors.py index 05abd1ab0..340f0473c 100644 --- a/gunicorn/http/errors.py +++ b/gunicorn/http/errors.py @@ -73,6 +73,15 @@ def __str__(self): return "Invalid HTTP header name: %r" % self.hdr +class UnsupportedTransferCoding(ParseException): + def __init__(self, hdr): + self.hdr = hdr + self.code = 501 + + def __str__(self): + return "Unsupported transfer coding: %r" % self.hdr + + class InvalidChunkSize(IOError): def __init__(self, data): self.data = data diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index 59a780f6c..75b36e330 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -12,6 +12,7 @@ InvalidHeader, InvalidHeaderName, NoMoreData, InvalidRequestLine, InvalidRequestMethod, InvalidHTTPVersion, LimitRequestLine, LimitRequestHeaders, + UnsupportedTransferCoding, ) from gunicorn.http.errors import InvalidProxyLine, ForbiddenProxyRequest from gunicorn.http.errors import InvalidSchemeHeaders @@ -39,6 +40,7 @@ def __init__(self, cfg, unreader, peer_addr): self.trailers = [] self.body = None self.scheme = "https" if cfg.is_ssl else "http" + self.must_close = False # set headers limits self.limit_request_fields = cfg.limit_request_fields @@ -58,6 +60,9 @@ def __init__(self, cfg, unreader, peer_addr): self.unreader.unread(unused) self.set_body_reader() + def force_close(self): + self.must_close = True + def parse(self, unreader): raise NotImplementedError() @@ -152,9 +157,47 @@ def set_body_reader(self): content_length = value elif name == "TRANSFER-ENCODING": if value.lower() == "chunked": + # DANGER: transer codings stack, and stacked chunking is never intended + if chunked: + raise InvalidHeader("TRANSFER-ENCODING", req=self) chunked = True + elif value.lower() == "identity": + # does not do much, could still plausibly desync from what the proxy does + # safe option: nuke it, its never needed + if chunked: + raise InvalidHeader("TRANSFER-ENCODING", req=self) + elif value.lower() == "": + # lacking security review on this case + # offer the option to restore previous behaviour, but refuse by default, for now + self.force_close() + if not self.cfg.tolerate_dangerous_framing: + raise UnsupportedTransferCoding(value) + # DANGER: do not change lightly; ref: request smuggling + # T-E is a list and we *could* support correctly parsing its elements + # .. but that is only safe after getting all the edge cases right + # .. for which no real-world need exists, so best to NOT open that can of worms + else: + self.force_close() + # even if parser is extended, retain this branch: + # the "chunked not last" case remains to be rejected! + raise UnsupportedTransferCoding(value) if chunked: + # two potentially dangerous cases: + # a) CL + TE (TE overrides CL.. only safe if the recipient sees it that way too) + # b) chunked HTTP/1.0 (always faulty) + if self.version < (1, 1): + # framing wonky, see RFC 9112 Section 6.1 + self.force_close() + if not self.cfg.tolerate_dangerous_framing: + raise InvalidHeader("TRANSFER-ENCODING", req=self) + if content_length is not None: + # we cannot be certain the message framing we understood matches proxy intent + # -> whatever happens next, remaining input must not be trusted + self.force_close() + # either processing or rejecting is permitted in RFC 9112 Section 6.1 + if not self.cfg.tolerate_dangerous_framing: + raise InvalidHeader("CONTENT-LENGTH", req=self) self.body = Body(ChunkedReader(self, self.unreader)) elif content_length is not None: try: @@ -173,6 +216,8 @@ def set_body_reader(self): self.body = Body(EOFReader(self.unreader)) def should_close(self): + if self.must_close: + return True for (h, v) in self.headers: if h == "CONNECTION": v = v.lower().strip(" \t") diff --git a/tests/requests/invalid/chunked_01.http b/tests/requests/invalid/chunked_01.http new file mode 100644 index 000000000..7a8e55d2e --- /dev/null +++ b/tests/requests/invalid/chunked_01.http @@ -0,0 +1,12 @@ +POST /chunked_w_underscore_chunk_size HTTP/1.1\r\n +Transfer-Encoding: chunked\r\n +\r\n +5\r\n +hello\r\n +6_0\r\n + world\r\n +0\r\n +\r\n +POST /after HTTP/1.1\r\n +Transfer-Encoding: identity\r\n +\r\n diff --git a/tests/requests/invalid/chunked_01.py b/tests/requests/invalid/chunked_01.py new file mode 100644 index 000000000..0571e1183 --- /dev/null +++ b/tests/requests/invalid/chunked_01.py @@ -0,0 +1,2 @@ +from gunicorn.http.errors import InvalidChunkSize +request = InvalidChunkSize diff --git a/tests/requests/invalid/chunked_02.http b/tests/requests/invalid/chunked_02.http new file mode 100644 index 000000000..9ae49e525 --- /dev/null +++ b/tests/requests/invalid/chunked_02.http @@ -0,0 +1,9 @@ +POST /chunked_with_prefixed_value HTTP/1.1\r\n +Content-Length: 12\r\n +Transfer-Encoding: \tchunked\r\n +\r\n +5\r\n +hello\r\n +6\r\n + world\r\n +\r\n diff --git a/tests/requests/invalid/chunked_02.py b/tests/requests/invalid/chunked_02.py new file mode 100644 index 000000000..1541eb70b --- /dev/null +++ b/tests/requests/invalid/chunked_02.py @@ -0,0 +1,2 @@ +from gunicorn.http.errors import InvalidHeader +request = InvalidHeader diff --git a/tests/requests/invalid/chunked_03.http b/tests/requests/invalid/chunked_03.http new file mode 100644 index 000000000..0bbbfe6ef --- /dev/null +++ b/tests/requests/invalid/chunked_03.http @@ -0,0 +1,8 @@ +POST /double_chunked HTTP/1.1\r\n +Transfer-Encoding: identity, chunked, identity, chunked\r\n +\r\n +5\r\n +hello\r\n +6\r\n + world\r\n +\r\n diff --git a/tests/requests/invalid/chunked_03.py b/tests/requests/invalid/chunked_03.py new file mode 100644 index 000000000..58a34600c --- /dev/null +++ b/tests/requests/invalid/chunked_03.py @@ -0,0 +1,2 @@ +from gunicorn.http.errors import UnsupportedTransferCoding +request = UnsupportedTransferCoding diff --git a/tests/requests/invalid/chunked_04.http b/tests/requests/invalid/chunked_04.http new file mode 100644 index 000000000..d47109e31 --- /dev/null +++ b/tests/requests/invalid/chunked_04.http @@ -0,0 +1,11 @@ +POST /chunked_twice HTTP/1.1\r\n +Transfer-Encoding: identity\r\n +Transfer-Encoding: chunked\r\n +Transfer-Encoding: identity\r\n +Transfer-Encoding: chunked\r\n +\r\n +5\r\n +hello\r\n +6\r\n + world\r\n +\r\n diff --git a/tests/requests/invalid/chunked_04.py b/tests/requests/invalid/chunked_04.py new file mode 100644 index 000000000..1541eb70b --- /dev/null +++ b/tests/requests/invalid/chunked_04.py @@ -0,0 +1,2 @@ +from gunicorn.http.errors import InvalidHeader +request = InvalidHeader diff --git a/tests/requests/invalid/chunked_05.http b/tests/requests/invalid/chunked_05.http new file mode 100644 index 000000000..014e85ac0 --- /dev/null +++ b/tests/requests/invalid/chunked_05.http @@ -0,0 +1,11 @@ +POST /chunked_HTTP_1.0 HTTP/1.0\r\n +Transfer-Encoding: chunked\r\n +\r\n +5\r\n +hello\r\n +6\r\n + world\r\n +0\r\n +Vary: *\r\n +Content-Type: text/plain\r\n +\r\n diff --git a/tests/requests/invalid/chunked_05.py b/tests/requests/invalid/chunked_05.py new file mode 100644 index 000000000..1541eb70b --- /dev/null +++ b/tests/requests/invalid/chunked_05.py @@ -0,0 +1,2 @@ +from gunicorn.http.errors import InvalidHeader +request = InvalidHeader diff --git a/tests/requests/invalid/chunked_06.http b/tests/requests/invalid/chunked_06.http new file mode 100644 index 000000000..ef70faab9 --- /dev/null +++ b/tests/requests/invalid/chunked_06.http @@ -0,0 +1,9 @@ +POST /chunked_not_last HTTP/1.1\r\n +Transfer-Encoding: chunked\r\n +Transfer-Encoding: gzip\r\n +\r\n +5\r\n +hello\r\n +6\r\n + world\r\n +\r\n diff --git a/tests/requests/invalid/chunked_06.py b/tests/requests/invalid/chunked_06.py new file mode 100644 index 000000000..58a34600c --- /dev/null +++ b/tests/requests/invalid/chunked_06.py @@ -0,0 +1,2 @@ +from gunicorn.http.errors import UnsupportedTransferCoding +request = UnsupportedTransferCoding diff --git a/tests/requests/invalid/chunked_08.http b/tests/requests/invalid/chunked_08.http new file mode 100644 index 000000000..8d4aaa6e9 --- /dev/null +++ b/tests/requests/invalid/chunked_08.http @@ -0,0 +1,9 @@ +POST /chunked_not_last HTTP/1.1\r\n +Transfer-Encoding: chunked\r\n +Transfer-Encoding: identity\r\n +\r\n +5\r\n +hello\r\n +6\r\n + world\r\n +\r\n diff --git a/tests/requests/invalid/chunked_08.py b/tests/requests/invalid/chunked_08.py new file mode 100644 index 000000000..1541eb70b --- /dev/null +++ b/tests/requests/invalid/chunked_08.py @@ -0,0 +1,2 @@ +from gunicorn.http.errors import InvalidHeader +request = InvalidHeader diff --git a/tests/requests/invalid/nonascii_01.http b/tests/requests/invalid/nonascii_01.http new file mode 100644 index 000000000..30d18cd6f --- /dev/null +++ b/tests/requests/invalid/nonascii_01.http @@ -0,0 +1,4 @@ +GETß /germans.. HTTP/1.1\r\n +Content-Length: 3\r\n +\r\n +ÄÄÄ diff --git a/tests/requests/invalid/nonascii_01.py b/tests/requests/invalid/nonascii_01.py new file mode 100644 index 000000000..0da10f426 --- /dev/null +++ b/tests/requests/invalid/nonascii_01.py @@ -0,0 +1,5 @@ +from gunicorn.config import Config +from gunicorn.http.errors import InvalidRequestMethod + +cfg = Config() +request = InvalidRequestMethod diff --git a/tests/requests/invalid/nonascii_02.http b/tests/requests/invalid/nonascii_02.http new file mode 100644 index 000000000..36a617039 --- /dev/null +++ b/tests/requests/invalid/nonascii_02.http @@ -0,0 +1,4 @@ +GETÿ /french.. HTTP/1.1\r\n +Content-Length: 3\r\n +\r\n +ÄÄÄ diff --git a/tests/requests/invalid/nonascii_02.py b/tests/requests/invalid/nonascii_02.py new file mode 100644 index 000000000..0da10f426 --- /dev/null +++ b/tests/requests/invalid/nonascii_02.py @@ -0,0 +1,5 @@ +from gunicorn.config import Config +from gunicorn.http.errors import InvalidRequestMethod + +cfg = Config() +request = InvalidRequestMethod diff --git a/tests/requests/invalid/nonascii_04.http b/tests/requests/invalid/nonascii_04.http new file mode 100644 index 000000000..be0b15661 --- /dev/null +++ b/tests/requests/invalid/nonascii_04.http @@ -0,0 +1,5 @@ +GET /french.. HTTP/1.1\r\n +Content-Lengthÿ: 3\r\n +Content-Length: 3\r\n +\r\n +ÄÄÄ diff --git a/tests/requests/invalid/nonascii_04.py b/tests/requests/invalid/nonascii_04.py new file mode 100644 index 000000000..d336fbc85 --- /dev/null +++ b/tests/requests/invalid/nonascii_04.py @@ -0,0 +1,5 @@ +from gunicorn.config import Config +from gunicorn.http.errors import InvalidHeaderName + +cfg = Config() +request = InvalidHeaderName diff --git a/tests/requests/invalid/prefix_01.http b/tests/requests/invalid/prefix_01.http new file mode 100644 index 000000000..f8bdeb352 --- /dev/null +++ b/tests/requests/invalid/prefix_01.http @@ -0,0 +1,2 @@ +GET\0PROXY /foo HTTP/1.1\r\n +\r\n diff --git a/tests/requests/invalid/prefix_01.py b/tests/requests/invalid/prefix_01.py new file mode 100644 index 000000000..86a0774e5 --- /dev/null +++ b/tests/requests/invalid/prefix_01.py @@ -0,0 +1,2 @@ +from gunicorn.http.errors import InvalidRequestMethod +request = InvalidRequestMethod \ No newline at end of file diff --git a/tests/requests/invalid/prefix_02.http b/tests/requests/invalid/prefix_02.http new file mode 100644 index 000000000..8a9b155c4 --- /dev/null +++ b/tests/requests/invalid/prefix_02.http @@ -0,0 +1,2 @@ +GET\0 /foo HTTP/1.1\r\n +\r\n diff --git a/tests/requests/invalid/prefix_02.py b/tests/requests/invalid/prefix_02.py new file mode 100644 index 000000000..86a0774e5 --- /dev/null +++ b/tests/requests/invalid/prefix_02.py @@ -0,0 +1,2 @@ +from gunicorn.http.errors import InvalidRequestMethod +request = InvalidRequestMethod \ No newline at end of file diff --git a/tests/requests/invalid/prefix_03.http b/tests/requests/invalid/prefix_03.http new file mode 100644 index 000000000..7803935cc --- /dev/null +++ b/tests/requests/invalid/prefix_03.http @@ -0,0 +1,4 @@ +GET /stuff/here?foo=bar HTTP/1.1\r\n +Content-Length: 0 1\r\n +\r\n +x diff --git a/tests/requests/invalid/prefix_03.py b/tests/requests/invalid/prefix_03.py new file mode 100644 index 000000000..95b0581ae --- /dev/null +++ b/tests/requests/invalid/prefix_03.py @@ -0,0 +1,5 @@ +from gunicorn.config import Config +from gunicorn.http.errors import InvalidHeader + +cfg = Config() +request = InvalidHeader diff --git a/tests/requests/invalid/prefix_04.http b/tests/requests/invalid/prefix_04.http new file mode 100644 index 000000000..712631c8f --- /dev/null +++ b/tests/requests/invalid/prefix_04.http @@ -0,0 +1,5 @@ +GET /stuff/here?foo=bar HTTP/1.1\r\n +Content-Length: 3 1\r\n +\r\n +xyz +abc123 diff --git a/tests/requests/invalid/prefix_04.py b/tests/requests/invalid/prefix_04.py new file mode 100644 index 000000000..95b0581ae --- /dev/null +++ b/tests/requests/invalid/prefix_04.py @@ -0,0 +1,5 @@ +from gunicorn.config import Config +from gunicorn.http.errors import InvalidHeader + +cfg = Config() +request = InvalidHeader diff --git a/tests/requests/invalid/prefix_05.http b/tests/requests/invalid/prefix_05.http new file mode 100644 index 000000000..120b6577d --- /dev/null +++ b/tests/requests/invalid/prefix_05.http @@ -0,0 +1,4 @@ +GET: /stuff/here?foo=bar HTTP/1.1\r\n +Content-Length: 3\r\n +\r\n +xyz diff --git a/tests/requests/invalid/prefix_05.py b/tests/requests/invalid/prefix_05.py new file mode 100644 index 000000000..0da10f426 --- /dev/null +++ b/tests/requests/invalid/prefix_05.py @@ -0,0 +1,5 @@ +from gunicorn.config import Config +from gunicorn.http.errors import InvalidRequestMethod + +cfg = Config() +request = InvalidRequestMethod diff --git a/tests/requests/valid/025.http b/tests/requests/valid/025.http index 62267add0..f8d7fae2e 100644 --- a/tests/requests/valid/025.http +++ b/tests/requests/valid/025.http @@ -1,5 +1,4 @@ POST /chunked_cont_h_at_first HTTP/1.1\r\n -Content-Length: -1\r\n Transfer-Encoding: chunked\r\n \r\n 5; some; parameters=stuff\r\n @@ -16,4 +15,10 @@ Content-Length: -1\r\n hello\r\n 6; blahblah; blah\r\n world\r\n -0\r\n \ No newline at end of file +0\r\n +\r\n +PUT /ignored_after_dangerous_framing HTTP/1.1\r\n +Content-Length: 3\r\n +\r\n +foo\r\n +\r\n diff --git a/tests/requests/valid/025.py b/tests/requests/valid/025.py index 12ea9ab76..33f5845cb 100644 --- a/tests/requests/valid/025.py +++ b/tests/requests/valid/025.py @@ -1,9 +1,13 @@ +from gunicorn.config import Config + +cfg = Config() +cfg.set("tolerate_dangerous_framing", True) + req1 = { "method": "POST", "uri": uri("/chunked_cont_h_at_first"), "version": (1, 1), "headers": [ - ("CONTENT-LENGTH", "-1"), ("TRANSFER-ENCODING", "chunked") ], "body": b"hello world" diff --git a/tests/requests/valid/025compat.http b/tests/requests/valid/025compat.http new file mode 100644 index 000000000..828f6fb71 --- /dev/null +++ b/tests/requests/valid/025compat.http @@ -0,0 +1,18 @@ +POST /chunked_cont_h_at_first HTTP/1.1\r\n +Transfer-Encoding: chunked\r\n +\r\n +5; some; parameters=stuff\r\n +hello\r\n +6; blahblah; blah\r\n + world\r\n +0\r\n +\r\n +PUT /chunked_cont_h_at_last HTTP/1.1\r\n +Transfer-Encoding: chunked\r\n +Content-Length: -1\r\n +\r\n +5; some; parameters=stuff\r\n +hello\r\n +6; blahblah; blah\r\n + world\r\n +0\r\n diff --git a/tests/requests/valid/025compat.py b/tests/requests/valid/025compat.py new file mode 100644 index 000000000..33f5845cb --- /dev/null +++ b/tests/requests/valid/025compat.py @@ -0,0 +1,27 @@ +from gunicorn.config import Config + +cfg = Config() +cfg.set("tolerate_dangerous_framing", True) + +req1 = { + "method": "POST", + "uri": uri("/chunked_cont_h_at_first"), + "version": (1, 1), + "headers": [ + ("TRANSFER-ENCODING", "chunked") + ], + "body": b"hello world" +} + +req2 = { + "method": "PUT", + "uri": uri("/chunked_cont_h_at_last"), + "version": (1, 1), + "headers": [ + ("TRANSFER-ENCODING", "chunked"), + ("CONTENT-LENGTH", "-1"), + ], + "body": b"hello world" +} + +request = [req1, req2] diff --git a/tests/requests/valid/029.http b/tests/requests/valid/029.http index c8611dbd3..5d029dd91 100644 --- a/tests/requests/valid/029.http +++ b/tests/requests/valid/029.http @@ -1,6 +1,6 @@ GET /stuff/here?foo=bar HTTP/1.1\r\n -Transfer-Encoding: chunked\r\n Transfer-Encoding: identity\r\n +Transfer-Encoding: chunked\r\n \r\n 5\r\n hello\r\n diff --git a/tests/requests/valid/029.py b/tests/requests/valid/029.py index f25449d15..64d026604 100644 --- a/tests/requests/valid/029.py +++ b/tests/requests/valid/029.py @@ -7,8 +7,8 @@ "uri": uri("/stuff/here?foo=bar"), "version": (1, 1), "headers": [ + ('TRANSFER-ENCODING', 'identity'), ('TRANSFER-ENCODING', 'chunked'), - ('TRANSFER-ENCODING', 'identity') ], "body": b"hello" } diff --git a/tests/treq.py b/tests/treq.py index 05adb1461..aeaae151f 100644 --- a/tests/treq.py +++ b/tests/treq.py @@ -248,8 +248,10 @@ def test_req(sn, sz, mt): def check(self, cfg, sender, sizer, matcher): cases = self.expect[:] p = RequestParser(cfg, sender(), None) - for req in p: + parsed_request_idx = -1 + for parsed_request_idx, req in enumerate(p): self.same(req, sizer, matcher, cases.pop(0)) + assert len(self.expect) == parsed_request_idx + 1 assert not cases def same(self, req, sizer, matcher, exp): From fd67112f40f48a9489f1f3132d8485ac11db9fbf Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" Date: Thu, 7 Dec 2023 09:31:00 +0100 Subject: [PATCH 12/16] Ignore secure_scheme_headers in Trailer section In common configuration unlikely a big security problem in itself you are just fooling the remote about https. However, it is offers an oracle for otherwise invisible proxy request headers, so it might help exploiting other vulnerabilities. --- gunicorn/http/body.py | 2 +- gunicorn/http/message.py | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gunicorn/http/body.py b/gunicorn/http/body.py index 41fe334bc..2ae0eb848 100644 --- a/gunicorn/http/body.py +++ b/gunicorn/http/body.py @@ -51,7 +51,7 @@ def parse_trailers(self, unreader, data): if done: unreader.unread(buf.getvalue()[2:]) return b"" - self.req.trailers = self.req.parse_headers(buf.getvalue()[:idx]) + self.req.trailers = self.req.parse_headers(buf.getvalue()[:idx], from_trailer=True) unreader.unread(buf.getvalue()[idx + 4:]) def parse_chunked(self, unreader): diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index 75b36e330..67fffd9e8 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -66,7 +66,7 @@ def force_close(self): def parse(self, unreader): raise NotImplementedError() - def parse_headers(self, data): + def parse_headers(self, data, from_trailer=False): cfg = self.cfg headers = [] @@ -76,9 +76,13 @@ def parse_headers(self, data): # handle scheme headers scheme_header = False secure_scheme_headers = {} - if ('*' in cfg.forwarded_allow_ips or - not isinstance(self.peer_addr, tuple) - or self.peer_addr[0] in cfg.forwarded_allow_ips): + if from_trailer: + # nonsense. either a request is https from the beginning + # .. or we are just behind a proxy who does not remove conflicting trailers + pass + elif ('*' in cfg.forwarded_allow_ips or + not isinstance(self.peer_addr, tuple) + or self.peer_addr[0] in cfg.forwarded_allow_ips): secure_scheme_headers = cfg.secure_scheme_headers # Parse headers into key/value pairs paying attention @@ -294,7 +298,7 @@ def parse(self, unreader): self.unreader.unread(data[2:]) return b"" - self.headers = self.parse_headers(data[:idx]) + self.headers = self.parse_headers(data[:idx], from_trailer=False) ret = data[idx + 4:] buf = None From f5501111a21d000f8c383277ef132ed882a4932f Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" Date: Thu, 7 Dec 2023 09:41:10 +0100 Subject: [PATCH 13/16] strict HTTP header field name validation Do the validation on the original, not the result from unicode case folding. Background: latin-1 0xDF is traditionally uppercased 0x53+0x53 which puts it back in ASCII --- gunicorn/http/message.py | 10 +++++++--- tests/requests/invalid/nonascii_03.http | 5 +++++ tests/requests/invalid/nonascii_03.py | 5 +++++ 3 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 tests/requests/invalid/nonascii_03.http create mode 100644 tests/requests/invalid/nonascii_03.py diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index 67fffd9e8..4e1c2fd55 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -98,12 +98,16 @@ def parse_headers(self, data, from_trailer=False): raise InvalidHeader(curr) name, value = curr.split(":", 1) if self.cfg.strip_header_spaces: - name = name.rstrip(" \t").upper() - else: - name = name.upper() + name = name.rstrip(" \t") if not TOKEN_RE.fullmatch(name): raise InvalidHeaderName(name) + # this is still a dangerous place to do this + # but it is more correct than doing it before the pattern match: + # after we entered Unicode wonderland, 8bits could case-shift into ASCII: + # b"\xDF".decode("latin-1").upper().encode("ascii") == b"SS" + name = name.upper() + value = [value.lstrip(" \t")] # Consume value continuation lines diff --git a/tests/requests/invalid/nonascii_03.http b/tests/requests/invalid/nonascii_03.http new file mode 100644 index 000000000..eda5d010f --- /dev/null +++ b/tests/requests/invalid/nonascii_03.http @@ -0,0 +1,5 @@ +GET /germans.. HTTP/1.1\r\n +Content-Lengthß: 3\r\n +Content-Length: 3\r\n +\r\n +ÄÄÄ diff --git a/tests/requests/invalid/nonascii_03.py b/tests/requests/invalid/nonascii_03.py new file mode 100644 index 000000000..d336fbc85 --- /dev/null +++ b/tests/requests/invalid/nonascii_03.py @@ -0,0 +1,5 @@ +from gunicorn.config import Config +from gunicorn.http.errors import InvalidHeaderName + +cfg = Config() +request = InvalidHeaderName From 7ebe442d089a6fe0c51abb19a598b3d0d6a6d128 Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" Date: Thu, 7 Dec 2023 09:56:49 +0100 Subject: [PATCH 14/16] strict HTTP version validation Note: This is unrelated to a reverse proxy potentially talking HTTP/3 to clients. This is about the HTTP protocol version spoken to Gunicorn, which is HTTP/1.0 or HTTP/1.1. Little legitimate need for processing HTTP 1 requests with ambiguous version numbers. Broadly refuse. Co-authored-by: Ben Kallus --- gunicorn/config.py | 20 ++++++++++++++++++++ gunicorn/http/message.py | 7 ++++++- tests/requests/invalid/prefix_06.http | 4 ++++ tests/requests/invalid/prefix_06.py | 5 +++++ tests/requests/invalid/version_01.http | 2 ++ tests/requests/invalid/version_01.py | 2 ++ tests/requests/invalid/version_02.http | 2 ++ tests/requests/invalid/version_02.py | 2 ++ 8 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 tests/requests/invalid/prefix_06.http create mode 100644 tests/requests/invalid/prefix_06.py create mode 100644 tests/requests/invalid/version_01.http create mode 100644 tests/requests/invalid/version_01.py create mode 100644 tests/requests/invalid/version_02.http create mode 100644 tests/requests/invalid/version_02.py diff --git a/gunicorn/config.py b/gunicorn/config.py index be9bb001c..e7e4fac54 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -2282,6 +2282,26 @@ class PermitUnconventionalHTTPMethod(Setting): """ +class PermitUnconventionalHTTPVersion(Setting): + name = "permit_unconventional_http_version" + section = "Server Mechanics" + cli = ["--permit-unconventional-http-version"] + validator = validate_bool + action = "store_true" + default = False + desc = """\ + Permit HTTP version not matching conventions of 2023 + + This disables the refusal of likely malformed request lines. + It is unusual to specify HTTP 1 versions other than 1.0 and 1.1. + + This option is provided to diagnose backwards-incompatible changes. + Use with care and only if necessary. May be removed in a future version. + + .. versionadded:: 22.0.0 + """ + + class CasefoldHTTPMethod(Setting): name = "casefold_http_method" section = "Server Mechanics" diff --git a/gunicorn/http/message.py b/gunicorn/http/message.py index 4e1c2fd55..5e8d2427d 100644 --- a/gunicorn/http/message.py +++ b/gunicorn/http/message.py @@ -26,7 +26,8 @@ RFC9110_5_6_2_TOKEN_SPECIALS = r"!#$%&'*+-.^_`|~" TOKEN_RE = re.compile(r"[%s0-9a-zA-Z]+" % (re.escape(RFC9110_5_6_2_TOKEN_SPECIALS))) METHOD_BADCHAR_RE = re.compile("[a-z#]") -VERSION_RE = re.compile(r"HTTP/(\d+)\.(\d+)") +# usually 1.0 or 1.1 - RFC9112 permits restricting to single-digit versions +VERSION_RE = re.compile(r"HTTP/(\d)\.(\d)") class Message(object): @@ -438,6 +439,10 @@ def parse_request_line(self, line_bytes): if match is None: raise InvalidHTTPVersion(bits[2]) self.version = (int(match.group(1)), int(match.group(2))) + if not (1, 0) <= self.version < (2, 0): + # if ever relaxing this, carefully review Content-Encoding processing + if not self.cfg.permit_unconventional_http_version: + raise InvalidHTTPVersion(self.version) def set_body_reader(self): super().set_body_reader() diff --git a/tests/requests/invalid/prefix_06.http b/tests/requests/invalid/prefix_06.http new file mode 100644 index 000000000..536c586a3 --- /dev/null +++ b/tests/requests/invalid/prefix_06.http @@ -0,0 +1,4 @@ +GET /the/future HTTP/1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111.1\r\n +Content-Length: 7\r\n +\r\n +Old Man diff --git a/tests/requests/invalid/prefix_06.py b/tests/requests/invalid/prefix_06.py new file mode 100644 index 000000000..b2286d428 --- /dev/null +++ b/tests/requests/invalid/prefix_06.py @@ -0,0 +1,5 @@ +from gunicorn.config import Config +from gunicorn.http.errors import InvalidHTTPVersion + +cfg = Config() +request = InvalidHTTPVersion diff --git a/tests/requests/invalid/version_01.http b/tests/requests/invalid/version_01.http new file mode 100644 index 000000000..0a222ce12 --- /dev/null +++ b/tests/requests/invalid/version_01.http @@ -0,0 +1,2 @@ +GET /foo HTTP/0.99\r\n +\r\n diff --git a/tests/requests/invalid/version_01.py b/tests/requests/invalid/version_01.py new file mode 100644 index 000000000..760840b69 --- /dev/null +++ b/tests/requests/invalid/version_01.py @@ -0,0 +1,2 @@ +from gunicorn.http.errors import InvalidHTTPVersion +request = InvalidHTTPVersion diff --git a/tests/requests/invalid/version_02.http b/tests/requests/invalid/version_02.http new file mode 100644 index 000000000..6d29ac5fe --- /dev/null +++ b/tests/requests/invalid/version_02.http @@ -0,0 +1,2 @@ +GET /foo HTTP/2.0\r\n +\r\n diff --git a/tests/requests/invalid/version_02.py b/tests/requests/invalid/version_02.py new file mode 100644 index 000000000..760840b69 --- /dev/null +++ b/tests/requests/invalid/version_02.py @@ -0,0 +1,2 @@ +from gunicorn.http.errors import InvalidHTTPVersion +request = InvalidHTTPVersion From b6c7414fd08196e87bee7c6687b4286922304e4e Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" Date: Tue, 12 Dec 2023 15:27:47 +0100 Subject: [PATCH 15/16] briefly document security fixes in 2023 news further information to be published in security advisories, published out of tree on Github --- docs/source/2023-news.rst | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/docs/source/2023-news.rst b/docs/source/2023-news.rst index 80f022248..971cfef38 100644 --- a/docs/source/2023-news.rst +++ b/docs/source/2023-news.rst @@ -2,6 +2,28 @@ Changelog - 2023 ================ +22.0.0 - TBDTBDTBD +================== + +- fix numerous security vulnerabilites in HTTP parser (closing some request smuggling vectors) +- parsing additional requests is no longer attempted past unsupported request framing +- on HTTP versions < 1.1 support for chunked transfer is refused (only used in exploits) +- requests conflicting configured or passed SCRIPT_NAME now produce a verbose error +- Trailer fields are no longer inspected for headers indicating secure scheme + +** Breaking changes ** + +- the limitations on valid characters in the HTTP method have been bounded to Internet Standards +- requests specifying unsupported transfer coding (order) are refused by default (rare) +- HTTP methods are no longer casefolded by default (IANA method registry contains none affacted) +- HTTP methods containing the number sign (#) are no longer accepted by default (rare) +- HTTP versions < 1.0 or >= 2.0 are no longer accepted by default (rare, only HTTP/1.1 is supported) +- HTTP versions consisting of multiple digits or containing a prefix/suffix are no longer accepted +- HTTP header field names Gunicorn cannot safely map to variables are silently dropped, as in other software +- HTTP headers with empty field name are refused by default (no legitimate use cases, used in exploits) +- requests with both Transfer-Encoding and Content-Length are refused by default (such a message might indicate an attempt to perform request smuggling) +- empty transfer codings are no longer permitted (reportedly seen with really old & broken proxies) + 21.2.0 - 2023-07-19 =================== From e710393d142edf56294c4490dbe08e5980160b19 Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" Date: Sun, 17 Dec 2023 17:11:58 +0100 Subject: [PATCH 16/16] HTTP parser: stricter chunk-ext OBS handling chunk extensions are silently ignored before and after this change; its just the whitespace handling for the case without extensions that matters applying same strip(WS)->rstrip(BWS) replacement as already done in related cases half-way fix: could probably reject all BWS cases, rejecting only misplaced ones --- gunicorn/http/body.py | 5 ++++- tests/requests/invalid/chunked_09.http | 7 +++++++ tests/requests/invalid/chunked_09.py | 2 ++ tests/requests/invalid/chunked_10.http | 7 +++++++ tests/requests/invalid/chunked_10.py | 2 ++ tests/requests/invalid/chunked_11.http | 7 +++++++ tests/requests/invalid/chunked_11.py | 2 ++ tests/requests/valid/025.http | 2 +- 8 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 tests/requests/invalid/chunked_09.http create mode 100644 tests/requests/invalid/chunked_09.py create mode 100644 tests/requests/invalid/chunked_10.http create mode 100644 tests/requests/invalid/chunked_10.py create mode 100644 tests/requests/invalid/chunked_11.http create mode 100644 tests/requests/invalid/chunked_11.py diff --git a/gunicorn/http/body.py b/gunicorn/http/body.py index 2ae0eb848..78f03214a 100644 --- a/gunicorn/http/body.py +++ b/gunicorn/http/body.py @@ -85,7 +85,10 @@ def parse_chunk_size(self, unreader, data=None): data = buf.getvalue() line, rest_chunk = data[:idx], data[idx + 2:] - chunk_size = line.split(b";", 1)[0].strip() + # RFC9112 7.1.1: BWS before chunk-ext - but ONLY then + chunk_size, *chunk_ext = line.split(b";", 1) + if chunk_ext: + chunk_size = chunk_size.rstrip(b" \t") if any(n not in b"0123456789abcdefABCDEF" for n in chunk_size): raise InvalidChunkSize(chunk_size) chunk_size = int(chunk_size, 16) diff --git a/tests/requests/invalid/chunked_09.http b/tests/requests/invalid/chunked_09.http new file mode 100644 index 000000000..6207310cf --- /dev/null +++ b/tests/requests/invalid/chunked_09.http @@ -0,0 +1,7 @@ +POST /chunked_ows_without_ext HTTP/1.1\r\n +Transfer-Encoding: chunked\r\n +\r\n +5\r\n +hello\r\n +0 \r\n +\r\n diff --git a/tests/requests/invalid/chunked_09.py b/tests/requests/invalid/chunked_09.py new file mode 100644 index 000000000..0571e1183 --- /dev/null +++ b/tests/requests/invalid/chunked_09.py @@ -0,0 +1,2 @@ +from gunicorn.http.errors import InvalidChunkSize +request = InvalidChunkSize diff --git a/tests/requests/invalid/chunked_10.http b/tests/requests/invalid/chunked_10.http new file mode 100644 index 000000000..db1e4c38d --- /dev/null +++ b/tests/requests/invalid/chunked_10.http @@ -0,0 +1,7 @@ +POST /chunked_ows_before HTTP/1.1\r\n +Transfer-Encoding: chunked\r\n +\r\n +5\r\n +hello\r\n + 0\r\n +\r\n diff --git a/tests/requests/invalid/chunked_10.py b/tests/requests/invalid/chunked_10.py new file mode 100644 index 000000000..0571e1183 --- /dev/null +++ b/tests/requests/invalid/chunked_10.py @@ -0,0 +1,2 @@ +from gunicorn.http.errors import InvalidChunkSize +request = InvalidChunkSize diff --git a/tests/requests/invalid/chunked_11.http b/tests/requests/invalid/chunked_11.http new file mode 100644 index 000000000..da1f72e58 --- /dev/null +++ b/tests/requests/invalid/chunked_11.http @@ -0,0 +1,7 @@ +POST /chunked_ows_before HTTP/1.1\r\n +Transfer-Encoding: chunked\r\n +\r\n +5\n;\r\n +hello\r\n +0\r\n +\r\n diff --git a/tests/requests/invalid/chunked_11.py b/tests/requests/invalid/chunked_11.py new file mode 100644 index 000000000..0571e1183 --- /dev/null +++ b/tests/requests/invalid/chunked_11.py @@ -0,0 +1,2 @@ +from gunicorn.http.errors import InvalidChunkSize +request = InvalidChunkSize diff --git a/tests/requests/valid/025.http b/tests/requests/valid/025.http index f8d7fae2e..214f3094c 100644 --- a/tests/requests/valid/025.http +++ b/tests/requests/valid/025.http @@ -3,7 +3,7 @@ Transfer-Encoding: chunked\r\n \r\n 5; some; parameters=stuff\r\n hello\r\n -6; blahblah; blah\r\n +6 \t;\tblahblah; blah\r\n world\r\n 0\r\n \r\n