Skip to content

Commit 35a1d58

Browse files
committed
gh-150499: http.server: drain unread request body on persistent connections
Follow-up to gh-150499 (paired with #150500 which lands the §3.3.3 rules). Wrap rfile with a small byte-counting reader for the duration of the request and, after the handler returns, drain any unread declared body up to a 1 MiB cap (or close the connection if the remainder is larger). Without this, the next iteration of the keep-alive loop parses the leftover body as a request line, per RFC 7230 section 6.3. The wrapper proxies read, read1, readline, readinto, and readinto1 and falls back to __getattr__ for everything else, so existing handlers (including the ServerHandler chain in wsgiref.simple_server, which passes self.rfile directly to WSGI applications) continue to work unchanged. Add RFC7230BodyDrainTestCase asserting that the leftover body is not parsed as the next request line on a persistent connection. Signed-off-by: tonghuaroot <tonghuaroot@gmail.com>
1 parent 776573c commit 35a1d58

3 files changed

Lines changed: 144 additions & 1 deletion

File tree

Lib/http/server.py

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,46 @@ class ThreadingHTTPSServer(socketserver.ThreadingMixIn, HTTPSServer):
170170
daemon_threads = True
171171

172172

173+
class _ReadCountingReader:
174+
# Proxy around the underlying rfile that tracks the number of bytes
175+
# consumed, so handle_one_request can drain or close per RFC 7230
176+
# section 6.3.
177+
178+
def __init__(self, stream):
179+
self._stream = stream
180+
self.bytes_read = 0
181+
182+
def read(self, *args, **kwargs):
183+
data = self._stream.read(*args, **kwargs)
184+
self.bytes_read += len(data)
185+
return data
186+
187+
def read1(self, *args, **kwargs):
188+
data = self._stream.read1(*args, **kwargs)
189+
self.bytes_read += len(data)
190+
return data
191+
192+
def readline(self, *args, **kwargs):
193+
data = self._stream.readline(*args, **kwargs)
194+
self.bytes_read += len(data)
195+
return data
196+
197+
def readinto(self, b):
198+
n = self._stream.readinto(b)
199+
if n:
200+
self.bytes_read += n
201+
return n
202+
203+
def readinto1(self, b):
204+
n = self._stream.readinto1(b)
205+
if n:
206+
self.bytes_read += n
207+
return n
208+
209+
def __getattr__(self, name):
210+
return getattr(self._stream, name)
211+
212+
173213
class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
174214

175215
"""HTTP request handler base class.
@@ -426,6 +466,11 @@ def handle_expect_100(self):
426466
self.end_headers()
427467
return True
428468

469+
# Maximum number of bytes drained from an unread request body after
470+
# dispatch to honour RFC 7230 section 6.3; over this, the connection
471+
# is closed instead.
472+
_MAX_BODY_DRAIN = 1 << 20
473+
429474
def handle_one_request(self):
430475
"""Handle a single HTTP request.
431476
@@ -434,8 +479,10 @@ def handle_one_request(self):
434479
commands such as GET and POST.
435480
436481
"""
482+
raw_rfile = self.rfile
483+
wrapped = False
437484
try:
438-
self.raw_requestline = self.rfile.readline(65537)
485+
self.raw_requestline = raw_rfile.readline(65537)
439486
if len(self.raw_requestline) > 65536:
440487
self.requestline = ''
441488
self.request_version = ''
@@ -445,9 +492,13 @@ def handle_one_request(self):
445492
if not self.raw_requestline:
446493
self.close_connection = True
447494
return
495+
# Wrap rfile to track body consumption for RFC 7230 section 6.3.
496+
self.rfile = _ReadCountingReader(raw_rfile)
497+
wrapped = True
448498
if not self.parse_request():
449499
# An error code has been sent, just exit
450500
return
501+
body_bytes_start = self.rfile.bytes_read
451502
mname = 'do_' + self.command
452503
if not hasattr(self, mname):
453504
self.send_error(
@@ -457,11 +508,40 @@ def handle_one_request(self):
457508
method = getattr(self, mname)
458509
method()
459510
self.wfile.flush() #actually send the response if not already done.
511+
self._drain_request_body(body_bytes_start)
460512
except TimeoutError as e:
461513
#a read or a write timed out. Discard this connection
462514
self.log_error("Request timed out: %r", e)
463515
self.close_connection = True
464516
return
517+
finally:
518+
if wrapped:
519+
self.rfile = raw_rfile
520+
521+
def _drain_request_body(self, body_bytes_start):
522+
# Drain any unread declared request body, or close the connection
523+
# if the remainder exceeds the drain cap. Required by RFC 7230
524+
# section 6.3 for persistent connections.
525+
if self.close_connection:
526+
return
527+
cl = self.headers.get('Content-Length')
528+
if not cl or not cl.isdigit():
529+
return
530+
declared = int(cl)
531+
consumed = self.rfile.bytes_read - body_bytes_start
532+
remaining = declared - consumed
533+
if remaining <= 0:
534+
return
535+
if remaining > self._MAX_BODY_DRAIN:
536+
self.close_connection = True
537+
return
538+
try:
539+
drained = self.rfile.read(remaining)
540+
except OSError:
541+
self.close_connection = True
542+
return
543+
if len(drained) < remaining:
544+
self.close_connection = True
465545

466546
def handle(self):
467547
"""Handle multiple requests if necessary."""

Lib/test/test_httpservers.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,64 @@ def test_head_via_send_error(self):
364364
self.assertEqual(b'', data)
365365

366366

367+
class RFC7230BodyDrainTestCase(BaseTestCase):
368+
"""Exercise the post-dispatch body drain added for RFC 7230 section 6.3."""
369+
370+
class request_handler(NoLogRequestHandler, BaseHTTPRequestHandler):
371+
protocol_version = 'HTTP/1.1'
372+
default_request_version = 'HTTP/1.1'
373+
374+
def do_GET(self):
375+
# Deliberately does not read the body, to exercise the
376+
# post-dispatch body drain.
377+
out = b'GET ok\n'
378+
self.send_response(HTTPStatus.OK)
379+
self.send_header('Content-Type', 'text/plain')
380+
self.send_header('Content-Length', str(len(out)))
381+
self.end_headers()
382+
self.wfile.write(out)
383+
384+
def _send_raw(self, payload, timeout=2):
385+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
386+
sock.settimeout(timeout)
387+
sock.connect((self.HOST, self.PORT))
388+
try:
389+
sock.sendall(payload)
390+
data = b''
391+
try:
392+
while True:
393+
chunk = sock.recv(4096)
394+
if not chunk:
395+
break
396+
data += chunk
397+
except TimeoutError:
398+
pass
399+
finally:
400+
sock.close()
401+
return data
402+
403+
def test_body_drained_on_persistent_connection(self):
404+
# The leftover body of a GET request must not be parsed as the
405+
# next request line on a keep-alive connection.
406+
body_filler = b'A' * 100
407+
smuggled = b'GET /pwn HTTP/1.1\r\nHost: 127.0.0.1\r\n\r\n'
408+
data = self._send_raw(
409+
b'GET / HTTP/1.1\r\n'
410+
b'Host: 127.0.0.1\r\n'
411+
b'Content-Length: 100\r\n'
412+
b'\r\n'
413+
+ body_filler
414+
+ smuggled
415+
)
416+
# The leftover body must not be parsed as a malformed request
417+
# line; we should never see "Unsupported method ('AAAA...')".
418+
self.assertNotIn(b"Unsupported method ('AAA", data)
419+
# The smuggled GET /pwn is a well-formed request that arrives
420+
# after the body has been drained, so the server should reply
421+
# to it normally as the second request on the connection.
422+
self.assertEqual(data.count(b'HTTP/1.1 200'), 2)
423+
424+
367425
class HTTP09ServerTestCase(BaseTestCase):
368426

369427
class request_handler(NoLogRequestHandler, BaseHTTPRequestHandler):
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
:mod:`http.server` now drains any unread declared request body up to
2+
1 MiB after dispatch on persistent connections, and closes the connection
3+
when the remainder exceeds that cap, per :rfc:`7230#section-6.3`. This
4+
prevents the next iteration of the keep-alive loop from parsing leftover
5+
body bytes as a request line.

0 commit comments

Comments
 (0)