Skip to content

Commit 119a195

Browse files
committed
httputil: Add limits on multipart form data parsing
The new default limits prevent a DoS vulnerability involving requests with many multipart parts. It also adds a defense-in-depth limit on the size of multipart headers, which would have mitigated the vulnerability fixed in 6.5.3. New data structures are added to allow users to configure these limits, and to disable multipart parsing entirely if they choose. However, due to the complexity of the plumbing required to pass these configuration options through the stack, the only configuration provided in this commit is the ability to set a global default.
1 parent 63d4df4 commit 119a195

2 files changed

Lines changed: 134 additions & 2 deletions

File tree

tornado/httputil.py

Lines changed: 98 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import calendar
2323
import collections.abc
2424
import copy
25+
import dataclasses
2526
import datetime
2627
import email.utils
2728
from functools import lru_cache
@@ -913,12 +914,90 @@ def _int_or_none(val: str) -> Optional[int]:
913914
return int(val)
914915

915916

917+
@dataclasses.dataclass
918+
class ParseMultipartConfig:
919+
"""This class configures the parsing of ``multipart/form-data`` request bodies.
920+
921+
Its primary purpose is to place limits on the size and complexity of request messages
922+
to avoid potential denial-of-service attacks.
923+
924+
.. versionadded:: 6.5.5
925+
"""
926+
927+
enabled: bool = True
928+
"""Set this to false to disable the parsing of ``multipart/form-data`` requests entirely.
929+
930+
This may be desirable for applications that do not need to handle this format, since
931+
multipart request have a history of DoS vulnerabilities in Tornado. Multipart requests
932+
are used primarily for ``<input type="file">`` in HTML forms, or in APIs that mimic this
933+
format. File uploads that use the HTTP ``PUT`` method generally do not use the multipart
934+
format.
935+
"""
936+
937+
max_parts: int = 100
938+
"""The maximum number of parts accepted in a multipart request.
939+
940+
Each ``<input>`` element in an HTML form corresponds to at least one "part".
941+
"""
942+
943+
max_part_header_size: int = 10 * 1024
944+
"""The maximum size of the headers for each part of a multipart request.
945+
946+
The header for a part contains the name of the form field and optionally the filename
947+
and content type of the uploaded file.
948+
"""
949+
950+
951+
@dataclasses.dataclass
952+
class ParseBodyConfig:
953+
"""This class configures the parsing of request bodies.
954+
955+
.. versionadded:: 6.5.5
956+
"""
957+
958+
multipart: ParseMultipartConfig = dataclasses.field(
959+
default_factory=ParseMultipartConfig
960+
)
961+
"""Configuration for ``multipart/form-data`` request bodies."""
962+
963+
964+
_DEFAULT_PARSE_BODY_CONFIG = ParseBodyConfig()
965+
966+
967+
def set_parse_body_config(config: ParseBodyConfig) -> None:
968+
r"""Sets the **global** default configuration for parsing request bodies.
969+
970+
This global setting is provided as a stopgap for applications that need to raise the limits
971+
introduced in Tornado 6.5.5, or who wish to disable the parsing of multipart/form-data bodies
972+
entirely. Non-global configuration for this functionality will be introduced in a future
973+
release.
974+
975+
>>> content_type = "multipart/form-data; boundary=foo"
976+
>>> multipart_body = b"--foo--\r\n"
977+
>>> parse_body_arguments(content_type, multipart_body, {}, {})
978+
>>> multipart_config = ParseMultipartConfig(enabled=False)
979+
>>> config = ParseBodyConfig(multipart=multipart_config)
980+
>>> set_parse_body_config(config)
981+
>>> parse_body_arguments(content_type, multipart_body, {}, {})
982+
Traceback (most recent call last):
983+
...
984+
tornado.httputil.HTTPInputError: ...: multipart/form-data parsing is disabled
985+
>>> set_parse_body_config(ParseBodyConfig()) # reset to defaults
986+
987+
.. versionadded:: 6.5.5
988+
"""
989+
global _DEFAULT_PARSE_BODY_CONFIG
990+
_DEFAULT_PARSE_BODY_CONFIG = config
991+
992+
916993
def parse_body_arguments(
917994
content_type: str,
918995
body: bytes,
919996
arguments: Dict[str, List[bytes]],
920997
files: Dict[str, List[HTTPFile]],
921998
headers: Optional[HTTPHeaders] = None,
999+
*,
1000+
config: Optional[ParseBodyConfig] = None,
9221001
) -> None:
9231002
"""Parses a form request body.
9241003
@@ -928,6 +1007,8 @@ def parse_body_arguments(
9281007
and ``files`` parameters are dictionaries that will be updated
9291008
with the parsed contents.
9301009
"""
1010+
if config is None:
1011+
config = _DEFAULT_PARSE_BODY_CONFIG
9311012
if content_type.startswith("application/x-www-form-urlencoded"):
9321013
if headers and "Content-Encoding" in headers:
9331014
raise HTTPInputError(
@@ -948,10 +1029,15 @@ def parse_body_arguments(
9481029
)
9491030
try:
9501031
fields = content_type.split(";")
1032+
if fields[0].strip() != "multipart/form-data":
1033+
# This catches "Content-Type: multipart/form-dataxyz"
1034+
raise HTTPInputError("Invalid content type")
9511035
for field in fields:
9521036
k, sep, v = field.strip().partition("=")
9531037
if k == "boundary" and v:
954-
parse_multipart_form_data(utf8(v), body, arguments, files)
1038+
parse_multipart_form_data(
1039+
utf8(v), body, arguments, files, config=config.multipart
1040+
)
9551041
break
9561042
else:
9571043
raise HTTPInputError("multipart boundary not found")
@@ -964,6 +1050,8 @@ def parse_multipart_form_data(
9641050
data: bytes,
9651051
arguments: Dict[str, List[bytes]],
9661052
files: Dict[str, List[HTTPFile]],
1053+
*,
1054+
config: Optional[ParseMultipartConfig] = None,
9671055
) -> None:
9681056
"""Parses a ``multipart/form-data`` body.
9691057
@@ -976,6 +1064,10 @@ def parse_multipart_form_data(
9761064
Now recognizes non-ASCII filenames in RFC 2231/5987
9771065
(``filename*=``) format.
9781066
"""
1067+
if config is None:
1068+
config = _DEFAULT_PARSE_BODY_CONFIG.multipart
1069+
if not config.enabled:
1070+
raise HTTPInputError("multipart/form-data parsing is disabled")
9791071
# The standard allows for the boundary to be quoted in the header,
9801072
# although it's rare (it happens at least for google app engine
9811073
# xmpp). I think we're also supposed to handle backslash-escapes
@@ -987,12 +1079,16 @@ def parse_multipart_form_data(
9871079
if final_boundary_index == -1:
9881080
raise HTTPInputError("Invalid multipart/form-data: no final boundary found")
9891081
parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n")
1082+
if len(parts) > config.max_parts:
1083+
raise HTTPInputError("multipart/form-data has too many parts")
9901084
for part in parts:
9911085
if not part:
9921086
continue
9931087
eoh = part.find(b"\r\n\r\n")
9941088
if eoh == -1:
9951089
raise HTTPInputError("multipart/form-data missing headers")
1090+
if eoh > config.max_part_header_size:
1091+
raise HTTPInputError("multipart/form-data part header too large")
9961092
headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"), _chars_are_bytes=False)
9971093
disp_header = headers.get("Content-Disposition", "")
9981094
disposition, disp_params = _parse_header(disp_header)
@@ -1200,7 +1296,7 @@ def doctests():
12001296
# type: () -> unittest.TestSuite
12011297
import doctest
12021298

1203-
return doctest.DocTestSuite()
1299+
return doctest.DocTestSuite(optionflags=doctest.ELLIPSIS)
12041300

12051301

12061302
_netloc_re = re.compile(r"^(.+):(\d+)$")

tornado/test/httputil_test.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
qs_to_qsl,
1010
HTTPInputError,
1111
HTTPFile,
12+
ParseMultipartConfig,
1213
)
1314
from tornado.escape import utf8, native_str
1415
from tornado.log import gen_log
@@ -298,10 +299,45 @@ def f(n):
298299
return time.perf_counter() - start
299300

300301
d1 = f(1_000)
302+
# Note that headers larger than this are blocked by the default configuration.
301303
d2 = f(10_000)
302304
if d2 / d1 > 20:
303305
self.fail(f"Disposition param parsing is not linear: {d1=} vs {d2=}")
304306

307+
def test_multipart_config(self):
308+
boundary = b"1234"
309+
body = b"""--1234
310+
Content-Disposition: form-data; name="files"; filename="ab.txt"
311+
312+
--1234--""".replace(
313+
b"\n", b"\r\n"
314+
)
315+
config = ParseMultipartConfig()
316+
args, files = form_data_args()
317+
parse_multipart_form_data(boundary, body, args, files, config=config)
318+
self.assertEqual(files["files"][0]["filename"], "ab.txt")
319+
320+
config_no_parts = ParseMultipartConfig(max_parts=0)
321+
with self.assertRaises(HTTPInputError) as cm:
322+
parse_multipart_form_data(
323+
boundary, body, args, files, config=config_no_parts
324+
)
325+
self.assertIn("too many parts", str(cm.exception))
326+
327+
config_small_headers = ParseMultipartConfig(max_part_header_size=10)
328+
with self.assertRaises(HTTPInputError) as cm:
329+
parse_multipart_form_data(
330+
boundary, body, args, files, config=config_small_headers
331+
)
332+
self.assertIn("header too large", str(cm.exception))
333+
334+
config_disabled = ParseMultipartConfig(enabled=False)
335+
with self.assertRaises(HTTPInputError) as cm:
336+
parse_multipart_form_data(
337+
boundary, body, args, files, config=config_disabled
338+
)
339+
self.assertIn("multipart/form-data parsing is disabled", str(cm.exception))
340+
305341

306342
class HTTPHeadersTest(unittest.TestCase):
307343
def test_multi_line(self):

0 commit comments

Comments
 (0)