|
1 | 1 | import copy |
2 | 2 | from io import BytesIO |
3 | 3 | from itertools import chain |
| 4 | +from unittest import mock |
4 | 5 | from urllib.parse import urlencode |
5 | 6 |
|
6 | 7 | from django.core.exceptions import BadRequest, DisallowedHost |
|
15 | 16 | ) |
16 | 17 | from django.http.multipartparser import ( |
17 | 18 | MAX_TOTAL_HEADER_SIZE, |
| 19 | + LazyStream, |
18 | 20 | MultiPartParser, |
19 | 21 | MultiPartParserError, |
20 | 22 | ) |
@@ -917,6 +919,65 @@ def test_multipart_post_field_with_invalid_base64(self): |
917 | 919 | request.body # evaluate |
918 | 920 | self.assertEqual(request.POST, {"name": ["123"]}) |
919 | 921 |
|
| 922 | + def test_multipart_file_upload_base64_whitespace_heavy(self): |
| 923 | + # Fake a file upload with base64-encoded content including mostly |
| 924 | + # whitespaces across chunk boundaries. |
| 925 | + payload = FakePayload( |
| 926 | + "\r\n".join( |
| 927 | + [ |
| 928 | + f"--{BOUNDARY}", |
| 929 | + 'Content-Disposition: form-data; name="file"; filename="test.txt"', |
| 930 | + "Content-Type: application/octet-stream", |
| 931 | + "Content-Transfer-Encoding: base64", |
| 932 | + "", |
| 933 | + ] |
| 934 | + ) |
| 935 | + ) |
| 936 | + # "AAAA" decodes to b"\x00\x00\x00". Whitespace (70000 bytes) spans the |
| 937 | + # default 64KB chunk boundary, hence the alignment loop is exercised. |
| 938 | + payload.write(b"\r\n" + b"AAA" + b" " * 70000 + b"A" + b"\r\n") |
| 939 | + payload.write("--" + BOUNDARY + "--\r\n") |
| 940 | + request = WSGIRequest( |
| 941 | + { |
| 942 | + "REQUEST_METHOD": "POST", |
| 943 | + "CONTENT_TYPE": MULTIPART_CONTENT, |
| 944 | + "CONTENT_LENGTH": len(payload), |
| 945 | + "wsgi.input": payload, |
| 946 | + } |
| 947 | + ) |
| 948 | + reads = [] |
| 949 | + original_read = LazyStream.read |
| 950 | + |
| 951 | + def counting_read(self_stream, size=None): |
| 952 | + reads.append(size) |
| 953 | + return original_read(self_stream, size) |
| 954 | + |
| 955 | + with mock.patch.object(LazyStream, "read", counting_read): |
| 956 | + files = request.FILES |
| 957 | + |
| 958 | + self.assertEqual(len(files), 1) |
| 959 | + self.assertEqual(files["file"].read(), b"\x00\x00\x00") |
| 960 | + |
| 961 | + # The alignment loop must read in `chunk-sized` units rather than one |
| 962 | + # byte at a time, otherwise each whitespace byte triggers a separate |
| 963 | + # read() call with a costly internal unget() cycle. |
| 964 | + # Parsing this payload should issue exactly 8 LazyStream.read() calls: |
| 965 | + # 1. main_stream.read(1) -- BoundaryIter.__init__ probe, preamble |
| 966 | + # 2. sub_stream.read(1024) -- parse_boundary_stream, preamble headers |
| 967 | + # 3. main_stream.read(1) -- BoundaryIter.__init__ probe, file field |
| 968 | + # 4. field_stream.read(1024) -- parse_boundary_stream, file headers |
| 969 | + # 5. field_stream.read(65536)-- base64 alignment loop: one chunk-sized |
| 970 | + # read to find the non-whitespace bytes |
| 971 | + # needed to complete the 4-byte base64 |
| 972 | + # group that spans the chunk boundary |
| 973 | + # 6. main_stream.read(1) -- BoundaryIter.__init__ probe, epilogue |
| 974 | + # 7. sub_stream.read(1024) -- parse_boundary_stream, epilogue headers |
| 975 | + # 8. main_stream.read(1) -- BoundaryIter.__init__ probe, exhausted |
| 976 | + # stream; returns b"" and stops iteration |
| 977 | + # A byte-at-a-time implementation of read() in step 5 would do instead |
| 978 | + # one read(1) per whitespace byte past the chunk boundary (4488 calls). |
| 979 | + self.assertEqual(reads, [1, 1024, 1, 1024, 65536, 1, 1024, 1]) |
| 980 | + |
920 | 981 | def test_POST_after_body_read_and_stream_read_multipart(self): |
921 | 982 | """ |
922 | 983 | POST should be populated even if body is read first, and then |
|
0 commit comments