diff --git a/python_multipart/multipart.py b/python_multipart/multipart.py index a996379..107857d 100644 --- a/python_multipart/multipart.py +++ b/python_multipart/multipart.py @@ -68,12 +68,14 @@ def finalize(self) -> None: ... def close(self) -> None: ... class FieldProtocol(_FormProtocol, Protocol): - def __init__(self, name: bytes | None) -> None: ... + def __init__(self, name: bytes | None, content_type: str | None = None) -> None: ... def set_none(self) -> None: ... class FileProtocol(_FormProtocol, Protocol): - def __init__(self, file_name: bytes | None, field_name: bytes | None, config: FileConfig) -> None: ... + def __init__( + self, file_name: bytes | None, field_name: bytes | None, config: FileConfig, content_type: str | None = None + ) -> None: ... OnFieldCallback = Callable[[FieldProtocol], None] OnFileCallback = Callable[[FileProtocol], None] @@ -221,11 +223,13 @@ class Field: Args: name: The name of the form field. + content_type: The value of the Content-Type header for this field. """ - def __init__(self, name: bytes | None) -> None: + def __init__(self, name: bytes | None, content_type: str | None = None) -> None: self._name = name self._value: list[bytes] = [] + self._content_type = content_type # We cache the joined version of _value for speed. self._cache = _missing @@ -317,6 +321,11 @@ def value(self) -> bytes | None: assert isinstance(self._cache, bytes) or self._cache is None return self._cache + @property + def content_type(self) -> str | None: + """This property returns the content_type value of the field.""" + return self._content_type + def __eq__(self, other: object) -> bool: if isinstance(other, Field): return self.field_name == other.field_name and self.value == other.value @@ -354,10 +363,17 @@ class File: file_name: The name of the file that this [`File`][python_multipart.File] represents. field_name: The name of the form field that this file was uploaded with. This can be None, if, for example, the file was uploaded with Content-Type application/octet-stream. + content_type: The value of the Content-Type header. config: The configuration for this File. See above for valid configuration keys and their corresponding values. """ # noqa: E501 - def __init__(self, file_name: bytes | None, field_name: bytes | None = None, config: FileConfig = {}) -> None: + def __init__( + self, + file_name: bytes | None, + field_name: bytes | None = None, + content_type: str | None = None, + config: FileConfig = {}, + ) -> None: # Save configuration, set other variables default. self.logger = logging.getLogger(__name__) self._config = config @@ -365,9 +381,10 @@ def __init__(self, file_name: bytes | None, field_name: bytes | None = None, con self._bytes_written = 0 self._fileobj: BytesIO | BufferedRandom = BytesIO() - # Save the provided field/file name. + # Save the provided field/file name and content type. self._field_name = field_name self._file_name = file_name + self._content_type = content_type # Our actual file name is None by default, since, depending on our # config, we may not actually use the provided name. @@ -420,6 +437,11 @@ def in_memory(self) -> bool: """ return self._in_memory + @property + def content_type(self) -> str | None: + """The Content-Type value for this part, if it was set.""" + return self._content_type + def flush_to_disk(self) -> None: """If the file is already on-disk, do nothing. Otherwise, copy from the in-memory buffer to a disk file, and then reassign our internal @@ -1237,7 +1259,7 @@ def data_callback(name: CallbackName, end_i: int, remaining: bool = False) -> No elif state == MultipartState.HEADER_VALUE_ALMOST_DONE: # The last character should be a LF. If not, it's an error. if c != LF: - msg = "Did not find LF character at end of header " "(found %r)" % (c,) + msg = "Did not find LF character at end of header (found %r)" % (c,) self.logger.warning(msg) e = MultipartParseError(msg) e.offset = i @@ -1656,7 +1678,7 @@ def on_header_value(data: bytes, start: int, end: int) -> None: header_value.append(data[start:end]) def on_header_end() -> None: - headers[b"".join(header_name)] = b"".join(header_value) + headers[b"".join(header_name).lower()] = b"".join(header_value) del header_name[:] del header_value[:] @@ -1666,26 +1688,31 @@ def on_headers_finished() -> None: is_file = False # Parse the content-disposition header. - # TODO: handle mixed case - content_disp = headers.get(b"Content-Disposition") + content_disp = headers.get(b"content-disposition") disp, options = parse_options_header(content_disp) # Get the field and filename. field_name = options.get(b"name") file_name = options.get(b"filename") - # TODO: check for errors + if field_name is None: + raise FormParserError('Field name not found in Content-Disposition: "{!r}"'.format(content_disp)) + # TODO: check for other errors # Create the proper class. + content_type_b = headers.get(b"content-type") + content_type = content_type_b.decode("latin-1") if content_type_b is not None else None if file_name is None: - f_multi = FieldClass(field_name) + f_multi = FieldClass(field_name, content_type=content_type) else: - f_multi = FileClass(file_name, field_name, config=cast("FileConfig", self.config)) + f_multi = FileClass( + file_name, field_name, config=cast("FileConfig", self.config), content_type=content_type + ) is_file = True # Parse the given Content-Transfer-Encoding to determine what # we need to do with the incoming data. # TODO: check that we properly handle 8bit / 7bit encoding. - transfer_encoding = headers.get(b"Content-Transfer-Encoding", b"7bit") + transfer_encoding = headers.get(b"content-transfer-encoding", b"7bit") if transfer_encoding in (b"binary", b"8bit", b"7bit"): writer = f_multi diff --git a/tests/test_data/http/almost_match_boundary.yaml b/tests/test_data/http/almost_match_boundary.yaml index 235493e..c114ffe 100644 --- a/tests/test_data/http/almost_match_boundary.yaml +++ b/tests/test_data/http/almost_match_boundary.yaml @@ -3,6 +3,7 @@ expected: - name: file type: file file_name: test.txt + content_type: text/plain data: !!binary | LS1ib3VuZGFyaQ0KLS1ib3VuZGFyeXEtLWJvdW5kYXJ5DXEtLWJvdW5kYXJxDQotLWJvdW5hcnlkLS0NCi0tbm90Ym91bmQtLQ0KLS1taXNtYXRjaA0KLS1taXNtYXRjaC0tDQotLWJvdW5kYXJ5LVENCi0tYm91bmRhcnkNUS0tYm91bmRhcnlR diff --git a/tests/test_data/http/base64_encoding.yaml b/tests/test_data/http/base64_encoding.yaml index 1033150..db227a1 100644 --- a/tests/test_data/http/base64_encoding.yaml +++ b/tests/test_data/http/base64_encoding.yaml @@ -3,5 +3,6 @@ expected: - name: file type: file file_name: test.txt + content_type: text/plain data: !!binary | VGVzdCAxMjM= diff --git a/tests/test_data/http/case_insensitive_headers.http b/tests/test_data/http/case_insensitive_headers.http new file mode 100644 index 0000000..a14cc11 --- /dev/null +++ b/tests/test_data/http/case_insensitive_headers.http @@ -0,0 +1,21 @@ +------WebKitFormBoundarygbACTUR58IyeurVf +Content-Disposition: form-data; name="file1"; filename="test1.txt" +Content-Type: text/plain + +Test file #1 +------WebKitFormBoundarygbACTUR58IyeurVf +CONTENT-DISPOSITION: form-data; name="file2"; filename="test2.txt" +CONTENT-Type: text/plain + +Test file #2 +------WebKitFormBoundarygbACTUR58IyeurVf +content-disposition: form-data; name="file3"; filename="test3.txt" +content-type: text/plain + +Test file #3 +------WebKitFormBoundarygbACTUR58IyeurVf +cOnTenT-DiSpOsItiOn: form-data; name="file4"; filename="test4.txt" +Content-Type: text/plain + +Test file #4 +------WebKitFormBoundarygbACTUR58IyeurVf-- diff --git a/tests/test_data/http/case_insensitive_headers.yaml b/tests/test_data/http/case_insensitive_headers.yaml new file mode 100644 index 0000000..4c9d365 --- /dev/null +++ b/tests/test_data/http/case_insensitive_headers.yaml @@ -0,0 +1,26 @@ +boundary: ----WebKitFormBoundarygbACTUR58IyeurVf +expected: + - name: file1 + type: file + file_name: test1.txt + content_type: text/plain + data: !!binary | + VGVzdCBmaWxlICMx + - name: file2 + type: file + file_name: test2.txt + content_type: text/plain + data: !!binary | + VGVzdCBmaWxlICMy + - name: file3 + type: file + file_name: test3.txt + content_type: text/plain + data: !!binary | + VGVzdCBmaWxlICMz + - name: file4 + type: file + file_name: test4.txt + content_type: text/plain + data: !!binary | + VGVzdCBmaWxlICM0 diff --git a/tests/test_data/http/header_with_number.yaml b/tests/test_data/http/header_with_number.yaml index 493b783..86b4779 100644 --- a/tests/test_data/http/header_with_number.yaml +++ b/tests/test_data/http/header_with_number.yaml @@ -3,5 +3,6 @@ expected: - name: files type: file file_name: secret.txt + content_type: "text/plain; charset=utf-8" data: !!binary | YWFhYWFh diff --git a/tests/test_data/http/multiple_files.yaml b/tests/test_data/http/multiple_files.yaml index 3bf70e2..b372ab2 100644 --- a/tests/test_data/http/multiple_files.yaml +++ b/tests/test_data/http/multiple_files.yaml @@ -3,11 +3,13 @@ expected: - name: file1 type: file file_name: test1.txt + content_type: 'text/plain' data: !!binary | VGVzdCBmaWxlICMx - name: file2 type: file file_name: test2.txt + content_type: 'text/plain' data: !!binary | VGVzdCBmaWxlICMy diff --git a/tests/test_data/http/quoted_printable_encoding.yaml b/tests/test_data/http/quoted_printable_encoding.yaml index 2c6bbfb..6dcbde3 100644 --- a/tests/test_data/http/quoted_printable_encoding.yaml +++ b/tests/test_data/http/quoted_printable_encoding.yaml @@ -3,5 +3,6 @@ expected: - name: file type: file file_name: test.txt + content_type: 'text/plain' data: !!binary | Zm9vPWJhcg== diff --git a/tests/test_data/http/single_field_single_file.yaml b/tests/test_data/http/single_field_single_file.yaml index 47c8d6e..fa7002e 100644 --- a/tests/test_data/http/single_field_single_file.yaml +++ b/tests/test_data/http/single_field_single_file.yaml @@ -2,11 +2,13 @@ boundary: boundary expected: - name: field type: field + content_type: 'text/plain' data: !!binary | dGVzdDE= - name: file type: file file_name: file.txt + content_type: 'text/plain' data: !!binary | dGVzdDI= diff --git a/tests/test_data/http/single_field_with_trailer.http b/tests/test_data/http/single_field_with_trailer.http new file mode 100644 index 0000000..a570340 --- /dev/null +++ b/tests/test_data/http/single_field_with_trailer.http @@ -0,0 +1,7 @@ +------WebKitFormBoundaryTkr3kCBQlBe1nrhc +Content-Disposition: form-data; name="field" + +This is a test. +------WebKitFormBoundaryTkr3kCBQlBe1nrhc-- +this trailer causes a warning +but should be ignored \ No newline at end of file diff --git a/tests/test_data/http/single_field_with_trailer.yaml b/tests/test_data/http/single_field_with_trailer.yaml new file mode 100644 index 0000000..7690f08 --- /dev/null +++ b/tests/test_data/http/single_field_with_trailer.yaml @@ -0,0 +1,6 @@ +boundary: ----WebKitFormBoundaryTkr3kCBQlBe1nrhc +expected: + - name: field + type: field + data: !!binary | + VGhpcyBpcyBhIHRlc3Qu diff --git a/tests/test_data/http/single_file.yaml b/tests/test_data/http/single_file.yaml index 2a8e005..dbdff51 100644 --- a/tests/test_data/http/single_file.yaml +++ b/tests/test_data/http/single_file.yaml @@ -3,6 +3,7 @@ expected: - name: file type: file file_name: test.txt + content_type: 'text/plain' data: !!binary | VGhpcyBpcyBhIHRlc3QgZmlsZS4= diff --git a/tests/test_data/http/utf8_filename.yaml b/tests/test_data/http/utf8_filename.yaml index 507ba2c..25fab67 100644 --- a/tests/test_data/http/utf8_filename.yaml +++ b/tests/test_data/http/utf8_filename.yaml @@ -3,6 +3,7 @@ expected: - name: file type: file file_name: ???.txt + content_type: 'text/plain' data: !!binary | 44GT44KM44Gv44OG44K544OI44Gn44GZ44CC diff --git a/tests/test_multipart.py b/tests/test_multipart.py index ce92ff4..f9839a6 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -758,7 +758,7 @@ def assert_file_data(self, f: File, data: bytes) -> None: file_data = o.read() self.assertEqual(file_data, data) - def assert_file(self, field_name: bytes, file_name: bytes, data: bytes) -> None: + def assert_file(self, field_name: bytes, file_name: bytes, content_type: str | None, data: bytes) -> None: # Find this file. found = None for f in self.files: @@ -770,6 +770,8 @@ def assert_file(self, field_name: bytes, file_name: bytes, data: bytes) -> None: self.assertIsNotNone(found) assert found is not None + self.assertEqual(found.content_type, content_type) + try: # Assert about this file. self.assert_file_data(found, data) @@ -839,7 +841,7 @@ def test_http(self, param: TestParams) -> None: self.assert_field(name, e["data"]) elif type == "file": - self.assert_file(name, e["file_name"].encode("latin-1"), e["data"]) + self.assert_file(name, e["file_name"].encode("latin-1"), e["content_type"], e["data"]) else: assert False @@ -870,7 +872,7 @@ def test_random_splitting(self) -> None: # Assert that our file and field are here. self.assert_field(b"field", b"test1") - self.assert_file(b"file", b"file.txt", b"test2") + self.assert_file(b"file", b"file.txt", "text/plain", b"test2") @parametrize("param", [t for t in http_tests if t["name"] in single_byte_tests]) def test_feed_single_bytes(self, param: TestParams) -> None: @@ -909,7 +911,8 @@ def test_feed_single_bytes(self, param: TestParams) -> None: self.assert_field(name, e["data"]) elif type == "file": - self.assert_file(name, e["file_name"].encode("latin-1"), e["data"]) + content_type = "text/plain" + self.assert_file(name, e["file_name"].encode("latin-1"), content_type, e["data"]) else: assert False @@ -947,6 +950,48 @@ def test_feed_blocks(self) -> None: # Assert that our field is here. self.assert_field(b"field", b"0123456789ABCDEFGHIJ0123456789ABCDEFGHIJ") + def test_file_content_type_header(self) -> None: + """ + This test checks the content-type for a file part is passed on. + """ + # Load test data. + test_file = "header_with_number.http" + with open(os.path.join(http_tests_dir, test_file), "rb") as f: + test_data = f.read() + + expected_content_type = "text/plain; charset=utf-8" + + # Create form parser. + self.make(boundary="b8825ae386be4fdc9644d87e392caad3") + self.f.write(test_data) + self.f.finalize() + + # Assert that our field is here. + self.assertEqual(1, len(self.files)) + actual_content_type = self.files[0].content_type + self.assertEqual(actual_content_type, expected_content_type) + + def test_field_content_type_header(self) -> None: + """ + This test checks content-tpye for a field part are read and passed. + """ + # Load test data. + test_file = "single_field.http" + with open(os.path.join(http_tests_dir, test_file), "rb") as f: + test_data = f.read() + + expected_content_type = None + + # Create form parser. + self.make(boundary="----WebKitFormBoundaryTkr3kCBQlBe1nrhc") + self.f.write(test_data) + self.f.finalize() + + # Assert that our field is here. + self.assertEqual(1, len(self.fields)) + actual_content_type = self.fields[0].content_type + self.assertEqual(actual_content_type, expected_content_type) + def test_request_body_fuzz(self) -> None: """ This test randomly fuzzes the request body to ensure that no strange @@ -1190,6 +1235,21 @@ def on_file(f: FileProtocol) -> None: f.finalize() self.assert_file_data(files[0], b"Test") + def test_bad_content_disposition(self) -> None: + # Field name is required. + data = ( + b"----boundary\r\nContent-Disposition: form-data;\r\nContent-Type: text/plain\r\nTest\r\n----boundary--\r\n" + ) + + on_field = Mock() + on_file = Mock() + + f = FormParser("multipart/form-data", on_field, on_file, boundary="--boundary") + + with self.assertRaises(FormParserError): + f.write(data) + f.finalize() + def test_handles_None_fields(self) -> None: fields: list[Field] = []