diff --git a/pyproject.toml b/pyproject.toml index 7d8bb6cb..ee529035 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,8 @@ exclude = ''' \.git | .tox )/ +| src/webob/multipart.py +| tests/test_multipart/ ''' # This next section only exists for people that have their editors @@ -17,7 +19,7 @@ exclude = ''' profile = "black" multi_line_output = 3 src_paths = ["src", "tests"] -skip_glob = ["docs/*"] +skip_glob = ["docs/*", "tests/test_multipart/*", "src/webob/multipart.py"] include_trailing_comma = true force_grid_wrap = false combine_as_imports = true diff --git a/src/webob/compat.py b/src/webob/compat.py deleted file mode 100644 index 55fbef9e..00000000 --- a/src/webob/compat.py +++ /dev/null @@ -1,117 +0,0 @@ -# flake8: noqa - -import cgi -from cgi import FieldStorage as _cgi_FieldStorage, parse_header -from html import escape -from queue import Empty, Queue -import sys -import tempfile -import types - - -# Various different FieldStorage work-arounds required on Python 3.x -class cgi_FieldStorage(_cgi_FieldStorage): # pragma: no cover - def __repr__(self): - """monkey patch for FieldStorage.__repr__ - - Unbelievably, the default __repr__ on FieldStorage reads - the entire file content instead of being sane about it. - This is a simple replacement that doesn't do that - """ - - if self.file: - return f"FieldStorage({self.name!r}, {self.filename!r})" - - return f"FieldStorage({self.name!r}, {self.filename!r}, {self.value!r})" - - # Work around https://bugs.python.org/issue27777 - def make_file(self): - if self._binary_file or self.length >= 0: - return tempfile.TemporaryFile("wb+") - else: - return tempfile.TemporaryFile("w+", encoding=self.encoding, newline="\n") - - # Work around http://bugs.python.org/issue23801 - # This is taken exactly from Python 3.5's cgi.py module - def read_multi(self, environ, keep_blank_values, strict_parsing): - """Internal: read a part that is itself multipart.""" - ib = self.innerboundary - - if not cgi.valid_boundary(ib): - raise ValueError(f"Invalid boundary in multipart form: {ib!r}") - self.list = [] - - if self.qs_on_post: - query = cgi.urllib.parse.parse_qsl( - self.qs_on_post, - self.keep_blank_values, - self.strict_parsing, - encoding=self.encoding, - errors=self.errors, - ) - - for key, value in query: - self.list.append(cgi.MiniFieldStorage(key, value)) - - klass = self.FieldStorageClass or self.__class__ - first_line = self.fp.readline() # bytes - - if not isinstance(first_line, bytes): - raise ValueError( - f"{self.fp} should return bytes, got {type(first_line).__name__}" - ) - self.bytes_read += len(first_line) - - # Ensure that we consume the file until we've hit our innerboundary - - while first_line.strip() != (b"--" + self.innerboundary) and first_line: - first_line = self.fp.readline() - self.bytes_read += len(first_line) - - while True: - parser = cgi.FeedParser() - hdr_text = b"" - - while True: - data = self.fp.readline() - hdr_text += data - - if not data.strip(): - break - - if not hdr_text: - break - # parser takes strings, not bytes - self.bytes_read += len(hdr_text) - parser.feed(hdr_text.decode(self.encoding, self.errors)) - headers = parser.close() - # Some clients add Content-Length for part headers, ignore them - - if "content-length" in headers: - filename = None - - if "content-disposition" in self.headers: - cdisp, pdict = parse_header(self.headers["content-disposition"]) - - if "filename" in pdict: - filename = pdict["filename"] - - if filename is None: - del headers["content-length"] - part = klass( - self.fp, - headers, - ib, - environ, - keep_blank_values, - strict_parsing, - self.limit - self.bytes_read, - self.encoding, - self.errors, - ) - self.bytes_read += part.bytes_read - self.list.append(part) - - if part.done or self.bytes_read >= self.length > 0: - break - self.skip_lines() diff --git a/src/webob/multidict.py b/src/webob/multidict.py index e54ea3b0..b21f9b0e 100644 --- a/src/webob/multidict.py +++ b/src/webob/multidict.py @@ -6,9 +6,11 @@ """ import binascii from collections.abc import MutableMapping -from urllib.parse import urlencode as url_encode +from urllib.parse import parse_qsl, urlencode as url_encode import warnings +from .multipart import parse_options_header + __all__ = ["MultiDict", "NestedMultiDict", "NoVars", "GetDict"] @@ -57,6 +59,9 @@ def view_list(cls, lst): def from_fieldstorage(cls, fs): """ Create a multidict from a cgi.FieldStorage instance + + Legacy. + """ obj = cls() # fs.list can be None when there's nothing to parse @@ -96,6 +101,25 @@ def decode(b): return obj + @classmethod + def from_multipart(cls, mp): + obj = cls() + + for part in mp: + if part.filename or not part.is_buffered(): + container = MultiDictFile.from_multipart_part(part) + obj.add(part.name, container) + else: + obj.add(part.name, part.value) + return obj + + @classmethod + def from_qs(cls, data, charset="utf-8"): + data = parse_qsl(data, keep_blank_values=True) + return cls( + (key.decode(charset), value.decode(charset)) for (key, value) in data + ) + def __getitem__(self, key): for k, v in reversed(self._items): if k == key: @@ -286,6 +310,60 @@ def values(self): _dummy = object() +class MultiDictFile: + """ + An object representing a file upload in a ``multipart/form-data`` request. + + This object has the same shape as Python's deprecated ``cgi.FieldStorage`` + object, which was previously used by webob to represent file uploads. + + """ + + def __init__( + self, + name, + filename, + file, + type, + type_options, + disposition, + disposition_options, + headers, + ): + self.name = name + self.filename = filename + self.file = file + self.type = type + self.type_options = type_options + self.disposition = disposition + self.disposition_options = disposition_options + self.headers = headers + + @classmethod + def from_multipart_part(cls, part): + content_type = part.headers.get("Content-Type", "") + content_type, options = parse_options_header(part.content_type) + disposition, disp_options = parse_options_header(part.disposition) + return cls( + name=part.name, + filename=part.filename, + file=part.file, + type=content_type, + type_options=options, + disposition=disposition, + disposition_options=disp_options, + headers=part.headers, + ) + + @property + def value(self): + pos = self.file.tell() + self.file.seek(0) + val = self.file.read() + self.file.seek(pos) + return val + + class GetDict(MultiDict): # def __init__(self, data, tracker, encoding, errors): # d = lambda b: b.decode(encoding, errors) diff --git a/src/webob/multipart.py b/src/webob/multipart.py new file mode 100644 index 00000000..a6d364cf --- /dev/null +++ b/src/webob/multipart.py @@ -0,0 +1,902 @@ +# -*- coding: utf-8 -*- +""" +This module provides multiple parsers for RFC-7578 `multipart/form-data`, +both low-level for framework authors and high-level for WSGI application +developers. + +Vendored from multipart v1.1.0 on Oct 16, 2024. +https://pypi.org/project/multipart/1.1.0/ + +https://github.com/defnull/multipart + +Copyright (c) 2010-2024, Marcel Hellkamp + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +""" + + +__author__ = "Marcel Hellkamp" +__version__ = '1.1.0' +__license__ = "MIT" +__all__ = ["MultipartError", "parse_form_data", "MultipartParser", "MultipartPart", "PushMultipartParser", "MultipartSegment"] + + +import re +from io import BytesIO +from typing import Iterator, Union, Optional, Tuple, List +from urllib.parse import parse_qs +from wsgiref.headers import Headers +from collections.abc import MutableMapping as DictMixin +import tempfile +import functools + + +############################################################################## +################################ Helper & Misc ############################### +############################################################################## +# Some of these were copied from bottle: https://bottlepy.org + + +class MultiDict(DictMixin): + """ A dict that stores multiple values per key. Most dict methods return the + last value by default. There are special methods to get all values. + """ + + def __init__(self, *args, **kwargs): + self.dict = {} + for arg in args: + if hasattr(arg, 'items'): + for k, v in arg.items(): + self[k] = v + else: + for k, v in arg: + self[k] = v + for k, v in kwargs.items(): + self[k] = v + + def __len__(self): + return len(self.dict) + + def __iter__(self): + return iter(self.dict) + + def __contains__(self, key): + return key in self.dict + + def __delitem__(self, key): + del self.dict[key] + + def __str__(self): + return str(self.dict) + + def __repr__(self): + return repr(self.dict) + + def keys(self): + return self.dict.keys() + + def __getitem__(self, key): + return self.get(key, KeyError, -1) + + def __setitem__(self, key, value): + self.append(key, value) + + def append(self, key, value): + self.dict.setdefault(key, []).append(value) + + def replace(self, key, value): + self.dict[key] = [value] + + def getall(self, key): + return self.dict.get(key) or [] + + def get(self, key, default=None, index=-1): + if key not in self.dict and default != KeyError: + return [default][index] + + return self.dict[key][index] + + def iterallitems(self): + """ Yield (key, value) keys, but for all values. """ + for key, values in self.dict.items(): + for value in values: + yield key, value + + +def to_bytes(data, enc="utf8"): + if isinstance(data, str): + data = data.encode(enc) + + return data + + +def copy_file(stream, target, maxread=-1, buffer_size=2 ** 16): + """ Read from :stream and write to :target until :maxread or EOF. """ + size, read = 0, stream.read + + while True: + to_read = buffer_size if maxread < 0 else min(buffer_size, maxread - size) + part = read(to_read) + + if not part: + return size + + target.write(part) + size += len(part) + + +class _cached_property: + """ A property that is only computed once per instance and then replaces + itself with an ordinary attribute. Deleting the attribute resets the + property. """ + + def __init__(self, func): + functools.update_wrapper(self, func) + self.func = func + + def __get__(self, obj, cls): + if obj is None: return self + value = obj.__dict__[self.func.__name__] = self.func(obj) + return value + + +# ------------- +# Header Parser +# ------------- + + +_special = re.escape('()<>@,;:"\\/[]?={} \t') +_re_special = re.compile(r'[%s]' % _special) +_quoted_string = r'"(?:\\.|[^"])*"' # Quoted string +_value = r'(?:[^%s]+|%s)' % (_special, _quoted_string) # Save or quoted string +_option = r'(?:;|^)\s*([^%s]+)\s*=\s*(%s)' % (_special, _value) +_re_option = re.compile(_option) # key=value part of an Content-Type like header + + +def header_quote(val): + if not _re_special.search(val): + return val + + return '"' + val.replace("\\", "\\\\").replace('"', '\\"') + '"' + + +def header_unquote(val, filename=False): + if val[0] == val[-1] == '"': + val = val[1:-1] + + # fix ie6 bug: full path --> filename + if filename and (val[1:3] == ":\\" or val[:2] == "\\\\"): + val = val.split("\\")[-1] + + return val.replace("\\\\", "\\").replace('\\"', '"') + + return val + + +def parse_options_header(header, options=None): + value, sep, tail = header.partition(";") + if not sep: + return header.lower().strip(), {} + + options = options or {} + for match in _re_option.finditer(tail): + key, val = match.groups() + key = key.lower() + options[key] = header_unquote(val, key == "filename") + + return value.lower(), options + + +############################################################################## +################################## SansIO Parser ############################# +############################################################################## + + +class MultipartError(ValueError): + pass + + +# Parser states as constants +_PREAMBLE = "PREAMBLE" +_HEADER = "HEADER" +_BODY = "BODY" +_COMPLETE = "END" + + +class PushMultipartParser: + def __init__( + self, + boundary: Union[str, bytes], + content_length=-1, + max_header_size=4096 + 128, # 4KB should be enough for everyone + max_header_count=8, # RFC 7578 allows just 3 + max_segment_size=2**64, # Practically unlimited + max_segment_count=2**64, # Practically unlimited + header_charset="utf8", + strict=False, + ): + """A push-based (incremental, non-blocking) parser for multipart/form-data. + + In `strict` mode, the parser will be less forgiving and bail out + more quickly, avoiding unnecessary computations caused by broken or + malicious clients. + + The various limits are meant as safeguards and exceeding any of those + limit triggers a :exc:`MultipartError`. + + :param boundary: The multipart boundary as found in the Content-Type header. + :param content_length: Maximum number of bytes to parse, or -1 for no limit. + :param max_header_size: Maximum size of a single header (name+value). + :param max_header_count: Maximum number of headers per segment. + :param max_segment_size: Maximum size of a single segment. + :param max_segment_count: Maximum number of segments. + :param header_charset: Charset for header names and values. + :param strict: Enable more format and sanity checks. + """ + self.boundary = to_bytes(boundary) + self.content_length = content_length + self.header_charset = header_charset + self.max_header_size = max_header_size + self.max_header_count = max_header_count + self.max_segment_size = max_segment_size + self.max_segment_count = max_segment_count + self.strict = strict + + self._delimiter = b"--" + self.boundary + + # Internal parser state + self._parsed = 0 + self._fieldcount = 0 + self._buffer = bytearray() + self._current = None + self._state = _PREAMBLE + + #: True if the parser was closed. + self.closed = False + #: The last error + self.error = None + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close(check_complete=not exc_type) + + def parse( + self, chunk: Union[bytes, bytearray] + ) -> Iterator[Union["MultipartSegment", bytearray, None]]: + """Parse a chunk of data and yield as many result objects as possible + with the data given. + + For each multipart segment, the parser will emit a single instance + of :class:`MultipartSegment` with all headers already present, + followed by zero or more non-empty `bytearray` instances containing + parts of the segment body, followed by a single `None` signaling the + end of the segment. + + The returned iterator iterator will stop if more data is required or + if the end of the multipart stream was detected. The iterator must + be fully consumed before parsing the next chunk. End of input can be + signaled by parsing an empty chunk or closing the parser. This is + important to verify the multipart message was parsed completely and + the last segment is actually complete. + + Format errors or exceeded limits will trigger :exc:`MultipartError`. + """ + + assert isinstance(chunk, (bytes, bytearray)) + + if not chunk: + self.close() + return + + if self.closed: + raise self._fail("Parser closed") + + if self.content_length > -1 and self.content_length < self._parsed + len( + self._buffer + ) + len(chunk): + raise self._fail("Content-Length limit exceeded") + + if self._state is _COMPLETE: + if self.strict: + raise self._fail("Unexpected data after end of multipart stream") + return + + buffer = self._buffer + delimiter = self._delimiter + buffer += chunk # Copy chunk to existing buffer + offset = 0 + d_len = len(delimiter) + bufferlen = len(buffer) + + while True: + + # Scan for first delimiter + if self._state is _PREAMBLE: + index = buffer.find(delimiter, offset) + + if (index == -1 or index > offset) and self.strict: + # Data before the first delimiter is allowed (RFC 2046, + # section 5.1.1) but very uncommon. + raise self._fail("Unexpected data in front of first delimiter") + + if index > -1: + tail = buffer[index + d_len : index + d_len + 2] + + # First delimiter found -> Start after it + if tail == b"\r\n": + self._current = MultipartSegment(self) + self._state = _HEADER + offset = index + d_len + 2 + continue + + # First delimiter is terminator -> Empty multipart stream + if tail == b"--": + offset = index + d_len + 2 + self._state = _COMPLETE + break # parsing complete + + # Bad newline after valid delimiter -> Broken client + if tail and tail[0:1] == b"\n": + raise self._fail("Invalid line break after delimiter") + + # Delimiter not found, skip data until we find one + offset = bufferlen - (d_len + 4) + break # wait for more data + + # Parse header section + elif self._state is _HEADER: + nl = buffer.find(b"\r\n", offset) + + if nl > offset: # Non-empty header line + self._current._add_headerline(buffer[offset:nl]) + offset = nl + 2 + continue + elif nl == offset: # Empty header line -> End of header section + self._current._close_headers() + yield self._current + self._state = _BODY + offset += 2 + continue + else: # No CRLF found -> Ask for more data + if buffer.find(b"\n", offset) != -1: + raise self._fail("Invalid line break in segment header") + if bufferlen - offset > self.max_header_size: + raise self._fail("Maximum segment header length exceeded") + break # wait for more data + + # Parse body until next delimiter is found + elif self._state is _BODY: + index = buffer.find(b"\r\n" + delimiter, offset) + tail = index > -1 and buffer[index + d_len + 2 : index + d_len + 4] + + if tail in (b"\r\n", b"--"): # Delimiter or terminator found + if index > offset: + self._current._update_size(index - offset) + yield buffer[offset:index] + offset = index + d_len + 4 + self._current._mark_complete() + yield None + + if tail == b"--": # Delimiter was a terminator + self._state = _COMPLETE + break + + # Normal delimiter, continue with next segment + self._current = MultipartSegment(self) + self._state = _HEADER + continue + + # No delimiter or terminator found + min_keep = d_len + 3 + chunk = buffer[offset:-min_keep] + if chunk: + self._current._update_size(len(chunk)) + offset += len(chunk) + yield chunk + break # wait for more data + + else: # pragma: no cover + self._fail(f"Unexpected internal state: {self._state}") + + # We ran out of data, or reached the end + self._parsed += offset + buffer[:] = buffer[offset:] + + def _fail(self, msg): + err = MultipartError(msg) + if not self.error: + self.error = err + self.close(check_complete=False) + raise err + + def close(self, check_complete=True): + """ + Close this parser if not already closed. + + :param check_complete: Raise MultipartError if the parser did not + reach the end of the multipart stream yet. + """ + + self.closed = True + self._current = None + del self._buffer[:] + + if check_complete and not self._state is _COMPLETE: + self._fail("Unexpected end of multipart stream (parser closed)") + + +class MultipartSegment: + + #: List of headers as name/value pairs with normalized (Title-Case) names. + headerlist: List[Tuple[str, str]] + #: The 'name' option of the Content-Disposition header. Always a string, + #: but may be empty. + name: str + #: The optional 'filename' option of the Content-Disposition header. + filename: Optional[str] + #: The Content-Type of this segment, if the header was present. + #: Not the entire header, just the actual content type without options. + content_type: Optional[str] + #: The 'charset' option of the Content-Type header, if present. + charset: Optional[str] + + #: Segment body size (so far). Will be updated during parsing. + size: int + #: If true, the last chunk of segment body data was parsed and the size + #: value is final. + complete: bool + + def __init__(self, parser: PushMultipartParser): + """ MultipartSegments are created by the PushMultipartParser and + represent a single multipart segment, but do not store or buffer any + of the content. The parser will emit MultipartSegments with a fully + populated headerlist and derived information (name, filename, ...) can + be accessed. + """ + self._parser = parser + + if parser._fieldcount+1 > parser.max_segment_count: + parser._fail("Maximum segment count exceeded") + parser._fieldcount += 1 + + self.headerlist = [] + self.size = 0 + self.complete = 0 + + self.name = None + self.filename = None + self.content_type = None + self.charset = None + self._clen = -1 + self._fail = parser._fail + self._size_limit = parser.max_segment_size + + def _add_headerline(self, line: bytearray): + assert line and self.name is None + parser = self._parser + + if line[0] in b" \t": # Multi-line header value + if not self.headerlist or parser.strict: + raise self._fail("Unexpected segment header continuation") + prev = ": ".join(self.headerlist.pop()) + line = prev.encode(parser.header_charset) + b" " + line.strip() + + if len(line) > parser.max_header_size: + raise self._fail("Maximum segment header length exceeded") + if len(self.headerlist) >= parser.max_header_count: + raise self._fail("Maximum segment header count exceeded") + + try: + name, col, value = line.decode(parser.header_charset).partition(":") + name = name.strip() + if not col or not name: + raise self._fail("Malformed segment header") + if " " in name or not name.isascii() or not name.isprintable(): + raise self._fail("Invalid segment header name") + except UnicodeDecodeError as err: + raise self._fail("Segment header failed to decode") + + self.headerlist.append((name.title(), value.strip())) + + def _close_headers(self): + assert self.name is None + + for h,v in self.headerlist: + if h == "Content-Disposition": + dtype, args = parse_options_header(v) + if dtype != "form-data": + raise self._fail("Invalid Content-Disposition segment header: Wrong type") + if "name" not in args and self._parser.strict: + raise self._fail("Invalid Content-Disposition segment header: Missing name option") + self.name = args.get("name", "") + self.filename = args.get("filename") + elif h == "Content-Type": + self.content_type, args = parse_options_header(v) + self.charset = args.get("charset") + elif h == "Content-Length": + self._clen = int(self.header("Content-Length", -1)) + + if self.name is None: + raise self._fail("Missing Content-Disposition segment header") + + def _update_size(self, bytecount: int): + assert self.name is not None and not self.complete + self.size += bytecount + if self._clen >= 0 and self.size > self._clen: + raise self._fail("Segment Content-Length exceeded") + if self.size > self._size_limit: + raise self._fail("Maximum segment size exceeded") + + def _mark_complete(self): + assert self.name is not None and not self.complete + if self._clen >= 0 and self.size != self._clen: + raise self._fail("Segment size does not match Content-Length header") + self.complete = True + + def header(self, name: str, default=None): + """Return the value of a header if present, or a default value.""" + compare = name.title() + for header in self.headerlist: + if header[0] == compare: + return header[1] + if default is KeyError: + raise KeyError(name) + return default + + def __getitem__(self, name): + """Return a header value if present, or raise KeyError.""" + return self.header(name, KeyError) + + +############################################################################## +################################## Multipart ################################# +############################################################################## + + +class MultipartParser(object): + def __init__( + self, + stream, + boundary, + content_length=-1, + charset="utf8", + strict=False, + buffer_size=1024 * 64, + header_limit=8, + headersize_limit=1024 * 4 + 128, # 4KB + part_limit=128, + partsize_limit=2**64, # practically unlimited + spool_limit=1024 * 64, # Keep fields up to 64KB in memory + memory_limit=1024 * 64 * 128, # spool_limit * part_limit + disk_limit=2**64, # practically unlimited + mem_limit=0, + memfile_limit=0, + ): + """A parser that reads from a multipart/form-data encoded byte stream + and yields :class:`MultipartPart` instances. + + The parse itself is an iterator and will read and parse data on + demand. results are cached, so once fully parsed, it can be iterated + over again. + + :param stream: A readable byte stream. Must implement ``.read(size)``. + :param boundary: The multipart boundary as found in the Content-Type header. + :param content_length: The maximum number of bytes to read. + :param charset: Default charset for headers and text fields. + :param strict: If true, the parser will reject invalid or strange inputs. + :param buffer_size: Size of chunks read from the source stream + + :param header_limit: Maximum number of headers per segment + :param headersize_limit: Maximum size of a segment header line + :param part_limit: Maximum number of segments to parse + :param partsize_limit: Maximum size of a segment body + :param spool_limit: Segments up to this size are buffered in memory, + larger segments are buffered in temporary files on disk. + :param memory_limit: Maximum size of all memory-buffered segments. + :param disk_limit: Maximum size of all disk-buffered segments + + :param memfile_limit: Deprecated alias for `spool_limit`. + :param mem_limit: Deprecated alias for `memory_limit`. + """ + self.stream = stream + self.boundary = boundary + self.content_length = content_length + self.charset = charset + self.strict = strict + self.buffer_size = buffer_size + self.header_limit = header_limit + self.headersize_limit = headersize_limit + self.part_limit = part_limit + self.partsize_limit = partsize_limit + self.memory_limit = mem_limit or memory_limit + self.spool_limit = min(memfile_limit or spool_limit, self.memory_limit) + self.disk_limit = disk_limit + + self._done = [] + self._part_iter = None + + def __iter__(self): + """Iterate over the parts of the multipart message.""" + if not self._part_iter: + self._part_iter = self._iterparse() + + if self._done: + yield from self._done + + for part in self._part_iter: + self._done.append(part) + yield part + + def parts(self): + """Returns a list with all parts of the multipart message.""" + return list(self) + + def get(self, name, default=None): + """Return the first part with that name or a default value.""" + for part in self: + if name == part.name: + return part + + return default + + def get_all(self, name): + """Return a list of parts with that name.""" + return [p for p in self if p.name == name] + + def _iterparse(self): + read = self.stream.read + bufsize = self.buffer_size + mem_used = disk_used = 0 + readlimit = self.content_length + + part = None + parser = PushMultipartParser( + boundary=self.boundary, + content_length=self.content_length, + max_header_count=self.header_limit, + max_header_size=self.headersize_limit, + max_segment_count=self.part_limit, + max_segment_size=self.partsize_limit, + header_charset=self.charset, + ) + + with parser: + while not parser.closed: + + if readlimit >= 0: + chunk = read(min(bufsize, readlimit)) + readlimit -= len(chunk) + else: + chunk = read(bufsize) + + for event in parser.parse(chunk): + if isinstance(event, MultipartSegment): + part = MultipartPart( + buffer_size=self.buffer_size, + memfile_limit=self.spool_limit, + charset=self.charset, + segment=event, + ) + elif event: + part._write(event) + if part.is_buffered(): + if part.size + mem_used > self.memory_limit: + raise MultipartError("Memory limit reached.") + elif part.size + disk_used > self.disk_limit: + raise MultipartError("Disk limit reached.") + else: + if part.is_buffered(): + mem_used += part.size + else: + disk_used += part.size + part._mark_complete() + yield part + part = None + + +class MultipartPart(object): + def __init__( + self, + buffer_size=2**16, + memfile_limit=2**18, + charset="utf8", + segment: "MultipartSegment" = None, + ): + self._segment = segment + #: A file-like object holding the fields content + self.file = BytesIO() + self.size = 0 + self.name = segment.name + self.filename = segment.filename + #: Charset as defined in the segment header, or the parser default charset + self.charset = segment.charset or charset + self.headerlist = segment.headerlist + + self.memfile_limit = memfile_limit + self.buffer_size = buffer_size + + @_cached_property + def headers(self) -> Headers: + return Headers(self._segment.headerlist) + + @_cached_property + def disposition(self) -> str: + return self._segment.header("Content-Disposition") + + @_cached_property + def content_type(self) -> str: + return self._segment.content_type or ( + "application/octet-stream" if self.filename else "text/plain") + + def _write(self, chunk): + self.size += len(chunk) + self.file.write(chunk) + if self.size > self.memfile_limit: + old = self.file + self.file = tempfile.TemporaryFile() + self.file.write(old.getvalue()) + self._write = self._write_nocheck + + def _write_nocheck(self, chunk): + self.size += len(chunk) + self.file.write(chunk) + + def _mark_complete(self): + self.file.seek(0) + + def is_buffered(self): + """Return true if the data is fully buffered in memory.""" + return isinstance(self.file, BytesIO) + + @property + def value(self): + """Return the entire payload as decoded text. + + Warning, this may consume a lot of memory, check size first. + """ + + return self.raw.decode(self.charset) + + @property + def raw(self): + """Return the entire payload as a raw byte string. + + Warning, this may consume a lot of memory, check size first. + """ + pos = self.file.tell() + self.file.seek(0) + + val = self.file.read() + self.file.seek(pos) + return val + + def save_as(self, path): + """Save a copy of this part to `path` and return its size.""" + with open(path, "wb") as fp: + pos = self.file.tell() + try: + self.file.seek(0) + size = copy_file(self.file, fp, buffer_size=self.buffer_size) + finally: + self.file.seek(pos) + return size + + def close(self): + if self.file: + self.file.close() + self.file = False + + +############################################################################## +#################################### WSGI #################################### +############################################################################## + + +def parse_form_data(environ, charset="utf8", strict=False, **kwargs): + """ Parses both types of form data (multipart and url-encoded) from a WSGI + environment and returns a (forms, files) tuple. Both are instances of + :class:`MultiDict` and may contain multiple values per key. + + The `forms` MultiDict contains text form fields as strings. + The `files` MultiDict contains :class:`MultipartPart` instances, either + because the form-field was a file-upload or the value was too big to fit + into memory limits. + + In case of an url-encoded form request, the total request body size is + limited by `memory_limit`. Larger requests will rigger an error. + + :param environ: A WSGI environment dictionary. + :param charset: The default charset to use to decode headers and text fields. + :param strict: If True, raise :exc:`MultipartError` for non-fatal + parsing errors. Fatal errors always raise an exception. + :param **kwargs: Additional keyword arguments are passed to + :class:`MultipartParser` + :raises MultipartError: On parsing errors or exceeded limits. + """ + + forms, files = MultiDict(), MultiDict() + + if strict and 'wsgi.input' not in environ: + raise MultipartError("No 'wsgi.input' in environment.") + + try: + if environ.get("REQUEST_METHOD", "GET").upper() not in ("POST", "PUT"): + raise MultipartError("Request method other than POST or PUT") + try: + content_length = int(environ.get("CONTENT_LENGTH", "-1")) + except ValueError: + raise MultipartError("Invalid Content-Length header") + content_type = environ.get("CONTENT_TYPE", "") + + if not content_type: + raise MultipartError("Missing Content-Type header") + + content_type, options = parse_options_header(content_type) + stream = environ.get("wsgi.input") or BytesIO() + kwargs["charset"] = charset = options.get("charset", charset) + + if content_type == "multipart/form-data": + boundary = options.get("boundary", "") + + if not boundary: + raise MultipartError("No boundary for multipart/form-data.") + + for part in MultipartParser(stream, boundary, content_length, **kwargs): + if part.filename or not part.is_buffered(): + files.append(part.name, part) + else: # TODO: Big form-fields go into the files dict. Really? + forms.append(part.name, part.value) + part.close() + + elif content_type in ( + "application/x-www-form-urlencoded", + "application/x-url-encoded", + ): + mem_limit = kwargs.get("memory_limit", kwargs.get("mem_limit", 1024*64*128)) + if content_length > -1: + if content_length > mem_limit: + raise MultipartError("Memory limit exceeded") + data = stream.read(min(mem_limit, content_length)) + if len(data) < content_length: + raise MultipartError("Unexpected end of data stream") + else: + data = stream.read(mem_limit + 1) + if len(data) > mem_limit: + raise MultipartError("Memory limit exceeded") + + data = data.decode(charset) + data = parse_qs(data, keep_blank_values=True, encoding=charset) + + for key, values in data.items(): + for value in values: + forms.append(key, value) + else: + raise MultipartError("Unsupported Content-Type") + + except MultipartError: + if strict: + for _, part in files.iterallitems(): + if hasattr(part, 'close'): + part.close() + raise + + return forms, files diff --git a/src/webob/request.py b/src/webob/request.py index ee52a7d1..529902f0 100644 --- a/src/webob/request.py +++ b/src/webob/request.py @@ -16,7 +16,6 @@ accept_property, ) from webob.cachecontrol import CacheControl, serialize_cache_control -from webob.compat import cgi_FieldStorage from webob.cookies import RequestCookies from webob.descriptors import ( CHARSET_RE, @@ -40,6 +39,8 @@ from webob.multidict import GetDict, MultiDict, NestedMultiDict, NoVars from webob.util import bytes_, parse_qsl_text, text_, url_unquote +from .multipart import MultipartParser + try: import simplejson as json except ImportError: @@ -168,18 +169,7 @@ def decode(self, charset=None, errors="strict"): elif content_type != "multipart/form-data": return r - fs_environ = self.environ.copy() - fs_environ.setdefault("CONTENT_LENGTH", "0") - fs_environ["QUERY_STRING"] = "" - fs = cgi_FieldStorage( - fp=self.body_file, - environ=fs_environ, - keep_blank_values=True, - encoding=charset, - errors=errors, - ) - - fout = t.transcode_fs(fs, r._content_type_raw) + fout = t.transcode_multipart(self.body_file, r._content_type_raw) # this order is important, because setting body_file # resets content_length @@ -796,27 +786,22 @@ def POST(self): return NoVars( "Not an HTML form submission (Content-Type: %s)" % content_type ) - self._check_charset() - - self.make_body_seekable() - self.body_file_raw.seek(0) - fs_environ = env.copy() - # FieldStorage assumes a missing CONTENT_LENGTH, but a - # default of 0 is better: - fs_environ.setdefault("CONTENT_LENGTH", "0") - fs_environ["QUERY_STRING"] = "" - fs = cgi_FieldStorage( - fp=self.body_file, - environ=fs_environ, - keep_blank_values=True, - encoding="utf8", - ) - - self.body_file_raw.seek(0) - vars = MultiDict.from_fieldstorage(fs) + self._check_charset() + if content_type == "multipart/form-data": + self.make_body_seekable() + self.body_file_raw.seek(0) + boundary = _get_multipart_boundary(self._content_type_raw) + parser = MultipartParser( + self.body_file, + boundary, + charset="utf8", + ) + vars = MultiDict.from_multipart(parser) + self.body_file_raw.seek(0) + else: + vars = MultiDict.from_qs(self.body) env["webob._parsed_post_vars"] = (vars, self.body_file_raw) - return vars @property @@ -1752,23 +1737,14 @@ def transcode_query(self, q): return url_encode(q) - def transcode_fs(self, fs, content_type): - # transcode FieldStorage - def decode(b): - return b - - data = [] - - for field in fs.list or (): - field.name = decode(field.name) - - if field.filename: - field.filename = decode(field.filename) - data.append((field.name, field)) - else: - data.append((field.name, decode(field.value))) - - # TODO: transcode big requests to temp file - content_type, fout = _encode_multipart(data, content_type, fout=io.BytesIO()) - + def transcode_multipart(self, body, content_type): + # Transcode multipart + boundary = _get_multipart_boundary(content_type) + parser = MultipartParser(body, boundary, charset=self.charset) + data = MultiDict.from_multipart(parser) + content_type, fout = _encode_multipart( + data.items(), + content_type, + fout=io.BytesIO(), + ) return fout diff --git a/src/webob/util.py b/src/webob/util.py index d26358e3..d7fb3322 100644 --- a/src/webob/util.py +++ b/src/webob/util.py @@ -1,6 +1,6 @@ +from html import escape import warnings -from webob.compat import escape from webob.headers import _trans_key diff --git a/tests/test_compat.py b/tests/test_compat.py deleted file mode 100644 index 9c9f87ea..00000000 --- a/tests/test_compat.py +++ /dev/null @@ -1,186 +0,0 @@ -from io import BytesIO -import sys - -import pytest - - -class TestText: - def _callFUT(self, *arg, **kw): - from webob.util import text_ - - return text_(*arg, **kw) - - def test_binary(self): - result = self._callFUT(b"123") - assert isinstance(result, str) - assert result == str(b"123", "ascii") - - def test_binary_alternate_decoding(self): - result = self._callFUT(b"La Pe\xc3\xb1a", "utf-8") - assert isinstance(result, str) - assert result == str(b"La Pe\xc3\xb1a", "utf-8") - - def test_binary_decoding_error(self): - pytest.raises(UnicodeDecodeError, self._callFUT, b"\xff", "utf-8") - - def test_text(self): - result = self._callFUT(str(b"123", "ascii")) - assert isinstance(result, str) - assert result == str(b"123", "ascii") - - -class TestBytes: - def _callFUT(self, *arg, **kw): - from webob.util import bytes_ - - return bytes_(*arg, **kw) - - def test_binary(self): - result = self._callFUT(b"123") - assert isinstance(result, bytes) - assert result == b"123" - - def test_text(self): - val = str(b"123", "ascii") - result = self._callFUT(val) - assert isinstance(result, bytes) - assert result == b"123" - - def test_text_alternate_encoding(self): - val = str(b"La Pe\xc3\xb1a", "utf-8") - result = self._callFUT(val, "utf-8") - assert isinstance(result, bytes) - assert result == b"La Pe\xc3\xb1a" - - -class Test_cgi_FieldStorage_Py3_tests: - def test_fieldstorage_not_multipart(self): - from webob.compat import cgi_FieldStorage - - POSTDATA = b'{"name": "Bert"}' - - env = { - "REQUEST_METHOD": "POST", - "CONTENT_TYPE": "text/plain", - "CONTENT_LENGTH": str(len(POSTDATA)), - } - fp = BytesIO(POSTDATA) - fs = cgi_FieldStorage(fp, environ=env) - assert fs.list is None - assert fs.value == b'{"name": "Bert"}' - - @pytest.mark.skipif( - sys.version_info < (3, 0), - reason="FieldStorage on Python 2.7 is broken, see " - "https://github.com/Pylons/webob/issues/293", - ) - def test_fieldstorage_part_content_length(self): - from webob.compat import cgi_FieldStorage - - BOUNDARY = "JfISa01" - POSTDATA = """--JfISa01 -Content-Disposition: form-data; name="submit-name" -Content-Length: 5 - -Larry ---JfISa01""" - env = { - "REQUEST_METHOD": "POST", - "CONTENT_TYPE": f"multipart/form-data; boundary={BOUNDARY}", - "CONTENT_LENGTH": str(len(POSTDATA)), - } - fp = BytesIO(POSTDATA.encode("latin-1")) - fs = cgi_FieldStorage(fp, environ=env) - assert len(fs.list) == 1 - assert fs.list[0].name == "submit-name" - assert fs.list[0].value == "Larry" - - def test_my_fieldstorage_part_content_length(self): - from webob.compat import cgi_FieldStorage - - BOUNDARY = "4ddfd368-cb07-4b9e-b003-876010298a6c" - POSTDATA = """--4ddfd368-cb07-4b9e-b003-876010298a6c -Content-Disposition: form-data; name="object"; filename="file.txt" -Content-Type: text/plain -Content-Length: 5 -Content-Transfer-Encoding: 7bit - -ADMIN ---4ddfd368-cb07-4b9e-b003-876010298a6c -Content-Disposition: form-data; name="sign_date" -Content-Type: application/json; charset=UTF-8 -Content-Length: 22 -Content-Transfer-Encoding: 7bit - -"2016-11-23T12:22:41Z" ---4ddfd368-cb07-4b9e-b003-876010298a6c -Content-Disposition: form-data; name="staffId" -Content-Type: text/plain; charset=UTF-8 -Content-Length: 5 -Content-Transfer-Encoding: 7bit - -ADMIN ---4ddfd368-cb07-4b9e-b003-876010298a6c--""" - env = { - "REQUEST_METHOD": "POST", - "CONTENT_TYPE": f"multipart/form-data; boundary={BOUNDARY}", - "CONTENT_LENGTH": str(len(POSTDATA)), - } - fp = BytesIO(POSTDATA.encode("latin-1")) - fs = cgi_FieldStorage(fp, environ=env) - assert len(fs.list) == 3 - expect = [ - {"name": "object", "filename": "file.txt", "value": b"ADMIN"}, - {"name": "sign_date", "filename": None, "value": '"2016-11-23T12:22:41Z"'}, - {"name": "staffId", "filename": None, "value": "ADMIN"}, - ] - for x in range(len(fs.list)): - for k, exp in expect[x].items(): - got = getattr(fs.list[x], k) - assert got == exp - - def test_fieldstorage_multipart_leading_whitespace(self): - from webob.compat import cgi_FieldStorage - - BOUNDARY = "---------------------------721837373350705526688164684" - POSTDATA = """-----------------------------721837373350705526688164684 -Content-Disposition: form-data; name="id" - -1234 ------------------------------721837373350705526688164684 -Content-Disposition: form-data; name="title" - - ------------------------------721837373350705526688164684 -Content-Disposition: form-data; name="file"; filename="test.txt" -Content-Type: text/plain - -Testing 123. - ------------------------------721837373350705526688164684 -Content-Disposition: form-data; name="submit" - - Add\x20 ------------------------------721837373350705526688164684-- -""" - - env = { - "REQUEST_METHOD": "POST", - "CONTENT_TYPE": f"multipart/form-data; boundary={BOUNDARY}", - "CONTENT_LENGTH": "560", - } - # Add some leading whitespace to our post data that will cause the - # first line to not be the innerboundary. - fp = BytesIO(b"\r\n" + POSTDATA.encode("latin-1")) - fs = cgi_FieldStorage(fp, environ=env) - assert len(fs.list) == 4 - expect = [ - {"name": "id", "filename": None, "value": "1234"}, - {"name": "title", "filename": None, "value": ""}, - {"name": "file", "filename": "test.txt", "value": b"Testing 123.\n"}, - {"name": "submit", "filename": None, "value": " Add "}, - ] - for x in range(len(fs.list)): - for k, exp in expect[x].items(): - got = getattr(fs.list[x], k) - assert got == exp diff --git a/tests/test_in_wsgiref.py b/tests/test_in_wsgiref.py index f8727762..d53d443a 100644 --- a/tests/test_in_wsgiref.py +++ b/tests/test_in_wsgiref.py @@ -1,12 +1,12 @@ import cgi import logging +from queue import Empty, Queue import socket import sys from urllib.request import urlopen as url_open import pytest -from webob.compat import Empty, Queue from webob.request import Request from webob.response import Response from webob.util import bytes_ diff --git a/tests/test_multipart/LICENSE b/tests/test_multipart/LICENSE new file mode 100644 index 00000000..17c3fce1 --- /dev/null +++ b/tests/test_multipart/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2010-2024, Marcel Hellkamp + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/tests/test_multipart/README b/tests/test_multipart/README new file mode 100644 index 00000000..030eaaba --- /dev/null +++ b/tests/test_multipart/README @@ -0,0 +1,4 @@ +These tests were vendored from multipart v1.1.0 on Oct 16, 2024. +https://pypi.org/project/multipart/1.1.0/ + +https://github.com/defnull/multipart diff --git a/tests/test_multipart/__init__.py b/tests/test_multipart/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_multipart/test_header_utils.py b/tests/test_multipart/test_header_utils.py new file mode 100644 index 00000000..fc5b8bf4 --- /dev/null +++ b/tests/test_multipart/test_header_utils.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +import unittest +from webob import multipart + +class TestHeaderParser(unittest.TestCase): + + def test_token_unquote(self): + unquote = multipart.header_unquote + self.assertEqual('foo', unquote('"foo"')) + self.assertEqual('foo"bar', unquote('"foo\\"bar"')) + self.assertEqual('ie.exe', unquote('"\\\\network\\ie.exe"', True)) + self.assertEqual('ie.exe', unquote('"c:\\wondows\\ie.exe"', True)) + + def test_token_quote(self): + quote = multipart.header_quote + self.assertEqual(quote('foo'), 'foo') + self.assertEqual(quote('foo"bar'), '"foo\\"bar"') + + def test_options_parser(self): + parse = multipart.parse_options_header + head = 'form-data; name="Test"; ' + self.assertEqual(parse(head+'filename="Test.txt"')[0], 'form-data') + self.assertEqual(parse(head+'filename="Test.txt"')[1]['name'], 'Test') + self.assertEqual(parse(head+'filename="Test.txt"')[1]['filename'], 'Test.txt') + self.assertEqual(parse(head+'FileName="Te\\"st.txt"')[1]['filename'], 'Te"st.txt') + self.assertEqual(parse(head+'filename="C:\\test\\bla.txt"')[1]['filename'], 'bla.txt') + self.assertEqual(parse(head+'filename="\\\\test\\bla.txt"')[1]['filename'], 'bla.txt') diff --git a/tests/test_multipart/test_legacy_parser.py b/tests/test_multipart/test_legacy_parser.py new file mode 100644 index 00000000..49889c6f --- /dev/null +++ b/tests/test_multipart/test_legacy_parser.py @@ -0,0 +1,187 @@ +# -*- coding: utf-8 -*- +from .utils import BaseParserTest + +import unittest +import base64 +import os.path, tempfile + +from io import BytesIO + +from webob import multipart +from webob.multipart import to_bytes + +#TODO: bufsize=10, line=1234567890--boundary\n +#TODO: bufsize < len(boundary) (should not be possible) +#TODO: bufsize = len(boundary)+5 (edge case) +#TODO: At least one test per possible exception (100% coverage) + + +class TestMultipartParser(BaseParserTest): + + def test_copyfile(self): + source = BytesIO(to_bytes('abc')) + target = BytesIO() + self.assertEqual(multipart.copy_file(source, target), 3) + target.seek(0) + self.assertEqual(target.read(), to_bytes('abc')) + + def test_big_file(self): + ''' If the size of an uploaded part exceeds memfile_limit, + it is written to disk. ''' + test_file = 'abc'*1024 + parser = self.parser( + '--foo\r\n', + 'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n', + 'Content-Type: image/png\r\n', '\r\n', test_file, '\r\n--foo\r\n', + 'Content-Disposition: form-data; name="file2"; filename="random.png"\r\n', + 'Content-Type: image/png\r\n', '\r\n', test_file + 'a', '\r\n--foo\r\n', + 'Content-Disposition: form-data; name="file3"; filename="random.png"\r\n', + 'Content-Type: image/png\r\n', '\r\n', test_file*2, '\r\n--foo--', + memfile_limit=len(test_file)) + + self.assertEqual(parser.get('file1').file.read(), to_bytes(test_file)) + self.assertTrue(parser.get('file1').is_buffered()) + self.assertEqual(parser.get('file2').file.read(), to_bytes(test_file + 'a')) + self.assertFalse(parser.get('file2').is_buffered()) + self.assertEqual(parser.get('file3').file.read(), to_bytes(test_file*2)) + self.assertFalse(parser.get('file3').is_buffered()) + + def test_get_all(self): + ''' Test the get() and get_all() methods. ''' + p = self.parser('--foo\r\n', + 'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n', + 'Content-Type: image/png\r\n', '\r\n', 'abc'*1024, '\r\n--foo\r\n', + 'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n', + 'Content-Type: image/png\r\n', '\r\n', 'def'*1024, '\r\n--foo--') + self.assertEqual(p.get('file1').file.read(), to_bytes('abc'*1024)) + self.assertEqual(p.get('file2'), None) + self.assertEqual(len(p.get_all('file1')), 2) + self.assertEqual(p.get_all('file1')[1].file.read(), to_bytes('def'*1024)) + self.assertEqual(p.get_all('file1'), p.parts()) + + def test_file_seek(self): + ''' The file object should be readable withoud a seek(0). ''' + test_file = 'abc'*1024 + p = self.parser( + '--foo\r\n', + 'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n', + 'Content-Type: image/png\r\n', + '\r\n', + test_file, + '\r\n--foo--') + self.assertEqual(p.get('file1').file.read(), to_bytes(test_file)) + self.assertEqual(p.get('file1').value, test_file) + + def test_unicode_value(self): + ''' The .value property always returns unicode ''' + test_file = 'abc'*1024 + p = self.parser('--foo\r\n', + 'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n', + 'Content-Type: image/png\r\n', '\r\n', test_file, '\r\n--foo--') + self.assertEqual(p.get('file1').file.read(), to_bytes(test_file)) + self.assertEqual(p.get('file1').value, test_file) + self.assertTrue(hasattr(p.get('file1').value, 'encode')) + + def test_save_as(self): + ''' save_as stores data in a file keeping the file position. ''' + def tmp_file_name(): + # create a temporary file name (on Python 2.6+ NamedTemporaryFile + # with delete=False could be used) + fd, fname = tempfile.mkstemp() + f = os.fdopen(fd) + f.close() + return fname + test_file = 'abc'*1024 + p = self.parser('--foo\r\n', + 'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n', + 'Content-Type: image/png\r\n', '\r\n', test_file, '\r\n--foo--') + self.assertEqual(p.get('file1').file.read(1024), to_bytes(test_file)[:1024]) + tfn = tmp_file_name() + p.get('file1').save_as(tfn) + tf = open(tfn, 'rb') + self.assertEqual(tf.read(), to_bytes(test_file)) + tf.close() + self.assertEqual(p.get('file1').file.read(), to_bytes(test_file)[1024:]) + + def test_part_header(self): + ''' HTTP allows headers to be multiline. ''' + p = self.parser('--foo\r\n', + 'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n', + 'Content-Type: image/png\r\n', '\r\n', "xxx", '\r\n--foo--') + part = p.get("file1") + self.assertEqual(part.file.read(), b"xxx") + self.assertEqual(part.size, 3) + self.assertEqual(part.name, "file1") + self.assertEqual(part.filename, "random.png") + self.assertEqual(part.charset, "utf8") + self.assertEqual(part.headerlist, [ + ('Content-Disposition','form-data; name="file1"; filename="random.png"'), + ('Content-Type','image/png') + ]) + self.assertEqual(part.headers["CoNtEnT-TyPe"], "image/png") + self.assertEqual(part.disposition, 'form-data; name="file1"; filename="random.png"') + self.assertEqual(part.content_type, "image/png") + + def test_multiline_header(self): + ''' HTTP allows headers to be multiline. ''' + test_file = to_bytes('abc'*1024) + test_text = u'Test text\n with\r\n ümläuts!' + p = self.parser('--foo\r\n', + 'Content-Disposition: form-data;\r\n', + '\tname="file1"; filename="random.png"\r\n', + 'Content-Type: image/png\r\n', '\r\n', test_file, '\r\n--foo\r\n', + 'Content-Disposition: form-data;\r\n', + ' name="text"\r\n', '\r\n', test_text, + '\r\n--foo--') + self.assertEqual(p.get('file1').file.read(), test_file) + self.assertEqual(p.get('file1').filename, 'random.png') + self.assertEqual(p.get('text').value, test_text) + + def test_disk_limit(self): + with self.assertRaises(multipart.MultipartError): + self.write_field("file1", 'x'*1025, filename="foo.bin") + self.write_end() + self.parser(spool_limit=10, disk_limit=1024) + + def test_spool_limit(self): + self.write_field("file1", 'x'*1024, filename="foo.bin") + self.write_field("file2", 'x'*1025, filename="foo.bin") + self.write_end() + p = self.parser(spool_limit=1024) + self.assertTrue(p.get("file1").is_buffered()) + self.assertFalse(p.get("file2").is_buffered()) + + def test_spool_limit_nocheck_write_func(self): + self.write_field("file1", 'x'*10240, filename="foo.bin") + self.write_end() + p = self.parser(spool_limit=1024, buffer_size=1024) + # A large upload should trigger the fast _write_nocheck path + self.assertEqual(p.get("file1")._write, p.get("file1")._write_nocheck) + + def test_memory_limit(self): + self.write_field("file1", 'x'*1024, filename="foo.bin") + self.write_end() + p = self.parser(memory_limit=1024) + self.assertTrue(p.get("file1").is_buffered()) + + self.reset() + self.write_field("file1", 'x'*1024, filename="foo.bin") + self.write_field("file2", 'x', filename="foo.bin") + self.write_end() + with self.assertMultipartError("Memory limit reached"): + p = self.parser(memory_limit=1024) + + def test_content_length(self): + self.write_field("file1", 'x'*1024, filename="foo.bin") + self.write_end() + clen = len(self.get_buffer_copy().getvalue()) + + # Correct content length + list(self.parser(content_length=clen)) + + # Short content length + with self.assertMultipartError("Unexpected end of multipart stream"): + list(self.parser(content_length=clen-1)) + + # Large content length (we don't care) + list(self.parser(content_length=clen+1)) diff --git a/tests/test_multipart/test_multdict.py b/tests/test_multipart/test_multdict.py new file mode 100644 index 00000000..72acfa9a --- /dev/null +++ b/tests/test_multipart/test_multdict.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +import unittest +from webob import multipart + + +class TestMultiDict(unittest.TestCase): + + def test_init(self): + md = multipart.MultiDict([("a", "1")], {"a": "2"}, a="3") + self.assertEqual(md.dict, {"a": ["1", "2", "3"]}) + + def test_append(self): + md = multipart.MultiDict() + md["a"] = "1" + md["a"] = "2" + md.append("a", "3") + md.update(a="4") + self.assertEqual(md.dict, {"a": ["1", "2", "3", "4"]}) + + def test_behaves_like_dict(self): + md = multipart.MultiDict([("a", "1"), ("a", "2")]) + self.assertTrue("a" in md) + self.assertFalse("b" in md) + self.assertTrue("a" in md.keys()) + self.assertEqual(list(md), ["a"]) + del md["a"] + self.assertTrue("a" not in md) + + def test_access_last(self): + md = multipart.MultiDict([("a", "1"), ("a", "2")]) + self.assertEqual(md["a"], "2") + self.assertEqual(md.get("a"), "2") + self.assertEqual(md.get("b"), None) + + def test_replace(self): + md = multipart.MultiDict([("a", "1"), ("a", "2")]) + md.replace("a", "3") + self.assertEqual(md.dict, {"a": ["3"]}) + + def test_str_repr(self): + md = multipart.MultiDict([("a", "1"), ("a", "2")]) + self.assertEqual(str(md), str(md.dict)) + self.assertEqual(repr(md), repr(md.dict)) + + def test_access_index(self): + md = multipart.MultiDict([("a", "1"), ("a", "2")]) + self.assertEqual(md.get("a", index=0), "1") + + def test_access_all(self): + md = multipart.MultiDict([("a", "1"), ("a", "2")]) + self.assertEqual(md.getall("a"), ["1", "2"]) + self.assertEqual(list(md.iterallitems()), [("a", "1"), ("a", "2")]) diff --git a/tests/test_multipart/test_push_parser.py b/tests/test_multipart/test_push_parser.py new file mode 100644 index 00000000..20ff2596 --- /dev/null +++ b/tests/test_multipart/test_push_parser.py @@ -0,0 +1,771 @@ +# -*- coding: utf-8 -*- + +""" +Tests for the PushMultipartParser all other parsers are based on. +""" + +from contextlib import contextmanager +import unittest +from base64 import b64decode +from webob import multipart + +def assertStrict(text): + def decorator(func): + def wrapper(self): + func(self, strict=False) + with self.assertRaisesRegex(multipart.MultipartError, text): + func(self, strict=True) + + return wrapper + + return decorator + +class PushTestBase(unittest.TestCase): + + def setUp(self): + self.parser = None + self.reset() + self.events = [] + + @contextmanager + def assertParseError(self, errortext): + with self.assertRaises(multipart.MultipartError) as r: + yield + fullmsg = " ".join(map(str, r.exception.args)) + self.assertTrue(errortext in fullmsg, f"{errortext!r} not in {fullmsg!r}") + + def reset(self, **ka): + ka.setdefault("boundary", "boundary") + self.parser = multipart.PushMultipartParser(**ka) + self.events = [] + return self + + def parse(self, *chunks): + events = [] + for chunk in chunks: + events += list(self.parser.parse(multipart.to_bytes(chunk))) + self.events += events + return events + + def compact_events(self): + current = None + data = [] + for event in self.events: + if isinstance(event, multipart.MultipartSegment): + current = event + elif event: + data.append(event) + else: + yield current, b''.join(data) + current = None + data = [] + if current: + yield current, b''.join(data) + + def get_segment(self, index_or_name): + for i, (segment, body) in enumerate(self.compact_events()): + if index_or_name == i or index_or_name == segment.name: + return segment, body + self.fail(f"Segment not found: {index_or_name}") + + +class TestPushParser(PushTestBase): + + def test_data_after_terminator(self): + self.parse(b"--boundary--") + self.parse(b"junk") # Fine + + self.reset(strict=True) + self.parse(b"--boundary--") + with self.assertRaises(multipart.MultipartError): + self.parse(b"junk") + + def test_eof_before_clen(self): + self.reset(content_length=100) + self.parse(b"--boundary") + with self.assertParseError("Unexpected end of multipart stream (parser closed)"): + self.parse(b"") + + def test_data_after_eof(self): + self.parse(b"--boundary--") + assert self.parser._state == multipart._COMPLETE + assert not self.parser.closed + + self.parse(b"") + assert self.parser.closed + + with self.assertParseError("Parser closed"): + self.parse(b"junk") + + def test_eof_before_terminator(self): + self.parse(b"--boundary") + with self.assertParseError("Unexpected end of multipart stream"): + self.parse(b"") + + def test_data_after_clen(self): + self.reset(content_length=12) + with self.assertParseError("Content-Length limit exceeded"): + self.parse(b"--boundary\r\njunk") + + def test_clen_match(self): + self.reset(content_length=12) + self.parse(b"--boundary--") + assert self.parser._state is multipart._COMPLETE + + @assertStrict("Unexpected data in front of first delimiter") + def test_junk_before(self, strict): + self.reset(strict=strict) + self.parse(b"junk--boundary--") + + @assertStrict("Unexpected data after end of multipart stream") + def test_junk_after(self, strict): + self.reset(strict=strict) + self.parse(b"--boundary--") + self.parse(b"junk") + + def test_close_before_end(self): + self.parse(b"--boundary") + with self.assertParseError("Unexpected end of multipart stream"): + self.parser.close() + + def test_autoclose(self): + with self.parser: + self.parse(b"--boundary--") + + self.reset() + with self.assertParseError("Unexpected end of multipart stream (parser closed)"): + with self.parser: + self.parse(b"--boundary") + + def test_invalid_NL_delimiter(self): + with self.assertParseError("Invalid line break after delimiter"): + self.parse(b"--boundary\n") + + def test_invalid_NL_header(self): + with self.assertParseError("Invalid line break in segment header"): + self.parse(b"--boundary\r\nfoo:bar\nbar:baz") + + def test_header_size_limit(self): + self.reset(max_header_size=1024) + self.parse(b"--boundary\r\n") + with self.assertParseError("Maximum segment header length exceeded"): + self.parse(b"Header: " + b"x" * (1024)) + + self.reset(max_header_size=1024, strict=True) + self.parse(b"--boundary\r\n") + with self.assertRaisesRegex( + multipart.MultipartError, "Maximum segment header length exceeded" + ): + self.parse(b"Header: " + b"x" * (1024) + b"\r\n") + + def test_header_count_limit(self): + self.reset(max_header_count=10) + self.parse(b"--boundary\r\n") + for i in range(10): + self.parse(b"Header: value\r\n") + with self.assertParseError("Maximum segment header count exceeded"): + self.parse(b"Header: value\r\n") + + @assertStrict("Unexpected segment header continuation") + def test_header_continuation(self, strict): + self.reset(strict=strict) + self.parse(b"--boundary\r\n") + self.parse(b"Content-Disposition: form-data;\r\n") + self.parse(b'\tname="foo"\r\n') + parts = self.parse(b"\r\ndata\r\n--boundary--") + self.assertEqual( + [("Content-Disposition", 'form-data; name="foo"')], parts[0].headerlist + ) + self.assertEqual(b"data", parts[1]) + + def test_header_continuation_first(self): + self.parse(b"--boundary\r\n") + with self.assertParseError("Unexpected segment header continuation"): + self.parse(b"\tbad: header\r\n\r\ndata\r\n--boundary--") + + def test_header_continuation_long(self): + self.reset(max_header_size=1024) + self.parse(b"--boundary\r\n") + self.parse(b"Header: " + b"v" * 1000 + b"\r\n") + with self.assertParseError("Maximum segment header length exceeded"): + self.parse(b"\tmoooooooooooooooooooooooooore value\r\n") + + def test_header_bad_name(self): + self.reset() + with self.assertParseError("Malformed segment header"): + self.parse(b"--boundary\r\nno-colon\r\n\r\ndata\r\n--boundary--") + self.reset() + with self.assertParseError("Malformed segment header"): + self.parse(b"--boundary\r\n:empty-name\r\n\r\ndata\r\n--boundary--") + for badchar in (b" ", b"\0", b"\r", b"\n", "ö".encode("utf8")): + self.reset() + with self.assertParseError("Invalid segment header name"): + self.parse( + b"--boundary\r\ninvalid%sname:value\r\n\r\ndata\r\n--boundary--" + % badchar + ) + self.reset() + with self.assertParseError("Segment header failed to decode"): + self.parse( + b"--boundary\r\ninvalid\xc3\x28:value\r\n\r\ndata\r\n--boundary--" + ) + + def test_header_wrong_segment_subtype(self): + with self.assertParseError("Invalid Content-Disposition segment header: Wrong type"): + self.parse( + b"--boundary\r\nContent-Disposition: mixed\r\n\r\ndata\r\n--boundary--" + ) + + def test_segment_empty_name(self): + self.parse(b"--boundary\r\n") + parts = self.parse(b"Content-Disposition: form-data; name\r\n\r\n") + self.assertEqual(parts[0].name, "") + self.parse(b"\r\n--boundary\r\n") + parts = self.parse(b"Content-Disposition: form-data; name=\r\n\r\n") + self.assertEqual(parts[0].name, "") + self.parse(b"\r\n--boundary\r\n") + parts = self.parse(b'Content-Disposition: form-data; name=""\r\n\r\n') + self.assertEqual(parts[0].name, "") + + @assertStrict("Invalid Content-Disposition segment header: Missing name option") + def test_segment_missing_name(self, strict): + self.reset(strict=strict) + self.parse(b"--boundary\r\n") + parts = self.parse(b"Content-Disposition: form-data;\r\n\r\n") + print(parts) + self.assertEqual(parts[0].name, "") + + def test_segment_count_limit(self): + self.reset(max_segment_count=1) + self.parse(b"--boundary\r\n") + self.parse(b"Content-Disposition: form-data; name=foo\r\n") + self.parse(b"\r\n") + with self.assertParseError("Maximum segment count exceeded"): + self.parse(b"\r\n--boundary\r\n") + + def test_segment_size_limit(self): + self.reset(max_segment_size=5) + self.parse(b"--boundary\r\n") + self.parse(b"Content-Disposition: form-data; name=foo\r\n") + self.parse(b"\r\n") + with self.assertParseError("Maximum segment size exceeded"): + self.parse(b"123456") + self.parse(b"\r\n--boundary\r\n") + + def test_partial_parts(self): + self.reset() + self.assertEqual([], self.parse(b"--boundary\r\n")) + self.assertEqual( + [], self.parse(b'Content-Disposition: form-data; name="foo"\r\n') + ) + part = self.parse(b"\r\n")[0] + self.assertEqual( + [("Content-Disposition", 'form-data; name="foo"')], part.headerlist + ) + # Write enough body data to trigger a new part + part = self.parse(b"body" * 10)[0] + # Write partial boundary, should stay incomplete + part = self.parse(b"more\r\n--boundary")[0] + # Turn the incomplete boundary into a terminator + parts = self.parse(b"--") + self.assertIsNone(parts[-1]) + + def test_segment_clen(self): + self.parse(b"--boundary\r\n") + self.parse(b"Content-Disposition: form-data; name=foo\r\n") + self.parse(b"Content-Length: 10\r\n") + self.parse(b"\r\n") + self.parse(b"x" * 10) + self.parse(b"\r\n--boundary--") + + def test_segment_clen_exceeded(self): + self.parse(b"--boundary\r\n") + self.parse(b"Content-Disposition: form-data; name=foo\r\n") + self.parse(b"Content-Length: 10\r\n") + self.parse(b"\r\n") + with self.assertParseError("Segment Content-Length exceeded"): + self.parse(b"x" * 11) + self.parse(b"\r\n--boundary--") + + def test_segment_clen_not_reached(self): + self.parse(b"--boundary\r\n") + self.parse(b"Content-Disposition: form-data; name=foo\r\n") + self.parse(b"Content-Length: 10\r\n") + self.parse(b"\r\n") + with self.assertParseError("Segment size does not match Content-Length header"): + self.parse(b"x" * 9) + self.parse(b"\r\n--boundary--") + + def test_segment_handle_access(self): + self.parse(b"--boundary\r\n") + self.parse(b"Content-Disposition: form-data; name=foo; filename=bar.txt\r\n") + self.parse(b"Content-Type: text/x-foo; charset=ascii\r\n") + part = self.parse(b"\r\n")[0] + self.assertEqual(part.header("Content-Type"), "text/x-foo; charset=ascii") + self.assertEqual(part.header("CONTENT-Type"), "text/x-foo; charset=ascii") + self.assertEqual(part["Content-Type"], "text/x-foo; charset=ascii") + self.assertEqual(part["CONTENT-Type"], "text/x-foo; charset=ascii") + + self.assertEqual(part.name, "foo") + self.assertEqual(part.filename, "bar.txt") + + self.assertEqual(part.header("Missing"), None) + self.assertEqual(part.header("Missing", 5), 5) + with self.assertRaises(KeyError): + part["Missing"] + + def test_part_ends_after_header(self): + with self.assertRaises(multipart.MultipartError), self.parser: + self.parse('--boundary\r\n', 'Header: value\r\n', '\r\n--boundary--') + + def test_part_ends_in_header(self): + with self.assertRaises(multipart.MultipartError), self.parser: + self.parse('--boundary\r\n', 'Header: value', '\r\n--boundary--') + + def test_no_terminator(self): + with self.assertRaises(multipart.MultipartError), self.parser: + self.parse('--boundary\r\n', + 'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n', + 'Content-Type: image/png\r\n', '\r\n', 'abc') + + def test_no_newline_after_content(self): + with self.assertRaises(multipart.MultipartError), self.parser: + self.parse('--boundary\r\n', + 'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n', + 'Content-Type: image/png\r\n', '\r\n', 'abc', '--boundary--') + + def test_no_newline_after_middle_content(self): + with self.parser: + self.parse( + '--boundary\r\n', + 'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n', + 'Content-Type: image/png\r\n', '\r\n', 'abc', '--boundary\r\n' + 'Content-Disposition: form-data; name="file2"; filename="random.png"\r\n', + 'Content-Type: image/png\r\n', '\r\n', 'abc\r\n', '--boundary--') + segment, body = self.get_segment("file1") + self.assertTrue(body.startswith(b"abc--boundary\r\n")) + self.assertTrue(body.endswith(b"abc")) + + @assertStrict("Unexpected data in front of first delimiter") + def test_ignore_junk_before_start_boundary(self, strict): + self.reset(strict=strict) + self.parse('Preamble\r\n', '--boundary\r\n' + 'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n', + 'Content-Type: image/png\r\n', '\r\n', 'abc\r\n', '--boundary--') + self.parser.close() + + def test_allow_junk_after_end_boundary(self): + self.parse('--boundary--\r\njunk') + self.reset() + self.parse('--boundary\r\n' + 'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n', + 'Content-Type: image/png\r\n', '\r\n', 'abc\r\n', '--boundary--\r\n', 'junk') + + def test_no_start_boundary(self): + with self.assertRaises(multipart.MultipartError), self.parser: + self.parse('--bar\r\n','--nonsense\r\n' + 'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n', + 'Content-Type: image/png\r\n', '\r\n', 'abc\r\n', '--nonsense--') + + def test_no_end_boundary(self): + with self.assertRaises(multipart.MultipartError): + self.parse('--boundary\r\n', + 'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n', + 'Content-Type: image/png\r\n', '\r\n', 'abc\r\n') + self.parser.close() + + def test_empty_part(self): + self.parse('--boundary\r\n', '--boundary--') + with self.assertRaises(multipart.MultipartError): + self.parser.close() + + def test_invalid_header(self): + with self.assertRaises(multipart.MultipartError): + self.parse('--boundary\r\n', + 'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n', + 'Content-Type: image/png\r\n', + 'Bad header\r\n', '\r\n', 'abc'*1024+'\r\n', '--boundary--') + + def test_content_length_to_small(self): + with self.assertRaises(multipart.MultipartError): + self.parse('--boundary\r\n', + 'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n', + 'Content-Type: image/png\r\n', + 'Content-Length: 111\r\n', '\r\n', 'abc'*1024, '\r\n--boundary--') + + def test_no_disposition_header(self): + with self.assertRaises(multipart.MultipartError): + self.parse('--boundary\r\n', + 'Content-Type: image/png\r\n', '\r\n', 'abc'*1024+'\r\n', '--boundary--') + + + + + + +''' The files used by the following test were taken from the werkzeug library + test suite and are therefore partly copyrighted by the Werkzeug Team + under BSD licence. See https://werkzeug.palletsprojects.com/ ''' + +browser_test_cases = {} +browser_test_cases['firefox3-2png1txt'] = {'data': b64decode(b''' +LS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0xODY0NTQ2NTE3MTM1MTkzNDE5NTE1ODEwMzAx +MDUNCkNvbnRlbnQtRGlzcG9zaXRpb246IGZvcm0tZGF0YTsgbmFtZT0iZmlsZTEiOyBmaWxlbmFt +ZT0iYW5jaG9yLnBuZyINCkNvbnRlbnQtVHlwZTogaW1hZ2UvcG5nDQoNColQTkcNChoKAAAADUlI +RFIAAAAQAAAAEAgGAAAAH/P/YQAAAARnQU1BAACvyDcFiukAAAAZdEVYdFNvZnR3YXJlAEFkb2Jl +IEltYWdlUmVhZHlxyWU8AAABnUlEQVQ4y6VTMWvCQBS+qwEFB10KGaS1P6FDpw7SrVvzAwRRx04V +Ck4K6iAoDhLXdhFcW9qhZCk4FQoW0gp2U4lQRDAUS4hJmn5Xgg2lsQ198PHu3b3vu5d3L9S2bfIf +47wOer1ewzTNtGEYBP48kUjkfsrb8BIAMb1cLovwRfi07wrYzcCr4/1/Am4FzzhzBGZeefR7E7vd +7j0Iu4wYjUYDBMfD0dBiMUQfstns3toKkHgF6EgmqqruW6bFiHcsxr70awVu63Q6NiOmUinquwfM +dF1f28CVgCRJx0jMAQ1BEFquRn7CbYVCYZVbr9dbnJMohoIh9kViu90WEW9nMpmxu4JyubyF/VEs +FiNcgCPyoyxiu7XhCPBzdU4s652VnUccbDabPLyN2C6VSmwdhFgel5DB84AJb64mEUlvmqadTKcv +40gkUkUsg1DjeZ7iRsrWgByP71T7/afxYrHIYry/eoBD9mxsaK4VRamFw2EBQknMAWGvRClNTpQJ +AfkCxFNgBmiez1ipVA4hdgQcOD/TLfylKIo3vubgL/YBnIw+ioOMLtwAAAAASUVORK5CYIINCi0t +LS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tMTg2NDU0NjUxNzEzNTE5MzQxOTUxNTgxMDMwMTA1 +DQpDb250ZW50LURpc3Bvc2l0aW9uOiBmb3JtLWRhdGE7IG5hbWU9ImZpbGUyIjsgZmlsZW5hbWU9 +ImFwcGxpY2F0aW9uX2VkaXQucG5nIg0KQ29udGVudC1UeXBlOiBpbWFnZS9wbmcNCg0KiVBORw0K +GgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdh +cmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAJRSURBVBgZpcHda81xHMDx9+d3fudYzuYw2RaZ5yTW +olEiuZpCSjGJFEktUUr8A6ZxQZGHmDtqdrGUXHgoeZqSp1F2bLFWjtkOB8PZzvmd7+djv5XaBRfL +6yVmxv+QjQeu7l25uuZYJmtxM0AVU8Wpw9RQU8w51AxzDqfKhFjwq6Mjdbj1RN0Zv2ZFzaloUdwr +L2Is4r+y7hRwxs8G5mUzPxmrwcA8hvnmjIZtcxmr3Y09hHwzJZQvOAwwNZyCYqgaThVXMFzBCD7f +Jfv8MpHiKvaV3ePV2f07fMwIiSeIGeYJJoao4HmCiIeIQzPXifY+paJqO4lZi/nWPZ/krabjvlNH +yANMBAQiBiqgakQMCunbxHJviM9bQeZdBzHJUzKhguLJlQnf1BghAmZ4gImAgAjk++8jP56QmL2G +XG8zsfFCz8skA1mQXKbaU3X8ISIgQsgDcun7FL7cJjFnLUMfLyLRr0SLS4hbhiup5Szd19rpFYKA +ESKICCERoS95neyHmyTmbmAodQ4vGpAfmEn6YTtTahv4ODiRkGdOCUUAAUSE/uQNfqTaKFu4jvyn +JiIxIzcwg/SjF1RsOk9R+QJMlZCvqvwhQFdbM4XvrynIVHpfn2ZSWYyhzHS+PUtSueUC0cQ0QmpG +yE9197TUnwzq1DnUKbXSxOb6S7xtPkjngzbGVVbzvS/FjaGt9DU8xlRRJdTCMDEzRjuyZ1FwaFe9 +j+d4eecaPd1dPxNTSlfWHm1v5y/EzBitblXp4JLZ5f6yBbOwaK5tsD+9c33jq/f8w2+mRSjOllPh +kAAAAABJRU5ErkJggg0KLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0xODY0NTQ2NTE3MTM1 +MTkzNDE5NTE1ODEwMzAxMDUNCkNvbnRlbnQtRGlzcG9zaXRpb246IGZvcm0tZGF0YTsgbmFtZT0i +dGV4dCINCg0KZXhhbXBsZSB0ZXh0DQotLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLTE4NjQ1 +NDY1MTcxMzUxOTM0MTk1MTU4MTAzMDEwNS0tDQo='''), +'boundary':'---------------------------186454651713519341951581030105', +'files': {'file1': (u'anchor.png', 'image/png', b64decode(b''' +iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0 +U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAGdSURBVDjLpVMxa8JAFL6rAQUHXQoZpLU/ +oUOnDtKtW/MDBFHHThUKTgrqICgOEtd2EVxb2qFkKTgVChbSCnZTiVBEMBRLiEmafleCDaWxDX3w +8e7dve+7l3cv1LZt8h/jvA56vV7DNM20YRgE/jyRSOR+ytvwEgAxvVwui/BF+LTvCtjNwKvj/X8C +bgXPOHMEZl559HsTu93uPQi7jBiNRgMEx8PR0GIxRB+y2eze2gqQeAXoSCaqqu5bpsWIdyzGvvRr +BW7rdDo2I6ZSKeq7B8x0XV/bwJWAJEnHSMwBDUEQWq5GfsJthUJhlVuv11uckyiGgiH2RWK73RYR +b2cymbG7gnK5vIX9USwWI1yAI/KjLGK7teEI8HN1TizrnZWdRxxsNps8vI3YLpVKbB2EWB6XkMHz +gAlvriYRSW+app1Mpy/jSCRSRSyDUON5nuJGytaAHI/vVPv9p/FischivL96gEP2bGxorhVFqYXD +YQFCScwBYa9EKU1OlAkB+QLEU2AGaJ7PWKlUDiF2BBw4P9Mt/KUoije+5uAv9gGcjD6Kg4wu3AAA +AABJRU5ErkJggg==''')), + 'file2': (u'application_edit.png', 'image/png', b64decode(b''' +iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0 +U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAJRSURBVBgZpcHda81xHMDx9+d3fudYzuYw +2RaZ5yTWolEiuZpCSjGJFEktUUr8A6ZxQZGHmDtqdrGUXHgoeZqSp1F2bLFWjtkOB8PZzvmd7+dj +v5XaBRfL6yVmxv+QjQeu7l25uuZYJmtxM0AVU8Wpw9RQU8w51AxzDqfKhFjwq6Mjdbj1RN0Zv2ZF +zaloUdwrL2Is4r+y7hRwxs8G5mUzPxmrwcA8hvnmjIZtcxmr3Y09hHwzJZQvOAwwNZyCYqgaThVX +MFzBCD7fJfv8MpHiKvaV3ePV2f07fMwIiSeIGeYJJoao4HmCiIeIQzPXifY+paJqO4lZi/nWPZ/k +rabjvlNHyANMBAQiBiqgakQMCunbxHJviM9bQeZdBzHJUzKhguLJlQnf1BghAmZ4gImAgAjk++8j +P56QmL2GXG8zsfFCz8skA1mQXKbaU3X8ISIgQsgDcun7FL7cJjFnLUMfLyLRr0SLS4hbhiup5Szd +19rpFYKAESKICCERoS95neyHmyTmbmAodQ4vGpAfmEn6YTtTahv4ODiRkGdOCUUAAUSE/uQNfqTa +KFu4jvynJiIxIzcwg/SjF1RsOk9R+QJMlZCvqvwhQFdbM4XvrynIVHpfn2ZSWYyhzHS+PUtSueUC +0cQ0QmpGyE9197TUnwzq1DnUKbXSxOb6S7xtPkjngzbGVVbzvS/FjaGt9DU8xlRRJdTCMDEzRjuy +Z1FwaFe9j+d4eecaPd1dPxNTSlfWHm1v5y/EzBitblXp4JLZ5f6yBbOwaK5tsD+9c33jq/f8w2+m +RSjOllPhkAAAAABJRU5ErkJggg=='''))}, +'forms': {'text': u'example text'}} + +browser_test_cases['firefox3-2pnglongtext'] = {'data': b64decode(b''' +LS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0xNDkwNDA0NDczOTc4NzE5MTAzMTc1NDcxMTc0 +OA0KQ29udGVudC1EaXNwb3NpdGlvbjogZm9ybS1kYXRhOyBuYW1lPSJmaWxlMSI7IGZpbGVuYW1l +PSJhY2NlcHQucG5nIg0KQ29udGVudC1UeXBlOiBpbWFnZS9wbmcNCg0KiVBORw0KGgoAAAANSUhE +UgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUg +SW1hZ2VSZWFkeXHJZTwAAAKfSURBVDjLpZPrS1NhHMf9O3bOdmwDCWREIYKEUHsVJBI7mg3FvCxL +09290jZj2EyLMnJexkgpLbPUanNOberU5taUMnHZUULMvelCtWF0sW/n7MVMEiN64AsPD8/n83uu +cQDi/id/DBT4Dolypw/qsz0pTMbj/WHpiDgsdSUyUmeiPt2+V7SrIM+bSss8ySGdR4abQQv6lrui +6VxsRonrGCS9VEjSQ9E7CtiqdOZ4UuTqnBHO1X7YXl6Daa4yGq7vWO1D40wVDtj4kWQbn94myPGk +CDPdSesczE2sCZShwl8CzcwZ6NiUs6n2nYX99T1cnKqA2EKui6+TwphA5k4yqMayopU5mANV3lNQ +TBdCMVUA9VQh3GuDMHiVcLCS3J4jSLhCGmKCjBEx0xlshjXYhApfMZRP5CyYD+UkG08+xt+4wLVQ +ZA1tzxthm2tEfD3JxARH7QkbD1ZuozaggdZbxK5kAIsf5qGaKMTY2lAU/rH5HW3PLsEwUYy+YCcE +RmIjJpDcpzb6l7th9KtQ69fi09ePUej9l7cx2DJbD7UrG3r3afQHOyCo+V3QQzE35pvQvnAZukk5 +zL5qRL59jsKbPzdheXoBZc4saFhBS6AO7V4zqCpiawuptwQG+UAa7Ct3UT0hh9p9EnXT5Vh6t4C2 +2QaUDh6HwnECOmcO7K+6kW49DKqS2DrEZCtfuI+9GrNHg4fMHVSO5kE7nAPVkAxKBxcOzsajpS4Y +h4ohUPPWKTUh3PaQEptIOr6BiJjcZXCwktaAGfrRIpwblqOV3YKdhfXOIvBLeREWpnd8ynsaSJoy +ESFphwTtfjN6X1jRO2+FxWtCWksqBApeiFIR9K6fiTpPiigDoadqCEag5YUFKl6Yrciw0VOlhOiv +v/Ff8wtn0KzlebrUYwAAAABJRU5ErkJggg0KLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0x +NDkwNDA0NDczOTc4NzE5MTAzMTc1NDcxMTc0OA0KQ29udGVudC1EaXNwb3NpdGlvbjogZm9ybS1k +YXRhOyBuYW1lPSJmaWxlMiI7IGZpbGVuYW1lPSJhZGQucG5nIg0KQ29udGVudC1UeXBlOiBpbWFn +ZS9wbmcNCg0KiVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK +6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAJvSURBVDjLpZPrS5NhGIf9 +W7YvBYOkhlkoqCklWChv2WyKik7blnNris72bi6dus0DLZ0TDxW1odtopDs4D8MDZuLU0kXq61Ci +jSIIasOvv94VTUfLiB74fXngup7nvrnvJABJ/5PfLnTTdcwOj4RsdYmo5glBWP6iOtzwvIKSWstI +0Wgx80SBblpKtE9KQs/We7EaWoT/8wbWP61gMmCH0lMDvokT4j25TiQU/ITFkek9Ow6+7WH2gwsm +ahCPdwyw75uw9HEO2gUZSkfyI9zBPCJOoJ2SMmg46N61YO/rNoa39Xi41oFuXysMfh36/Fp0b7bA +fWAH6RGi0HglWNCbzYgJaFjRv6zGuy+b9It96N3SQvNKiV9HvSaDfFEIxXItnPs23BzJQd6DDEVM +0OKsoVwBG/1VMzpXVWhbkUM2K4oJBDYuGmbKIJ0qxsAbHfRLzbjcnUbFBIpx/qH3vQv9b3U03IQ/ +HfFkERTzfFj8w8jSpR7GBE123uFEYAzaDRIqX/2JAtJbDat/COkd7CNBva2cMvq0MGxp0PRSCPF8 +BXjWG3FgNHc9XPT71Ojy3sMFdfJRCeKxEsVtKwFHwALZfCUk3tIfNR8XiJwc1LmL4dg141JPKtj3 +WUdNFJqLGFVPC4OkR4BxajTWsChY64wmCnMxsWPCHcutKBxMVp5mxA1S+aMComToaqTRUQknLTH6 +2kHOVEE+VQnjahscNCy0cMBWsSI0TCQcZc5ALkEYckL5A5noWSBhfm2AecMAjbcRWV0pUTh0HE64 +TNf0mczcnnQyu/MilaFJCae1nw2fbz1DnVOxyGTlKeZft/Ff8x1BRssfACjTwQAAAABJRU5ErkJg +gg0KLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0xNDkwNDA0NDczOTc4NzE5MTAzMTc1NDcx +MTc0OA0KQ29udGVudC1EaXNwb3NpdGlvbjogZm9ybS1kYXRhOyBuYW1lPSJ0ZXh0Ig0KDQotLWxv +bmcgdGV4dA0KLS13aXRoIGJvdW5kYXJ5DQotLWxvb2thbGlrZXMtLQ0KLS0tLS0tLS0tLS0tLS0t +LS0tLS0tLS0tLS0tLS0xNDkwNDA0NDczOTc4NzE5MTAzMTc1NDcxMTc0OC0tDQo='''), +'boundary':'---------------------------14904044739787191031754711748', +'files': {'file1': (u'accept.png', 'image/png', b64decode(b''' +iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0 +U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAKfSURBVDjLpZPrS1NhHMf9O3bOdmwDCWRE +IYKEUHsVJBI7mg3FvCxL09290jZj2EyLMnJexkgpLbPUanNOberU5taUMnHZUULMvelCtWF0sW/n +7MVMEiN64AsPD8/n83uucQDi/id/DBT4Dolypw/qsz0pTMbj/WHpiDgsdSUyUmeiPt2+V7SrIM+b +Sss8ySGdR4abQQv6lrui6VxsRonrGCS9VEjSQ9E7CtiqdOZ4UuTqnBHO1X7YXl6Daa4yGq7vWO1D +40wVDtj4kWQbn94myPGkCDPdSesczE2sCZShwl8CzcwZ6NiUs6n2nYX99T1cnKqA2EKui6+TwphA +5k4yqMayopU5mANV3lNQTBdCMVUA9VQh3GuDMHiVcLCS3J4jSLhCGmKCjBEx0xlshjXYhApfMZRP +5CyYD+UkG08+xt+4wLVQZA1tzxthm2tEfD3JxARH7QkbD1ZuozaggdZbxK5kAIsf5qGaKMTY2lAU +/rH5HW3PLsEwUYy+YCcERmIjJpDcpzb6l7th9KtQ69fi09ePUej9l7cx2DJbD7UrG3r3afQHOyCo ++V3QQzE35pvQvnAZukk5zL5qRL59jsKbPzdheXoBZc4saFhBS6AO7V4zqCpiawuptwQG+UAa7Ct3 +UT0hh9p9EnXT5Vh6t4C22QaUDh6HwnECOmcO7K+6kW49DKqS2DrEZCtfuI+9GrNHg4fMHVSO5kE7 +nAPVkAxKBxcOzsajpS4Yh4ohUPPWKTUh3PaQEptIOr6BiJjcZXCwktaAGfrRIpwblqOV3YKdhfXO +IvBLeREWpnd8ynsaSJoyESFphwTtfjN6X1jRO2+FxWtCWksqBApeiFIR9K6fiTpPiigDoadqCEag +5YUFKl6Yrciw0VOlhOivv/Ff8wtn0KzlebrUYwAAAABJRU5ErkJggg==''')), + 'file2': (u'add.png', 'image/png', b64decode(b''' +iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0 +U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAJvSURBVDjLpZPrS5NhGIf9W7YvBYOkhlko +qCklWChv2WyKik7blnNris72bi6dus0DLZ0TDxW1odtopDs4D8MDZuLU0kXq61CijSIIasOvv94V +TUfLiB74fXngup7nvrnvJABJ/5PfLnTTdcwOj4RsdYmo5glBWP6iOtzwvIKSWstI0Wgx80SBblpK +tE9KQs/We7EaWoT/8wbWP61gMmCH0lMDvokT4j25TiQU/ITFkek9Ow6+7WH2gwsmahCPdwyw75uw +9HEO2gUZSkfyI9zBPCJOoJ2SMmg46N61YO/rNoa39Xi41oFuXysMfh36/Fp0b7bAfWAH6RGi0Hgl +WNCbzYgJaFjRv6zGuy+b9It96N3SQvNKiV9HvSaDfFEIxXItnPs23BzJQd6DDEVM0OKsoVwBG/1V +MzpXVWhbkUM2K4oJBDYuGmbKIJ0qxsAbHfRLzbjcnUbFBIpx/qH3vQv9b3U03IQ/HfFkERTzfFj8 +w8jSpR7GBE123uFEYAzaDRIqX/2JAtJbDat/COkd7CNBva2cMvq0MGxp0PRSCPF8BXjWG3FgNHc9 +XPT71Ojy3sMFdfJRCeKxEsVtKwFHwALZfCUk3tIfNR8XiJwc1LmL4dg141JPKtj3WUdNFJqLGFVP +C4OkR4BxajTWsChY64wmCnMxsWPCHcutKBxMVp5mxA1S+aMComToaqTRUQknLTH62kHOVEE+VQnj +ahscNCy0cMBWsSI0TCQcZc5ALkEYckL5A5noWSBhfm2AecMAjbcRWV0pUTh0HE64TNf0mczcnnQy +u/MilaFJCae1nw2fbz1DnVOxyGTlKeZft/Ff8x1BRssfACjTwQAAAABJRU5ErkJggg=='''))}, +'forms': {'text': u'--long text\r\n--with boundary\r\n--lookalikes--'}} + +browser_test_cases['opera8-2png1txt'] = {'data': b64decode(b''' +LS0tLS0tLS0tLS0tekVPOWpRS21MYzJDcTg4YzIzRHgxOQ0KQ29udGVudC1EaXNwb3NpdGlvbjog +Zm9ybS1kYXRhOyBuYW1lPSJmaWxlMSI7IGZpbGVuYW1lPSJhcnJvd19icmFuY2gucG5nIg0KQ29u +dGVudC1UeXBlOiBpbWFnZS9wbmcNCg0KiVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9h +AAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAHY +SURBVDjLlVLPS1RxHJynpVu7KEn0Vt+2l6IO5qGCIsIwCPwD6hTUaSk6REoUHeoQ0qVAMrp0COpY +0SUIPVRgSl7ScCUTst6zIoqg0y7lvpnPt8MWKuuu29w+hxnmx8dzzmE5+l7mxk1u/a3Dd/ejDjSs +II/m3vjJ9MF0yt93ZuTkdD0CnnMO/WOnmsxsJp3yd2zfvA3mHOa+zuHTjy/zojrvHX1YqunAZE9M +lpUcZAaZQBNIZUg9XdPBP5wePuEO7eyGQXg29QL3jz3y1oqwbvkhCuYEOQMp/HeJohCbICMUVwr0 +DvZcOnK9u7GmQNmBQLJCgORxkneqRmAs0BFmDi0bW9E72PPda/BikwWi0OEHkNR14MrewsTAZF+l +AAWZEH6LUCwUkUlntrS1tiG5IYlEc6LcjYjSYuncngtdhakbM5dXlhgTNEMYLqB9q49MKgsPjTBX +ntVgkDNIgmI1VY2Q7QzgJ9rx++ci3ofziBYiiELQEUAyhB/D29M3Zy+uIkDIhGYvgeKvIkbHxz6T +evzq6ut+ANh9fldetMn80OzZVVdgLFjBQ0tpEz68jcB4ifx3pQeictVXIEETnBPCKMLEwBIZAPJD +767V/ETGwsjzYYiC6vzEP9asLo3SGuQvAAAAAElFTkSuQmCCDQotLS0tLS0tLS0tLS16RU85alFL +bUxjMkNxODhjMjNEeDE5DQpDb250ZW50LURpc3Bvc2l0aW9uOiBmb3JtLWRhdGE7IG5hbWU9ImZp +bGUyIjsgZmlsZW5hbWU9ImF3YXJkX3N0YXJfYnJvbnplXzEucG5nIg0KQ29udGVudC1UeXBlOiBp +bWFnZS9wbmcNCg0KiVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/I +NwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAJvSURBVDjLhZNNSFRR +FIC/N++9eWMzhkl/ZJqFMQMRFvTvImkXSdKiVRAURBRRW1eZA9EqaNOiFlZEtQxKyrJwUS0K+qEQ +zaTE/AtLHR3HmffuvafFNINDWGdz7z2c7+Nyzr2WiFAIffaMBDW1+B0diAgYgxiDiCDG4DU1QfcL +os+fWAXGYUGIUsXiAliUFER+sBAhVCIIVB7QGtEat1oTbcwVz2LMfwR+gPg+oY0bEa3x6sHdUoVd +niMUj0M2i/j+PwVJa2QUu7YWp34D7mqNWdNApD6Ks24dpvcL4gfJRQXevbutjI4lGRzCS9iYukPo +5dvxVqWQvn6k/2uyoudd60LGEhG43VBGyI4j2ADZ7vDJ8DZ9Img4hw4cvO/3UZ1vH3p7lrWRLwGV +neD4y6G84NaOYSoTVYIFIiAGvXI3OWctJv0TW03jZb5gZSfzl9YBpMcIzUwdzQsuVR9EyR3TeCqm +6w5jZiZQMz8xsxOYzDTi50AMVngJNgrnUweRbwMPiLpHrOJDOl9Vh6HD7GyO52qa0VPj6MwUJpNC +5mYQS/DUJLH3zzRp1cqN8YulTUyODBBzt4X6Ou870z2I8ZHsHJLLYNQ8jusQ6+2exJf9BfivKdAy +mKZiaVdodhBRAagAjIbgzxp20lwb6Vp0jADYkQO6IpHfuoqInSJUVoE2HrpyRQ1tic2LC9p3lSHW +Ph2rJfL1MeVP2weWvHp8s3ziNZ49i1q6HrR1YHGBNnt1dG2Z++gC4TdvrqNkK1eHj7ljQ/ujHx6N +yPw8BFIiKPmNpKar7P7xb/zyT9P+o7OYvzzYSUt8U+TzxytodixEfgN3CFlQMNAcMgAAAABJRU5E +rkJggg0KLS0tLS0tLS0tLS0tekVPOWpRS21MYzJDcTg4YzIzRHgxOQ0KQ29udGVudC1EaXNwb3Np +dGlvbjogZm9ybS1kYXRhOyBuYW1lPSJ0ZXh0Ig0KDQpibGFmYXNlbCDDtsOkw7wNCi0tLS0tLS0t +LS0tLXpFTzlqUUttTGMyQ3E4OGMyM0R4MTktLQ0K'''), +'boundary':'----------zEO9jQKmLc2Cq88c23Dx19', +'files': {'file1': (u'arrow_branch.png', 'image/png', b64decode(b''' +iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0 +U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAHYSURBVDjLlVLPS1RxHJynpVu7KEn0Vt+2 +l6IO5qGCIsIwCPwD6hTUaSk6REoUHeoQ0qVAMrp0COpY0SUIPVRgSl7ScCUTst6zIoqg0y7lvpnP +t8MWKuuu29w+hxnmx8dzzmE5+l7mxk1u/a3Dd/ejDjSsII/m3vjJ9MF0yt93ZuTkdD0CnnMO/WOn +msxsJp3yd2zfvA3mHOa+zuHTjy/zojrvHX1YqunAZE9MlpUcZAaZQBNIZUg9XdPBP5wePuEO7eyG +QXg29QL3jz3y1oqwbvkhCuYEOQMp/HeJohCbICMUVwr0DvZcOnK9u7GmQNmBQLJCgORxkneqRmAs +0BFmDi0bW9E72PPda/BikwWi0OEHkNR14MrewsTAZF+lAAWZEH6LUCwUkUlntrS1tiG5IYlEc6Lc +jYjSYuncngtdhakbM5dXlhgTNEMYLqB9q49MKgsPjTBXntVgkDNIgmI1VY2Q7QzgJ9rx++ci3ofz +iBYiiELQEUAyhB/D29M3Zy+uIkDIhGYvgeKvIkbHxz6Tevzq6ut+ANh9fldetMn80OzZVVdgLFjB +Q0tpEz68jcB4ifx3pQeictVXIEETnBPCKMLEwBIZAPJD767V/ETGwsjzYYiC6vzEP9asLo3SGuQv +AAAAAElFTkSuQmCC''')), + 'file2': (u'award_star_bronze_1.png', 'image/png', b64decode(b''' +iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0 +U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAJvSURBVDjLhZNNSFRRFIC/N++9eWMzhkl/ +ZJqFMQMRFvTvImkXSdKiVRAURBRRW1eZA9EqaNOiFlZEtQxKyrJwUS0K+qEQzaTE/AtLHR3Hmffu +vafFNINDWGdz7z2c7+Nyzr2WiFAIffaMBDW1+B0diAgYgxiDiCDG4DU1QfcLos+fWAXGYUGIUsXi +AliUFER+sBAhVCIIVB7QGtEat1oTbcwVz2LMfwR+gPg+oY0bEa3x6sHdUoVdniMUj0M2i/j+PwVJ +a2QUu7YWp34D7mqNWdNApD6Ks24dpvcL4gfJRQXevbutjI4lGRzCS9iYukPo5dvxVqWQvn6k/2uy +oudd60LGEhG43VBGyI4j2ADZ7vDJ8DZ9Img4hw4cvO/3UZ1vH3p7lrWRLwGVneD4y6G84NaOYSoT +VYIFIiAGvXI3OWctJv0TW03jZb5gZSfzl9YBpMcIzUwdzQsuVR9EyR3TeCqm6w5jZiZQMz8xsxOY +zDTi50AMVngJNgrnUweRbwMPiLpHrOJDOl9Vh6HD7GyO52qa0VPj6MwUJpNC5mYQS/DUJLH3zzRp +1cqN8YulTUyODBBzt4X6Ou870z2I8ZHsHJLLYNQ8jusQ6+2exJf9BfivKdAymKZiaVdodhBRAagA +jIbgzxp20lwb6Vp0jADYkQO6IpHfuoqInSJUVoE2HrpyRQ1tic2LC9p3lSHWPh2rJfL1MeVP2weW +vHp8s3ziNZ49i1q6HrR1YHGBNnt1dG2Z++gC4TdvrqNkK1eHj7ljQ/ujHx6NyPw8BFIiKPmNpKar +7P7xb/zyT9P+o7OYvzzYSUt8U+TzxytodixEfgN3CFlQMNAcMgAAAABJRU5ErkJggg=='''))}, +'forms': {'text': u'blafasel öäü'}} + +browser_test_cases['webkit3-2png1txt'] = {'data': b64decode(b''' +LS0tLS0tV2ViS2l0Rm9ybUJvdW5kYXJ5amRTRmhjQVJrOGZ5R055Ng0KQ29udGVudC1EaXNwb3Np +dGlvbjogZm9ybS1kYXRhOyBuYW1lPSJmaWxlMSI7IGZpbGVuYW1lPSJndGstYXBwbHkucG5nIg0K +Q29udGVudC1UeXBlOiBpbWFnZS9wbmcNCg0KiVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACN +iR0NAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAN1wAADdcBQiibeAAAABl0RVh0U29mdHdhcmUA +d3d3Lmlua3NjYXBlLm9yZ5vuPBoAAANnSURBVDiNldJ9aJVVHAfw7znPuS/PvW4405WbLWfbsBuN +bramq5Tp7mLqIFPXINlwpAitaCAPjWKgBdXzR2TBpEZoadAyCVGndttCFNxqLXORK7x3y704NlzX +zfs8d89znuf0R/fKk03xHvjCOZxzPpzzO4cIIZBuC6nsGYmRrwFMWVw0hxV+PDVH0gVDKvNSRgZf +rm5+QCISOi58pY1MXhm1uHg+rPDfabqnoxJpKQ2snf/gwgKY3ut4pfodX/lTGwokRt4AgLTAkMoK +3cz7enVJg/fyTCdGE/3gwsTo+LBu2+J82qDE6IEXyrd7YvYwbpgjyPOtQHTikvhz+NKgsNGWFhhS +WU3uwqWPBx9aRwfjPTCFgXx5JY50tumWKbaFFS7uGQypLINKZH/tukb/kN6DSSOCFfO3oqu/3biZ +iH0ZVvjF1Np7AiVG31sdXO/P8GfhqtaLbE8BqOlBZ++xuMXFbudaljxBDnNJHbZlFwF407bFh6kr +hFRW7Jcztlc9Uee5HD+DaWsCTy/YgbaOvZpl2Y1hhU87QVLxvpQpMfpzfeXuZfmLA/Rw1wdaZOS3 +Pm7aNQDGJUZ/qatqKs5etIj03TiKQv8aaFOWOHRm30+nm4zS229DmVs6Ulm6OW/50iD9G1Hsqnrb +t2lNwyoXYwMAPnk4N1D4aO4qEtW6wagHeZ4SfNP1mW6Zdt1c5WEE8Lll5qKCQbdiGIh/h+JlK6Wi +xcHM4z2fb9tUtkOO6hdw3Yzi2axdON33xaxuzLSGFf7HXCA1Dav+5Nn2Kyd7DyYK5bXw0QWIJM4j +7rqGmvKd8gwZw5D+I3K8jyGhmzj366lpi4uWOz0gEUIgpDKPxGjr/VlLanZubJknXLMYiH8Pjccw +K26C27Oouu8tfHysWbs6HnkxrPATdwVTLaSyzW63+8BLzzX6H1lSSrtjBzFpRPBkZi0mrk3Z7Z2t +P5xqMiruhP0PTKL5EqMnSgKr87eUvSqPGf3Ipsux53CDpie0QFjhf90NhBDiVlJ1LaqmcqXq2l/7 +aU7826E94rWjQb3iXbYXgAzAC8ADwI1//zF1OkQIAUIIBSAlc6tfpkjr52XTj4SFi937eP3MmDAB +2I5YyaT63AmyuVDHmAAQt0FOzARg/aeGhBCS3EjnCBygMwKAnXL+AdDkiZ/xYgR3AAAAAElFTkSu +QmCCDQotLS0tLS1XZWJLaXRGb3JtQm91bmRhcnlqZFNGaGNBUms4ZnlHTnk2DQpDb250ZW50LURp +c3Bvc2l0aW9uOiBmb3JtLWRhdGE7IG5hbWU9ImZpbGUyIjsgZmlsZW5hbWU9Imd0ay1uby5wbmci +DQpDb250ZW50LVR5cGU6IGltYWdlL3BuZw0KDQqJUE5HDQoaCgAAAA1JSERSAAAAFAAAABQIBgAA +AI2JHQ0AAAAEc0JJVAgICAh8CGSIAAAACXBIWXMAAA3XAAAN1wFCKJt4AAAAGXRFWHRTb2Z0d2Fy +ZQB3d3cuaW5rc2NhcGUub3Jnm+48GgAAAzVJREFUOI2tlM9rG0cUxz8zu7OzsqhtyTIONDG2g9ue +UnIwFEqCwYUeTC+99u5T/4FAKKUEeuh/4FPvOZXiWw3GpRRcGjW0h1KwLLe4juOspJUlS95frwft +CkdJbh347o95bz+8mfedVSLC/zncNwUeKnVfw4YD6yncBXCgnsJeBruPRPZf952arPCBUhUL216p +tLm0vGxmq1X3rbk5AC6CgE67nTQbjTgaDHauYOtrkfYbgV8o9SHw/crKytR7d+5YDXhzc2hjEBGy +OCZutciU4s+nT68ajcYl8MlXIj+9AnygVMXA4draWqVWqaBLJcz09ChLBBGBXHEYImlK0G5zcHDQ +juF2UakuyBa2l27dmqqWywxOTpAkIWq1iILgFWVxzOXREZVymaXFxSkL2wVHFw0w1m6urq7asF7H +sZa01SINAiQIyIp7q0XaapEEAcp1CZ884Z3VVWus3Xyo1P1xlzVsvL2wYJLTUwhDdBiiHAedL1EV ++yxCJoJkGTpJkDAkOj3l5o0b5vD4eAPYd3M7rM+WSq7qdLCAOjtD+z46y1DXgJkIZNmIHUWj3E6H +melp14H1cYUZ3J31fZyTE1zA7fVw+n0cERSg8v2RUS5pPqeArNtlZmGBwqtjY+skwYig80lXBCff +5OvANFeSxzIRojge5+j8Uu9dXOD5Pt6o41jAz1W69uznMQ8wgOf79LpdNNTHwBT22r1ebDwPt0h8 +DbQAFTADGGvp9PtxCntjYAa7zW43wVpca3HyZZsJaAF0C/k+4vs0wzDJYHcMfCSyHyfJzq/n50NT +raKVwhl1H3cCpAsphVut8tvz58M4SXaKn8X4pFzB1lG/P2gOBuhaDYxBJhqR5e8Yg56f53gwoNHr +Da9gq+CMz7JSauoz+HgFvr1trX+vXPZKUYSbJCMTA+K6xMYw8Dx+7Pfjw+Fw+Dt8/h38ALwQkeg6 +cAaoLcLyp/BlVam1dz3PWdDaqbkjdwVpymmaZn9FUXouUn8M3zyDJvAC+PclYA6dBmpA5SO4dxM+ +mIf3fVgCGMLfz+CPf+CXPfgZCIFz4ExEkpeWfH0opZzcKYUsI38nIy5D4BK4kgnAfwLblOaQdQsS +AAAAAElFTkSuQmCCDQotLS0tLS1XZWJLaXRGb3JtQm91bmRhcnlqZFNGaGNBUms4ZnlHTnk2DQpD +b250ZW50LURpc3Bvc2l0aW9uOiBmb3JtLWRhdGE7IG5hbWU9InRleHQiDQoNCnRoaXMgaXMgYW5v +dGhlciB0ZXh0IHdpdGggw7xtbMOkw7x0cw0KLS0tLS0tV2ViS2l0Rm9ybUJvdW5kYXJ5amRTRmhj +QVJrOGZ5R055Ni0tDQo='''), +'boundary':'----WebKitFormBoundaryjdSFhcARk8fyGNy6', +'files': {'file1': (u'gtk-apply.png', 'image/png', b64decode(b''' +iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz +AAAN1wAADdcBQiibeAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAANnSURB +VDiNldJ9aJVVHAfw7znPuS/PvW4405WbLWfbsBuNbramq5Tp7mLqIFPXINlwpAitaCAPjWKgBdXz +R2TBpEZoadAyCVGndttCFNxqLXORK7x3y704NlzXzfs8d89znuf0R/fKk03xHvjCOZxzPpzzO4cI +IZBuC6nsGYmRrwFMWVw0hxV+PDVH0gVDKvNSRgZfrm5+QCISOi58pY1MXhm1uHg+rPDfabqnoxJp +KQ2snf/gwgKY3ut4pfodX/lTGwokRt4AgLTAkMoK3cz7enVJg/fyTCdGE/3gwsTo+LBu2+J82qDE +6IEXyrd7YvYwbpgjyPOtQHTikvhz+NKgsNGWFhhSWU3uwqWPBx9aRwfjPTCFgXx5JY50tumWKbaF +FS7uGQypLINKZH/tukb/kN6DSSOCFfO3oqu/3biZiH0ZVvjF1Np7AiVG31sdXO/P8GfhqtaLbE8B +qOlBZ++xuMXFbudaljxBDnNJHbZlFwF407bFh6krhFRW7Jcztlc9Uee5HD+DaWsCTy/YgbaOvZpl +2Y1hhU87QVLxvpQpMfpzfeXuZfmLA/Rw1wdaZOS3Pm7aNQDGJUZ/qatqKs5etIj03TiKQv8aaFOW +OHRm30+nm4zS229DmVs6Ulm6OW/50iD9G1Hsqnrbt2lNwyoXYwMAPnk4N1D4aO4qEtW6wagHeZ4S +fNP1mW6Zdt1c5WEE8Lll5qKCQbdiGIh/h+JlK6WixcHM4z2fb9tUtkOO6hdw3Yzi2axdON33xaxu +zLSGFf7HXCA1Dav+5Nn2Kyd7DyYK5bXw0QWIJM4j7rqGmvKd8gwZw5D+I3K8jyGhmzj366lpi4uW +Oz0gEUIgpDKPxGjr/VlLanZubJknXLMYiH8PjccwK26C27Oouu8tfHysWbs6HnkxrPATdwVTLaSy +zW63+8BLzzX6H1lSSrtjBzFpRPBkZi0mrk3Z7Z2tP5xqMiruhP0PTKL5EqMnSgKr87eUvSqPGf3I +psux53CDpie0QFjhf90NhBDiVlJ1LaqmcqXq2l/7aU7826E94rWjQb3iXbYXgAzAC8ADwI1//zF1 +OkQIAUIIBSAlc6tfpkjr52XTj4SFi937eP3MmDAB2I5YyaT63AmyuVDHmAAQt0FOzARg/aeGhBCS +3EjnCBygMwKAnXL+AdDkiZ/xYgR3AAAAAElFTkSuQmCC''')), + 'file2': (u'gtk-no.png', 'image/png', b64decode(b''' +iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz +AAAN1wAADdcBQiibeAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAAM1SURB +VDiNrZTPaxtHFMc/M7uzs7KobckyDjQxtoPbnlJyMBRKgsGFHkwvvfbuU/+BQCilBHrof+BT7zmV +4lsNxqUUXBo1tIdSsCy3uI7jrKSVJUveX68H7QpHSW4d+O6PeW8/vJn3nVUiwv853DcFHip1X8OG +A+sp3AVwoJ7CXga7j0T2X/edmqzwgVIVC9teqbS5tLxsZqtV9625OQAugoBOu500G404Ggx2rmDr +a5H2G4FfKPUh8P3KysrUe3fuWA14c3NoYxARsjgmbrXIlOLPp0+vGo3GJfDJVyI/vQJ8oFTFwOHa +2lqlVqmgSyXM9PQoSwQRgVxxGCJpStBuc3Bw0I7hdlGpLsgWtpdu3ZqqlssMTk6QJCFqtYiC4BVl +cczl0RGVcpmlxcUpC9sFRxcNMNZurq6u2rBex7GWtNUiDQIkCMiKe6tF2mqRBAHKdQmfPOGd1VVr +rN18qNT9cZc1bLy9sGCS01MIQ3QYohwHnS9RFfssQiaCZBk6SZAwJDo95eaNG+bw+HgD2HdzO6zP +lkqu6nSwgDo7Q/s+OstQ14CZCGTZiB1Fo9xOh5npadeB9XGFGdyd9X2ckxNcwO31cPp9HBEUoPL9 +kVEuaT6ngKzbZWZhgcKrY2PrJMGIoPNJVwQn3+TrwDRXkscyEaI4Hufo/FLvXVzg+T7eqONYwM9V +uvbs5zEPMIDn+/S6XTTUx8AU9tq9Xmw8D7dIfA20ABUwAxhr6fT7cQp7Y2AGu81uN8FaXGtx8mWb +CWgBdAv5PuL7NMMwyWB3DHwksh8nyc6v5+dDU62ilcIZdR93AqQLKYVbrfLb8+fDOEl2ip/F+KRc +wdZRvz9oDgboWg2MQSYakeXvGIOen+d4MKDR6w2vYKvgjM+yUmrqM/h4Bb69ba1/r1z2SlGEmyQj +EwPiusTGMPA8fuz348PhcPg7fP4d/AC8EJHoOnAGqC3C8qfwZVWptXc9z1nQ2qm5I3cFacppmmZ/ +RVF6LlJ/DN88gybwAvj3JWAOnQZqQOUjuHcTPpiH931YAhjC38/gj3/glz34GQiBc+BMRJKXlnx9 +KKWc3CmFLCN/JyMuQ+ASuJIJwH8C25TmkHULEgAAAABJRU5ErkJggg=='''))}, +'forms': {'text': u'this is another text with ümläüts'}} + +browser_test_cases['ie6-2png1txt'] = {'data': b64decode(b''' +LS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS03ZDkxYjAzYTIwMTI4DQpDb250ZW50LURpc3Bv +c2l0aW9uOiBmb3JtLWRhdGE7IG5hbWU9ImZpbGUxIjsgZmlsZW5hbWU9IkM6XFB5dGhvbjI1XHd6 +dGVzdFx3ZXJremV1Zy1tYWluXHRlc3RzXG11bHRpcGFydFxmaXJlZm94My0ycG5nMXR4dFxmaWxl +MS5wbmciDQpDb250ZW50LVR5cGU6IGltYWdlL3gtcG5nDQoNColQTkcNChoKAAAADUlIRFIAAAAQ +AAAAEAgGAAAAH/P/YQAAAARnQU1BAACvyDcFiukAAAAZdEVYdFNvZnR3YXJlAEFkb2JlIEltYWdl +UmVhZHlxyWU8AAABnUlEQVQ4y6VTMWvCQBS+qwEFB10KGaS1P6FDpw7SrVvzAwRRx04VCk4K6iAo +DhLXdhFcW9qhZCk4FQoW0gp2U4lQRDAUS4hJmn5Xgg2lsQ198PHu3b3vu5d3L9S2bfIf47wOer1e +wzTNtGEYBP48kUjkfsrb8BIAMb1cLovwRfi07wrYzcCr4/1/Am4FzzhzBGZeefR7E7vd7j0Iu4wY +jUYDBMfD0dBiMUQfstns3toKkHgF6EgmqqruW6bFiHcsxr70awVu63Q6NiOmUinquwfMdF1f28CV +gCRJx0jMAQ1BEFquRn7CbYVCYZVbr9dbnJMohoIh9kViu90WEW9nMpmxu4JyubyF/VEsFiNcgCPy +oyxiu7XhCPBzdU4s652VnUccbDabPLyN2C6VSmwdhFgel5DB84AJb64mEUlvmqadTKcv40gkUkUs +g1DjeZ7iRsrWgByP71T7/afxYrHIYry/eoBD9mxsaK4VRamFw2EBQknMAWGvRClNTpQJAfkCxFNg +Bmiez1ipVA4hdgQcOD/TLfylKIo3vubgL/YBnIw+ioOMLtwAAAAASUVORK5CYIINCi0tLS0tLS0t +LS0tLS0tLS0tLS0tLS0tLS0tLS0tN2Q5MWIwM2EyMDEyOA0KQ29udGVudC1EaXNwb3NpdGlvbjog +Zm9ybS1kYXRhOyBuYW1lPSJmaWxlMiI7IGZpbGVuYW1lPSJDOlxQeXRob24yNVx3enRlc3Rcd2Vy +a3pldWctbWFpblx0ZXN0c1xtdWx0aXBhcnRcZmlyZWZveDMtMnBuZzF0eHRcZmlsZTIucG5nIg0K +Q29udGVudC1UeXBlOiBpbWFnZS94LXBuZw0KDQqJUE5HDQoaCgAAAA1JSERSAAAAEAAAABAIBgAA +AB/z/2EAAAAEZ0FNQQAAr8g3BYrpAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccll +PAAAAlFJREFUGBmlwd1rzXEcwPH353d+51jO5jDZFpnnJNaiUSK5mkJKMYkUSS1RSvwDpnFBkYeY +O2p2sZRceCh5mpKnUXZssVaO2Q4Hw9nO+Z3v52O/ldoFF8vrJWbG/5CNB67uXbm65lgma3EzQBVT +xanD1FBTzDnUDHMOp8qEWPCroyN1uPVE3Rm/ZkXNqWhR3CsvYiziv7LuFHDGzwbmZTM/GavBwDyG ++eaMhm1zGavdjT2EfDMllC84DDA1nIJiqBpOFVcwXMEIPt8l+/wykeIq9pXd49XZ/Tt8zAiJJ4gZ +5gkmhqjgeYKIh4hDM9eJ9j6lomo7iVmL+dY9n+StpuO+U0fIA0wEBCIGKqBqRAwK6dvEcm+Iz1tB +5l0HMclTMqGC4smVCd/UGCECZniAiYCACOT77yM/npCYvYZcbzOx8ULPyyQDWZBcptpTdfwhIiBC +yANy6fsUvtwmMWctQx8vItGvRItLiFuGK6nlLN3X2ukVgoARIogIIRGhL3md7IebJOZuYCh1Di8a +kB+YSfphO1NqG/g4OJGQZ04JRQABRIT+5A1+pNooW7iO/KcmIjEjNzCD9KMXVGw6T1H5AkyVkK+q +/CFAV1szhe+vKchUel+fZlJZjKHMdL49S1K55QLRxDRCakbIT3X3tNSfDOrUOdQptdLE5vpLvG0+ +SOeDNsZVVvO9L8WNoa30NTzGVFEl1MIwMTNGO7JnUXBoV72P53h55xo93V0/E1NKV9YebW/nL8TM +GK1uVengktnl/rIFs7Borm2wP71zfeOr9/zDb6ZFKM6WU+GQAAAAAElFTkSuQmCCDQotLS0tLS0t +LS0tLS0tLS0tLS0tLS0tLS0tLS0tLTdkOTFiMDNhMjAxMjgNCkNvbnRlbnQtRGlzcG9zaXRpb246 +IGZvcm0tZGF0YTsgbmFtZT0idGV4dCINCg0KaWU2IHN1Y2tzIDotLw0KLS0tLS0tLS0tLS0tLS0t +LS0tLS0tLS0tLS0tLS03ZDkxYjAzYTIwMTI4LS0NCg=='''), +'boundary':'---------------------------7d91b03a20128', +'files': {'file1': (u'file1.png', 'image/x-png', b64decode(b''' +iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0 +U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAGdSURBVDjLpVMxa8JAFL6rAQUHXQoZpLU/ +oUOnDtKtW/MDBFHHThUKTgrqICgOEtd2EVxb2qFkKTgVChbSCnZTiVBEMBRLiEmafleCDaWxDX3w +8e7dve+7l3cv1LZt8h/jvA56vV7DNM20YRgE/jyRSOR+ytvwEgAxvVwui/BF+LTvCtjNwKvj/X8C +bgXPOHMEZl559HsTu93uPQi7jBiNRgMEx8PR0GIxRB+y2eze2gqQeAXoSCaqqu5bpsWIdyzGvvRr +BW7rdDo2I6ZSKeq7B8x0XV/bwJWAJEnHSMwBDUEQWq5GfsJthUJhlVuv11uckyiGgiH2RWK73RYR +b2cymbG7gnK5vIX9USwWI1yAI/KjLGK7teEI8HN1TizrnZWdRxxsNps8vI3YLpVKbB2EWB6XkMHz +gAlvriYRSW+app1Mpy/jSCRSRSyDUON5nuJGytaAHI/vVPv9p/FischivL96gEP2bGxorhVFqYXD +YQFCScwBYa9EKU1OlAkB+QLEU2AGaJ7PWKlUDiF2BBw4P9Mt/KUoije+5uAv9gGcjD6Kg4wu3AAA +AABJRU5ErkJggg==''')), + 'file2': (u'file2.png', 'image/x-png', b64decode(b''' +iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0 +U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAJRSURBVBgZpcHda81xHMDx9+d3fudYzuYw +2RaZ5yTWolEiuZpCSjGJFEktUUr8A6ZxQZGHmDtqdrGUXHgoeZqSp1F2bLFWjtkOB8PZzvmd7+dj +v5XaBRfL6yVmxv+QjQeu7l25uuZYJmtxM0AVU8Wpw9RQU8w51AxzDqfKhFjwq6Mjdbj1RN0Zv2ZF +zaloUdwrL2Is4r+y7hRwxs8G5mUzPxmrwcA8hvnmjIZtcxmr3Y09hHwzJZQvOAwwNZyCYqgaThVX +MFzBCD7fJfv8MpHiKvaV3ePV2f07fMwIiSeIGeYJJoao4HmCiIeIQzPXifY+paJqO4lZi/nWPZ/k +rabjvlNHyANMBAQiBiqgakQMCunbxHJviM9bQeZdBzHJUzKhguLJlQnf1BghAmZ4gImAgAjk++8j +P56QmL2GXG8zsfFCz8skA1mQXKbaU3X8ISIgQsgDcun7FL7cJjFnLUMfLyLRr0SLS4hbhiup5Szd +19rpFYKAESKICCERoS95neyHmyTmbmAodQ4vGpAfmEn6YTtTahv4ODiRkGdOCUUAAUSE/uQNfqTa +KFu4jvynJiIxIzcwg/SjF1RsOk9R+QJMlZCvqvwhQFdbM4XvrynIVHpfn2ZSWYyhzHS+PUtSueUC +0cQ0QmpGyE9197TUnwzq1DnUKbXSxOb6S7xtPkjngzbGVVbzvS/FjaGt9DU8xlRRJdTCMDEzRjuy +Z1FwaFe9j+d4eecaPd1dPxNTSlfWHm1v5y/EzBitblXp4JLZ5f6yBbOwaK5tsD+9c33jq/f8w2+m +RSjOllPhkAAAAABJRU5ErkJggg=='''))}, +'forms': {'text': u'ie6 sucks :-/'}} + +class TestWerkzeugExamples(PushTestBase): + def test_werkzeug_examples(self): + """Tests multipart parsing against data collected from webbrowsers""" + for name in browser_test_cases: + self.reset( + boundary=browser_test_cases[name]['boundary'], + strict=True, + header_charset='utf8' + ) + files = browser_test_cases[name]['files'] + forms = browser_test_cases[name]['forms'] + self.parse(browser_test_cases[name]['data']) + + for field in files: + segment, body = self.get_segment(field) + self.assertTrue(segment.complete) + self.assertEqual(segment.name, field) + self.assertEqual(segment.filename, files[field][0]) + self.assertEqual(segment.content_type, files[field][1]) + self.assertEqual(body, files[field][2]) + for field in forms: + segment, body = self.get_segment(field) + self.assertEqual(segment.name, field) + self.assertEqual(segment.filename, None) + self.assertEqual(segment.content_type, None) + self.assertEqual(body.decode(segment.charset or 'utf8'), forms[field]) diff --git a/tests/test_multipart/test_wsgi_parser.py b/tests/test_multipart/test_wsgi_parser.py new file mode 100644 index 00000000..2d2d800f --- /dev/null +++ b/tests/test_multipart/test_wsgi_parser.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +from .utils import BaseParserTest + +from webob import multipart + +class TestFormParser(BaseParserTest): + + def test_multipart(self): + self.write_field("file1", "abc", filename="random.png", content_type="image/png") + self.write_field("text1", "abc",) + self.write_end() + forms, files = self.parse_form_data() + + self.assertEqual(forms['text1'], 'abc') + self.assertEqual(files['file1'].file.read(), b'abc') + self.assertEqual(files['file1'].filename, 'random.png') + self.assertEqual(files['file1'].name, 'file1') + self.assertEqual(files['file1'].content_type, 'image/png') + + def test_empty(self): + self.write_end() + forms, files = self.parse_form_data() + self.assertEqual(0, len(forms)) + self.assertEqual(0, len(files)) + + def test_urlencoded(self): + for ctype in ('application/x-www-form-urlencoded', 'application/x-url-encoded'): + self.reset().write('a=b&c=d') + self.environ['CONTENT_TYPE'] = ctype + forms, files = self.parse_form_data() + self.assertEqual(forms['a'], 'b') + self.assertEqual(forms['c'], 'd') + + def test_urlencoded_latin1(self): + for ctype in ('application/x-www-form-urlencoded', 'application/x-url-encoded'): + self.reset().write(b'a=\xe0\xe1&e=%E8%E9') + self.environ['CONTENT_TYPE'] = ctype + forms, files = self.parse_form_data(charset='iso-8859-1') + self.assertEqual(forms['a'], 'àá') + self.assertEqual(forms['e'], 'èé') + + def test_urlencoded_utf8(self): + for ctype in ('application/x-www-form-urlencoded', 'application/x-url-encoded'): + self.reset().write(b'a=\xc6\x80\xe2\x99\xad&e=%E1%B8%9F%E2%99%AE') + self.environ['CONTENT_TYPE'] = ctype + forms, files = self.parse_form_data() + self.assertEqual(forms['a'], 'ƀ♭') + self.assertEqual(forms['e'], 'ḟ♮') + + def test_empty(self): + with self.assertRaises(multipart.MultipartError): + self.parse_form_data(strict=True) + + def test_wrong_method(self): + self.environ['REQUEST_METHOD'] = 'GET' + with self.assertRaises(multipart.MultipartError): + self.parse_form_data(strict=True) + + def test_missing_content_type(self): + self.environ['CONTENT_TYPE'] = None + with self.assertRaises(multipart.MultipartError): + self.parse_form_data(strict=True) + + def test_unsupported_content_type(self): + self.environ['CONTENT_TYPE'] = 'multipart/fantasy' + with self.assertRaises(multipart.MultipartError): + self.parse_form_data(strict=True) + + def test_missing_boundary(self): + self.environ['CONTENT_TYPE'] = 'multipart/form-data' + with self.assertRaises(multipart.MultipartError): + self.parse_form_data(strict=True) + + def test_invalid_content_length(self): + self.environ['CONTENT_LENGTH'] = '' + with self.assertRaises(multipart.MultipartError): + self.parse_form_data(strict=True) + self.environ['CONTENT_LENGTH'] = 'notanumber' + with self.assertRaises(multipart.MultipartError): + self.parse_form_data(strict=True) + + def test_invalid_environ(self): + self.environ['wsgi.input'] = None + with self.assertRaises(multipart.MultipartError): + self.parse_form_data(strict=True) + + def test_big_urlencoded_detect_early(self): + self.environ['CONTENT_TYPE'] = 'application/x-www-form-urlencoded' + self.environ['CONTENT_LENGTH'] = 1024+1 + self.write('a=b') + with self.assertRaises(multipart.MultipartError): + self.parse_form_data(mem_limit=1024, strict=True) + + def test_big_urlencoded_detect_late(self): + self.environ['CONTENT_TYPE'] = 'application/x-www-form-urlencoded' + self.write('a='+'b'*1024) + with self.assertRaises(multipart.MultipartError): + self.parse_form_data(mem_limit=1024, strict=True) + + def test_content_length(self): + self.write('a=b&c=ddd') + self.environ['CONTENT_TYPE'] = 'application/x-www-form-urlencoded' + self.environ['CONTENT_LENGTH'] = '7' + + # Obey Content-Length, do not overread + forms, files = self.parse_form_data() + self.assertEqual(forms["c"], "d") + + # Detect short inputs + with self.assertMultipartError("Unexpected end of data stream"): + self.environ['CONTENT_LENGTH'] = '10' + self.parse_form_data(strict=True) + + def test_close_on_error(self): + self.write_field("file1", 'x'*1024, filename="foo.bin") + self.write_field("file2", 'x'*1025, filename="foo.bin") + # self.write_end() <-- bad multipart + # In case of an error, all parts parsed up until then should be closed + # Can't really be tested here, but will show up in coverace + with self.assertMultipartError("Unexpected end of multipart stream"): + self.parse_form_data(strict=True) diff --git a/tests/test_multipart/utils.py b/tests/test_multipart/utils.py new file mode 100644 index 00000000..b79da1e1 --- /dev/null +++ b/tests/test_multipart/utils.py @@ -0,0 +1,100 @@ +from contextlib import contextmanager +import unittest + +from io import BytesIO + +from webob import multipart +from webob.multipart import to_bytes + +class BaseParserTest(unittest.TestCase): + def setUp(self): + self.data = BytesIO() + self.boundary = 'foo' + self.environ = { + 'REQUEST_METHOD':'POST', + 'CONTENT_TYPE':'multipart/form-data; boundary=%s' % self.boundary + } + self.to_close = [] + + def tearDown(self): + for part in self.to_close: + if hasattr(part, 'close'): + part.close() + + def reset(self): + self.data.seek(0) + self.data.truncate() + return self + + def write(self, *chunks): + for chunk in chunks: + self.data.write(to_bytes(chunk)) + return self + + def write_boundary(self): + if self.data.tell() > 0: + self.write(b'\r\n') + self.write(b'--', to_bytes(self.boundary), b'\r\n') + + def write_end(self, force=False): + end = b'--' + to_bytes(self.boundary) + b'--' + if not force and self.data.getvalue().endswith(end): + return + if self.data.tell() > 0: + self.write(b'\r\n') + self.write(end) + + def write_header(self, header, value, **opts): + line = to_bytes(header) + b': ' + to_bytes(value) + for opt, val in opts.items(): + if val is not None: + line += b"; " + to_bytes(opt) + b'=' + to_bytes(multipart.header_quote(val)) + self.write(line + b'\r\n') + + def write_field(self, name, data, filename=None, content_type=None): + self.write_boundary() + self.write_header("Content-Disposition", "form-data", name=name, filename=filename) + if content_type: + self.write_header("Content-Type", content_type) + self.write(b"\r\n") + self.write(data) + + def get_buffer_copy(self): + return BytesIO(self.data.getvalue()) + + def parser(self, *lines, **kwargs): + if lines: + self.reset() + self.write(*lines) + self.data.seek(0) + + kwargs.setdefault("boundary", self.boundary) + p = multipart.MultipartParser(self.data, **kwargs) + for part in p: + self.to_close.append(part) + return p + + def parse_form_data(self, *lines, **kwargs): + if lines: + self.reset() + self.write(*lines) + + environ = kwargs.setdefault('environ', self.environ.copy()) + environ.setdefault('wsgi.input', self.get_buffer_copy()) + for key, value in list(environ.items()): + if value is None: + del environ[key] + + forms, files = multipart.parse_form_data(**kwargs) + self.to_close.extend(part for _, part in files.iterallitems()) + return forms, files + + def assertParserFails(self, *a, **ka): + self.assertRaises(multipart.MultipartError, self.parser, *a, **ka) + + @contextmanager + def assertMultipartError(self, message: str = None): + with self.assertRaises(multipart.MultipartError) as ex: + yield + if message: + self.assertIn(message, str(ex.exception)) diff --git a/tests/test_request.py b/tests/test_request.py index 86fbdfbd..2711cafe 100644 --- a/tests/test_request.py +++ b/tests/test_request.py @@ -579,17 +579,17 @@ def test_POST_urlencoded(self, method): @pytest.mark.parametrize("method", ["POST", "PUT", "PATCH", "DELETE"]) def test_POST_multipart(self, method): data = ( - b"------------------------------deb95b63e42a\n" - b'Content-Disposition: form-data; name="foo"\n' - b"\n" - b"foo\n" - b"------------------------------deb95b63e42a\n" - b'Content-Disposition: form-data; name="bar"; filename="bar.txt"\n' - b"Content-type: application/octet-stream\n" - b"\n" - b'these are the contents of the file "bar.txt"\n' - b"\n" - b"------------------------------deb95b63e42a--\n" + b"------------------------------deb95b63e42a\r\n" + b'Content-Disposition: form-data; name="foo"\r\n' + b"\r\n" + b"foo\r\n" + b"------------------------------deb95b63e42a\r\n" + b'Content-Disposition: form-data; name="bar"; filename="bar.txt"\r\n' + b"Content-type: application/octet-stream\r\n" + b"\r\n" + b'these are the contents of the file "bar.txt"\r\n' + b"\r\n" + b"------------------------------deb95b63e42a--\r\n" ) wsgi_input = BytesIO(data) environ = { @@ -606,7 +606,7 @@ def test_POST_multipart(self, method): bar = result["bar"] assert bar.name == "bar" assert bar.filename == "bar.txt" - assert bar.file.read() == b'these are the contents of the file "bar.txt"\n' + assert bar.file.read() == b'these are the contents of the file "bar.txt"\r\n' @pytest.mark.parametrize("method", ["POST", "PUT", "PATCH", "DELETE"]) def test_POST_preserves_body_file(self, method): @@ -1060,9 +1060,7 @@ def test_blank__post_multipart(self): assert request.content_length == 139 def test_blank__post_files(self): - import cgi - - from webob.multidict import MultiDict + from webob.multidict import MultiDict, MultiDictFile from webob.request import _get_multipart_boundary POST = MultiDict() @@ -1090,8 +1088,9 @@ def test_blank__post_files(self): ) assert body_norm == expected assert request.content_length == 294 - assert isinstance(request.POST["first"], cgi.FieldStorage) - assert isinstance(request.POST["second"], cgi.FieldStorage) + # TODO: Backwards incompatible changes + assert isinstance(request.POST["first"], MultiDictFile) + assert isinstance(request.POST["second"], MultiDictFile) assert request.POST["first"].value == b"1" assert request.POST["second"].value == b"2" assert request.POST["third"] == "3" @@ -2120,21 +2119,6 @@ def test_already_consumed_stream(self): req2 = req2.decode("latin-1") assert body == req2.body - def test_none_field_name(self): - from webob.request import Request - - body = b"--FOO\r\nContent-Disposition: form-data\r\n\r\n123\r\n--FOO--" - content_type = "multipart/form-data; boundary=FOO" - environ = { - "wsgi.input": BytesIO(body), - "CONTENT_TYPE": content_type, - "CONTENT_LENGTH": len(body), - "REQUEST_METHOD": "POST", - } - req = Request(environ) - req = req.decode("latin-1") - assert body == req.body - def test_broken_seek(self): # copy() should work even when the input has a broken seek method req = self._blankOne( @@ -2440,7 +2424,7 @@ def test_from_bytes(self): # A valid request without a Content-Length header should still read # the full body. # Also test parity between as_string and from_bytes / from_file. - import cgi + from webob.multidict import MultiDictFile cls = self._getTargetClass() req = cls.from_bytes(_test_req) @@ -2455,7 +2439,7 @@ def test_from_bytes(self): assert bar_contents in req.body assert req.params["foo"] == "foo" bar = req.params["bar"] - assert isinstance(bar, cgi.FieldStorage) + assert isinstance(bar, MultiDictFile) assert bar.type == "application/octet-stream" bar.file.seek(0) assert bar.file.read() == bar_contents @@ -2473,7 +2457,7 @@ def test_from_bytes(self): cls.from_bytes(_test_req2 + b"xx") def test_from_text(self): - import cgi + from webob.multidict import MultiDictFile cls = self._getTargetClass() req = cls.from_text(text_(_test_req, "utf-8")) @@ -2488,7 +2472,7 @@ def test_from_text(self): assert bar_contents in req.body assert req.params["foo"] == "foo" bar = req.params["bar"] - assert isinstance(bar, cgi.FieldStorage) + assert isinstance(bar, MultiDictFile) assert bar.type == "application/octet-stream" bar.file.seek(0) assert bar.file.read() == bar_contents @@ -2574,16 +2558,6 @@ def test_body_file_noseek(self): lst = [req.body_file.read(1) for i in range(3)] assert lst == [b"a", b"b", b"c"] - def test_cgi_escaping_fix(self): - req = self._blankOne( - "/", - content_type="multipart/form-data; boundary=boundary", - POST=_cgi_escaping_body, - ) - assert list(req.POST.keys()) == ['%20%22"'] - req.body_file.read() - assert list(req.POST.keys()) == ['%20%22"'] - def test_content_type_none(self): r = self._blankOne("/", content_type="text/html") assert r.content_type == "text/html" @@ -2922,35 +2896,6 @@ def equal_req(self, req, inp): assert req_body == req2_body -class Test_cgi_FieldStorage__repr__patch: - def _callFUT(self, fake): - from webob.compat import cgi_FieldStorage - - return cgi_FieldStorage.__repr__(fake) - - def test_with_file(self): - class Fake: - name = "name" - file = "file" - filename = "filename" - value = "value" - - fake = Fake() - result = self._callFUT(fake) - assert result, "FieldStorage('name' == 'filename')" - - def test_without_file(self): - class Fake: - name = "name" - file = None - filename = "filename" - value = "value" - - fake = Fake() - result = self._callFUT(fake) - assert result, "FieldStorage('name', 'filename' == 'value')" - - class TestLimitedLengthFile: def _makeOne(self, file, maxlen): from webob.request import LimitedLengthFile @@ -3132,11 +3077,13 @@ def simpleapp(environ, start_response): ] -_cgi_escaping_body = """--boundary -Content-Disposition: form-data; name="%20%22"" - - ---boundary--""" +_cgi_escaping_body = ( + b"--boundary\r\n" + b'Content-Disposition: form-data; name="%20%22""\r\n' + b"\r\n" + b"\r\n" + b"--boundary--\r\n" +) def _norm_req(s):