A streaming multipart parser for Python that enables efficient handling of file uploads and form data in web applications
npx @tessl/cli install tessl/pypi-python-multipart@0.0.0A streaming multipart parser for Python that provides comprehensive parsing capabilities for multipart/form-data, application/x-www-form-urlencoded, and application/octet-stream content types. Enables efficient handling of file uploads and form data in web applications without loading entire payloads into memory.
pip install python-multipartimport python_multipartCommon imports for specific parser classes:
from python_multipart import (
FormParser,
MultipartParser,
QuerystringParser,
OctetStreamParser,
parse_form,
create_form_parser
)Legacy import (deprecated but still supported):
import multipart # Shows deprecation warningimport python_multipart
def simple_wsgi_app(environ, start_response):
# Simple form parsing with callbacks
def on_field(field):
print(f"Field: {field.field_name} = {field.value}")
def on_file(file):
print(f"File: {file.field_name}, size: {file.size}")
file.close()
# Parse form data from WSGI environ
headers = {'Content-Type': environ['CONTENT_TYPE']}
python_multipart.parse_form(
headers,
environ['wsgi.input'],
on_field,
on_file
)
start_response('200 OK', [('Content-Type', 'text/plain')])
return [b'Form parsed successfully']
# Direct parser usage for streaming large files
from python_multipart import MultipartParser
def handle_upload(boundary, input_stream):
def on_part_data(data, start, end):
# Process data chunk without loading entire file
chunk = data[start:end]
process_chunk(chunk)
callbacks = {'on_part_data': on_part_data}
parser = MultipartParser(boundary, callbacks)
# Stream data in chunks
while True:
chunk = input_stream.read(8192)
if not chunk:
break
parser.write(chunk)
parser.finalize()Python-multipart uses a streaming, callback-based architecture that enables memory-efficient processing:
This design allows processing arbitrarily large uploads without memory constraints while providing both low-level control and high-level convenience.
Complete form parsing solution that automatically detects content types and creates appropriate parsers. Handles multipart/form-data, application/x-www-form-urlencoded, and application/octet-stream with Field and File object creation.
def parse_form(
headers: dict[str, bytes],
input_stream,
on_field,
on_file,
chunk_size: int = 1048576
) -> None: ...
def create_form_parser(
headers: dict[str, bytes],
on_field,
on_file,
trust_x_headers: bool = False,
config: dict = {}
) -> FormParser: ...
class FormParser:
def __init__(
self,
content_type: str,
on_field: OnFieldCallback | None,
on_file: OnFileCallback | None,
on_end: Callable[[], None] | None = None,
boundary: bytes | str | None = None,
file_name: bytes | None = None,
FileClass: type[FileProtocol] = File,
FieldClass: type[FieldProtocol] = Field,
config: dict = {}
): ...
def write(self, data: bytes) -> int: ...
def finalize(self) -> None: ...
def close(self) -> None: ...Base class and low-level streaming parsers for specific content types with callback-based processing. BaseParser provides common functionality, while specialized parsers provide fine-grained control over parsing behavior.
class BaseParser:
def __init__(self): ...
def callback(
self,
name: str,
data: bytes | None = None,
start: int | None = None,
end: int | None = None
) -> None: ...
def set_callback(self, name: str, new_func) -> None: ...
def close(self) -> None: ...
def finalize(self) -> None: ...
class MultipartParser(BaseParser):
def __init__(
self,
boundary: bytes | str,
callbacks: dict = {},
max_size: float = float("inf")
): ...
def write(self, data: bytes) -> int: ...
class QuerystringParser(BaseParser):
def __init__(
self,
callbacks: dict = {},
strict_parsing: bool = False,
max_size: float = float("inf")
): ...
class OctetStreamParser(BaseParser):
def __init__(
self,
callbacks: dict = {},
max_size: float = float("inf")
): ...Base Parser and Streaming Parsers
Field and File objects for handling parsed form data with configurable storage options. Files support automatic memory-to-disk spillover based on size thresholds.
class Field:
def __init__(self, name: bytes | None): ...
@classmethod
def from_value(cls, name: bytes, value: bytes | None) -> Field: ...
field_name: bytes | None
value: bytes | None
class File:
def __init__(
self,
file_name: bytes | None,
field_name: bytes | None = None,
config: dict = {}
): ...
field_name: bytes | None
file_name: bytes | None
actual_file_name: bytes | None
file_object: BytesIO | BufferedRandom
size: int
in_memory: boolStreaming decoders for Base64 and quoted-printable encoded content with automatic caching for incomplete chunks.
class Base64Decoder:
def __init__(self, underlying): ...
def write(self, data: bytes) -> int: ...
def finalize(self) -> None: ...
class QuotedPrintableDecoder:
def __init__(self, underlying): ...
def write(self, data: bytes) -> int: ...
def finalize(self) -> None: ...Comprehensive exception hierarchy for robust error handling across all parsing operations.
class FormParserError(ValueError): ...
class ParseError(FormParserError):
offset: int = -1
class MultipartParseError(ParseError): ...
class QuerystringParseError(ParseError): ...
class DecodeError(ParseError): ...
class FileError(FormParserError, OSError): ...def parse_options_header(value: str | bytes | None) -> tuple[bytes, dict[bytes, bytes]]: ...Parses Content-Type headers into (content_type, parameters) format for boundary extraction and content type detection.
Import:
from python_multipart.multipart import parse_options_header# State enums for parser tracking
class QuerystringState(IntEnum):
BEFORE_FIELD = 0
FIELD_NAME = 1
FIELD_DATA = 2
class MultipartState(IntEnum):
START = 0
START_BOUNDARY = 1
HEADER_FIELD_START = 2
HEADER_FIELD = 3
HEADER_VALUE_START = 4
HEADER_VALUE = 5
HEADER_VALUE_ALMOST_DONE = 6
HEADERS_ALMOST_DONE = 7
PART_DATA_START = 8
PART_DATA = 9
PART_DATA_END = 10
END_BOUNDARY = 11
END = 12
# Configuration types
class FormParserConfig(TypedDict):
UPLOAD_DIR: str | None
UPLOAD_KEEP_FILENAME: bool
UPLOAD_KEEP_EXTENSIONS: bool
UPLOAD_ERROR_ON_BAD_CTE: bool
MAX_MEMORY_FILE_SIZE: int
MAX_BODY_SIZE: float
class FileConfig(TypedDict, total=False):
UPLOAD_DIR: str | bytes | None
UPLOAD_DELETE_TMP: bool
UPLOAD_KEEP_FILENAME: bool
UPLOAD_KEEP_EXTENSIONS: bool
MAX_MEMORY_FILE_SIZE: int
class QuerystringCallbacks(TypedDict, total=False):
on_field_start: Callable[[], None]
on_field_name: Callable[[bytes, int, int], None]
on_field_data: Callable[[bytes, int, int], None]
on_field_end: Callable[[], None]
on_end: Callable[[], None]
class OctetStreamCallbacks(TypedDict, total=False):
on_start: Callable[[], None]
on_data: Callable[[bytes, int, int], None]
on_end: Callable[[], None]
class MultipartCallbacks(TypedDict, total=False):
on_part_begin: Callable[[], None]
on_part_data: Callable[[bytes, int, int], None]
on_part_end: Callable[[], None]
on_header_begin: Callable[[], None]
on_header_field: Callable[[bytes, int, int], None]
on_header_value: Callable[[bytes, int, int], None]
on_header_end: Callable[[], None]
on_headers_finished: Callable[[], None]
on_end: Callable[[], None]
# Protocol types
class SupportsRead(Protocol):
def read(self, __n: int) -> bytes: ...
class SupportsWrite(Protocol):
def write(self, __b: bytes) -> object: ...
class _FormProtocol(Protocol):
def write(self, data: bytes) -> int: ...
def finalize(self) -> None: ...
def close(self) -> None: ...
class FieldProtocol(Protocol):
def __init__(self, name: bytes | None) -> None: ...
def write(self, data: bytes) -> int: ...
def finalize(self) -> None: ...
def close(self) -> None: ...
def set_none(self) -> None: ...
class FileProtocol(Protocol):
def __init__(self, file_name: bytes | None, field_name: bytes | None, config: dict) -> None: ...
def write(self, data: bytes) -> int: ...
def finalize(self) -> None: ...
def close(self) -> None: ...
# Callback type aliases
OnFieldCallback = Callable[[FieldProtocol], None]
OnFileCallback = Callable[[FileProtocol], None]
CallbackName = Literal[
"start",
"data",
"end",
"field_start",
"field_name",
"field_data",
"field_end",
"part_begin",
"part_data",
"part_end",
"header_begin",
"header_field",
"header_value",
"header_end",
"headers_finished",
]