A streaming multipart parser for Python that enables efficient handling of file uploads and form data in web applications
—
Field and File objects for handling parsed form data with configurable storage options and lifecycle management. These objects provide the data layer for storing and accessing parsed form fields and uploaded files with automatic memory management.
Represents a parsed form field with name and value, providing simple storage for text-based form data.
class Field:
"""
Represents a form field with name and value.
"""
def __init__(self, name: bytes | None):
"""
Initialize Field object.
Parameters:
- name: Field name as bytes
"""
@classmethod
def from_value(cls, name: bytes, value: bytes | None) -> 'Field':
"""
Create Field from name and value.
Parameters:
- name: Field name as bytes
- value: Field value as bytes or None
Returns:
Field instance
"""
def write(self, data: bytes) -> int:
"""
Write data to field value.
Parameters:
- data: Bytes to append to field value
Returns:
Number of bytes written
"""
def on_data(self, data: bytes) -> int:
"""Handle data callback."""
def on_end(self) -> None:
"""Handle end callback."""
def finalize(self) -> None:
"""Finalize field processing."""
def close(self) -> None:
"""Close field and clean up resources."""
def set_none(self) -> None:
"""Set field value to None."""
def __eq__(self, other: object) -> bool:
"""Compare fields for equality."""
def __repr__(self) -> str:
"""String representation of field."""
# Properties
@property
def field_name(self) -> bytes | None:
"""This property returns the name of the field."""
@property
def value(self) -> bytes | None:
"""This property returns the value of the form field."""Usage Example:
from python_multipart import Field
# Create field directly
field = Field(b'username')
field.write(b'john_doe')
field.finalize()
print(f"Field: {field.field_name.decode('utf-8')} = {field.value.decode('utf-8')}")
# Create field from value
email_field = Field.from_value(b'email', b'john@example.com')
print(f"Email: {email_field.value.decode('utf-8')}")
# Handle empty field
empty_field = Field(b'optional_field')
empty_field.set_none()
print(f"Empty field value: {empty_field.value}") # NoneHandles writing file data to memory or disk with configurable thresholds and automatic spillover for large files.
class File:
"""
Handles file uploads with configurable memory/disk storage.
"""
def __init__(
self,
file_name: bytes | None,
field_name: bytes | None = None,
config: FileConfig = {}
):
"""
Initialize File object.
Parameters:
- file_name: Original filename as bytes
- field_name: Form field name as bytes
- config: Configuration dict for file handling
"""
def flush_to_disk(self) -> None:
"""If the file is already on-disk, do nothing. Otherwise, copy from
the in-memory buffer to a disk file, and then reassign our internal
file object to this new disk file.
Note that if you attempt to flush a file that is already on-disk, a
warning will be logged to this module's logger."""
def _get_disk_file(self) -> BufferedRandom:
"""This function is responsible for getting a file object on-disk for us.
Creates either a named temporary file or uses configured upload directory
based on configuration options.
Returns:
BufferedRandom file object opened for writing
Raises:
FileError: If unable to create or open disk file
"""
def write(self, data: bytes) -> int:
"""
Write data to file.
Parameters:
- data: Bytes to write
Returns:
Number of bytes written
"""
def on_data(self, data: bytes) -> int:
"""This method is a callback that will be called whenever data is
written to the File.
Parameters:
- data: The data to write to the file
Returns:
The number of bytes written"""
def on_end(self) -> None:
"""This method is called whenever the File is finalized."""
def finalize(self) -> None:
"""Finalize the form file. This will not close the underlying file,
but simply signal that we are finished writing to the File."""
def close(self) -> None:
"""Close the File object. This will actually close the underlying
file object (whether it's a io.BytesIO or an actual file object)."""
def __repr__(self) -> str:
"""Return string representation: File(file_name=..., field_name=...)."""
# Properties
@property
def field_name(self) -> bytes | None:
"""The form field associated with this file. May be None if there isn't
one, for example when we have an application/octet-stream upload."""
@property
def file_name(self) -> bytes | None:
"""The file name given in the upload request."""
@property
def actual_file_name(self) -> bytes | None:
"""The file name that this file is saved as. Will be None if it's not
currently saved on disk."""
@property
def file_object(self) -> BytesIO | BufferedRandom:
"""The file object that we're currently writing to. Note that this
will either be an instance of a io.BytesIO, or a regular file object."""
@property
def size(self) -> int:
"""The total size of this file, counted as the number of bytes that
currently have been written to the file."""
@property
def in_memory(self) -> bool:
"""A boolean representing whether or not this file object is currently
stored in-memory or on-disk."""Configuration Options:
FileConfig = {
'UPLOAD_DIR': str | bytes | None, # Directory for file uploads
'UPLOAD_DELETE_TMP': bool, # Delete temp files automatically
'UPLOAD_KEEP_FILENAME': bool, # Keep original filename
'UPLOAD_KEEP_EXTENSIONS': bool, # Keep file extensions
'MAX_MEMORY_FILE_SIZE': int # Max size before writing to disk
}Usage Example:
from python_multipart import File
import os
import tempfile
# Configure file handling
config = {
'UPLOAD_DIR': '/tmp/uploads',
'UPLOAD_KEEP_FILENAME': True,
'UPLOAD_KEEP_EXTENSIONS': True,
'MAX_MEMORY_FILE_SIZE': 1024 * 1024, # 1MB
'UPLOAD_DELETE_TMP': False
}
# Create upload directory if it doesn't exist
os.makedirs(config['UPLOAD_DIR'], exist_ok=True)
# Create file object
uploaded_file = File(
file_name=b'document.pdf',
field_name=b'file_upload',
config=config
)
# Simulate writing file content
file_content = b'PDF content would go here...' * 1000
uploaded_file.write(file_content)
uploaded_file.finalize()
print(f"File: {uploaded_file.file_name.decode('utf-8')}")
print(f"Field: {uploaded_file.field_name.decode('utf-8')}")
print(f"Size: {uploaded_file.size} bytes")
print(f"In memory: {uploaded_file.in_memory}")
if not uploaded_file.in_memory:
print(f"Saved to: {uploaded_file.actual_file_name.decode('utf-8')}")
# Always close when done
uploaded_file.close()Memory Management Example:
from python_multipart import File
from io import BytesIO
def handle_file_upload(file_data, filename, field_name):
"""Demonstrate automatic memory-to-disk spillover."""
# Small files stay in memory
small_config = {'MAX_MEMORY_FILE_SIZE': 10 * 1024 * 1024} # 10MB
small_file = File(filename.encode(), field_name.encode(), small_config)
# Large files go to disk
large_config = {'MAX_MEMORY_FILE_SIZE': 1024} # 1KB
large_file = File(filename.encode(), field_name.encode(), large_config)
# Write same data to both
for file_obj in [small_file, large_file]:
file_obj.write(file_data)
file_obj.finalize()
print(f"File size: {file_obj.size}")
print(f"In memory: {file_obj.in_memory}")
if file_obj.in_memory:
# Access data directly from memory
data = file_obj.file_object.getvalue()
print(f"Memory data length: {len(data)}")
else:
# File is on disk
print(f"Disk file: {file_obj.actual_file_name}")
# Read from disk file
with open(file_obj.actual_file_name, 'rb') as f:
disk_data = f.read()
print(f"Disk data length: {len(disk_data)}")
file_obj.close()
# Test with sample data
test_data = b'x' * 2048 # 2KB of data
handle_file_upload(test_data, 'test.txt', 'upload')File Lifecycle Management:
from python_multipart import File
import tempfile
import os
def process_upload_with_cleanup(file_content, filename):
"""Demonstrate proper file lifecycle management."""
config = {
'UPLOAD_DIR': tempfile.gettempdir(),
'UPLOAD_DELETE_TMP': True, # Auto-delete temp files
'MAX_MEMORY_FILE_SIZE': 1024
}
file_obj = File(filename.encode(), b'upload', config)
try:
# Write file content
file_obj.write(file_content)
file_obj.finalize()
# Process the file
if file_obj.in_memory:
# Work with in-memory file
content = file_obj.file_object.getvalue()
return f"Processed {len(content)} bytes in memory"
else:
# Work with disk file
temp_path = file_obj.actual_file_name.decode()
# Process file on disk
with open(temp_path, 'rb') as f:
content = f.read()
return f"Processed {len(content)} bytes from disk"
finally:
# Always clean up
file_obj.close() # This will delete temp files if configured
# Usage
result = process_upload_with_cleanup(b'Large file content...', 'data.bin')
print(result)Field and File objects are automatically created by FormParser and passed to callbacks:
from python_multipart import FormParser
def handle_form_data(content_type, input_stream):
processed_fields = []
processed_files = []
def on_field(field):
# Field object is automatically created and populated
processed_fields.append({
'name': field.field_name.decode('utf-8'),
'value': field.value.decode('utf-8') if field.value else None
})
field.close() # Clean up
def on_file(file):
# File object is automatically created and populated
file_info = {
'field_name': file.field_name.decode('utf-8'),
'filename': file.file_name.decode('utf-8') if file.file_name else None,
'size': file.size,
'in_memory': file.in_memory
}
if file.in_memory:
# Small file - data is in memory
file_info['content'] = file.file_object.getvalue()
else:
# Large file - saved to disk
file_info['temp_path'] = file.actual_file_name.decode('utf-8')
processed_files.append(file_info)
file.close() # Important: clean up temp files
# Parser automatically creates Field/File objects
parser = FormParser(content_type, on_field, on_file)
# Process input stream
while True:
chunk = input_stream.read(8192)
if not chunk:
break
parser.write(chunk)
parser.finalize()
parser.close()
return {
'fields': processed_fields,
'files': processed_files
}Install with Tessl CLI
npx tessl i tessl/pypi-python-multipart