A streaming multipart parser for Python that enables efficient handling of file uploads and form data in web applications
—
Streaming decoders for Base64 and quoted-printable encoded content with automatic caching for incomplete chunks. These decoders enable processing of encoded form data without loading entire payloads into memory.
class SupportsWrite(Protocol):
def write(self, __b: bytes) -> object: ...Provides interface to decode a stream of Base64 data with automatic caching for arbitrary-sized writes and proper handling of incomplete chunks.
class Base64Decoder:
"""
Streaming Base64 decoder with chunk caching.
"""
def __init__(self, underlying: SupportsWrite[bytes]) -> None:
"""
Initialize Base64Decoder.
Parameters:
- underlying: Object with write() method to receive decoded data
"""
def write(self, data: bytes) -> int:
"""
Decode Base64 data and write to underlying object.
Parameters:
- data: Base64 encoded bytes to decode
Returns:
Number of input bytes processed
Raises:
- DecodeError: If invalid Base64 data is encountered
"""
def close(self) -> None:
"""
Close decoder and underlying object if it has close() method.
"""
def finalize(self) -> None:
"""
Finalize decoder, writing any remaining cached data.
Raises:
- DecodeError: If data remains in cache (incomplete Base64)
"""
# Properties
cache: bytearray # Cache for incomplete Base64 chunks
underlying: SupportsWrite[bytes] # Underlying object to write decoded dataUsage Example:
from python_multipart.decoders import Base64Decoder
from python_multipart.exceptions import DecodeError
import base64
import io
def decode_base64_stream(encoded_stream, output_file):
"""Decode Base64 stream to file."""
with open(output_file, 'wb') as f:
decoder = Base64Decoder(f)
try:
# Process stream in chunks
while True:
chunk = encoded_stream.read(1024)
if not chunk:
break
decoder.write(chunk)
# Finalize to flush any remaining data
decoder.finalize()
except DecodeError as e:
print(f"Base64 decode error: {e}")
raise
finally:
decoder.close()
# Example with in-memory decoding
def decode_base64_to_memory(base64_data):
"""Decode Base64 data to memory buffer."""
output_buffer = io.BytesIO()
decoder = Base64Decoder(output_buffer)
try:
# Can handle partial chunks
chunk_size = 100
for i in range(0, len(base64_data), chunk_size):
chunk = base64_data[i:i + chunk_size]
decoder.write(chunk)
decoder.finalize()
# Get decoded data
output_buffer.seek(0)
return output_buffer.read()
except DecodeError as e:
print(f"Decode error: {e}")
return None
finally:
decoder.close()
# Test with sample data
original_data = b"Hello, World! This is a test message."
encoded_data = base64.b64encode(original_data)
print(f"Original: {original_data}")
print(f"Encoded: {encoded_data}")
decoded_data = decode_base64_to_memory(encoded_data)
print(f"Decoded: {decoded_data}")
print(f"Match: {original_data == decoded_data}")Handling Incomplete Data:
from python_multipart.decoders import Base64Decoder
from python_multipart.exceptions import DecodeError
import io
def demonstrate_chunk_handling():
"""Show how decoder handles incomplete Base64 chunks."""
# Base64 data that doesn't align to 4-byte boundaries
base64_data = b"SGVsbG8gV29ybGQh" # "Hello World!" encoded
output = io.BytesIO()
decoder = Base64Decoder(output)
# Feed data in unaligned chunks
chunks = [
base64_data[:3], # "SGV" - incomplete
base64_data[3:7], # "sbG8" - complete group
base64_data[7:10], # "gV2" - incomplete
base64_data[10:] # "9ybGQh" - remainder
]
try:
for i, chunk in enumerate(chunks):
print(f"Writing chunk {i}: {chunk}")
decoder.write(chunk)
print(f"Cache after chunk {i}: {decoder.cache}")
decoder.finalize()
# Get result
output.seek(0)
result = output.read()
print(f"Decoded result: {result}")
except DecodeError as e:
print(f"Error: {e}")
finally:
decoder.close()
demonstrate_chunk_handling()Provides interface to decode a stream of quoted-printable data with caching for incomplete escape sequences.
class QuotedPrintableDecoder:
"""
Streaming quoted-printable decoder with chunk caching.
"""
def __init__(self, underlying: SupportsWrite[bytes]) -> None:
"""
Initialize QuotedPrintableDecoder.
Parameters:
- underlying: Object with write() method to receive decoded data
"""
def write(self, data: bytes) -> int:
"""
Decode quoted-printable data and write to underlying object.
Parameters:
- data: Quoted-printable encoded bytes to decode
Returns:
Number of input bytes processed
"""
def close(self) -> None:
"""
Close decoder and underlying object if it has close() method.
"""
def finalize(self) -> None:
"""
Finalize decoder, writing any remaining cached data.
Does not raise exceptions for incomplete data.
"""
# Properties
cache: bytes # Cache for incomplete quoted-printable chunks
underlying: SupportsWrite[bytes] # Underlying object to write decoded dataUsage Example:
from python_multipart.decoders import QuotedPrintableDecoder
import binascii
import io
def decode_quoted_printable_stream(encoded_stream, output_file):
"""Decode quoted-printable stream to file."""
with open(output_file, 'wb') as f:
decoder = QuotedPrintableDecoder(f)
try:
while True:
chunk = encoded_stream.read(1024)
if not chunk:
break
decoder.write(chunk)
decoder.finalize()
finally:
decoder.close()
# Example with email-style quoted-printable content
def decode_email_content():
"""Decode typical email quoted-printable content."""
# Sample quoted-printable data
qp_data = b"Hello=20World!=0D=0AThis=20is=20a=20test."
# Decodes to: "Hello World!\r\nThis is a test."
output = io.BytesIO()
decoder = QuotedPrintableDecoder(output)
# Process in small chunks to test caching
chunk_size = 5
for i in range(0, len(qp_data), chunk_size):
chunk = qp_data[i:i + chunk_size]
print(f"Processing chunk: {chunk}")
decoder.write(chunk)
decoder.finalize()
# Get decoded result
output.seek(0)
result = output.read()
print(f"Decoded: {result}")
return result
decode_email_content()Handling Escape Sequences:
from python_multipart.decoders import QuotedPrintableDecoder
import io
def demonstrate_escape_handling():
"""Show how decoder handles incomplete escape sequences."""
# Quoted-printable with escape sequences split across chunks
qp_data = b"Hello=3DWorld=21=0AEnd"
# Should decode to: "Hello=World!\nEnd"
output = io.BytesIO()
decoder = QuotedPrintableDecoder(output)
# Split data to break escape sequences
chunks = [
b"Hello=3", # Incomplete escape
b"DWorld=2", # Complete + incomplete
b"1=0AEnd" # Complete sequences
]
for i, chunk in enumerate(chunks):
print(f"Chunk {i}: {chunk}")
decoder.write(chunk)
print(f"Cache after chunk {i}: {decoder.cache}")
decoder.finalize()
output.seek(0)
result = output.read()
print(f"Final result: {result}")
print(f"As string: {result.decode('utf-8')}")
demonstrate_escape_handling()Decoders are typically used internally by the parsing system when Content-Transfer-Encoding headers specify encoded content:
from python_multipart import FormParser
from python_multipart.decoders import Base64Decoder, QuotedPrintableDecoder
import io
def handle_encoded_form_data():
"""Example of how decoders integrate with form parsing."""
# This is typically handled automatically by FormParser
# but shown here for illustration
def create_decoder_for_encoding(encoding, output):
"""Create appropriate decoder based on encoding type."""
if encoding.lower() == 'base64':
return Base64Decoder(output)
elif encoding.lower() == 'quoted-printable':
return QuotedPrintableDecoder(output)
else:
return output # No decoding needed
def process_encoded_part(content_transfer_encoding, data):
"""Process a form part with content encoding."""
output = io.BytesIO()
if content_transfer_encoding:
decoder = create_decoder_for_encoding(content_transfer_encoding, output)
# Write encoded data through decoder
decoder.write(data)
decoder.finalize()
if hasattr(decoder, 'close'):
decoder.close()
else:
# No encoding - write directly
output.write(data)
# Get decoded result
output.seek(0)
return output.read()
# Example usage
base64_data = b"SGVsbG8gV29ybGQh" # "Hello World!" in Base64
qp_data = b"Hello=20World=21" # "Hello World!" in quoted-printable
decoded_b64 = process_encoded_part('base64', base64_data)
decoded_qp = process_encoded_part('quoted-printable', qp_data)
print(f"Base64 decoded: {decoded_b64}")
print(f"QP decoded: {decoded_qp}")
handle_encoded_form_data()Decoders can be used independently for any streaming decode operation:
from python_multipart.decoders import Base64Decoder
import io
class DataProcessor:
"""Custom data processor that uses Base64Decoder."""
def __init__(self):
self.processed_data = io.BytesIO()
self.decoder = Base64Decoder(self.processed_data)
def process_chunk(self, encoded_chunk):
"""Process a chunk of Base64 encoded data."""
return self.decoder.write(encoded_chunk)
def get_result(self):
"""Get the final decoded result."""
self.decoder.finalize()
self.processed_data.seek(0)
result = self.processed_data.read()
self.decoder.close()
return result
# Usage
processor = DataProcessor()
processor.process_chunk(b"SGVsbG8g")
processor.process_chunk(b"V29ybGQh")
result = processor.get_result()
print(f"Processed result: {result}") # b"Hello World!"Install with Tessl CLI
npx tessl i tessl/pypi-python-multipart