CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-python-multipart

A streaming multipart parser for Python that enables efficient handling of file uploads and form data in web applications

Pending
Overview
Eval results
Files

decoders.mddocs/

Content Decoders

Streaming decoders for Base64 and quoted-printable encoded content with automatic caching for incomplete chunks. These decoders enable processing of encoded form data without loading entire payloads into memory.

Protocol Types

class SupportsWrite(Protocol):
    def write(self, __b: bytes) -> object: ...

Capabilities

Base64Decoder

Provides interface to decode a stream of Base64 data with automatic caching for arbitrary-sized writes and proper handling of incomplete chunks.

class Base64Decoder:
    """
    Streaming Base64 decoder with chunk caching.
    """
    
    def __init__(self, underlying: SupportsWrite[bytes]) -> None:
        """
        Initialize Base64Decoder.
        
        Parameters:
        - underlying: Object with write() method to receive decoded data
        """
    
    def write(self, data: bytes) -> int:
        """
        Decode Base64 data and write to underlying object.
        
        Parameters:
        - data: Base64 encoded bytes to decode
        
        Returns:
        Number of input bytes processed
        
        Raises:
        - DecodeError: If invalid Base64 data is encountered
        """
    
    def close(self) -> None:
        """
        Close decoder and underlying object if it has close() method.
        """
    
    def finalize(self) -> None:
        """
        Finalize decoder, writing any remaining cached data.
        
        Raises:
        - DecodeError: If data remains in cache (incomplete Base64)
        """
    
    # Properties
    cache: bytearray              # Cache for incomplete Base64 chunks
    underlying: SupportsWrite[bytes]  # Underlying object to write decoded data

Usage Example:

from python_multipart.decoders import Base64Decoder
from python_multipart.exceptions import DecodeError
import base64
import io

def decode_base64_stream(encoded_stream, output_file):
    """Decode Base64 stream to file."""
    
    with open(output_file, 'wb') as f:
        decoder = Base64Decoder(f)
        
        try:
            # Process stream in chunks
            while True:
                chunk = encoded_stream.read(1024)
                if not chunk:
                    break
                decoder.write(chunk)
            
            # Finalize to flush any remaining data
            decoder.finalize()
            
        except DecodeError as e:
            print(f"Base64 decode error: {e}")
            raise
        finally:
            decoder.close()

# Example with in-memory decoding
def decode_base64_to_memory(base64_data):
    """Decode Base64 data to memory buffer."""
    
    output_buffer = io.BytesIO()
    decoder = Base64Decoder(output_buffer)
    
    try:
        # Can handle partial chunks
        chunk_size = 100
        for i in range(0, len(base64_data), chunk_size):
            chunk = base64_data[i:i + chunk_size]
            decoder.write(chunk)
        
        decoder.finalize()
        
        # Get decoded data
        output_buffer.seek(0)
        return output_buffer.read()
        
    except DecodeError as e:
        print(f"Decode error: {e}")
        return None
    finally:
        decoder.close()

# Test with sample data
original_data = b"Hello, World! This is a test message."
encoded_data = base64.b64encode(original_data)

print(f"Original: {original_data}")
print(f"Encoded: {encoded_data}")

decoded_data = decode_base64_to_memory(encoded_data)
print(f"Decoded: {decoded_data}")
print(f"Match: {original_data == decoded_data}")

Handling Incomplete Data:

from python_multipart.decoders import Base64Decoder
from python_multipart.exceptions import DecodeError
import io

def demonstrate_chunk_handling():
    """Show how decoder handles incomplete Base64 chunks."""
    
    # Base64 data that doesn't align to 4-byte boundaries
    base64_data = b"SGVsbG8gV29ybGQh"  # "Hello World!" encoded
    
    output = io.BytesIO()
    decoder = Base64Decoder(output)
    
    # Feed data in unaligned chunks
    chunks = [
        base64_data[:3],   # "SGV" - incomplete
        base64_data[3:7],  # "sbG8" - complete group
        base64_data[7:10], # "gV2" - incomplete  
        base64_data[10:]   # "9ybGQh" - remainder
    ]
    
    try:
        for i, chunk in enumerate(chunks):
            print(f"Writing chunk {i}: {chunk}")
            decoder.write(chunk)
            print(f"Cache after chunk {i}: {decoder.cache}")
        
        decoder.finalize()
        
        # Get result
        output.seek(0)
        result = output.read()
        print(f"Decoded result: {result}")
        
    except DecodeError as e:
        print(f"Error: {e}")
    finally:
        decoder.close()

demonstrate_chunk_handling()

QuotedPrintableDecoder

Provides interface to decode a stream of quoted-printable data with caching for incomplete escape sequences.

class QuotedPrintableDecoder:
    """
    Streaming quoted-printable decoder with chunk caching.
    """
    
    def __init__(self, underlying: SupportsWrite[bytes]) -> None:
        """
        Initialize QuotedPrintableDecoder.
        
        Parameters:
        - underlying: Object with write() method to receive decoded data
        """
    
    def write(self, data: bytes) -> int:
        """
        Decode quoted-printable data and write to underlying object.
        
        Parameters:
        - data: Quoted-printable encoded bytes to decode
        
        Returns:
        Number of input bytes processed
        """
    
    def close(self) -> None:
        """
        Close decoder and underlying object if it has close() method.
        """
    
    def finalize(self) -> None:
        """
        Finalize decoder, writing any remaining cached data.
        Does not raise exceptions for incomplete data.
        """
    
    # Properties
    cache: bytes                  # Cache for incomplete quoted-printable chunks
    underlying: SupportsWrite[bytes]  # Underlying object to write decoded data

Usage Example:

from python_multipart.decoders import QuotedPrintableDecoder
import binascii
import io

def decode_quoted_printable_stream(encoded_stream, output_file):
    """Decode quoted-printable stream to file."""
    
    with open(output_file, 'wb') as f:
        decoder = QuotedPrintableDecoder(f)
        
        try:
            while True:
                chunk = encoded_stream.read(1024)
                if not chunk:
                    break
                decoder.write(chunk)
            
            decoder.finalize()
            
        finally:
            decoder.close()

# Example with email-style quoted-printable content
def decode_email_content():
    """Decode typical email quoted-printable content."""
    
    # Sample quoted-printable data
    qp_data = b"Hello=20World!=0D=0AThis=20is=20a=20test."
    # Decodes to: "Hello World!\r\nThis is a test."
    
    output = io.BytesIO()
    decoder = QuotedPrintableDecoder(output)
    
    # Process in small chunks to test caching
    chunk_size = 5
    for i in range(0, len(qp_data), chunk_size):
        chunk = qp_data[i:i + chunk_size]
        print(f"Processing chunk: {chunk}")
        decoder.write(chunk)
    
    decoder.finalize()
    
    # Get decoded result
    output.seek(0)
    result = output.read()
    print(f"Decoded: {result}")
    return result

decode_email_content()

Handling Escape Sequences:

from python_multipart.decoders import QuotedPrintableDecoder
import io

def demonstrate_escape_handling():
    """Show how decoder handles incomplete escape sequences."""
    
    # Quoted-printable with escape sequences split across chunks
    qp_data = b"Hello=3DWorld=21=0AEnd"
    # Should decode to: "Hello=World!\nEnd"
    
    output = io.BytesIO()
    decoder = QuotedPrintableDecoder(output)
    
    # Split data to break escape sequences
    chunks = [
        b"Hello=3",     # Incomplete escape
        b"DWorld=2",    # Complete + incomplete
        b"1=0AEnd"      # Complete sequences
    ]
    
    for i, chunk in enumerate(chunks):
        print(f"Chunk {i}: {chunk}")
        decoder.write(chunk)
        print(f"Cache after chunk {i}: {decoder.cache}")
    
    decoder.finalize()
    
    output.seek(0)
    result = output.read()
    print(f"Final result: {result}")
    print(f"As string: {result.decode('utf-8')}")

demonstrate_escape_handling()

Integration with Form Parsing

Decoders are typically used internally by the parsing system when Content-Transfer-Encoding headers specify encoded content:

from python_multipart import FormParser
from python_multipart.decoders import Base64Decoder, QuotedPrintableDecoder
import io

def handle_encoded_form_data():
    """Example of how decoders integrate with form parsing."""
    
    # This is typically handled automatically by FormParser
    # but shown here for illustration
    
    def create_decoder_for_encoding(encoding, output):
        """Create appropriate decoder based on encoding type."""
        if encoding.lower() == 'base64':
            return Base64Decoder(output)
        elif encoding.lower() == 'quoted-printable':
            return QuotedPrintableDecoder(output)
        else:
            return output  # No decoding needed
    
    def process_encoded_part(content_transfer_encoding, data):
        """Process a form part with content encoding."""
        
        output = io.BytesIO()
        
        if content_transfer_encoding:
            decoder = create_decoder_for_encoding(content_transfer_encoding, output)
            
            # Write encoded data through decoder
            decoder.write(data)
            decoder.finalize()
            
            if hasattr(decoder, 'close'):
                decoder.close()
        else:
            # No encoding - write directly
            output.write(data)
        
        # Get decoded result
        output.seek(0)
        return output.read()
    
    # Example usage
    base64_data = b"SGVsbG8gV29ybGQh"  # "Hello World!" in Base64
    qp_data = b"Hello=20World=21"       # "Hello World!" in quoted-printable
    
    decoded_b64 = process_encoded_part('base64', base64_data)
    decoded_qp = process_encoded_part('quoted-printable', qp_data)
    
    print(f"Base64 decoded: {decoded_b64}")
    print(f"QP decoded: {decoded_qp}")

handle_encoded_form_data()

Custom Decoder Usage

Decoders can be used independently for any streaming decode operation:

from python_multipart.decoders import Base64Decoder
import io

class DataProcessor:
    """Custom data processor that uses Base64Decoder."""
    
    def __init__(self):
        self.processed_data = io.BytesIO()
        self.decoder = Base64Decoder(self.processed_data)
    
    def process_chunk(self, encoded_chunk):
        """Process a chunk of Base64 encoded data."""
        return self.decoder.write(encoded_chunk)
    
    def get_result(self):
        """Get the final decoded result."""
        self.decoder.finalize()
        self.processed_data.seek(0)
        result = self.processed_data.read()
        self.decoder.close()
        return result

# Usage
processor = DataProcessor()
processor.process_chunk(b"SGVsbG8g")
processor.process_chunk(b"V29ybGQh")
result = processor.get_result()
print(f"Processed result: {result}")  # b"Hello World!"

Install with Tessl CLI

npx tessl i tessl/pypi-python-multipart

docs

data-objects.md

decoders.md

exceptions.md

form-parsing.md

index.md

streaming-parsers.md

tile.json