CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-betterproto

A better Protobuf / gRPC generator & library

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

serialization.mddocs/

Serialization and Wire Format

Low-level serialization utilities including varint encoding/decoding, wire type handling, and binary format parsing compatible with standard protobuf implementations.

Capabilities

Varint Encoding and Decoding

Functions for encoding and decoding variable-length integers used in the protobuf wire format.

def encode_varint(value: int) -> bytes:
    """
    Encodes a single varint value for serialization.
    
    Args:
        value: Integer value to encode (handles negative values)
        
    Returns:
        Encoded bytes using varint format
    """

def decode_varint(buffer: bytes, pos: int, signed: bool = False) -> Tuple[int, int]:
    """
    Decode a single varint value from a byte buffer.
    
    Args:
        buffer: Byte buffer containing varint data
        pos: Starting position in the buffer
        signed: Whether to interpret as signed value
        
    Returns:
        Tuple of (decoded_value, new_position)
        
    Raises:
        ValueError: If too many bytes encountered when decoding
    """

Field Parsing

Functions for parsing protobuf binary data into structured field information.

def parse_fields(value: bytes) -> Generator[ParsedField, None, None]:
    """
    Parse protobuf fields from binary data.
    
    Args:
        value: Binary protobuf data
        
    Yields:
        ParsedField instances containing field information
    """

@dataclass(frozen=True)
class ParsedField:
    """Represents a parsed protobuf field."""
    
    number: int      # Field number from proto definition
    wire_type: int   # Wire type (varint, fixed32, length-delimited, etc.)
    value: Any       # Decoded field value
    raw: bytes       # Raw bytes for this field

Wire Format Utilities

Functions and constants for handling the protobuf wire format encoding.

def serialized_on_wire(message: Message) -> bool:
    """
    Check if this message was or should be serialized on the wire.
    
    Used to detect presence (e.g. optional wrapper message) and
    internally during parsing/serialization.
    
    Args:
        message: Message instance to check
        
    Returns:
        True if message was/should be serialized
    """

Usage Examples

Manual Varint Encoding

import betterproto

# Encode various integer values
small_value = betterproto.encode_varint(150)
print(small_value.hex())  # '9601'

large_value = betterproto.encode_varint(16384)  
print(large_value.hex())  # '808001'

# Handle negative values (uses two's complement)
negative_value = betterproto.encode_varint(-1)
print(negative_value.hex())  # 'ffffffffffffffffff01'

Manual Varint Decoding

# Decode varint from bytes
buffer = bytes.fromhex('9601')  # 150 encoded
value, new_pos = betterproto.decode_varint(buffer, 0)
print(f"Decoded: {value}, next position: {new_pos}")  # Decoded: 150, next position: 2

# Handle multi-byte varints
buffer = bytes.fromhex('808001')  # 16384 encoded
value, new_pos = betterproto.decode_varint(buffer, 0) 
print(f"Decoded: {value}")  # Decoded: 16384

# Handle signed interpretation
buffer = bytes.fromhex('ffffffffffffffffff01')  # -1 encoded
value, new_pos = betterproto.decode_varint(buffer, 0, signed=True)
print(f"Signed: {value}")  # Signed: -1

Parsing Raw Protobuf Data

from dataclasses import dataclass

@dataclass
class Person(betterproto.Message):
    name: str = betterproto.string_field(1)
    age: int = betterproto.int32_field(2)

# Create and serialize a message
person = Person(name="Alice", age=30)
binary_data = bytes(person)

# Parse fields manually
for field in betterproto.parse_fields(binary_data):
    print(f"Field {field.number}: wire_type={field.wire_type}, "
          f"value={field.value}, raw={field.raw.hex()}")

# Example output:
# Field 1: wire_type=2, value=b'Alice', raw=0a05416c696365
# Field 2: wire_type=0, value=30, raw=101e

Checking Message Serialization State

from dataclasses import dataclass

@dataclass 
class Container(betterproto.Message):
    item: Person = betterproto.message_field(1)

# Create container with unset message
container = Container()
print(betterproto.serialized_on_wire(container.item))  # False

# Set a field in the nested message
container.item.name = "Bob"  
print(betterproto.serialized_on_wire(container.item))  # True

# Even setting to default value marks as serialized
container.item.age = 0  # Default value
print(betterproto.serialized_on_wire(container.item))  # Still True

# Create new nested message explicitly
container.item = Person()
print(betterproto.serialized_on_wire(container.item))  # False

Custom Serialization Logic

# Understanding how fields are processed during serialization
@dataclass
class CustomMessage(betterproto.Message):
    value: int = betterproto.int32_field(1)
    optional_text: str = betterproto.string_field(2)
    
    def debug_serialization(self):
        """Debug helper to show serialization details."""
        import dataclasses
        
        for field in dataclasses.fields(self):
            meta = betterproto.FieldMetadata.get(field)
            value = getattr(self, field.name)
            
            print(f"Field {field.name}:")
            print(f"  Number: {meta.number}")
            print(f"  Type: {meta.proto_type}")
            print(f"  Value: {value}")
            print(f"  Default: {self._get_field_default(field, meta)}")
            print(f"  Will serialize: {value != self._get_field_default(field, meta)}")

# Use the debug helper
msg = CustomMessage(value=42, optional_text="")
msg.debug_serialization()

Working with Unknown Fields

@dataclass
class KnownMessage(betterproto.Message):
    known_field: str = betterproto.string_field(1)

# Create message with extra data (simulating newer version)
original_data = bytes([
    0x0a, 0x05, 0x48, 0x65, 0x6c, 0x6c, 0x6f,  # field 1: "Hello"
    0x12, 0x05, 0x57, 0x6f, 0x72, 0x6c, 0x64,  # field 2: "World" (unknown)
])

# Parse with known message - unknown fields preserved
msg = KnownMessage().parse(original_data)
print(f"Known field: {msg.known_field}")  # Known field: Hello
print(f"Unknown fields: {msg._unknown_fields.hex()}")  # Unknown fields: 1205576f726c64

# Re-serialize includes unknown fields
serialized = bytes(msg)
print(f"Includes unknown: {serialized == original_data}")  # True

Constants

# Wire type constants
WIRE_VARINT: int = 0      # Variable-length integers
WIRE_FIXED_64: int = 1    # 64-bit fixed-length
WIRE_LEN_DELIM: int = 2   # Length-delimited (strings, messages, etc.)
WIRE_FIXED_32: int = 5    # 32-bit fixed-length

# Type collections for wire format mapping
WIRE_VARINT_TYPES: List[str] = [
    "enum", "bool", "int32", "int64", "uint32", "uint64", "sint32", "sint64"
]

WIRE_FIXED_32_TYPES: List[str] = ["float", "fixed32", "sfixed32"]

WIRE_FIXED_64_TYPES: List[str] = ["double", "fixed64", "sfixed64"]

WIRE_LEN_DELIM_TYPES: List[str] = ["string", "bytes", "message", "map"]

# Type collections for special handling
FIXED_TYPES: List[str] = [
    "float", "double", "fixed32", "sfixed32", "fixed64", "sfixed64"
]

INT_64_TYPES: List[str] = ["int64", "uint64", "sint64", "fixed64", "sfixed64"]

PACKED_TYPES: List[str] = [
    "enum", "bool", "int32", "int64", "uint32", "uint64", "sint32", "sint64",
    "float", "double", "fixed32", "sfixed32", "fixed64", "sfixed64"
]

Install with Tessl CLI

npx tessl i tessl/pypi-betterproto

docs

code-generation.md

enumerations.md

grpc-services.md

index.md

message-fields.md

serialization.md

utilities.md

tile.json