A better Protobuf / gRPC generator & library
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Low-level serialization utilities including varint encoding/decoding, wire type handling, and binary format parsing compatible with standard protobuf implementations.
Functions for encoding and decoding variable-length integers used in the protobuf wire format.
def encode_varint(value: int) -> bytes:
"""
Encodes a single varint value for serialization.
Args:
value: Integer value to encode (handles negative values)
Returns:
Encoded bytes using varint format
"""
def decode_varint(buffer: bytes, pos: int, signed: bool = False) -> Tuple[int, int]:
"""
Decode a single varint value from a byte buffer.
Args:
buffer: Byte buffer containing varint data
pos: Starting position in the buffer
signed: Whether to interpret as signed value
Returns:
Tuple of (decoded_value, new_position)
Raises:
ValueError: If too many bytes encountered when decoding
"""Functions for parsing protobuf binary data into structured field information.
def parse_fields(value: bytes) -> Generator[ParsedField, None, None]:
"""
Parse protobuf fields from binary data.
Args:
value: Binary protobuf data
Yields:
ParsedField instances containing field information
"""
@dataclass(frozen=True)
class ParsedField:
"""Represents a parsed protobuf field."""
number: int # Field number from proto definition
wire_type: int # Wire type (varint, fixed32, length-delimited, etc.)
value: Any # Decoded field value
raw: bytes # Raw bytes for this fieldFunctions and constants for handling the protobuf wire format encoding.
def serialized_on_wire(message: Message) -> bool:
"""
Check if this message was or should be serialized on the wire.
Used to detect presence (e.g. optional wrapper message) and
internally during parsing/serialization.
Args:
message: Message instance to check
Returns:
True if message was/should be serialized
"""import betterproto
# Encode various integer values
small_value = betterproto.encode_varint(150)
print(small_value.hex()) # '9601'
large_value = betterproto.encode_varint(16384)
print(large_value.hex()) # '808001'
# Handle negative values (uses two's complement)
negative_value = betterproto.encode_varint(-1)
print(negative_value.hex()) # 'ffffffffffffffffff01'# Decode varint from bytes
buffer = bytes.fromhex('9601') # 150 encoded
value, new_pos = betterproto.decode_varint(buffer, 0)
print(f"Decoded: {value}, next position: {new_pos}") # Decoded: 150, next position: 2
# Handle multi-byte varints
buffer = bytes.fromhex('808001') # 16384 encoded
value, new_pos = betterproto.decode_varint(buffer, 0)
print(f"Decoded: {value}") # Decoded: 16384
# Handle signed interpretation
buffer = bytes.fromhex('ffffffffffffffffff01') # -1 encoded
value, new_pos = betterproto.decode_varint(buffer, 0, signed=True)
print(f"Signed: {value}") # Signed: -1from dataclasses import dataclass
@dataclass
class Person(betterproto.Message):
name: str = betterproto.string_field(1)
age: int = betterproto.int32_field(2)
# Create and serialize a message
person = Person(name="Alice", age=30)
binary_data = bytes(person)
# Parse fields manually
for field in betterproto.parse_fields(binary_data):
print(f"Field {field.number}: wire_type={field.wire_type}, "
f"value={field.value}, raw={field.raw.hex()}")
# Example output:
# Field 1: wire_type=2, value=b'Alice', raw=0a05416c696365
# Field 2: wire_type=0, value=30, raw=101efrom dataclasses import dataclass
@dataclass
class Container(betterproto.Message):
item: Person = betterproto.message_field(1)
# Create container with unset message
container = Container()
print(betterproto.serialized_on_wire(container.item)) # False
# Set a field in the nested message
container.item.name = "Bob"
print(betterproto.serialized_on_wire(container.item)) # True
# Even setting to default value marks as serialized
container.item.age = 0 # Default value
print(betterproto.serialized_on_wire(container.item)) # Still True
# Create new nested message explicitly
container.item = Person()
print(betterproto.serialized_on_wire(container.item)) # False# Understanding how fields are processed during serialization
@dataclass
class CustomMessage(betterproto.Message):
value: int = betterproto.int32_field(1)
optional_text: str = betterproto.string_field(2)
def debug_serialization(self):
"""Debug helper to show serialization details."""
import dataclasses
for field in dataclasses.fields(self):
meta = betterproto.FieldMetadata.get(field)
value = getattr(self, field.name)
print(f"Field {field.name}:")
print(f" Number: {meta.number}")
print(f" Type: {meta.proto_type}")
print(f" Value: {value}")
print(f" Default: {self._get_field_default(field, meta)}")
print(f" Will serialize: {value != self._get_field_default(field, meta)}")
# Use the debug helper
msg = CustomMessage(value=42, optional_text="")
msg.debug_serialization()@dataclass
class KnownMessage(betterproto.Message):
known_field: str = betterproto.string_field(1)
# Create message with extra data (simulating newer version)
original_data = bytes([
0x0a, 0x05, 0x48, 0x65, 0x6c, 0x6c, 0x6f, # field 1: "Hello"
0x12, 0x05, 0x57, 0x6f, 0x72, 0x6c, 0x64, # field 2: "World" (unknown)
])
# Parse with known message - unknown fields preserved
msg = KnownMessage().parse(original_data)
print(f"Known field: {msg.known_field}") # Known field: Hello
print(f"Unknown fields: {msg._unknown_fields.hex()}") # Unknown fields: 1205576f726c64
# Re-serialize includes unknown fields
serialized = bytes(msg)
print(f"Includes unknown: {serialized == original_data}") # True# Wire type constants
WIRE_VARINT: int = 0 # Variable-length integers
WIRE_FIXED_64: int = 1 # 64-bit fixed-length
WIRE_LEN_DELIM: int = 2 # Length-delimited (strings, messages, etc.)
WIRE_FIXED_32: int = 5 # 32-bit fixed-length
# Type collections for wire format mapping
WIRE_VARINT_TYPES: List[str] = [
"enum", "bool", "int32", "int64", "uint32", "uint64", "sint32", "sint64"
]
WIRE_FIXED_32_TYPES: List[str] = ["float", "fixed32", "sfixed32"]
WIRE_FIXED_64_TYPES: List[str] = ["double", "fixed64", "sfixed64"]
WIRE_LEN_DELIM_TYPES: List[str] = ["string", "bytes", "message", "map"]
# Type collections for special handling
FIXED_TYPES: List[str] = [
"float", "double", "fixed32", "sfixed32", "fixed64", "sfixed64"
]
INT_64_TYPES: List[str] = ["int64", "uint64", "sint64", "fixed64", "sfixed64"]
PACKED_TYPES: List[str] = [
"enum", "bool", "int32", "int64", "uint32", "uint64", "sint32", "sint64",
"float", "double", "fixed32", "sfixed32", "fixed64", "sfixed64"
]Install with Tessl CLI
npx tessl i tessl/pypi-betterproto