0
# Serialization and Wire Format
1
2
Low-level serialization utilities including varint encoding/decoding, wire type handling, and binary format parsing compatible with standard protobuf implementations.
3
4
## Capabilities
5
6
### Varint Encoding and Decoding
7
8
Functions for encoding and decoding variable-length integers used in the protobuf wire format.
9
10
```python { .api }
11
def encode_varint(value: int) -> bytes:
12
"""
13
Encodes a single varint value for serialization.
14
15
Args:
16
value: Integer value to encode (handles negative values)
17
18
Returns:
19
Encoded bytes using varint format
20
"""
21
22
def decode_varint(buffer: bytes, pos: int, signed: bool = False) -> Tuple[int, int]:
23
"""
24
Decode a single varint value from a byte buffer.
25
26
Args:
27
buffer: Byte buffer containing varint data
28
pos: Starting position in the buffer
29
signed: Whether to interpret as signed value
30
31
Returns:
32
Tuple of (decoded_value, new_position)
33
34
Raises:
35
ValueError: If too many bytes encountered when decoding
36
"""
37
```
38
39
### Field Parsing
40
41
Functions for parsing protobuf binary data into structured field information.
42
43
```python { .api }
44
def parse_fields(value: bytes) -> Generator[ParsedField, None, None]:
45
"""
46
Parse protobuf fields from binary data.
47
48
Args:
49
value: Binary protobuf data
50
51
Yields:
52
ParsedField instances containing field information
53
"""
54
55
@dataclass(frozen=True)
56
class ParsedField:
57
"""Represents a parsed protobuf field."""
58
59
number: int # Field number from proto definition
60
wire_type: int # Wire type (varint, fixed32, length-delimited, etc.)
61
value: Any # Decoded field value
62
raw: bytes # Raw bytes for this field
63
```
64
65
### Wire Format Utilities
66
67
Functions and constants for handling the protobuf wire format encoding.
68
69
```python { .api }
70
def serialized_on_wire(message: Message) -> bool:
71
"""
72
Check if this message was or should be serialized on the wire.
73
74
Used to detect presence (e.g. optional wrapper message) and
75
internally during parsing/serialization.
76
77
Args:
78
message: Message instance to check
79
80
Returns:
81
True if message was/should be serialized
82
"""
83
```
84
85
## Usage Examples
86
87
### Manual Varint Encoding
88
89
```python
90
import betterproto
91
92
# Encode various integer values
93
small_value = betterproto.encode_varint(150)
94
print(small_value.hex()) # '9601'
95
96
large_value = betterproto.encode_varint(16384)
97
print(large_value.hex()) # '808001'
98
99
# Handle negative values (uses two's complement)
100
negative_value = betterproto.encode_varint(-1)
101
print(negative_value.hex()) # 'ffffffffffffffffff01'
102
```
103
104
### Manual Varint Decoding
105
106
```python
107
# Decode varint from bytes
108
buffer = bytes.fromhex('9601') # 150 encoded
109
value, new_pos = betterproto.decode_varint(buffer, 0)
110
print(f"Decoded: {value}, next position: {new_pos}") # Decoded: 150, next position: 2
111
112
# Handle multi-byte varints
113
buffer = bytes.fromhex('808001') # 16384 encoded
114
value, new_pos = betterproto.decode_varint(buffer, 0)
115
print(f"Decoded: {value}") # Decoded: 16384
116
117
# Handle signed interpretation
118
buffer = bytes.fromhex('ffffffffffffffffff01') # -1 encoded
119
value, new_pos = betterproto.decode_varint(buffer, 0, signed=True)
120
print(f"Signed: {value}") # Signed: -1
121
```
122
123
### Parsing Raw Protobuf Data
124
125
```python
126
from dataclasses import dataclass
127
128
@dataclass
129
class Person(betterproto.Message):
130
name: str = betterproto.string_field(1)
131
age: int = betterproto.int32_field(2)
132
133
# Create and serialize a message
134
person = Person(name="Alice", age=30)
135
binary_data = bytes(person)
136
137
# Parse fields manually
138
for field in betterproto.parse_fields(binary_data):
139
print(f"Field {field.number}: wire_type={field.wire_type}, "
140
f"value={field.value}, raw={field.raw.hex()}")
141
142
# Example output:
143
# Field 1: wire_type=2, value=b'Alice', raw=0a05416c696365
144
# Field 2: wire_type=0, value=30, raw=101e
145
```
146
147
### Checking Message Serialization State
148
149
```python
150
from dataclasses import dataclass
151
152
@dataclass
153
class Container(betterproto.Message):
154
item: Person = betterproto.message_field(1)
155
156
# Create container with unset message
157
container = Container()
158
print(betterproto.serialized_on_wire(container.item)) # False
159
160
# Set a field in the nested message
161
container.item.name = "Bob"
162
print(betterproto.serialized_on_wire(container.item)) # True
163
164
# Even setting to default value marks as serialized
165
container.item.age = 0 # Default value
166
print(betterproto.serialized_on_wire(container.item)) # Still True
167
168
# Create new nested message explicitly
169
container.item = Person()
170
print(betterproto.serialized_on_wire(container.item)) # False
171
```
172
173
### Custom Serialization Logic
174
175
```python
176
# Understanding how fields are processed during serialization
177
@dataclass
178
class CustomMessage(betterproto.Message):
179
value: int = betterproto.int32_field(1)
180
optional_text: str = betterproto.string_field(2)
181
182
def debug_serialization(self):
183
"""Debug helper to show serialization details."""
184
import dataclasses
185
186
for field in dataclasses.fields(self):
187
meta = betterproto.FieldMetadata.get(field)
188
value = getattr(self, field.name)
189
190
print(f"Field {field.name}:")
191
print(f" Number: {meta.number}")
192
print(f" Type: {meta.proto_type}")
193
print(f" Value: {value}")
194
print(f" Default: {self._get_field_default(field, meta)}")
195
print(f" Will serialize: {value != self._get_field_default(field, meta)}")
196
197
# Use the debug helper
198
msg = CustomMessage(value=42, optional_text="")
199
msg.debug_serialization()
200
```
201
202
### Working with Unknown Fields
203
204
```python
205
@dataclass
206
class KnownMessage(betterproto.Message):
207
known_field: str = betterproto.string_field(1)
208
209
# Create message with extra data (simulating newer version)
210
original_data = bytes([
211
0x0a, 0x05, 0x48, 0x65, 0x6c, 0x6c, 0x6f, # field 1: "Hello"
212
0x12, 0x05, 0x57, 0x6f, 0x72, 0x6c, 0x64, # field 2: "World" (unknown)
213
])
214
215
# Parse with known message - unknown fields preserved
216
msg = KnownMessage().parse(original_data)
217
print(f"Known field: {msg.known_field}") # Known field: Hello
218
print(f"Unknown fields: {msg._unknown_fields.hex()}") # Unknown fields: 1205576f726c64
219
220
# Re-serialize includes unknown fields
221
serialized = bytes(msg)
222
print(f"Includes unknown: {serialized == original_data}") # True
223
```
224
225
## Constants
226
227
```python { .api }
228
# Wire type constants
229
WIRE_VARINT: int = 0 # Variable-length integers
230
WIRE_FIXED_64: int = 1 # 64-bit fixed-length
231
WIRE_LEN_DELIM: int = 2 # Length-delimited (strings, messages, etc.)
232
WIRE_FIXED_32: int = 5 # 32-bit fixed-length
233
234
# Type collections for wire format mapping
235
WIRE_VARINT_TYPES: List[str] = [
236
"enum", "bool", "int32", "int64", "uint32", "uint64", "sint32", "sint64"
237
]
238
239
WIRE_FIXED_32_TYPES: List[str] = ["float", "fixed32", "sfixed32"]
240
241
WIRE_FIXED_64_TYPES: List[str] = ["double", "fixed64", "sfixed64"]
242
243
WIRE_LEN_DELIM_TYPES: List[str] = ["string", "bytes", "message", "map"]
244
245
# Type collections for special handling
246
FIXED_TYPES: List[str] = [
247
"float", "double", "fixed32", "sfixed32", "fixed64", "sfixed64"
248
]
249
250
INT_64_TYPES: List[str] = ["int64", "uint64", "sint64", "fixed64", "sfixed64"]
251
252
PACKED_TYPES: List[str] = [
253
"enum", "bool", "int32", "int64", "uint32", "uint64", "sint32", "sint64",
254
"float", "double", "fixed32", "sfixed32", "fixed64", "sfixed64"
255
]
256
```