or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

code-generation.mdenumerations.mdgrpc-services.mdindex.mdmessage-fields.mdserialization.mdutilities.md

serialization.mddocs/

0

# Serialization and Wire Format

1

2

Low-level serialization utilities including varint encoding/decoding, wire type handling, and binary format parsing compatible with standard protobuf implementations.

3

4

## Capabilities

5

6

### Varint Encoding and Decoding

7

8

Functions for encoding and decoding variable-length integers used in the protobuf wire format.

9

10

```python { .api }

11

def encode_varint(value: int) -> bytes:

12

"""

13

Encodes a single varint value for serialization.

14

15

Args:

16

value: Integer value to encode (handles negative values)

17

18

Returns:

19

Encoded bytes using varint format

20

"""

21

22

def decode_varint(buffer: bytes, pos: int, signed: bool = False) -> Tuple[int, int]:

23

"""

24

Decode a single varint value from a byte buffer.

25

26

Args:

27

buffer: Byte buffer containing varint data

28

pos: Starting position in the buffer

29

signed: Whether to interpret as signed value

30

31

Returns:

32

Tuple of (decoded_value, new_position)

33

34

Raises:

35

ValueError: If too many bytes encountered when decoding

36

"""

37

```

38

39

### Field Parsing

40

41

Functions for parsing protobuf binary data into structured field information.

42

43

```python { .api }

44

def parse_fields(value: bytes) -> Generator[ParsedField, None, None]:

45

"""

46

Parse protobuf fields from binary data.

47

48

Args:

49

value: Binary protobuf data

50

51

Yields:

52

ParsedField instances containing field information

53

"""

54

55

@dataclass(frozen=True)

56

class ParsedField:

57

"""Represents a parsed protobuf field."""

58

59

number: int # Field number from proto definition

60

wire_type: int # Wire type (varint, fixed32, length-delimited, etc.)

61

value: Any # Decoded field value

62

raw: bytes # Raw bytes for this field

63

```

64

65

### Wire Format Utilities

66

67

Functions and constants for handling the protobuf wire format encoding.

68

69

```python { .api }

70

def serialized_on_wire(message: Message) -> bool:

71

"""

72

Check if this message was or should be serialized on the wire.

73

74

Used to detect presence (e.g. optional wrapper message) and

75

internally during parsing/serialization.

76

77

Args:

78

message: Message instance to check

79

80

Returns:

81

True if message was/should be serialized

82

"""

83

```

84

85

## Usage Examples

86

87

### Manual Varint Encoding

88

89

```python

90

import betterproto

91

92

# Encode various integer values

93

small_value = betterproto.encode_varint(150)

94

print(small_value.hex()) # '9601'

95

96

large_value = betterproto.encode_varint(16384)

97

print(large_value.hex()) # '808001'

98

99

# Handle negative values (uses two's complement)

100

negative_value = betterproto.encode_varint(-1)

101

print(negative_value.hex()) # 'ffffffffffffffffff01'

102

```

103

104

### Manual Varint Decoding

105

106

```python

107

# Decode varint from bytes

108

buffer = bytes.fromhex('9601') # 150 encoded

109

value, new_pos = betterproto.decode_varint(buffer, 0)

110

print(f"Decoded: {value}, next position: {new_pos}") # Decoded: 150, next position: 2

111

112

# Handle multi-byte varints

113

buffer = bytes.fromhex('808001') # 16384 encoded

114

value, new_pos = betterproto.decode_varint(buffer, 0)

115

print(f"Decoded: {value}") # Decoded: 16384

116

117

# Handle signed interpretation

118

buffer = bytes.fromhex('ffffffffffffffffff01') # -1 encoded

119

value, new_pos = betterproto.decode_varint(buffer, 0, signed=True)

120

print(f"Signed: {value}") # Signed: -1

121

```

122

123

### Parsing Raw Protobuf Data

124

125

```python

126

from dataclasses import dataclass

127

128

@dataclass

129

class Person(betterproto.Message):

130

name: str = betterproto.string_field(1)

131

age: int = betterproto.int32_field(2)

132

133

# Create and serialize a message

134

person = Person(name="Alice", age=30)

135

binary_data = bytes(person)

136

137

# Parse fields manually

138

for field in betterproto.parse_fields(binary_data):

139

print(f"Field {field.number}: wire_type={field.wire_type}, "

140

f"value={field.value}, raw={field.raw.hex()}")

141

142

# Example output:

143

# Field 1: wire_type=2, value=b'Alice', raw=0a05416c696365

144

# Field 2: wire_type=0, value=30, raw=101e

145

```

146

147

### Checking Message Serialization State

148

149

```python

150

from dataclasses import dataclass

151

152

@dataclass

153

class Container(betterproto.Message):

154

item: Person = betterproto.message_field(1)

155

156

# Create container with unset message

157

container = Container()

158

print(betterproto.serialized_on_wire(container.item)) # False

159

160

# Set a field in the nested message

161

container.item.name = "Bob"

162

print(betterproto.serialized_on_wire(container.item)) # True

163

164

# Even setting to default value marks as serialized

165

container.item.age = 0 # Default value

166

print(betterproto.serialized_on_wire(container.item)) # Still True

167

168

# Create new nested message explicitly

169

container.item = Person()

170

print(betterproto.serialized_on_wire(container.item)) # False

171

```

172

173

### Custom Serialization Logic

174

175

```python

176

# Understanding how fields are processed during serialization

177

@dataclass

178

class CustomMessage(betterproto.Message):

179

value: int = betterproto.int32_field(1)

180

optional_text: str = betterproto.string_field(2)

181

182

def debug_serialization(self):

183

"""Debug helper to show serialization details."""

184

import dataclasses

185

186

for field in dataclasses.fields(self):

187

meta = betterproto.FieldMetadata.get(field)

188

value = getattr(self, field.name)

189

190

print(f"Field {field.name}:")

191

print(f" Number: {meta.number}")

192

print(f" Type: {meta.proto_type}")

193

print(f" Value: {value}")

194

print(f" Default: {self._get_field_default(field, meta)}")

195

print(f" Will serialize: {value != self._get_field_default(field, meta)}")

196

197

# Use the debug helper

198

msg = CustomMessage(value=42, optional_text="")

199

msg.debug_serialization()

200

```

201

202

### Working with Unknown Fields

203

204

```python

205

@dataclass

206

class KnownMessage(betterproto.Message):

207

known_field: str = betterproto.string_field(1)

208

209

# Create message with extra data (simulating newer version)

210

original_data = bytes([

211

0x0a, 0x05, 0x48, 0x65, 0x6c, 0x6c, 0x6f, # field 1: "Hello"

212

0x12, 0x05, 0x57, 0x6f, 0x72, 0x6c, 0x64, # field 2: "World" (unknown)

213

])

214

215

# Parse with known message - unknown fields preserved

216

msg = KnownMessage().parse(original_data)

217

print(f"Known field: {msg.known_field}") # Known field: Hello

218

print(f"Unknown fields: {msg._unknown_fields.hex()}") # Unknown fields: 1205576f726c64

219

220

# Re-serialize includes unknown fields

221

serialized = bytes(msg)

222

print(f"Includes unknown: {serialized == original_data}") # True

223

```

224

225

## Constants

226

227

```python { .api }

228

# Wire type constants

229

WIRE_VARINT: int = 0 # Variable-length integers

230

WIRE_FIXED_64: int = 1 # 64-bit fixed-length

231

WIRE_LEN_DELIM: int = 2 # Length-delimited (strings, messages, etc.)

232

WIRE_FIXED_32: int = 5 # 32-bit fixed-length

233

234

# Type collections for wire format mapping

235

WIRE_VARINT_TYPES: List[str] = [

236

"enum", "bool", "int32", "int64", "uint32", "uint64", "sint32", "sint64"

237

]

238

239

WIRE_FIXED_32_TYPES: List[str] = ["float", "fixed32", "sfixed32"]

240

241

WIRE_FIXED_64_TYPES: List[str] = ["double", "fixed64", "sfixed64"]

242

243

WIRE_LEN_DELIM_TYPES: List[str] = ["string", "bytes", "message", "map"]

244

245

# Type collections for special handling

246

FIXED_TYPES: List[str] = [

247

"float", "double", "fixed32", "sfixed32", "fixed64", "sfixed64"

248

]

249

250

INT_64_TYPES: List[str] = ["int64", "uint64", "sint64", "fixed64", "sfixed64"]

251

252

PACKED_TYPES: List[str] = [

253

"enum", "bool", "int32", "int64", "uint32", "uint64", "sint32", "sint64",

254

"float", "double", "fixed32", "sfixed32", "fixed64", "sfixed64"

255

]

256

```