or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

callbacks-handles.mdcore-ffi.mddata-conversion.mderror-handling.mdindex.mdmemory-management.mdsource-generation.mdtype-system.md

data-conversion.mddocs/

0

# Data Conversion

1

2

Converting between Python and C data representations. These functions handle string conversion, array unpacking, buffer operations, and memory transfers.

3

4

## Capabilities

5

6

### String Conversion

7

8

Converts C data to Python strings with support for different character encodings and length limits.

9

10

```python { .api }

11

def string(self, cdata, maxlen=-1):

12

"""

13

Convert C data to Python string.

14

15

Parameters:

16

- cdata: C data object (char*, char[], wchar_t*, single char, enum)

17

- maxlen (int): Maximum length (-1 for null-terminated or array length)

18

19

Returns:

20

str|bytes: Python string (bytes for char*, str for wchar_t*)

21

"""

22

```

23

24

**Usage Examples:**

25

26

```python

27

# Null-terminated strings

28

c_str = ffi.new("char[]", b"Hello, World!")

29

py_str = ffi.string(c_str) # b"Hello, World!"

30

31

# String with length limit

32

long_str = ffi.new("char[]", b"This is a very long string")

33

short_str = ffi.string(long_str, 10) # b"This is a "

34

35

# Unicode strings (wchar_t)

36

ffi.cdef("typedef wchar_t WCHAR;")

37

wide_str = ffi.new("WCHAR[]", u"Hello, 世界!")

38

unicode_str = ffi.string(wide_str) # u"Hello, 世界!"

39

40

# Single characters

41

char_val = ffi.new("char *", ord('A'))

42

char_str = ffi.string(char_val[0]) # b"A"

43

44

# Enum values as strings

45

ffi.cdef("enum status { OK, ERROR, PENDING };")

46

status = ffi.new("enum status *", 1) # ERROR

47

status_str = ffi.string(status[0]) # "ERROR" or "1" if out of range

48

```

49

50

### Array Unpacking

51

52

Unpacks C arrays into Python lists or strings without stopping at null terminators.

53

54

```python { .api }

55

def unpack(self, cdata, length):

56

"""

57

Unpack C array data to Python collection.

58

59

Parameters:

60

- cdata: C data pointer or array

61

- length (int): Number of elements to unpack

62

63

Returns:

64

bytes|str|list: Python collection of unpacked data

65

"""

66

```

67

68

**Usage Examples:**

69

70

```python

71

# Unpack char array to bytes

72

char_array = ffi.new("char[6]", b"Hi\x00lo!") # Contains null byte

73

data = ffi.unpack(char_array, 6) # b"Hi\x00lo!" (preserves null)

74

75

# Unpack integer array to list

76

int_array = ffi.new("int[]", [1, 2, 3, 4, 5])

77

py_list = ffi.unpack(int_array, 5) # [1, 2, 3, 4, 5]

78

79

# Unpack wide character array

80

wide_array = ffi.new("wchar_t[]", u"Hello")

81

unicode_data = ffi.unpack(wide_array, 5) # u"Hello"

82

83

# Unpack structure array

84

ffi.cdef("struct point { int x, y; };")

85

points = ffi.new("struct point[3]")

86

points[0].x, points[0].y = 1, 2

87

points[1].x, points[1].y = 3, 4

88

points[2].x, points[2].y = 5, 6

89

90

point_list = ffi.unpack(points, 3) # List of struct point objects

91

```

92

93

### Buffer Interface

94

95

Provides raw memory access through Python's buffer protocol for efficient data operations.

96

97

```python { .api }

98

buffer: callable # Buffer property for accessing raw C data

99

```

100

101

**Usage Examples:**

102

103

```python

104

# Create buffer from C array

105

data = ffi.new("char[1024]")

106

buf = ffi.buffer(data)

107

108

# Buffer operations

109

buf[0:5] = b"Hello"

110

content = buf[:] # Get entire buffer as bytes

111

print(len(buf)) # 1024

112

113

# Partial buffer access

114

partial = buf[10:20] # Slice of buffer

115

partial[:] = b"World "

116

117

# Buffer with specific size

118

limited_buf = ffi.buffer(data, 100) # Only first 100 bytes

119

```

120

121

### Buffer Creation from Python Objects

122

123

Creates C data from existing Python buffer objects like bytearray, array.array, or numpy arrays.

124

125

```python { .api }

126

def from_buffer(self, cdecl, python_buffer=None, require_writable=False):

127

"""

128

Create C data from Python buffer object.

129

130

Parameters:

131

- cdecl (str): C type for the returned data (defaults to 'char[]')

132

- python_buffer: Python object supporting buffer protocol

133

- require_writable (bool): Require writable buffer

134

135

Returns:

136

CData object pointing to buffer data

137

"""

138

```

139

140

**Usage Examples:**

141

142

```python

143

# From bytearray

144

source = bytearray(b"Hello, World!")

145

c_data = ffi.from_buffer(source) # char[] pointing to bytearray

146

c_data[0] = ord('h') # Modifies original bytearray

147

148

# From array.array

149

import array

150

int_array = array.array('i', [1, 2, 3, 4, 5])

151

c_ints = ffi.from_buffer("int[]", int_array)

152

153

# From bytes (read-only)

154

byte_data = b"Read only data"

155

c_readonly = ffi.from_buffer(byte_data)

156

# c_readonly[0] = ord('r') # Would raise error

157

158

# Require writable buffer

159

try:

160

c_writable = ffi.from_buffer(byte_data, require_writable=True)

161

except TypeError:

162

print("Buffer is not writable")

163

164

# With specific C type

165

float_array = array.array('f', [1.0, 2.5, 3.7])

166

c_floats = ffi.from_buffer("float[]", float_array)

167

```

168

169

### Memory Transfer

170

171

Copies memory between C data objects and Python buffers with overlap handling.

172

173

```python { .api }

174

def memmove(self, dest, src, n):

175

"""

176

Copy n bytes of memory from src to dest.

177

178

Parameters:

179

- dest: Destination C data or writable Python buffer

180

- src: Source C data or Python buffer

181

- n (int): Number of bytes to copy

182

183

Returns:

184

None

185

"""

186

```

187

188

**Usage Examples:**

189

190

```python

191

# Copy between C arrays

192

src = ffi.new("char[]", b"Hello, World!")

193

dest = ffi.new("char[20]")

194

ffi.memmove(dest, src, 13)

195

print(ffi.string(dest)) # b"Hello, World!"

196

197

# Copy from Python buffer to C data

198

python_data = bytearray(b"Python data")

199

c_buffer = ffi.new("char[50]")

200

ffi.memmove(c_buffer, python_data, len(python_data))

201

202

# Copy from C data to Python buffer

203

result = bytearray(20)

204

ffi.memmove(result, c_buffer, 11)

205

print(result[:11]) # bytearray(b'Python data')

206

207

# Overlapping memory (safe with memmove)

208

data = ffi.new("char[]", b"Hello, World!")

209

ffi.memmove(data + 2, data, 5) # Shift "Hello" 2 positions right

210

print(ffi.string(data)) # b"HeHello..."

211

```

212

213

## Advanced Buffer Operations

214

215

### Zero-Copy Data Sharing

216

217

```python

218

# Share data between Python and C without copying

219

class SharedBuffer:

220

def __init__(self, size):

221

self.python_buffer = bytearray(size)

222

self.c_view = ffi.from_buffer(self.python_buffer)

223

224

def write_from_python(self, data):

225

self.python_buffer[:len(data)] = data

226

227

def read_from_c_side(self):

228

return bytes(self.python_buffer)

229

230

# Usage

231

shared = SharedBuffer(1024)

232

shared.write_from_python(b"Data from Python")

233

234

# C side can directly access shared.c_view

235

# Changes are immediately visible to Python side

236

```

237

238

### Efficient Array Processing

239

240

```python

241

def process_large_array(py_array):

242

"""Process large Python array through C without copying"""

243

# Create C view of Python data

244

c_array = ffi.from_buffer("double[]", py_array)

245

246

# Process data through C (example: in-place operations)

247

ffi.cdef("void process_doubles(double* arr, size_t count);")

248

lib = ffi.dlopen("./processing_lib.so")

249

250

lib.process_doubles(c_array, len(py_array))

251

252

# py_array is now modified in-place

253

return py_array

254

```

255

256

### String Builder Pattern

257

258

```python

259

class CStringBuilder:

260

def __init__(self, initial_size=1024):

261

self.buffer = ffi.new("char[]", initial_size)

262

self.size = initial_size

263

self.length = 0

264

265

def append(self, text):

266

text_bytes = text.encode('utf-8') if isinstance(text, str) else text

267

needed = self.length + len(text_bytes)

268

269

if needed >= self.size:

270

# Resize buffer

271

new_size = max(needed * 2, self.size * 2)

272

new_buffer = ffi.new("char[]", new_size)

273

ffi.memmove(new_buffer, self.buffer, self.length)

274

self.buffer = new_buffer

275

self.size = new_size

276

277

ffi.memmove(self.buffer + self.length, text_bytes, len(text_bytes))

278

self.length += len(text_bytes)

279

280

def to_string(self):

281

return ffi.string(self.buffer, self.length)

282

283

# Usage

284

builder = CStringBuilder()

285

builder.append("Hello, ")

286

builder.append("World!")

287

result = builder.to_string() # b"Hello, World!"

288

```

289

290

### Binary Data Processing

291

292

```python

293

def parse_binary_protocol(data):

294

"""Parse binary protocol data efficiently"""

295

c_data = ffi.from_buffer("unsigned char[]", data)

296

297

# Define protocol structure

298

ffi.cdef("""

299

struct header {

300

unsigned int magic;

301

unsigned short version;

302

unsigned short length;

303

};

304

305

struct message {

306

struct header hdr;

307

unsigned char payload[];

308

};

309

""")

310

311

# Cast to structure

312

msg = ffi.cast("struct message *", c_data)

313

314

# Access fields directly

315

if msg.hdr.magic == 0xDEADBEEF:

316

payload_len = msg.hdr.length - ffi.sizeof("struct header")

317

payload = ffi.unpack(msg.payload, payload_len)

318

return {

319

'version': msg.hdr.version,

320

'payload': payload

321

}

322

323

raise ValueError("Invalid magic number")

324

```

325

326

## Performance Considerations

327

328

### Buffer vs String vs Unpack

329

330

```python

331

# For large data, buffer operations are most efficient

332

large_array = ffi.new("char[10000]")

333

334

# Fastest: direct buffer access

335

buf = ffi.buffer(large_array)

336

data = buf[:] # Single copy

337

338

# Slower: string conversion (for char data)

339

str_data = ffi.string(large_array, 10000)

340

341

# Slowest: unpack (creates Python list for non-char data)

342

int_array = ffi.cast("int *", large_array)

343

list_data = ffi.unpack(int_array, 2500) # 10000 / 4

344

```

345

346

### Memory Alignment

347

348

```python

349

def create_aligned_buffer(size, alignment=16):

350

"""Create memory-aligned buffer for SIMD operations"""

351

# Allocate extra space for alignment

352

raw_size = size + alignment - 1

353

raw_buffer = ffi.new("char[]", raw_size)

354

355

# Calculate aligned address

356

addr = ffi.cast("uintptr_t", raw_buffer)

357

aligned_addr = (addr + alignment - 1) & ~(alignment - 1)

358

359

# Return aligned pointer

360

return ffi.cast("char *", aligned_addr)

361

```