or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

column-selection.mdconfiguration.mdcore-data-structures.mddata-conversion.mddata-types.mderror-handling.mdfunctions-expressions.mdindex.mdio-operations.mdsql-interface.md

data-conversion.mddocs/

0

# Data Conversion

1

2

Seamless integration with pandas, NumPy, PyArrow, and PyTorch through conversion functions supporting bidirectional data exchange with automatic schema mapping and optimized memory transfer.

3

4

## Capabilities

5

6

### From External Libraries

7

8

Convert data from popular Python data libraries into Polars DataFrames.

9

10

```python { .api }

11

def from_pandas(df, *, schema_overrides=None, rechunk=True, nan_to_null=True, include_index=False) -> DataFrame:

12

"""

13

Convert pandas DataFrame to Polars DataFrame.

14

15

Parameters:

16

- df: pandas DataFrame

17

- schema_overrides: Override column types

18

- rechunk: Rechunk to contiguous memory

19

- nan_to_null: Convert NaN to null values

20

- include_index: Include pandas index as column

21

22

Returns:

23

Polars DataFrame

24

"""

25

26

def from_numpy(data, schema=None, *, orient=None) -> DataFrame:

27

"""

28

Convert NumPy array to Polars DataFrame.

29

30

Parameters:

31

- data: NumPy array (1D or 2D)

32

- schema: Column names and types

33

- orient: Data orientation ('col' or 'row')

34

35

Returns:

36

Polars DataFrame

37

"""

38

39

def from_arrow(data, *, schema_overrides=None, rechunk=True) -> DataFrame:

40

"""

41

Convert PyArrow Table to Polars DataFrame.

42

43

Parameters:

44

- data: PyArrow Table or RecordBatch

45

- schema_overrides: Override column types

46

- rechunk: Rechunk to contiguous memory

47

48

Returns:

49

Polars DataFrame

50

"""

51

52

def from_torch(tensor, *, schema=None) -> DataFrame:

53

"""

54

Convert PyTorch tensor to Polars DataFrame.

55

56

Parameters:

57

- tensor: PyTorch tensor

58

- schema: Column names and types

59

60

Returns:

61

Polars DataFrame

62

"""

63

```

64

65

### From Python Data Structures

66

67

Convert native Python data structures into Polars DataFrames.

68

69

```python { .api }

70

def from_dict(data, schema=None, *, schema_overrides=None, strict=True, nan_to_null=False) -> DataFrame:

71

"""

72

Convert dictionary to Polars DataFrame.

73

74

Parameters:

75

- data: Dictionary mapping column names to values

76

- schema: Column schema

77

- schema_overrides: Override specific column types

78

- strict: Strict schema validation

79

- nan_to_null: Convert NaN to null values

80

81

Returns:

82

Polars DataFrame

83

"""

84

85

def from_dicts(dicts, schema=None, *, schema_overrides=None, strict=True, infer_schema_length=100) -> DataFrame:

86

"""

87

Convert list of dictionaries to Polars DataFrame.

88

89

Parameters:

90

- dicts: List of dictionaries (records)

91

- schema: Column schema

92

- schema_overrides: Override specific column types

93

- strict: Strict schema validation

94

- infer_schema_length: Rows to scan for schema inference

95

96

Returns:

97

Polars DataFrame

98

"""

99

100

def from_records(records, schema=None, *, schema_overrides=None, orient=None, infer_schema_length=100) -> DataFrame:

101

"""

102

Convert records (list of tuples/lists) to Polars DataFrame.

103

104

Parameters:

105

- records: List of records (tuples or lists)

106

- schema: Column schema

107

- schema_overrides: Override specific column types

108

- orient: Data orientation ('col' or 'row')

109

- infer_schema_length: Rows to scan for schema inference

110

111

Returns:

112

Polars DataFrame

113

"""

114

115

def from_repr(text: str) -> DataFrame:

116

"""

117

Parse DataFrame from string representation.

118

119

Parameters:

120

- text: String representation of DataFrame

121

122

Returns:

123

Polars DataFrame

124

"""

125

```

126

127

### From Generic DataFrame Types

128

129

Convert from other DataFrame implementations with automatic protocol detection.

130

131

```python { .api }

132

def from_dataframe(df, *, allow_copy=True) -> DataFrame:

133

"""

134

Convert DataFrame interchange object to Polars DataFrame.

135

136

Parameters:

137

- df: DataFrame implementing interchange protocol

138

- allow_copy: Allow copying data if necessary

139

140

Returns:

141

Polars DataFrame

142

"""

143

```

144

145

### JSON Normalization

146

147

Flatten nested JSON data into tabular format.

148

149

```python { .api }

150

def json_normalize(data, *, separator=".", max_level=None) -> DataFrame:

151

"""

152

Normalize nested JSON data into flat DataFrame.

153

154

Parameters:

155

- data: JSON data (dict, list of dicts, or JSON string)

156

- separator: Separator for nested field names

157

- max_level: Maximum nesting level to flatten

158

159

Returns:

160

Normalized DataFrame

161

"""

162

```

163

164

## Usage Examples

165

166

### From Pandas

167

168

```python

169

import polars as pl

170

import pandas as pd

171

import numpy as np

172

173

# Convert pandas DataFrame

174

pdf = pd.DataFrame({

175

'A': [1, 2, 3, np.nan],

176

'B': ['a', 'b', 'c', 'd'],

177

'C': pd.date_range('2023-01-01', periods=4)

178

})

179

180

# Basic conversion

181

df = pl.from_pandas(pdf)

182

183

# Conversion with options

184

df = pl.from_pandas(

185

pdf,

186

schema_overrides={'A': pl.Int32},

187

include_index=True,

188

nan_to_null=True

189

)

190

```

191

192

### From NumPy

193

194

```python

195

# 2D array to DataFrame

196

arr = np.random.rand(5, 3)

197

df = pl.from_numpy(

198

arr,

199

schema=['col1', 'col2', 'col3'],

200

orient='row'

201

)

202

203

# 1D array to single-column DataFrame

204

arr_1d = np.array([1, 2, 3, 4, 5])

205

df = pl.from_numpy(arr_1d, schema=['values'])

206

```

207

208

### From Python Dictionaries

209

210

```python

211

# Dictionary with lists

212

data = {

213

'name': ['Alice', 'Bob', 'Charlie'],

214

'age': [25, 30, 35],

215

'city': ['NYC', 'LA', 'Chicago']

216

}

217

df = pl.from_dict(data)

218

219

# List of dictionaries (records)

220

records = [

221

{'name': 'Alice', 'age': 25, 'city': 'NYC'},

222

{'name': 'Bob', 'age': 30, 'city': 'LA'},

223

{'name': 'Charlie', 'age': 35, 'city': 'Chicago'}

224

]

225

df = pl.from_dicts(records)

226

227

# List of tuples/lists

228

tuples = [

229

('Alice', 25, 'NYC'),

230

('Bob', 30, 'LA'),

231

('Charlie', 35, 'Chicago')

232

]

233

df = pl.from_records(

234

tuples,

235

schema=['name', 'age', 'city']

236

)

237

```

238

239

### From PyArrow

240

241

```python

242

import pyarrow as pa

243

244

# Create PyArrow table

245

arrow_table = pa.table({

246

'integers': [1, 2, 3, 4],

247

'floats': [1.1, 2.2, 3.3, 4.4],

248

'strings': ['a', 'b', 'c', 'd']

249

})

250

251

# Convert to Polars

252

df = pl.from_arrow(arrow_table)

253

254

# With schema overrides

255

df = pl.from_arrow(

256

arrow_table,

257

schema_overrides={'integers': pl.Int32}

258

)

259

```

260

261

### JSON Normalization

262

263

```python

264

# Nested JSON data

265

json_data = [

266

{

267

'name': 'Alice',

268

'address': {

269

'street': '123 Main St',

270

'city': 'NYC',

271

'coordinates': {'lat': 40.7, 'lon': -74.0}

272

},

273

'hobbies': ['reading', 'swimming']

274

},

275

{

276

'name': 'Bob',

277

'address': {

278

'street': '456 Oak Ave',

279

'city': 'LA',

280

'coordinates': {'lat': 34.0, 'lon': -118.2}

281

},

282

'hobbies': ['cycling', 'cooking', 'gaming']

283

}

284

]

285

286

# Normalize nested structure

287

df = pl.json_normalize(

288

json_data,

289

separator='_',

290

max_level=2

291

)

292

```

293

294

### Integration with ML Libraries

295

296

```python

297

# From PyTorch tensor

298

import torch

299

300

tensor = torch.randn(100, 5)

301

df = pl.from_torch(

302

tensor,

303

schema=['feature_1', 'feature_2', 'feature_3', 'feature_4', 'feature_5']

304

)

305

306

# Convert back to tensor for ML

307

tensor_back = torch.from_numpy(df.to_numpy())

308

```

309

310

### Bidirectional Conversion

311

312

```python

313

# Polars -> Pandas -> Polars

314

original_df = pl.DataFrame({

315

'a': [1, 2, 3],

316

'b': ['x', 'y', 'z']

317

})

318

319

# Convert to pandas

320

pandas_df = original_df.to_pandas()

321

322

# Convert back to polars

323

restored_df = pl.from_pandas(pandas_df)

324

325

# Polars -> Arrow -> Polars

326

arrow_table = original_df.to_arrow()

327

restored_df = pl.from_arrow(arrow_table)

328

```

329

330

### Complex Schema Handling

331

332

```python

333

# Mixed data types with schema overrides

334

mixed_data = {

335

'ids': [1, 2, 3, 4],

336

'values': [1.1, 2.2, 3.3, 4.4],

337

'categories': ['A', 'B', 'A', 'C'],

338

'timestamps': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04']

339

}

340

341

df = pl.from_dict(

342

mixed_data,

343

schema_overrides={

344

'ids': pl.Int32,

345

'categories': pl.Categorical,

346

'timestamps': pl.Datetime

347

}

348

)

349

```