0
# Data Conversion
1
2
Seamless integration with pandas, NumPy, PyArrow, and PyTorch through conversion functions supporting bidirectional data exchange with automatic schema mapping and optimized memory transfer.
3
4
## Capabilities
5
6
### From External Libraries
7
8
Convert data from popular Python data libraries into Polars DataFrames.
9
10
```python { .api }
11
def from_pandas(df, *, schema_overrides=None, rechunk=True, nan_to_null=True, include_index=False) -> DataFrame:
12
"""
13
Convert pandas DataFrame to Polars DataFrame.
14
15
Parameters:
16
- df: pandas DataFrame
17
- schema_overrides: Override column types
18
- rechunk: Rechunk to contiguous memory
19
- nan_to_null: Convert NaN to null values
20
- include_index: Include pandas index as column
21
22
Returns:
23
Polars DataFrame
24
"""
25
26
def from_numpy(data, schema=None, *, orient=None) -> DataFrame:
27
"""
28
Convert NumPy array to Polars DataFrame.
29
30
Parameters:
31
- data: NumPy array (1D or 2D)
32
- schema: Column names and types
33
- orient: Data orientation ('col' or 'row')
34
35
Returns:
36
Polars DataFrame
37
"""
38
39
def from_arrow(data, *, schema_overrides=None, rechunk=True) -> DataFrame:
40
"""
41
Convert PyArrow Table to Polars DataFrame.
42
43
Parameters:
44
- data: PyArrow Table or RecordBatch
45
- schema_overrides: Override column types
46
- rechunk: Rechunk to contiguous memory
47
48
Returns:
49
Polars DataFrame
50
"""
51
52
def from_torch(tensor, *, schema=None) -> DataFrame:
53
"""
54
Convert PyTorch tensor to Polars DataFrame.
55
56
Parameters:
57
- tensor: PyTorch tensor
58
- schema: Column names and types
59
60
Returns:
61
Polars DataFrame
62
"""
63
```
64
65
### From Python Data Structures
66
67
Convert native Python data structures into Polars DataFrames.
68
69
```python { .api }
70
def from_dict(data, schema=None, *, schema_overrides=None, strict=True, nan_to_null=False) -> DataFrame:
71
"""
72
Convert dictionary to Polars DataFrame.
73
74
Parameters:
75
- data: Dictionary mapping column names to values
76
- schema: Column schema
77
- schema_overrides: Override specific column types
78
- strict: Strict schema validation
79
- nan_to_null: Convert NaN to null values
80
81
Returns:
82
Polars DataFrame
83
"""
84
85
def from_dicts(dicts, schema=None, *, schema_overrides=None, strict=True, infer_schema_length=100) -> DataFrame:
86
"""
87
Convert list of dictionaries to Polars DataFrame.
88
89
Parameters:
90
- dicts: List of dictionaries (records)
91
- schema: Column schema
92
- schema_overrides: Override specific column types
93
- strict: Strict schema validation
94
- infer_schema_length: Rows to scan for schema inference
95
96
Returns:
97
Polars DataFrame
98
"""
99
100
def from_records(records, schema=None, *, schema_overrides=None, orient=None, infer_schema_length=100) -> DataFrame:
101
"""
102
Convert records (list of tuples/lists) to Polars DataFrame.
103
104
Parameters:
105
- records: List of records (tuples or lists)
106
- schema: Column schema
107
- schema_overrides: Override specific column types
108
- orient: Data orientation ('col' or 'row')
109
- infer_schema_length: Rows to scan for schema inference
110
111
Returns:
112
Polars DataFrame
113
"""
114
115
def from_repr(text: str) -> DataFrame:
116
"""
117
Parse DataFrame from string representation.
118
119
Parameters:
120
- text: String representation of DataFrame
121
122
Returns:
123
Polars DataFrame
124
"""
125
```
126
127
### From Generic DataFrame Types
128
129
Convert from other DataFrame implementations with automatic protocol detection.
130
131
```python { .api }
132
def from_dataframe(df, *, allow_copy=True) -> DataFrame:
133
"""
134
Convert DataFrame interchange object to Polars DataFrame.
135
136
Parameters:
137
- df: DataFrame implementing interchange protocol
138
- allow_copy: Allow copying data if necessary
139
140
Returns:
141
Polars DataFrame
142
"""
143
```
144
145
### JSON Normalization
146
147
Flatten nested JSON data into tabular format.
148
149
```python { .api }
150
def json_normalize(data, *, separator=".", max_level=None) -> DataFrame:
151
"""
152
Normalize nested JSON data into flat DataFrame.
153
154
Parameters:
155
- data: JSON data (dict, list of dicts, or JSON string)
156
- separator: Separator for nested field names
157
- max_level: Maximum nesting level to flatten
158
159
Returns:
160
Normalized DataFrame
161
"""
162
```
163
164
## Usage Examples
165
166
### From Pandas
167
168
```python
169
import polars as pl
170
import pandas as pd
171
import numpy as np
172
173
# Convert pandas DataFrame
174
pdf = pd.DataFrame({
175
'A': [1, 2, 3, np.nan],
176
'B': ['a', 'b', 'c', 'd'],
177
'C': pd.date_range('2023-01-01', periods=4)
178
})
179
180
# Basic conversion
181
df = pl.from_pandas(pdf)
182
183
# Conversion with options
184
df = pl.from_pandas(
185
pdf,
186
schema_overrides={'A': pl.Int32},
187
include_index=True,
188
nan_to_null=True
189
)
190
```
191
192
### From NumPy
193
194
```python
195
# 2D array to DataFrame
196
arr = np.random.rand(5, 3)
197
df = pl.from_numpy(
198
arr,
199
schema=['col1', 'col2', 'col3'],
200
orient='row'
201
)
202
203
# 1D array to single-column DataFrame
204
arr_1d = np.array([1, 2, 3, 4, 5])
205
df = pl.from_numpy(arr_1d, schema=['values'])
206
```
207
208
### From Python Dictionaries
209
210
```python
211
# Dictionary with lists
212
data = {
213
'name': ['Alice', 'Bob', 'Charlie'],
214
'age': [25, 30, 35],
215
'city': ['NYC', 'LA', 'Chicago']
216
}
217
df = pl.from_dict(data)
218
219
# List of dictionaries (records)
220
records = [
221
{'name': 'Alice', 'age': 25, 'city': 'NYC'},
222
{'name': 'Bob', 'age': 30, 'city': 'LA'},
223
{'name': 'Charlie', 'age': 35, 'city': 'Chicago'}
224
]
225
df = pl.from_dicts(records)
226
227
# List of tuples/lists
228
tuples = [
229
('Alice', 25, 'NYC'),
230
('Bob', 30, 'LA'),
231
('Charlie', 35, 'Chicago')
232
]
233
df = pl.from_records(
234
tuples,
235
schema=['name', 'age', 'city']
236
)
237
```
238
239
### From PyArrow
240
241
```python
242
import pyarrow as pa
243
244
# Create PyArrow table
245
arrow_table = pa.table({
246
'integers': [1, 2, 3, 4],
247
'floats': [1.1, 2.2, 3.3, 4.4],
248
'strings': ['a', 'b', 'c', 'd']
249
})
250
251
# Convert to Polars
252
df = pl.from_arrow(arrow_table)
253
254
# With schema overrides
255
df = pl.from_arrow(
256
arrow_table,
257
schema_overrides={'integers': pl.Int32}
258
)
259
```
260
261
### JSON Normalization
262
263
```python
264
# Nested JSON data
265
json_data = [
266
{
267
'name': 'Alice',
268
'address': {
269
'street': '123 Main St',
270
'city': 'NYC',
271
'coordinates': {'lat': 40.7, 'lon': -74.0}
272
},
273
'hobbies': ['reading', 'swimming']
274
},
275
{
276
'name': 'Bob',
277
'address': {
278
'street': '456 Oak Ave',
279
'city': 'LA',
280
'coordinates': {'lat': 34.0, 'lon': -118.2}
281
},
282
'hobbies': ['cycling', 'cooking', 'gaming']
283
}
284
]
285
286
# Normalize nested structure
287
df = pl.json_normalize(
288
json_data,
289
separator='_',
290
max_level=2
291
)
292
```
293
294
### Integration with ML Libraries
295
296
```python
297
# From PyTorch tensor
298
import torch
299
300
tensor = torch.randn(100, 5)
301
df = pl.from_torch(
302
tensor,
303
schema=['feature_1', 'feature_2', 'feature_3', 'feature_4', 'feature_5']
304
)
305
306
# Convert back to tensor for ML
307
tensor_back = torch.from_numpy(df.to_numpy())
308
```
309
310
### Bidirectional Conversion
311
312
```python
313
# Polars -> Pandas -> Polars
314
original_df = pl.DataFrame({
315
'a': [1, 2, 3],
316
'b': ['x', 'y', 'z']
317
})
318
319
# Convert to pandas
320
pandas_df = original_df.to_pandas()
321
322
# Convert back to polars
323
restored_df = pl.from_pandas(pandas_df)
324
325
# Polars -> Arrow -> Polars
326
arrow_table = original_df.to_arrow()
327
restored_df = pl.from_arrow(arrow_table)
328
```
329
330
### Complex Schema Handling
331
332
```python
333
# Mixed data types with schema overrides
334
mixed_data = {
335
'ids': [1, 2, 3, 4],
336
'values': [1.1, 2.2, 3.3, 4.4],
337
'categories': ['A', 'B', 'A', 'C'],
338
'timestamps': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04']
339
}
340
341
df = pl.from_dict(
342
mixed_data,
343
schema_overrides={
344
'ids': pl.Int32,
345
'categories': pl.Categorical,
346
'timestamps': pl.Datetime
347
}
348
)
349
```