Tessl Tile for pypi/polars@1.33.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

column-selection.md configuration.md core-data-structures.md data-conversion.md data-types.md error-handling.md functions-expressions.md index.md io-operations.md sql-interface.md

core-data-structures.mddocs/

0
# Core Data Structures
1

2
The fundamental data structures that form the foundation of Polars: DataFrame for eager evaluation, LazyFrame for lazy evaluation with query optimization, Series for one-dimensional data, and Expr for building complex column operations and transformations.
3

4
## Capabilities
5

6
### DataFrame
7

8
Primary data structure for eager evaluation providing immediate computation with comprehensive data manipulation methods including filtering, selection, aggregation, joining, and reshaping operations.
9

10
```python { .api }  
11
class DataFrame:
12
    def __init__(
13
        self,
14
        data=None,
15
        schema=None,
16
        *,
17
        schema_overrides=None,
18
        strict=True,
19
        orient=None,
20
        infer_schema_length=None,
21
        nan_to_null=False
22
    ): 
23
        """
24
        Create a DataFrame from various data sources.
25
        
26
        Parameters:
27
        - data: Data source (dict, list, arrow table, pandas df, etc.)
28
        - schema: Column names and types
29
        - schema_overrides: Override specific column types
30
        - strict: Strict schema validation
31
        - orient: Data orientation ('row' or 'col')
32
        - infer_schema_length: Rows to scan for type inference
33
        - nan_to_null: Convert NaN to null values
34
        """
35

36
    # Selection and Projection
37
    def select(self, *exprs, **named_exprs) -> DataFrame: ...
38
    def with_columns(self, *exprs, **named_exprs) -> DataFrame: ...
39
    def drop(self, *columns) -> DataFrame: ...
40
    def rename(self, mapping) -> DataFrame: ...
41

42
    # Filtering and Sorting
43
    def filter(self, *predicates) -> DataFrame: ...
44
    def sort(self, by, *, descending=False, nulls_last=False) -> DataFrame: ...
45
    def unique(self, subset=None, *, keep="any", maintain_order=False) -> DataFrame: ...
46
    def sample(self, n=None, *, fraction=None, with_replacement=False, shuffle=False, seed=None) -> DataFrame: ...
47

48
    # Aggregation and Grouping
49
    def group_by(self, *by, maintain_order=False) -> GroupBy: ...
50
    def sum(self) -> DataFrame: ...
51
    def mean(self) -> DataFrame: ...
52
    def max(self) -> DataFrame: ...
53
    def min(self) -> DataFrame: ...
54
    def std(self, ddof=1) -> DataFrame: ...
55
    def var(self, ddof=1) -> DataFrame: ...
56

57
    # Reshaping and Transformation
58
    def pivot(self, *, on, index=None, values=None, aggregate_function="first", sort_columns=False) -> DataFrame: ...
59
    def unpivot(self, *, on=None, index=None, variable_name=None, value_name=None) -> DataFrame: ...
60
    def transpose(self, *, include_header=False, header_name="column", column_names=None) -> DataFrame: ...
61
    def explode(self, columns, *, schema_overrides=None) -> DataFrame: ...
62

63
    # Joining Operations  
64
    def join(self, other, *, on=None, how="inner", left_on=None, right_on=None, suffix="_right", validate=None, join_nulls=False) -> DataFrame: ...
65
    def join_asof(self, other, *, left_on=None, right_on=None, on=None, by_left=None, by_right=None, by=None, strategy="backward") -> DataFrame: ...
66

67
    # Window Operations
68
    def with_row_index(self, name="row_nr", offset=0) -> DataFrame: ...
69
    def rolling(self, index_column, *, period, offset=None, closed="right", by=None, check_sorted=True) -> RollingGroupBy: ...
70

71
    # I/O Operations
72
    def write_csv(self, file=None, **kwargs) -> str | None: ...
73
    def write_parquet(self, file, **kwargs) -> None: ...
74
    def write_json(self, file=None, **kwargs) -> str | None: ...
75
    def write_excel(self, workbook=None, worksheet=None, **kwargs): ...
76
    def write_database(self, table_name, connection, **kwargs) -> int: ...
77

78
    # Conversion Methods
79
    def to_pandas(self, **kwargs): ...
80
    def to_numpy(self, structured=False, **kwargs): ...
81
    def to_arrow(self) -> pa.Table: ...
82
    def to_dict(self, as_series=True) -> dict: ...
83
    def to_dicts(self) -> list[dict]: ...
84

85
    # Utility Methods
86
    def head(self, n=5) -> DataFrame: ...
87
    def tail(self, n=5) -> DataFrame: ...
88
    def slice(self, offset, length=None) -> DataFrame: ...
89
    def glimpse(self, *, max_items_per_column=10, max_colname_length=50, return_as_string=False) -> str | None: ...
90
    def describe(self, *, percentiles=None, interpolation="nearest") -> DataFrame: ...
91
    def is_empty(self) -> bool: ...
92
    def lazy(self) -> LazyFrame: ...
93

94
    # Properties
95
    @property
96
    def columns(self) -> list[str]: ...
97
    @property 
98
    def dtypes(self) -> list[DataType]: ...
99
    @property
100
    def schema(self) -> Schema: ...
101
    @property
102
    def shape(self) -> tuple[int, int]: ...
103
    @property
104
    def height(self) -> int: ...
105
    @property
106
    def width(self) -> int: ...
107
    @property
108
    def flags(self) -> dict[str, dict[str, bool]]: ...
109
```
110

111
### LazyFrame
112

113
Lazy evaluation data structure that builds a computation graph for query optimization, predicate pushdown, and efficient memory usage with automatic query planning.
114

115
```python { .api }
116
class LazyFrame:
117
    # Selection and Projection
118
    def select(self, *exprs, **named_exprs) -> LazyFrame: ...
119
    def with_columns(self, *exprs, **named_exprs) -> LazyFrame: ...
120
    def drop(self, *columns) -> LazyFrame: ...
121
    def rename(self, mapping) -> LazyFrame: ...
122

123
    # Filtering and Sorting  
124
    def filter(self, *predicates) -> LazyFrame: ...
125
    def sort(self, by, *, descending=False, nulls_last=False, multithreaded=True, maintain_order=False) -> LazyFrame: ...
126
    def unique(self, subset=None, *, keep="any", maintain_order=False) -> LazyFrame: ...
127
    def sample(self, n=None, *, fraction=None, with_replacement=False, shuffle=False, seed=None) -> LazyFrame: ...
128

129
    # Aggregation and Grouping
130
    def group_by(self, *by, maintain_order=False) -> LazyGroupBy: ...
131
    def sum(self) -> LazyFrame: ...
132
    def mean(self) -> LazyFrame: ...  
133
    def max(self) -> LazyFrame: ...
134
    def min(self) -> LazyFrame: ...
135
    def std(self, ddof=1) -> LazyFrame: ...
136
    def var(self, ddof=1) -> LazyFrame: ...
137

138
    # Reshaping and Transformation
139
    def pivot(self, *, on, index=None, values=None, aggregate_function="first", sort_columns=False) -> LazyFrame: ...
140
    def unpivot(self, *, on=None, index=None, variable_name=None, value_name=None) -> LazyFrame: ...
141
    def explode(self, columns, *, schema_overrides=None) -> LazyFrame: ...
142

143
    # Joining Operations
144
    def join(self, other, *, on=None, how="inner", left_on=None, right_on=None, suffix="_right", validate=None, join_nulls=False) -> LazyFrame: ...
145
    def join_asof(self, other, *, left_on=None, right_on=None, on=None, by_left=None, by_right=None, by=None, strategy="backward") -> LazyFrame: ...
146

147
    # Window Operations
148
    def with_row_index(self, name="row_nr", offset=0) -> LazyFrame: ...
149
    def rolling(self, index_column, *, period, offset=None, closed="right", by=None) -> RollingGroupBy: ...
150

151
    # Execution and Optimization  
152
    def collect(self, *, type_coercion=True, predicate_pushdown=True, projection_pushdown=True, simplify_expression=True, slice_pushdown=True, comm_subplan_elim=True, comm_subexpr_elim=True, cluster_with_columns=True, streaming=False, background=False, _eager=True) -> DataFrame: ...
153
    def explain(self, *, optimized=True, type_coercion=True, predicate_pushdown=True, projection_pushdown=True, simplify_expression=True, slice_pushdown=True, comm_subplan_elim=True, comm_subexpr_elim=True, cluster_with_columns=True, format="plain") -> str: ...
154
    def show_graph(self, *, optimized=True, show=True, output_path=None, raw_output=False, figsize=(16, 12), type_coercion=True, predicate_pushdown=True, projection_pushdown=True, simplify_expression=True, slice_pushdown=True, comm_subplan_elim=True, comm_subexpr_elim=True, cluster_with_columns=True) -> str | None: ...
155

156
    # Utility Methods
157
    def head(self, n=5) -> LazyFrame: ...
158
    def tail(self, n=5) -> LazyFrame: ...
159
    def slice(self, offset, length=None) -> LazyFrame: ...
160
    def first(self) -> LazyFrame: ...
161
    def last(self) -> LazyFrame: ...
162
    def cache(self) -> LazyFrame: ...
163

164
    # Properties
165
    @property
166
    def columns(self) -> list[str]: ...
167
    @property
168
    def dtypes(self) -> list[DataType]: ...
169
    @property
170
    def schema(self) -> Schema: ...
171
    @property
172
    def width(self) -> int: ...
173
```
174

175
### Series
176

177
One-dimensional data structure with vectorized operations, supporting element-wise transformations, aggregations, and integration with DataFrame operations.
178

179
```python { .api }
180
class Series:
181
    def __init__(self, name=None, values=None, dtype=None, strict=True, nan_to_null=False): 
182
        """
183
        Create a Series from values.
184
        
185
        Parameters:
186
        - name: Series name
187
        - values: Data values (list, array, etc.)
188
        - dtype: Data type
189
        - strict: Strict type checking
190
        - nan_to_null: Convert NaN to null
191
        """
192

193
    # Element Access and Slicing
194
    def __getitem__(self, item): ...
195
    def get(self, index, *, default=None): ...
196
    def slice(self, offset, length=None) -> Series: ...
197
    def head(self, n=5) -> Series: ...
198
    def tail(self, n=5) -> Series: ...
199
    def take(self, indices) -> Series: ...
200
    def gather(self, indices) -> Series: ...
201

202
    # Filtering and Selection
203
    def filter(self, predicate) -> Series: ...
204
    def unique(self, *, maintain_order=False) -> Series: ...
205
    def sample(self, n=None, *, fraction=None, with_replacement=False, shuffle=False, seed=None) -> Series: ...
206
    def sort(self, *, descending=False, nulls_last=False) -> Series: ...
207

208
    # Transformations
209
    def map_elements(self, function, return_dtype=None, *, skip_nulls=True) -> Series: ...
210
    def cast(self, dtype, *, strict=True) -> Series: ...
211
    def alias(self, name) -> Series: ...
212
    def rename(self, name) -> Series: ...
213

214
    # Aggregations
215
    def sum(self) -> int | float: ...
216
    def mean(self) -> float | None: ...
217
    def median(self) -> float | None: ...
218
    def max(self) -> Any: ...
219
    def min(self) -> Any: ...
220
    def std(self, ddof=1) -> float | None: ...
221
    def var(self, ddof=1) -> float | None: ...
222
    def count(self) -> int: ...
223
    def len(self) -> int: ...
224

225
    # String Operations (when dtype is String)
226
    @property
227
    def str(self) -> StringNameSpace: ...
228

229
    # Datetime Operations (when dtype is temporal)
230
    @property  
231
    def dt(self) -> DateTimeNameSpace: ...
232

233
    # List Operations (when dtype is List)
234
    @property
235
    def list(self) -> ListNameSpace: ...
236

237
    # Array Operations (when dtype is Array)
238
    @property
239
    def arr(self) -> ArrayNameSpace: ...
240

241
    # Struct Operations (when dtype is Struct)
242
    @property
243
    def struct(self) -> StructNameSpace: ...
244

245
    # Categorical Operations (when dtype is Categorical)
246
    @property
247
    def cat(self) -> CategoricalNameSpace: ...
248

249
    # Binary Operations (when dtype is Binary)  
250
    @property
251
    def bin(self) -> BinaryNameSpace: ...
252

253
    # Conversion Methods
254
    def to_list(self) -> list: ...
255
    def to_numpy(self, *, zero_copy_only=False, writable=False) -> np.ndarray: ...
256
    def to_arrow(self) -> pa.Array: ...
257
    def to_pandas(self, **kwargs): ...
258
    def to_frame(self, name=None) -> DataFrame: ...
259

260
    # Utility Methods
261
    def is_null(self) -> Series: ...
262
    def is_not_null(self) -> Series: ...
263
    def is_finite(self) -> Series: ...
264
    def is_infinite(self) -> Series: ...
265
    def is_nan(self) -> Series: ...
266
    def is_not_nan(self) -> Series: ...
267
    def is_empty(self) -> bool: ...
268
    def describe(self, *, percentiles=None, interpolation="nearest") -> DataFrame: ...
269

270
    # Properties
271
    @property
272
    def name(self) -> str: ...
273
    @property  
274
    def dtype(self) -> DataType: ...
275
    @property
276
    def shape(self) -> tuple[int]: ...
277
    @property
278
    def flags(self) -> dict[str, bool]: ...
279
```
280

281
### Expr
282

283
Expression builder for column operations, transformations, and aggregations that can be used across DataFrame, LazyFrame, and various contexts for building complex data processing pipelines.
284

285
```python { .api }
286
class Expr:
287
    # Aliasing and Naming
288
    def alias(self, name: str) -> Expr: ...
289
    def name(self) -> ExprNameNameSpace: ...
290

291
    # Filtering and Selection
292
    def filter(self, predicate) -> Expr: ...
293
    def sort(self, *, descending=False, nulls_last=False) -> Expr: ...
294
    def sort_by(self, by, *, descending=False, nulls_last=False) -> Expr: ...
295
    def unique(self, *, maintain_order=False) -> Expr: ...
296
    def slice(self, offset, length=None) -> Expr: ...
297
    def head(self, n=5) -> Expr: ...  
298
    def tail(self, n=5) -> Expr: ...
299
    def first(self) -> Expr: ...
300
    def last(self) -> Expr: ...
301
    def take(self, indices) -> Expr: ...
302
    def gather(self, indices) -> Expr: ...
303

304
    # Aggregations
305
    def sum(self) -> Expr: ...
306
    def mean(self) -> Expr: ...
307
    def median(self) -> Expr: ...
308
    def max(self) -> Expr: ...
309
    def min(self) -> Expr: ... 
310
    def std(self, ddof=1) -> Expr: ...
311
    def var(self, ddof=1) -> Expr: ...
312
    def count(self) -> Expr: ...
313
    def len(self) -> Expr: ...
314
    def n_unique(self) -> Expr: ...
315
    def null_count(self) -> Expr: ...
316
    def quantile(self, quantile, interpolation="nearest") -> Expr: ...
317

318
    # Window Functions
319
    def over(self, partition_by=None, *, order_by=None, mapping_strategy="group_to_rows") -> Expr: ...
320
    def rank(self, method="average", *, descending=False, seed=None) -> Expr: ...
321
    def cum_sum(self, *, reverse=False) -> Expr: ...
322
    def cum_count(self, *, reverse=False) -> Expr: ...
323
    def cum_max(self, *, reverse=False) -> Expr: ...
324
    def cum_min(self, *, reverse=False) -> Expr: ...
325

326
    # Mathematical Operations  
327
    def abs(self) -> Expr: ...
328
    def sqrt(self) -> Expr: ...
329
    def log(self, base=None) -> Expr: ...
330
    def log10(self) -> Expr: ...
331
    def exp(self) -> Expr: ...
332
    def pow(self, exponent) -> Expr: ...
333
    def round(self, decimals=0) -> Expr: ...
334
    def floor(self) -> Expr: ...
335
    def ceil(self) -> Expr: ...
336

337
    # Type Operations  
338
    def cast(self, dtype, *, strict=True) -> Expr: ...
339
    def is_null(self) -> Expr: ...
340
    def is_not_null(self) -> Expr: ...
341
    def is_finite(self) -> Expr: ...
342
    def is_infinite(self) -> Expr: ...
343
    def is_nan(self) -> Expr: ...
344
    def is_not_nan(self) -> Expr: ...
345
    def is_duplicated(self) -> Expr: ...
346
    def is_unique(self) -> Expr: ...
347
    def is_first_distinct(self) -> Expr: ...
348
    def is_last_distinct(self) -> Expr: ...
349

350
    # Conditional Operations
351
    def is_between(self, lower_bound, upper_bound, closed="both") -> Expr: ...
352
    def is_in(self, other) -> Expr: ...
353
    def when(self, condition) -> When: ...
354

355
    # String Operations (when expression evaluates to String)
356
    @property
357
    def str(self) -> ExprStringNameSpace: ...
358

359
    # Datetime Operations (when expression evaluates to temporal type)
360
    @property
361
    def dt(self) -> ExprDateTimeNameSpace: ...
362

363
    # List Operations (when expression evaluates to List)  
364
    @property
365
    def list(self) -> ExprListNameSpace: ...
366

367
    # Array Operations (when expression evaluates to Array)
368
    @property
369
    def arr(self) -> ExprArrayNameSpace: ...
370

371
    # Struct Operations (when expression evaluates to Struct)
372
    @property
373
    def struct(self) -> ExprStructNameSpace: ...
374

375
    # Categorical Operations (when expression evaluates to Categorical)
376
    @property
377
    def cat(self) -> ExprCategoricalNameSpace: ...
378

379
    # Binary Operations (when expression evaluates to Binary)
380
    @property
381
    def bin(self) -> ExprBinaryNameSpace: ...
382

383
    # Meta Operations
384
    @property
385
    def meta(self) -> ExprMetaNameSpace: ...
386
```
387

388
## Usage Examples
389

390
### Basic DataFrame Operations
391

392
```python
393
import polars as pl
394

395
# Create DataFrame
396
df = pl.DataFrame({
397
    "product": ["A", "B", "C", "A", "B"],
398
    "sales": [100, 200, 150, 80, 250],
399
    "region": ["North", "South", "North", "South", "North"]
400
})
401

402
# Chain operations
403
result = (
404
    df
405
    .filter(pl.col("sales") > 100) 
406
    .with_columns(
407
        pl.col("sales").mul(1.1).alias("sales_with_tax"),
408
        pl.col("product").str.to_lowercase().alias("product_lower")
409
    )
410
    .group_by("region")
411
    .agg([
412
        pl.col("sales").sum().alias("total_sales"),
413
        pl.col("product").count().alias("product_count")
414
    ])
415
)
416
```
417

418
### Lazy Evaluation with Query Optimization
419

420
```python
421
# Build lazy computation
422
lazy_query = (
423
    pl.scan_csv("large_dataset.csv")
424
    .filter(pl.col("amount") > 1000)
425
    .with_columns(
426
        pl.col("date").str.to_date().alias("parsed_date"),
427
        pl.col("category").str.to_uppercase()  
428
    )
429
    .group_by(["category", pl.col("parsed_date").dt.month()])
430
    .agg([
431
        pl.col("amount").sum().alias("monthly_total"),
432
        pl.col("transaction_id").count().alias("transaction_count")
433
    ])
434
    .sort("monthly_total", descending=True)
435
)
436

437
# Execute optimized query
438
result = lazy_query.collect()
439

440
# View query plan
441
print(lazy_query.explain(optimized=True))
442
```
443

444
### Advanced Expressions
445

446
```python
447
# Complex expression building
448
complex_expr = (
449
    pl.when(pl.col("score") >= 90)
450
    .then(pl.lit("A"))
451
    .when(pl.col("score") >= 80)
452
    .then(pl.lit("B"))
453
    .when(pl.col("score") >= 70)
454
    .then(pl.lit("C"))
455
    .otherwise(pl.lit("F"))
456
    .alias("grade")
457
)
458

459
df = df.with_columns(complex_expr)
460

461
# Window functions
462
df = df.with_columns([
463
    pl.col("sales").rank().over("region").alias("sales_rank"),
464
    pl.col("sales").cum_sum().over("region").alias("running_total")
465
])
466
```

Version

Tile

Files

core-data-structures.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

core-data-structures.mddocs/