0
# Core Classes
1
2
The fundamental data structures that form the foundation of polars-lts-cpu's data manipulation capabilities. These classes provide different approaches to working with tabular data, from eager evaluation to lazy optimization.
3
4
## Capabilities
5
6
### DataFrame
7
8
Two-dimensional data structure representing tabular data with rows and columns, providing eager evaluation for immediate operations.
9
10
```python { .api }
11
class DataFrame:
12
def __init__(
13
self,
14
data: Any = None,
15
schema: Optional[SchemaDict] = None,
16
schema_overrides: Optional[SchemaDict] = None,
17
orient: Optional[str] = None,
18
infer_schema_length: Optional[int] = 100,
19
nan_to_null: bool = False
20
):
21
"""
22
Create a DataFrame from various data sources.
23
24
Parameters:
25
- data: Data source (dict, list, numpy array, pandas DataFrame, etc.)
26
- schema: Schema specification as {column: dtype} dict
27
- schema_overrides: Override inferred types for specific columns
28
- orient: Data orientation ('col' or 'row')
29
- infer_schema_length: Number of rows to scan for schema inference
30
- nan_to_null: Convert NaN values to null
31
"""
32
33
# Properties
34
@property
35
def shape(self) -> tuple[int, int]:
36
"""Returns (height, width) tuple."""
37
38
@property
39
def height(self) -> int:
40
"""Number of rows."""
41
42
@property
43
def width(self) -> int:
44
"""Number of columns."""
45
46
@property
47
def columns(self) -> list[str]:
48
"""Column names."""
49
50
@property
51
def dtypes(self) -> list[type]:
52
"""Column data types."""
53
54
@property
55
def schema(self) -> dict[str, type]:
56
"""Schema as {column: dtype} dict."""
57
58
# Data Selection and Filtering
59
def select(self, *exprs: IntoExpr) -> DataFrame:
60
"""Select columns using expressions."""
61
62
def filter(self, predicate: IntoExpr) -> DataFrame:
63
"""Filter rows based on predicate."""
64
65
def with_columns(self, *exprs: IntoExpr, **named_exprs: IntoExpr) -> DataFrame:
66
"""Add or modify columns."""
67
68
def drop(self, *columns: str) -> DataFrame:
69
"""Drop columns."""
70
71
def rename(self, mapping: dict[str, str]) -> DataFrame:
72
"""Rename columns."""
73
74
# Data Access
75
def get_column(self, name: str) -> Series:
76
"""Get column as Series."""
77
78
def get_columns(self) -> list[Series]:
79
"""Get all columns as list of Series."""
80
81
def row(self, index: int, *, named: bool = False) -> tuple | dict:
82
"""Get single row."""
83
84
def rows(self, *, named: bool = False) -> list[tuple] | list[dict]:
85
"""Get all rows."""
86
87
def item(self, row: int = None, column: str | int = None) -> Any:
88
"""Get single item."""
89
90
# Transformations
91
def sort(
92
self,
93
by: ColumnNameOrSelector | list[ColumnNameOrSelector],
94
*,
95
descending: bool | list[bool] = False,
96
nulls_last: bool = False
97
) -> DataFrame:
98
"""Sort DataFrame."""
99
100
def reverse(self) -> DataFrame:
101
"""Reverse row order."""
102
103
def transpose(
104
self,
105
*,
106
include_header: bool = False,
107
header_name: str = "column",
108
column_names: str | list[str] | None = None
109
) -> DataFrame:
110
"""Transpose DataFrame."""
111
112
def cast(self, dtypes: dict[str, type] | type, *, strict: bool = True) -> DataFrame:
113
"""Cast column types."""
114
115
# Aggregations
116
def sum(self, *, axis: int = 0) -> DataFrame | Series:
117
"""Sum values."""
118
119
def mean(self, *, axis: int = 0) -> DataFrame | Series:
120
"""Mean of values."""
121
122
def max(self, *, axis: int = 0) -> DataFrame | Series:
123
"""Maximum values."""
124
125
def min(self, *, axis: int = 0) -> DataFrame | Series:
126
"""Minimum values."""
127
128
def std(self, *, ddof: int = 1, axis: int = 0) -> DataFrame | Series:
129
"""Standard deviation."""
130
131
def var(self, *, ddof: int = 1, axis: int = 0) -> DataFrame | Series:
132
"""Variance."""
133
134
def median(self, *, axis: int = 0) -> DataFrame | Series:
135
"""Median values."""
136
137
def quantile(self, quantile: float, *, interpolation: str = "nearest", axis: int = 0) -> DataFrame | Series:
138
"""Quantile values."""
139
140
# Horizontal Operations
141
def sum_horizontal(self, *exprs: IntoExpr) -> DataFrame:
142
"""Sum values horizontally across columns."""
143
144
def mean_horizontal(self, *exprs: IntoExpr) -> DataFrame:
145
"""Mean values horizontally across columns."""
146
147
def max_horizontal(self, *exprs: IntoExpr) -> DataFrame:
148
"""Maximum values horizontally across columns."""
149
150
def min_horizontal(self, *exprs: IntoExpr) -> DataFrame:
151
"""Minimum values horizontally across columns."""
152
153
# Grouping Operations
154
def group_by(
155
self,
156
*by: IntoExpr,
157
maintain_order: bool = False,
158
**named_by: IntoExpr
159
) -> GroupBy:
160
"""Group DataFrame by expressions."""
161
162
def rolling(
163
self,
164
index_column: str,
165
*,
166
period: str | timedelta,
167
offset: str | timedelta | None = None,
168
closed: str = "right",
169
by: str | list[str] | None = None,
170
check_sorted: bool = True
171
) -> RollingGroupBy:
172
"""Create rolling window groupby."""
173
174
# Joins
175
def join(
176
self,
177
other: DataFrame,
178
on: str | list[str] | None = None,
179
how: str = "inner",
180
*,
181
left_on: str | list[str] | None = None,
182
right_on: str | list[str] | None = None,
183
suffix: str = "_right",
184
validate: str = "m:m",
185
join_nulls: bool = False
186
) -> DataFrame:
187
"""Join with another DataFrame."""
188
189
def join_asof(
190
self,
191
other: DataFrame,
192
*,
193
left_on: str | None = None,
194
right_on: str | None = None,
195
on: str | None = None,
196
by_left: str | list[str] | None = None,
197
by_right: str | list[str] | None = None,
198
by: str | list[str] | None = None,
199
strategy: str = "backward",
200
suffix: str = "_right",
201
tolerance: str | int | float | None = None,
202
allow_exact_matches: bool = True
203
) -> DataFrame:
204
"""Perform asof join."""
205
206
# Reshaping
207
def pivot(
208
self,
209
*,
210
on: ColumnNameOrSelector,
211
index: ColumnNameOrSelector | None = None,
212
values: ColumnNameOrSelector | None = None,
213
aggregate_function: str | Expr | None = None,
214
maintain_order: bool = True,
215
sort_columns: bool = False,
216
separator: str = "_"
217
) -> DataFrame:
218
"""Pivot DataFrame."""
219
220
def unpivot(
221
self,
222
on: ColumnNameOrSelector | None = None,
223
*,
224
index: ColumnNameOrSelector | None = None,
225
variable_name: str | None = None,
226
value_name: str | None = None
227
) -> DataFrame:
228
"""Unpivot DataFrame."""
229
230
def melt(
231
self,
232
id_vars: ColumnNameOrSelector | None = None,
233
value_vars: ColumnNameOrSelector | None = None,
234
*,
235
variable_name: str | None = None,
236
value_name: str | None = None
237
) -> DataFrame:
238
"""Melt DataFrame from wide to long format."""
239
240
# Utilities
241
def head(self, n: int = 5) -> DataFrame:
242
"""Get first n rows."""
243
244
def tail(self, n: int = 5) -> DataFrame:
245
"""Get last n rows."""
246
247
def slice(self, offset: int, length: int | None = None) -> DataFrame:
248
"""Slice DataFrame."""
249
250
def limit(self, n: int) -> DataFrame:
251
"""Limit to n rows."""
252
253
def sample(
254
self,
255
n: int | None = None,
256
*,
257
fraction: float | None = None,
258
with_replacement: bool = False,
259
shuffle: bool = False,
260
seed: int | None = None
261
) -> DataFrame:
262
"""Sample rows."""
263
264
def unique(
265
self,
266
subset: ColumnNameOrSelector | None = None,
267
*,
268
keep: str = "any",
269
maintain_order: bool = False
270
) -> DataFrame:
271
"""Get unique rows."""
272
273
def drop_nulls(self, subset: ColumnNameOrSelector | None = None) -> DataFrame:
274
"""Drop rows with null values."""
275
276
def fill_null(
277
self,
278
value: Any = None,
279
strategy: str | None = None,
280
limit: int | None = None,
281
*,
282
matches_supertype: bool = True
283
) -> DataFrame:
284
"""Fill null values."""
285
286
# Conversion
287
def lazy(self) -> LazyFrame:
288
"""Convert to LazyFrame."""
289
290
def to_series(self, index: int = 0) -> Series:
291
"""Convert to Series."""
292
293
def to_dict(self, *, as_series: bool = True) -> dict:
294
"""Convert to dictionary."""
295
296
def to_dicts(self) -> list[dict]:
297
"""Convert to list of dictionaries."""
298
299
def to_numpy(self, *, structured: bool = False, order: str = "c") -> np.ndarray:
300
"""Convert to numpy array."""
301
302
def to_pandas(self, **kwargs) -> pd.DataFrame:
303
"""Convert to pandas DataFrame."""
304
305
def to_arrow(self) -> pa.Table:
306
"""Convert to PyArrow table."""
307
308
# I/O Operations
309
def write_csv(
310
self,
311
file: str | Path | BytesIO,
312
*,
313
include_bom: bool = False,
314
include_header: bool = True,
315
separator: str = ",",
316
line_terminator: str = "\n",
317
quote_char: str = '"',
318
batch_size: int = 1024,
319
datetime_format: str | None = None,
320
date_format: str | None = None,
321
time_format: str | None = None,
322
float_scientific: bool | None = None,
323
float_precision: int | None = None,
324
null_value: str = ""
325
) -> None:
326
"""Write to CSV file."""
327
328
def write_parquet(
329
self,
330
file: str | Path | BytesIO,
331
*,
332
compression: str = "zstd",
333
compression_level: int | None = None,
334
statistics: bool | dict[str, bool] = True,
335
row_group_size: int | None = None,
336
data_page_size: int | None = None,
337
maintain_order: bool = True
338
) -> None:
339
"""Write to Parquet file."""
340
341
def write_json(self, file: str | Path | BytesIO, *, pretty: bool = False, row_oriented: bool = False) -> None:
342
"""Write to JSON file."""
343
344
# Analysis
345
def describe(self, *, percentiles: Sequence[float] = (0.25, 0.5, 0.75)) -> DataFrame:
346
"""Generate descriptive statistics."""
347
348
def equals(self, other: DataFrame, *, null_equal: bool = True) -> bool:
349
"""Check equality with another DataFrame."""
350
351
def is_duplicated(self) -> Series:
352
"""Check for duplicated rows."""
353
354
def is_unique(self) -> Series:
355
"""Check for unique rows."""
356
```
357
358
### LazyFrame
359
360
Lazy evaluation version of DataFrame that builds a computation graph for optimized query execution.
361
362
```python { .api }
363
class LazyFrame:
364
# Properties
365
@property
366
def columns(self) -> list[str]:
367
"""Column names."""
368
369
@property
370
def dtypes(self) -> list[type]:
371
"""Column data types."""
372
373
@property
374
def schema(self) -> dict[str, type]:
375
"""Schema as {column: dtype} dict."""
376
377
@property
378
def width(self) -> int:
379
"""Number of columns."""
380
381
# Query Execution
382
def collect(
383
self,
384
*,
385
predicate_pushdown: bool = True,
386
projection_pushdown: bool = True,
387
simplify_expression: bool = True,
388
slice_pushdown: bool = True,
389
comm_subplan_elim: bool = True,
390
comm_subexpr_elim: bool = True,
391
cluster_with_columns: bool = True,
392
streaming: bool = False
393
) -> DataFrame:
394
"""Execute lazy query and return DataFrame."""
395
396
def collect_async(self, *, gevent: bool = False) -> Awaitable[DataFrame]:
397
"""Execute lazy query asynchronously."""
398
399
def fetch(
400
self,
401
n_rows: int = 500,
402
*,
403
type_coercion: bool = True,
404
predicate_pushdown: bool = True,
405
projection_pushdown: bool = True,
406
simplify_expression: bool = True,
407
slice_pushdown: bool = True,
408
comm_subplan_elim: bool = True,
409
comm_subexpr_elim: bool = True,
410
streaming: bool = False
411
) -> DataFrame:
412
"""Execute lazy query for first n rows."""
413
414
def explain(
415
self,
416
*,
417
format: str = "plain",
418
optimized: bool = True,
419
type_coercion: bool = True,
420
predicate_pushdown: bool = True,
421
projection_pushdown: bool = True,
422
simplify_expression: bool = True,
423
slice_pushdown: bool = True,
424
comm_subplan_elim: bool = True,
425
comm_subexpr_elim: bool = True,
426
streaming: bool = False,
427
tree_format: bool | None = None
428
) -> str:
429
"""Show query execution plan."""
430
431
# Transformations (same interface as DataFrame but lazy)
432
def select(self, *exprs: IntoExpr) -> LazyFrame: ...
433
def filter(self, predicate: IntoExpr) -> LazyFrame: ...
434
def with_columns(self, *exprs: IntoExpr, **named_exprs: IntoExpr) -> LazyFrame: ...
435
def drop(self, *columns: str) -> LazyFrame: ...
436
def rename(self, mapping: dict[str, str]) -> LazyFrame: ...
437
def sort(self, by: ColumnNameOrSelector, *, descending: bool = False) -> LazyFrame: ...
438
def reverse(self) -> LazyFrame: ...
439
def cast(self, dtypes: dict[str, type] | type, *, strict: bool = True) -> LazyFrame: ...
440
441
# Grouping Operations
442
def group_by(self, *by: IntoExpr, maintain_order: bool = False) -> LazyGroupBy: ...
443
def rolling(self, index_column: str, *, period: str) -> RollingGroupBy: ...
444
445
# Joins
446
def join(self, other: LazyFrame, on: str | list[str], how: str = "inner", **kwargs) -> LazyFrame: ...
447
def join_asof(self, other: LazyFrame, **kwargs) -> LazyFrame: ...
448
449
# Utilities
450
def head(self, n: int = 5) -> LazyFrame: ...
451
def tail(self, n: int = 5) -> LazyFrame: ...
452
def slice(self, offset: int, length: int | None = None) -> LazyFrame: ...
453
def limit(self, n: int) -> LazyFrame: ...
454
455
# Streaming Sinks
456
def sink_parquet(
457
self,
458
path: str | Path,
459
*,
460
compression: str = "zstd",
461
maintain_order: bool = True,
462
**kwargs
463
) -> DataFrame:
464
"""Write to Parquet file using streaming engine."""
465
466
def sink_csv(self, path: str | Path, **kwargs) -> DataFrame:
467
"""Write to CSV file using streaming engine."""
468
```
469
470
### Series
471
472
One-dimensional data structure representing a single column of data.
473
474
```python { .api }
475
class Series:
476
def __init__(
477
self,
478
name: str | None = None,
479
values: Sequence[Any] | None = None,
480
dtype: type | None = None,
481
*,
482
strict: bool = True,
483
nan_to_null: bool = False
484
):
485
"""
486
Create a Series.
487
488
Parameters:
489
- name: Series name
490
- values: Data values
491
- dtype: Data type
492
- strict: Strict type checking
493
- nan_to_null: Convert NaN to null
494
"""
495
496
# Properties
497
@property
498
def dtype(self) -> type:
499
"""Data type."""
500
501
@property
502
def name(self) -> str:
503
"""Series name."""
504
505
@property
506
def shape(self) -> tuple[int]:
507
"""Shape as (length,) tuple."""
508
509
# Arithmetic Operations
510
def __add__(self, other: Any) -> Series: ...
511
def __sub__(self, other: Any) -> Series: ...
512
def __mul__(self, other: Any) -> Series: ...
513
def __truediv__(self, other: Any) -> Series: ...
514
def __floordiv__(self, other: Any) -> Series: ...
515
def __mod__(self, other: Any) -> Series: ...
516
def __pow__(self, other: Any) -> Series: ...
517
518
# Comparison Operations
519
def eq(self, other: Any) -> Series:
520
"""Element-wise equality."""
521
522
def ne(self, other: Any) -> Series:
523
"""Element-wise inequality."""
524
525
def lt(self, other: Any) -> Series:
526
"""Element-wise less than."""
527
528
def le(self, other: Any) -> Series:
529
"""Element-wise less than or equal."""
530
531
def gt(self, other: Any) -> Series:
532
"""Element-wise greater than."""
533
534
def ge(self, other: Any) -> Series:
535
"""Element-wise greater than or equal."""
536
537
# Aggregations
538
def sum(self) -> Any:
539
"""Sum of values."""
540
541
def mean(self) -> float | None:
542
"""Mean of values."""
543
544
def max(self) -> Any:
545
"""Maximum value."""
546
547
def min(self) -> Any:
548
"""Minimum value."""
549
550
def std(self, ddof: int = 1) -> float | None:
551
"""Standard deviation."""
552
553
def var(self, ddof: int = 1) -> float | None:
554
"""Variance."""
555
556
def median(self) -> float | None:
557
"""Median value."""
558
559
def quantile(self, quantile: float, interpolation: str = "nearest") -> float | None:
560
"""Quantile value."""
561
562
# Data Access
563
def get(self, index: int) -> Any:
564
"""Get value by index."""
565
566
def item(self, index: int | None = None) -> Any:
567
"""Get single item."""
568
569
def gather(self, indices: list[int] | Series) -> Series:
570
"""Gather values by indices."""
571
572
# Transformations
573
def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Series:
574
"""Sort Series."""
575
576
def reverse(self) -> Series:
577
"""Reverse Series."""
578
579
def cast(self, dtype: type, *, strict: bool = True) -> Series:
580
"""Cast to different type."""
581
582
def rename(self, name: str) -> Series:
583
"""Rename Series."""
584
585
# Utilities
586
def drop_nulls(self) -> Series:
587
"""Drop null values."""
588
589
def fill_null(self, value: Any = None, strategy: str | None = None) -> Series:
590
"""Fill null values."""
591
592
def unique(self, *, maintain_order: bool = False) -> Series:
593
"""Get unique values."""
594
595
def value_counts(self, *, sort: bool = False, parallel: bool = False) -> DataFrame:
596
"""Count unique values."""
597
598
# Conversion
599
def to_frame(self, name: str | None = None) -> DataFrame:
600
"""Convert to DataFrame."""
601
602
def to_list(self) -> list[Any]:
603
"""Convert to Python list."""
604
605
def to_numpy(self, *, zero_copy_only: bool = False, writable: bool = False) -> np.ndarray:
606
"""Convert to numpy array."""
607
608
def to_pandas(self, **kwargs) -> pd.Series:
609
"""Convert to pandas Series."""
610
611
# Namespaces
612
@property
613
def str(self) -> StringNameSpace:
614
"""String operations namespace."""
615
616
@property
617
def dt(self) -> DateTimeNameSpace:
618
"""DateTime operations namespace."""
619
620
@property
621
def list(self) -> ListNameSpace:
622
"""List operations namespace."""
623
624
@property
625
def struct(self) -> StructNameSpace:
626
"""Struct operations namespace."""
627
```
628
629
### Expr
630
631
Expression object for building complex lazy computations and transformations.
632
633
```python { .api }
634
class Expr:
635
# Arithmetic Operations
636
def __add__(self, other: Any) -> Expr: ...
637
def __sub__(self, other: Any) -> Expr: ...
638
def __mul__(self, other: Any) -> Expr: ...
639
def __truediv__(self, other: Any) -> Expr: ...
640
641
# Aggregations
642
def sum(self) -> Expr:
643
"""Sum aggregation."""
644
645
def mean(self) -> Expr:
646
"""Mean aggregation."""
647
648
def max(self) -> Expr:
649
"""Maximum aggregation."""
650
651
def min(self) -> Expr:
652
"""Minimum aggregation."""
653
654
def count(self) -> Expr:
655
"""Count aggregation."""
656
657
def std(self, ddof: int = 1) -> Expr:
658
"""Standard deviation."""
659
660
def var(self, ddof: int = 1) -> Expr:
661
"""Variance."""
662
663
# Window Functions
664
def over(self, *partition_by: IntoExpr, order_by: IntoExpr | None = None) -> Expr:
665
"""Window function over partitions."""
666
667
def rolling_sum(self, window_size: int | str, weights: list[float] | None = None) -> Expr:
668
"""Rolling sum."""
669
670
def rolling_mean(self, window_size: int | str, weights: list[float] | None = None) -> Expr:
671
"""Rolling mean."""
672
673
# Conditional Logic
674
def when(self, predicate: Expr) -> ExprWhenThen:
675
"""Start conditional expression."""
676
677
def then(self, statement: IntoExpr) -> ExprWhenThen:
678
"""Then clause in conditional."""
679
680
def otherwise(self, statement: IntoExpr) -> Expr:
681
"""Else clause in conditional."""
682
683
# Transformations
684
def cast(self, dtype: type, *, strict: bool = True) -> Expr:
685
"""Cast to different type."""
686
687
def alias(self, name: str) -> Expr:
688
"""Alias expression."""
689
690
def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Expr:
691
"""Sort expression."""
692
693
def reverse(self) -> Expr:
694
"""Reverse expression."""
695
696
# Utilities
697
def is_null(self) -> Expr:
698
"""Check for null values."""
699
700
def is_not_null(self) -> Expr:
701
"""Check for non-null values."""
702
703
def fill_null(self, value: Any = None, strategy: str | None = None) -> Expr:
704
"""Fill null values."""
705
706
def drop_nulls(self) -> Expr:
707
"""Drop null values."""
708
709
# Namespaces
710
@property
711
def str(self) -> ExprStringNameSpace:
712
"""String operations namespace."""
713
714
@property
715
def dt(self) -> ExprDateTimeNameSpace:
716
"""DateTime operations namespace."""
717
718
@property
719
def list(self) -> ExprListNameSpace:
720
"""List operations namespace."""
721
722
@property
723
def arr(self) -> ExprArrayNameSpace:
724
"""Array operations namespace."""
725
726
@property
727
def struct(self) -> ExprStructNameSpace:
728
"""Struct operations namespace."""
729
730
@property
731
def cat(self) -> ExprCategoricalNameSpace:
732
"""Categorical operations namespace."""
733
734
@property
735
def bin(self) -> ExprBinaryNameSpace:
736
"""Binary operations namespace."""
737
738
@property
739
def name(self) -> ExprNameNameSpace:
740
"""Name operations namespace."""
741
742
@property
743
def meta(self) -> ExprMetaNameSpace:
744
"""Meta operations namespace."""
745
```
746
747
## Usage Examples
748
749
### DataFrame Operations
750
751
```python
752
import polars as pl
753
754
# Create DataFrame
755
df = pl.DataFrame({
756
"id": [1, 2, 3, 4],
757
"name": ["Alice", "Bob", "Charlie", "Diana"],
758
"age": [25, 30, 35, 28],
759
"salary": [50000, 60000, 70000, 55000]
760
})
761
762
# Select and transform columns
763
result = df.select([
764
pl.col("name"),
765
pl.col("age"),
766
(pl.col("salary") * 1.1).alias("new_salary")
767
]).filter(pl.col("age") > 25)
768
769
print(result)
770
```
771
772
### LazyFrame Operations
773
774
```python
775
# Create LazyFrame and build query
776
lazy_df = (
777
pl.scan_csv("large_file.csv")
778
.filter(pl.col("date") >= "2023-01-01")
779
.group_by("category")
780
.agg([
781
pl.col("amount").sum().alias("total_amount"),
782
pl.col("id").count().alias("count")
783
])
784
.sort("total_amount", descending=True)
785
)
786
787
# Execute query
788
result = lazy_df.collect()
789
```
790
791
### Series Operations
792
793
```python
794
# Create Series
795
s = pl.Series("values", [1, 2, 3, 4, 5])
796
797
# Perform operations
798
doubled = s * 2
799
mean_val = s.mean()
800
unique_vals = s.unique()
801
802
# String operations
803
text_series = pl.Series("text", ["hello", "world", "polars"])
804
upper_text = text_series.str.upper()
805
```
806
807
### Expression Building
808
809
```python
810
# Complex expressions
811
expr = (
812
pl.when(pl.col("age") < 30)
813
.then(pl.col("salary") * 0.8)
814
.when(pl.col("age") < 40)
815
.then(pl.col("salary") * 0.9)
816
.otherwise(pl.col("salary"))
817
.alias("adjusted_salary")
818
)
819
820
# Use in DataFrame
821
df_with_adjustment = df.with_columns(expr)
822
```
823
824
### QueryOptFlags
825
826
Configuration class for controlling query optimization behavior in LazyFrame operations, allowing fine-grained control over performance optimizations.
827
828
```python { .api }
829
class QueryOptFlags:
830
def __init__(
831
self,
832
*,
833
predicate_pushdown: Optional[bool] = None,
834
projection_pushdown: Optional[bool] = None,
835
simplify_expression: Optional[bool] = None,
836
slice_pushdown: Optional[bool] = None,
837
comm_subplan_elim: Optional[bool] = None,
838
comm_subexpr_elim: Optional[bool] = None,
839
cluster_with_columns: Optional[bool] = None,
840
collapse_joins: Optional[bool] = None,
841
check_order_observe: Optional[bool] = None,
842
fast_projection: Optional[bool] = None,
843
):
844
"""
845
Configure query optimization flags.
846
847
Parameters:
848
- predicate_pushdown: Push predicates down in the query tree
849
- projection_pushdown: Push projections down in the query tree
850
- simplify_expression: Simplify expressions during optimization
851
- slice_pushdown: Push slice operations down in the query tree
852
- comm_subplan_elim: Eliminate common subplans
853
- comm_subexpr_elim: Eliminate common subexpressions
854
- cluster_with_columns: Cluster with_columns operations
855
- collapse_joins: Collapse consecutive joins
856
- check_order_observe: Check if ordering is observed
857
- fast_projection: Use fast projection when possible
858
"""
859
860
@staticmethod
861
def none(**kwargs) -> QueryOptFlags:
862
"""Create QueryOptFlags with all optimizations disabled."""
863
864
def update(self, **kwargs) -> QueryOptFlags:
865
"""Update optimization flags."""
866
```
867
868
### GPUEngine
869
870
Configuration class for GPU-accelerated processing in LazyFrame operations.
871
872
```python { .api }
873
class GPUEngine:
874
def __init__(self):
875
"""
876
Configure GPU engine for accelerated processing.
877
878
Note: GPU processing requires compatible hardware and drivers.
879
"""
880
```