or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration.mdcore-classes.mddata-types.mdexpressions.mdfunctions.mdindex.mdio-operations.mdsql-functionality.md

core-classes.mddocs/

0

# Core Classes

1

2

The fundamental data structures that form the foundation of polars-lts-cpu's data manipulation capabilities. These classes provide different approaches to working with tabular data, from eager evaluation to lazy optimization.

3

4

## Capabilities

5

6

### DataFrame

7

8

Two-dimensional data structure representing tabular data with rows and columns, providing eager evaluation for immediate operations.

9

10

```python { .api }

11

class DataFrame:

12

def __init__(

13

self,

14

data: Any = None,

15

schema: Optional[SchemaDict] = None,

16

schema_overrides: Optional[SchemaDict] = None,

17

orient: Optional[str] = None,

18

infer_schema_length: Optional[int] = 100,

19

nan_to_null: bool = False

20

):

21

"""

22

Create a DataFrame from various data sources.

23

24

Parameters:

25

- data: Data source (dict, list, numpy array, pandas DataFrame, etc.)

26

- schema: Schema specification as {column: dtype} dict

27

- schema_overrides: Override inferred types for specific columns

28

- orient: Data orientation ('col' or 'row')

29

- infer_schema_length: Number of rows to scan for schema inference

30

- nan_to_null: Convert NaN values to null

31

"""

32

33

# Properties

34

@property

35

def shape(self) -> tuple[int, int]:

36

"""Returns (height, width) tuple."""

37

38

@property

39

def height(self) -> int:

40

"""Number of rows."""

41

42

@property

43

def width(self) -> int:

44

"""Number of columns."""

45

46

@property

47

def columns(self) -> list[str]:

48

"""Column names."""

49

50

@property

51

def dtypes(self) -> list[type]:

52

"""Column data types."""

53

54

@property

55

def schema(self) -> dict[str, type]:

56

"""Schema as {column: dtype} dict."""

57

58

# Data Selection and Filtering

59

def select(self, *exprs: IntoExpr) -> DataFrame:

60

"""Select columns using expressions."""

61

62

def filter(self, predicate: IntoExpr) -> DataFrame:

63

"""Filter rows based on predicate."""

64

65

def with_columns(self, *exprs: IntoExpr, **named_exprs: IntoExpr) -> DataFrame:

66

"""Add or modify columns."""

67

68

def drop(self, *columns: str) -> DataFrame:

69

"""Drop columns."""

70

71

def rename(self, mapping: dict[str, str]) -> DataFrame:

72

"""Rename columns."""

73

74

# Data Access

75

def get_column(self, name: str) -> Series:

76

"""Get column as Series."""

77

78

def get_columns(self) -> list[Series]:

79

"""Get all columns as list of Series."""

80

81

def row(self, index: int, *, named: bool = False) -> tuple | dict:

82

"""Get single row."""

83

84

def rows(self, *, named: bool = False) -> list[tuple] | list[dict]:

85

"""Get all rows."""

86

87

def item(self, row: int = None, column: str | int = None) -> Any:

88

"""Get single item."""

89

90

# Transformations

91

def sort(

92

self,

93

by: ColumnNameOrSelector | list[ColumnNameOrSelector],

94

*,

95

descending: bool | list[bool] = False,

96

nulls_last: bool = False

97

) -> DataFrame:

98

"""Sort DataFrame."""

99

100

def reverse(self) -> DataFrame:

101

"""Reverse row order."""

102

103

def transpose(

104

self,

105

*,

106

include_header: bool = False,

107

header_name: str = "column",

108

column_names: str | list[str] | None = None

109

) -> DataFrame:

110

"""Transpose DataFrame."""

111

112

def cast(self, dtypes: dict[str, type] | type, *, strict: bool = True) -> DataFrame:

113

"""Cast column types."""

114

115

# Aggregations

116

def sum(self, *, axis: int = 0) -> DataFrame | Series:

117

"""Sum values."""

118

119

def mean(self, *, axis: int = 0) -> DataFrame | Series:

120

"""Mean of values."""

121

122

def max(self, *, axis: int = 0) -> DataFrame | Series:

123

"""Maximum values."""

124

125

def min(self, *, axis: int = 0) -> DataFrame | Series:

126

"""Minimum values."""

127

128

def std(self, *, ddof: int = 1, axis: int = 0) -> DataFrame | Series:

129

"""Standard deviation."""

130

131

def var(self, *, ddof: int = 1, axis: int = 0) -> DataFrame | Series:

132

"""Variance."""

133

134

def median(self, *, axis: int = 0) -> DataFrame | Series:

135

"""Median values."""

136

137

def quantile(self, quantile: float, *, interpolation: str = "nearest", axis: int = 0) -> DataFrame | Series:

138

"""Quantile values."""

139

140

# Horizontal Operations

141

def sum_horizontal(self, *exprs: IntoExpr) -> DataFrame:

142

"""Sum values horizontally across columns."""

143

144

def mean_horizontal(self, *exprs: IntoExpr) -> DataFrame:

145

"""Mean values horizontally across columns."""

146

147

def max_horizontal(self, *exprs: IntoExpr) -> DataFrame:

148

"""Maximum values horizontally across columns."""

149

150

def min_horizontal(self, *exprs: IntoExpr) -> DataFrame:

151

"""Minimum values horizontally across columns."""

152

153

# Grouping Operations

154

def group_by(

155

self,

156

*by: IntoExpr,

157

maintain_order: bool = False,

158

**named_by: IntoExpr

159

) -> GroupBy:

160

"""Group DataFrame by expressions."""

161

162

def rolling(

163

self,

164

index_column: str,

165

*,

166

period: str | timedelta,

167

offset: str | timedelta | None = None,

168

closed: str = "right",

169

by: str | list[str] | None = None,

170

check_sorted: bool = True

171

) -> RollingGroupBy:

172

"""Create rolling window groupby."""

173

174

# Joins

175

def join(

176

self,

177

other: DataFrame,

178

on: str | list[str] | None = None,

179

how: str = "inner",

180

*,

181

left_on: str | list[str] | None = None,

182

right_on: str | list[str] | None = None,

183

suffix: str = "_right",

184

validate: str = "m:m",

185

join_nulls: bool = False

186

) -> DataFrame:

187

"""Join with another DataFrame."""

188

189

def join_asof(

190

self,

191

other: DataFrame,

192

*,

193

left_on: str | None = None,

194

right_on: str | None = None,

195

on: str | None = None,

196

by_left: str | list[str] | None = None,

197

by_right: str | list[str] | None = None,

198

by: str | list[str] | None = None,

199

strategy: str = "backward",

200

suffix: str = "_right",

201

tolerance: str | int | float | None = None,

202

allow_exact_matches: bool = True

203

) -> DataFrame:

204

"""Perform asof join."""

205

206

# Reshaping

207

def pivot(

208

self,

209

*,

210

on: ColumnNameOrSelector,

211

index: ColumnNameOrSelector | None = None,

212

values: ColumnNameOrSelector | None = None,

213

aggregate_function: str | Expr | None = None,

214

maintain_order: bool = True,

215

sort_columns: bool = False,

216

separator: str = "_"

217

) -> DataFrame:

218

"""Pivot DataFrame."""

219

220

def unpivot(

221

self,

222

on: ColumnNameOrSelector | None = None,

223

*,

224

index: ColumnNameOrSelector | None = None,

225

variable_name: str | None = None,

226

value_name: str | None = None

227

) -> DataFrame:

228

"""Unpivot DataFrame."""

229

230

def melt(

231

self,

232

id_vars: ColumnNameOrSelector | None = None,

233

value_vars: ColumnNameOrSelector | None = None,

234

*,

235

variable_name: str | None = None,

236

value_name: str | None = None

237

) -> DataFrame:

238

"""Melt DataFrame from wide to long format."""

239

240

# Utilities

241

def head(self, n: int = 5) -> DataFrame:

242

"""Get first n rows."""

243

244

def tail(self, n: int = 5) -> DataFrame:

245

"""Get last n rows."""

246

247

def slice(self, offset: int, length: int | None = None) -> DataFrame:

248

"""Slice DataFrame."""

249

250

def limit(self, n: int) -> DataFrame:

251

"""Limit to n rows."""

252

253

def sample(

254

self,

255

n: int | None = None,

256

*,

257

fraction: float | None = None,

258

with_replacement: bool = False,

259

shuffle: bool = False,

260

seed: int | None = None

261

) -> DataFrame:

262

"""Sample rows."""

263

264

def unique(

265

self,

266

subset: ColumnNameOrSelector | None = None,

267

*,

268

keep: str = "any",

269

maintain_order: bool = False

270

) -> DataFrame:

271

"""Get unique rows."""

272

273

def drop_nulls(self, subset: ColumnNameOrSelector | None = None) -> DataFrame:

274

"""Drop rows with null values."""

275

276

def fill_null(

277

self,

278

value: Any = None,

279

strategy: str | None = None,

280

limit: int | None = None,

281

*,

282

matches_supertype: bool = True

283

) -> DataFrame:

284

"""Fill null values."""

285

286

# Conversion

287

def lazy(self) -> LazyFrame:

288

"""Convert to LazyFrame."""

289

290

def to_series(self, index: int = 0) -> Series:

291

"""Convert to Series."""

292

293

def to_dict(self, *, as_series: bool = True) -> dict:

294

"""Convert to dictionary."""

295

296

def to_dicts(self) -> list[dict]:

297

"""Convert to list of dictionaries."""

298

299

def to_numpy(self, *, structured: bool = False, order: str = "c") -> np.ndarray:

300

"""Convert to numpy array."""

301

302

def to_pandas(self, **kwargs) -> pd.DataFrame:

303

"""Convert to pandas DataFrame."""

304

305

def to_arrow(self) -> pa.Table:

306

"""Convert to PyArrow table."""

307

308

# I/O Operations

309

def write_csv(

310

self,

311

file: str | Path | BytesIO,

312

*,

313

include_bom: bool = False,

314

include_header: bool = True,

315

separator: str = ",",

316

line_terminator: str = "\n",

317

quote_char: str = '"',

318

batch_size: int = 1024,

319

datetime_format: str | None = None,

320

date_format: str | None = None,

321

time_format: str | None = None,

322

float_scientific: bool | None = None,

323

float_precision: int | None = None,

324

null_value: str = ""

325

) -> None:

326

"""Write to CSV file."""

327

328

def write_parquet(

329

self,

330

file: str | Path | BytesIO,

331

*,

332

compression: str = "zstd",

333

compression_level: int | None = None,

334

statistics: bool | dict[str, bool] = True,

335

row_group_size: int | None = None,

336

data_page_size: int | None = None,

337

maintain_order: bool = True

338

) -> None:

339

"""Write to Parquet file."""

340

341

def write_json(self, file: str | Path | BytesIO, *, pretty: bool = False, row_oriented: bool = False) -> None:

342

"""Write to JSON file."""

343

344

# Analysis

345

def describe(self, *, percentiles: Sequence[float] = (0.25, 0.5, 0.75)) -> DataFrame:

346

"""Generate descriptive statistics."""

347

348

def equals(self, other: DataFrame, *, null_equal: bool = True) -> bool:

349

"""Check equality with another DataFrame."""

350

351

def is_duplicated(self) -> Series:

352

"""Check for duplicated rows."""

353

354

def is_unique(self) -> Series:

355

"""Check for unique rows."""

356

```

357

358

### LazyFrame

359

360

Lazy evaluation version of DataFrame that builds a computation graph for optimized query execution.

361

362

```python { .api }

363

class LazyFrame:

364

# Properties

365

@property

366

def columns(self) -> list[str]:

367

"""Column names."""

368

369

@property

370

def dtypes(self) -> list[type]:

371

"""Column data types."""

372

373

@property

374

def schema(self) -> dict[str, type]:

375

"""Schema as {column: dtype} dict."""

376

377

@property

378

def width(self) -> int:

379

"""Number of columns."""

380

381

# Query Execution

382

def collect(

383

self,

384

*,

385

predicate_pushdown: bool = True,

386

projection_pushdown: bool = True,

387

simplify_expression: bool = True,

388

slice_pushdown: bool = True,

389

comm_subplan_elim: bool = True,

390

comm_subexpr_elim: bool = True,

391

cluster_with_columns: bool = True,

392

streaming: bool = False

393

) -> DataFrame:

394

"""Execute lazy query and return DataFrame."""

395

396

def collect_async(self, *, gevent: bool = False) -> Awaitable[DataFrame]:

397

"""Execute lazy query asynchronously."""

398

399

def fetch(

400

self,

401

n_rows: int = 500,

402

*,

403

type_coercion: bool = True,

404

predicate_pushdown: bool = True,

405

projection_pushdown: bool = True,

406

simplify_expression: bool = True,

407

slice_pushdown: bool = True,

408

comm_subplan_elim: bool = True,

409

comm_subexpr_elim: bool = True,

410

streaming: bool = False

411

) -> DataFrame:

412

"""Execute lazy query for first n rows."""

413

414

def explain(

415

self,

416

*,

417

format: str = "plain",

418

optimized: bool = True,

419

type_coercion: bool = True,

420

predicate_pushdown: bool = True,

421

projection_pushdown: bool = True,

422

simplify_expression: bool = True,

423

slice_pushdown: bool = True,

424

comm_subplan_elim: bool = True,

425

comm_subexpr_elim: bool = True,

426

streaming: bool = False,

427

tree_format: bool | None = None

428

) -> str:

429

"""Show query execution plan."""

430

431

# Transformations (same interface as DataFrame but lazy)

432

def select(self, *exprs: IntoExpr) -> LazyFrame: ...

433

def filter(self, predicate: IntoExpr) -> LazyFrame: ...

434

def with_columns(self, *exprs: IntoExpr, **named_exprs: IntoExpr) -> LazyFrame: ...

435

def drop(self, *columns: str) -> LazyFrame: ...

436

def rename(self, mapping: dict[str, str]) -> LazyFrame: ...

437

def sort(self, by: ColumnNameOrSelector, *, descending: bool = False) -> LazyFrame: ...

438

def reverse(self) -> LazyFrame: ...

439

def cast(self, dtypes: dict[str, type] | type, *, strict: bool = True) -> LazyFrame: ...

440

441

# Grouping Operations

442

def group_by(self, *by: IntoExpr, maintain_order: bool = False) -> LazyGroupBy: ...

443

def rolling(self, index_column: str, *, period: str) -> RollingGroupBy: ...

444

445

# Joins

446

def join(self, other: LazyFrame, on: str | list[str], how: str = "inner", **kwargs) -> LazyFrame: ...

447

def join_asof(self, other: LazyFrame, **kwargs) -> LazyFrame: ...

448

449

# Utilities

450

def head(self, n: int = 5) -> LazyFrame: ...

451

def tail(self, n: int = 5) -> LazyFrame: ...

452

def slice(self, offset: int, length: int | None = None) -> LazyFrame: ...

453

def limit(self, n: int) -> LazyFrame: ...

454

455

# Streaming Sinks

456

def sink_parquet(

457

self,

458

path: str | Path,

459

*,

460

compression: str = "zstd",

461

maintain_order: bool = True,

462

**kwargs

463

) -> DataFrame:

464

"""Write to Parquet file using streaming engine."""

465

466

def sink_csv(self, path: str | Path, **kwargs) -> DataFrame:

467

"""Write to CSV file using streaming engine."""

468

```

469

470

### Series

471

472

One-dimensional data structure representing a single column of data.

473

474

```python { .api }

475

class Series:

476

def __init__(

477

self,

478

name: str | None = None,

479

values: Sequence[Any] | None = None,

480

dtype: type | None = None,

481

*,

482

strict: bool = True,

483

nan_to_null: bool = False

484

):

485

"""

486

Create a Series.

487

488

Parameters:

489

- name: Series name

490

- values: Data values

491

- dtype: Data type

492

- strict: Strict type checking

493

- nan_to_null: Convert NaN to null

494

"""

495

496

# Properties

497

@property

498

def dtype(self) -> type:

499

"""Data type."""

500

501

@property

502

def name(self) -> str:

503

"""Series name."""

504

505

@property

506

def shape(self) -> tuple[int]:

507

"""Shape as (length,) tuple."""

508

509

# Arithmetic Operations

510

def __add__(self, other: Any) -> Series: ...

511

def __sub__(self, other: Any) -> Series: ...

512

def __mul__(self, other: Any) -> Series: ...

513

def __truediv__(self, other: Any) -> Series: ...

514

def __floordiv__(self, other: Any) -> Series: ...

515

def __mod__(self, other: Any) -> Series: ...

516

def __pow__(self, other: Any) -> Series: ...

517

518

# Comparison Operations

519

def eq(self, other: Any) -> Series:

520

"""Element-wise equality."""

521

522

def ne(self, other: Any) -> Series:

523

"""Element-wise inequality."""

524

525

def lt(self, other: Any) -> Series:

526

"""Element-wise less than."""

527

528

def le(self, other: Any) -> Series:

529

"""Element-wise less than or equal."""

530

531

def gt(self, other: Any) -> Series:

532

"""Element-wise greater than."""

533

534

def ge(self, other: Any) -> Series:

535

"""Element-wise greater than or equal."""

536

537

# Aggregations

538

def sum(self) -> Any:

539

"""Sum of values."""

540

541

def mean(self) -> float | None:

542

"""Mean of values."""

543

544

def max(self) -> Any:

545

"""Maximum value."""

546

547

def min(self) -> Any:

548

"""Minimum value."""

549

550

def std(self, ddof: int = 1) -> float | None:

551

"""Standard deviation."""

552

553

def var(self, ddof: int = 1) -> float | None:

554

"""Variance."""

555

556

def median(self) -> float | None:

557

"""Median value."""

558

559

def quantile(self, quantile: float, interpolation: str = "nearest") -> float | None:

560

"""Quantile value."""

561

562

# Data Access

563

def get(self, index: int) -> Any:

564

"""Get value by index."""

565

566

def item(self, index: int | None = None) -> Any:

567

"""Get single item."""

568

569

def gather(self, indices: list[int] | Series) -> Series:

570

"""Gather values by indices."""

571

572

# Transformations

573

def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Series:

574

"""Sort Series."""

575

576

def reverse(self) -> Series:

577

"""Reverse Series."""

578

579

def cast(self, dtype: type, *, strict: bool = True) -> Series:

580

"""Cast to different type."""

581

582

def rename(self, name: str) -> Series:

583

"""Rename Series."""

584

585

# Utilities

586

def drop_nulls(self) -> Series:

587

"""Drop null values."""

588

589

def fill_null(self, value: Any = None, strategy: str | None = None) -> Series:

590

"""Fill null values."""

591

592

def unique(self, *, maintain_order: bool = False) -> Series:

593

"""Get unique values."""

594

595

def value_counts(self, *, sort: bool = False, parallel: bool = False) -> DataFrame:

596

"""Count unique values."""

597

598

# Conversion

599

def to_frame(self, name: str | None = None) -> DataFrame:

600

"""Convert to DataFrame."""

601

602

def to_list(self) -> list[Any]:

603

"""Convert to Python list."""

604

605

def to_numpy(self, *, zero_copy_only: bool = False, writable: bool = False) -> np.ndarray:

606

"""Convert to numpy array."""

607

608

def to_pandas(self, **kwargs) -> pd.Series:

609

"""Convert to pandas Series."""

610

611

# Namespaces

612

@property

613

def str(self) -> StringNameSpace:

614

"""String operations namespace."""

615

616

@property

617

def dt(self) -> DateTimeNameSpace:

618

"""DateTime operations namespace."""

619

620

@property

621

def list(self) -> ListNameSpace:

622

"""List operations namespace."""

623

624

@property

625

def struct(self) -> StructNameSpace:

626

"""Struct operations namespace."""

627

```

628

629

### Expr

630

631

Expression object for building complex lazy computations and transformations.

632

633

```python { .api }

634

class Expr:

635

# Arithmetic Operations

636

def __add__(self, other: Any) -> Expr: ...

637

def __sub__(self, other: Any) -> Expr: ...

638

def __mul__(self, other: Any) -> Expr: ...

639

def __truediv__(self, other: Any) -> Expr: ...

640

641

# Aggregations

642

def sum(self) -> Expr:

643

"""Sum aggregation."""

644

645

def mean(self) -> Expr:

646

"""Mean aggregation."""

647

648

def max(self) -> Expr:

649

"""Maximum aggregation."""

650

651

def min(self) -> Expr:

652

"""Minimum aggregation."""

653

654

def count(self) -> Expr:

655

"""Count aggregation."""

656

657

def std(self, ddof: int = 1) -> Expr:

658

"""Standard deviation."""

659

660

def var(self, ddof: int = 1) -> Expr:

661

"""Variance."""

662

663

# Window Functions

664

def over(self, *partition_by: IntoExpr, order_by: IntoExpr | None = None) -> Expr:

665

"""Window function over partitions."""

666

667

def rolling_sum(self, window_size: int | str, weights: list[float] | None = None) -> Expr:

668

"""Rolling sum."""

669

670

def rolling_mean(self, window_size: int | str, weights: list[float] | None = None) -> Expr:

671

"""Rolling mean."""

672

673

# Conditional Logic

674

def when(self, predicate: Expr) -> ExprWhenThen:

675

"""Start conditional expression."""

676

677

def then(self, statement: IntoExpr) -> ExprWhenThen:

678

"""Then clause in conditional."""

679

680

def otherwise(self, statement: IntoExpr) -> Expr:

681

"""Else clause in conditional."""

682

683

# Transformations

684

def cast(self, dtype: type, *, strict: bool = True) -> Expr:

685

"""Cast to different type."""

686

687

def alias(self, name: str) -> Expr:

688

"""Alias expression."""

689

690

def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Expr:

691

"""Sort expression."""

692

693

def reverse(self) -> Expr:

694

"""Reverse expression."""

695

696

# Utilities

697

def is_null(self) -> Expr:

698

"""Check for null values."""

699

700

def is_not_null(self) -> Expr:

701

"""Check for non-null values."""

702

703

def fill_null(self, value: Any = None, strategy: str | None = None) -> Expr:

704

"""Fill null values."""

705

706

def drop_nulls(self) -> Expr:

707

"""Drop null values."""

708

709

# Namespaces

710

@property

711

def str(self) -> ExprStringNameSpace:

712

"""String operations namespace."""

713

714

@property

715

def dt(self) -> ExprDateTimeNameSpace:

716

"""DateTime operations namespace."""

717

718

@property

719

def list(self) -> ExprListNameSpace:

720

"""List operations namespace."""

721

722

@property

723

def arr(self) -> ExprArrayNameSpace:

724

"""Array operations namespace."""

725

726

@property

727

def struct(self) -> ExprStructNameSpace:

728

"""Struct operations namespace."""

729

730

@property

731

def cat(self) -> ExprCategoricalNameSpace:

732

"""Categorical operations namespace."""

733

734

@property

735

def bin(self) -> ExprBinaryNameSpace:

736

"""Binary operations namespace."""

737

738

@property

739

def name(self) -> ExprNameNameSpace:

740

"""Name operations namespace."""

741

742

@property

743

def meta(self) -> ExprMetaNameSpace:

744

"""Meta operations namespace."""

745

```

746

747

## Usage Examples

748

749

### DataFrame Operations

750

751

```python

752

import polars as pl

753

754

# Create DataFrame

755

df = pl.DataFrame({

756

"id": [1, 2, 3, 4],

757

"name": ["Alice", "Bob", "Charlie", "Diana"],

758

"age": [25, 30, 35, 28],

759

"salary": [50000, 60000, 70000, 55000]

760

})

761

762

# Select and transform columns

763

result = df.select([

764

pl.col("name"),

765

pl.col("age"),

766

(pl.col("salary") * 1.1).alias("new_salary")

767

]).filter(pl.col("age") > 25)

768

769

print(result)

770

```

771

772

### LazyFrame Operations

773

774

```python

775

# Create LazyFrame and build query

776

lazy_df = (

777

pl.scan_csv("large_file.csv")

778

.filter(pl.col("date") >= "2023-01-01")

779

.group_by("category")

780

.agg([

781

pl.col("amount").sum().alias("total_amount"),

782

pl.col("id").count().alias("count")

783

])

784

.sort("total_amount", descending=True)

785

)

786

787

# Execute query

788

result = lazy_df.collect()

789

```

790

791

### Series Operations

792

793

```python

794

# Create Series

795

s = pl.Series("values", [1, 2, 3, 4, 5])

796

797

# Perform operations

798

doubled = s * 2

799

mean_val = s.mean()

800

unique_vals = s.unique()

801

802

# String operations

803

text_series = pl.Series("text", ["hello", "world", "polars"])

804

upper_text = text_series.str.upper()

805

```

806

807

### Expression Building

808

809

```python

810

# Complex expressions

811

expr = (

812

pl.when(pl.col("age") < 30)

813

.then(pl.col("salary") * 0.8)

814

.when(pl.col("age") < 40)

815

.then(pl.col("salary") * 0.9)

816

.otherwise(pl.col("salary"))

817

.alias("adjusted_salary")

818

)

819

820

# Use in DataFrame

821

df_with_adjustment = df.with_columns(expr)

822

```

823

824

### QueryOptFlags

825

826

Configuration class for controlling query optimization behavior in LazyFrame operations, allowing fine-grained control over performance optimizations.

827

828

```python { .api }

829

class QueryOptFlags:

830

def __init__(

831

self,

832

*,

833

predicate_pushdown: Optional[bool] = None,

834

projection_pushdown: Optional[bool] = None,

835

simplify_expression: Optional[bool] = None,

836

slice_pushdown: Optional[bool] = None,

837

comm_subplan_elim: Optional[bool] = None,

838

comm_subexpr_elim: Optional[bool] = None,

839

cluster_with_columns: Optional[bool] = None,

840

collapse_joins: Optional[bool] = None,

841

check_order_observe: Optional[bool] = None,

842

fast_projection: Optional[bool] = None,

843

):

844

"""

845

Configure query optimization flags.

846

847

Parameters:

848

- predicate_pushdown: Push predicates down in the query tree

849

- projection_pushdown: Push projections down in the query tree

850

- simplify_expression: Simplify expressions during optimization

851

- slice_pushdown: Push slice operations down in the query tree

852

- comm_subplan_elim: Eliminate common subplans

853

- comm_subexpr_elim: Eliminate common subexpressions

854

- cluster_with_columns: Cluster with_columns operations

855

- collapse_joins: Collapse consecutive joins

856

- check_order_observe: Check if ordering is observed

857

- fast_projection: Use fast projection when possible

858

"""

859

860

@staticmethod

861

def none(**kwargs) -> QueryOptFlags:

862

"""Create QueryOptFlags with all optimizations disabled."""

863

864

def update(self, **kwargs) -> QueryOptFlags:

865

"""Update optimization flags."""

866

```

867

868

### GPUEngine

869

870

Configuration class for GPU-accelerated processing in LazyFrame operations.

871

872

```python { .api }

873

class GPUEngine:

874

def __init__(self):

875

"""

876

Configure GPU engine for accelerated processing.

877

878

Note: GPU processing requires compatible hardware and drivers.

879

"""

880

```