or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

config-utilities.mdcore-data-structures.mddata-types.mdexpressions.mdfunctions.mdindex.mdio-operations.mdselectors.mdsql-interface.md

functions.mddocs/

0

# Functions and Utilities

1

2

Built-in functions for aggregation, transformations, date/time operations, string manipulation, and utility functions. These functions work with expressions and can be used in DataFrame operations, LazyFrame queries, and standalone computations.

3

4

## Capabilities

5

6

### Aggregation Functions

7

8

Statistical aggregation functions that operate on columns or expressions.

9

10

```python { .api }

11

def sum(*exprs) -> Expr:

12

"""

13

Sum values horizontally across columns.

14

15

Parameters:

16

- exprs: Expressions to sum

17

18

Returns:

19

Sum expression

20

"""

21

22

def mean(*exprs) -> Expr:

23

"""

24

Calculate mean horizontally across columns.

25

26

Parameters:

27

- exprs: Expressions to average

28

29

Returns:

30

Mean expression

31

"""

32

33

def max(*exprs) -> Expr:

34

"""

35

Get maximum value horizontally across columns.

36

37

Parameters:

38

- exprs: Expressions to compare

39

40

Returns:

41

Maximum expression

42

"""

43

44

def min(*exprs) -> Expr:

45

"""

46

Get minimum value horizontally across columns.

47

48

Parameters:

49

- exprs: Expressions to compare

50

51

Returns:

52

Minimum expression

53

"""

54

55

def count(*exprs) -> Expr:

56

"""

57

Count non-null values.

58

59

Parameters:

60

- exprs: Expressions to count (optional)

61

62

Returns:

63

Count expression

64

"""

65

66

def all(*exprs) -> Expr:

67

"""

68

Check if all values are true.

69

70

Parameters:

71

- exprs: Boolean expressions

72

73

Returns:

74

Boolean expression (all true)

75

"""

76

77

def any(*exprs) -> Expr:

78

"""

79

Check if any values are true.

80

81

Parameters:

82

- exprs: Boolean expressions

83

84

Returns:

85

Boolean expression (any true)

86

"""

87

88

# Horizontal operations

89

def sum_horizontal(*exprs) -> Expr:

90

"""Sum across columns horizontally."""

91

92

def mean_horizontal(*exprs) -> Expr:

93

"""Mean across columns horizontally."""

94

95

def max_horizontal(*exprs) -> Expr:

96

"""Maximum across columns horizontally."""

97

98

def min_horizontal(*exprs) -> Expr:

99

"""Minimum across columns horizontally."""

100

101

def all_horizontal(*exprs) -> Expr:

102

"""All true across columns horizontally."""

103

104

def any_horizontal(*exprs) -> Expr:

105

"""Any true across columns horizontally."""

106

107

# Cumulative functions

108

def cum_sum(*exprs) -> Expr:

109

"""Cumulative sum."""

110

111

def cum_sum_horizontal(*exprs) -> Expr:

112

"""Cumulative sum horizontally."""

113

114

def cum_count(*exprs) -> Expr:

115

"""Cumulative count."""

116

117

def cum_fold(acc: Expr, function: Callable[[Expr, Expr], Expr], *exprs: Expr, include_init: bool = False) -> Expr:

118

"""

119

Cumulative fold operation.

120

121

Parameters:

122

- acc: Initial accumulator value

123

- function: Fold function

124

- exprs: Expressions to fold

125

- include_init: Include initial value

126

127

Returns:

128

Cumulative fold expression

129

"""

130

131

def cum_reduce(function: Callable[[Expr, Expr], Expr], *exprs: Expr) -> Expr:

132

"""

133

Cumulative reduce operation.

134

135

Parameters:

136

- function: Reduce function

137

- exprs: Expressions to reduce

138

139

Returns:

140

Cumulative reduce expression

141

"""

142

```

143

144

### Date and Time Functions

145

146

Functions for creating and manipulating temporal data.

147

148

```python { .api }

149

def date(year: int | Expr, month: int | Expr, day: int | Expr) -> Expr:

150

"""

151

Create date from year, month, day.

152

153

Parameters:

154

- year: Year value

155

- month: Month value (1-12)

156

- day: Day value (1-31)

157

158

Returns:

159

Date expression

160

"""

161

162

def datetime(

163

year: int | Expr,

164

month: int | Expr,

165

day: int | Expr,

166

hour: int | Expr = 0,

167

minute: int | Expr = 0,

168

second: int | Expr = 0,

169

microsecond: int | Expr = 0,

170

*,

171

time_unit: TimeUnit = "us",

172

time_zone: str | None = None

173

) -> Expr:

174

"""

175

Create datetime from components.

176

177

Parameters:

178

- year: Year value

179

- month: Month value (1-12)

180

- day: Day value (1-31)

181

- hour: Hour value (0-23)

182

- minute: Minute value (0-59)

183

- second: Second value (0-59)

184

- microsecond: Microsecond value

185

- time_unit: Time precision

186

- time_zone: Timezone

187

188

Returns:

189

Datetime expression

190

"""

191

192

def time(hour: int | Expr, minute: int | Expr, second: int | Expr, microsecond: int | Expr = 0) -> Expr:

193

"""

194

Create time from components.

195

196

Parameters:

197

- hour: Hour value (0-23)

198

- minute: Minute value (0-59)

199

- second: Second value (0-59)

200

- microsecond: Microsecond value

201

202

Returns:

203

Time expression

204

"""

205

206

def duration(

207

*,

208

weeks: int | Expr | None = None,

209

days: int | Expr | None = None,

210

hours: int | Expr | None = None,

211

minutes: int | Expr | None = None,

212

seconds: int | Expr | None = None,

213

milliseconds: int | Expr | None = None,

214

microseconds: int | Expr | None = None,

215

nanoseconds: int | Expr | None = None,

216

time_unit: TimeUnit = "us"

217

) -> Expr:

218

"""

219

Create duration from components.

220

221

Parameters:

222

- weeks: Number of weeks

223

- days: Number of days

224

- hours: Number of hours

225

- minutes: Number of minutes

226

- seconds: Number of seconds

227

- milliseconds: Number of milliseconds

228

- microseconds: Number of microseconds

229

- nanoseconds: Number of nanoseconds

230

- time_unit: Time unit for result

231

232

Returns:

233

Duration expression

234

"""

235

236

def from_epoch(column: str | Expr, time_unit: TimeUnit = "s") -> Expr:

237

"""

238

Convert epoch timestamp to datetime.

239

240

Parameters:

241

- column: Column with epoch values

242

- time_unit: Unit of epoch values

243

244

Returns:

245

Datetime expression

246

"""

247

```

248

249

### Range Functions

250

251

Functions for generating sequences and ranges of values.

252

253

```python { .api }

254

def arange(start: int | Expr, end: int | Expr, step: int = 1, *, eager: bool = False) -> Expr | Series:

255

"""

256

Generate range of integers.

257

258

Parameters:

259

- start: Start value (inclusive)

260

- end: End value (exclusive)

261

- step: Step size

262

- eager: Return Series instead of Expr

263

264

Returns:

265

Range expression or Series

266

"""

267

268

def date_range(

269

start: date | datetime | IntoExpr,

270

end: date | datetime | IntoExpr,

271

interval: str | timedelta = "1d",

272

*,

273

closed: ClosedInterval = "both",

274

time_unit: TimeUnit | None = None,

275

time_zone: str | None = None,

276

eager: bool = False

277

) -> Expr | Series:

278

"""

279

Generate date range.

280

281

Parameters:

282

- start: Start date

283

- end: End date

284

- interval: Time interval ("1d", "1h", etc.)

285

- closed: Include endpoints ("both", "left", "right", "none")

286

- time_unit: Time precision

287

- time_zone: Timezone

288

- eager: Return Series instead of Expr

289

290

Returns:

291

Date range expression or Series

292

"""

293

294

def date_ranges(

295

start: IntoExpr,

296

end: IntoExpr,

297

interval: str | timedelta = "1d",

298

*,

299

closed: ClosedInterval = "both",

300

time_unit: TimeUnit | None = None,

301

time_zone: str | None = None,

302

eager: bool = False

303

) -> Expr | Series:

304

"""Generate multiple date ranges."""

305

306

def datetime_range(

307

start: datetime | IntoExpr,

308

end: datetime | IntoExpr,

309

interval: str | timedelta = "1d",

310

*,

311

closed: ClosedInterval = "both",

312

time_unit: TimeUnit = "us",

313

time_zone: str | None = None,

314

eager: bool = False

315

) -> Expr | Series:

316

"""Generate datetime range."""

317

318

def datetime_ranges(

319

start: IntoExpr,

320

end: IntoExpr,

321

interval: str | timedelta = "1d",

322

**kwargs

323

) -> Expr | Series:

324

"""Generate multiple datetime ranges."""

325

326

def time_range(

327

start: time | IntoExpr | None = None,

328

end: time | IntoExpr | None = None,

329

interval: str | timedelta = "1h",

330

*,

331

closed: ClosedInterval = "both",

332

eager: bool = False

333

) -> Expr | Series:

334

"""Generate time range."""

335

336

def time_ranges(

337

start: IntoExpr,

338

end: IntoExpr,

339

interval: str | timedelta = "1h",

340

**kwargs

341

) -> Expr | Series:

342

"""Generate multiple time ranges."""

343

344

def int_range(start: int | Expr, end: int | Expr, step: int = 1, *, eager: bool = False) -> Expr | Series:

345

"""Generate integer range."""

346

347

def int_ranges(start: IntoExpr, end: IntoExpr, step: int | IntoExpr = 1, *, eager: bool = False) -> Expr | Series:

348

"""Generate multiple integer ranges."""

349

350

def linear_space(start: float | Expr, end: float | Expr, n: int, *, endpoint: bool = True, eager: bool = False) -> Expr | Series:

351

"""

352

Generate linearly spaced values.

353

354

Parameters:

355

- start: Start value

356

- end: End value

357

- n: Number of values

358

- endpoint: Include endpoint

359

- eager: Return Series instead of Expr

360

361

Returns:

362

Linear space expression or Series

363

"""

364

365

def linear_spaces(start: IntoExpr, end: IntoExpr, n: int | IntoExpr, **kwargs) -> Expr | Series:

366

"""Generate multiple linear spaces."""

367

```

368

369

### String Functions

370

371

Functions for string manipulation and processing.

372

373

```python { .api }

374

def concat_str(exprs: IntoExpr, *, separator: str = "", ignore_nulls: bool = False) -> Expr:

375

"""

376

Concatenate strings horizontally.

377

378

Parameters:

379

- exprs: String expressions to concatenate

380

- separator: Separator between strings

381

- ignore_nulls: Skip null values

382

383

Returns:

384

Concatenated string expression

385

"""

386

387

def format(format_str: str, *args: IntoExpr) -> Expr:

388

"""

389

Format string with placeholders.

390

391

Parameters:

392

- format_str: Format string with {} placeholders

393

- args: Values to substitute

394

395

Returns:

396

Formatted string expression

397

"""

398

399

def escape_regex(pattern: str | Expr) -> Expr:

400

"""

401

Escape regex special characters.

402

403

Parameters:

404

- pattern: Pattern to escape

405

406

Returns:

407

Escaped pattern expression

408

"""

409

```

410

411

### List and Array Functions

412

413

Functions for working with list and array data types.

414

415

```python { .api }

416

def concat_list(exprs: IntoExpr, *, ignore_nulls: bool = False) -> Expr:

417

"""

418

Concatenate lists horizontally.

419

420

Parameters:

421

- exprs: List expressions to concatenate

422

- ignore_nulls: Skip null values

423

424

Returns:

425

Concatenated list expression

426

"""

427

428

def concat_arr(exprs: IntoExpr, *, ignore_nulls: bool = False) -> Expr:

429

"""

430

Concatenate arrays horizontally.

431

432

Parameters:

433

- exprs: Array expressions to concatenate

434

- ignore_nulls: Skip null values

435

436

Returns:

437

Concatenated array expression

438

"""

439

```

440

441

### Statistical Functions

442

443

Advanced statistical and mathematical functions.

444

445

```python { .api }

446

def std(*exprs) -> Expr:

447

"""Calculate standard deviation."""

448

449

def var(*exprs) -> Expr:

450

"""Calculate variance."""

451

452

def median(*exprs) -> Expr:

453

"""Calculate median."""

454

455

def quantile(*exprs, quantile: float, interpolation: str = "nearest") -> Expr:

456

"""

457

Calculate quantile.

458

459

Parameters:

460

- exprs: Expressions to analyze

461

- quantile: Quantile value (0.0 to 1.0)

462

- interpolation: Interpolation method

463

464

Returns:

465

Quantile expression

466

"""

467

468

def n_unique(*exprs) -> Expr:

469

"""Count unique values."""

470

471

def approx_n_unique(*exprs) -> Expr:

472

"""Approximate unique count (faster for large data)."""

473

474

def corr(a: IntoExpr, b: IntoExpr, *, method: CorrelationMethod = "pearson", ddof: int = 1) -> Expr:

475

"""

476

Calculate correlation coefficient.

477

478

Parameters:

479

- a: First expression

480

- b: Second expression

481

- method: Correlation method ("pearson", "spearman")

482

- ddof: Delta degrees of freedom

483

484

Returns:

485

Correlation expression

486

"""

487

488

def cov(a: IntoExpr, b: IntoExpr) -> Expr:

489

"""

490

Calculate covariance.

491

492

Parameters:

493

- a: First expression

494

- b: Second expression

495

496

Returns:

497

Covariance expression

498

"""

499

500

def rolling_corr(a: IntoExpr, b: IntoExpr, window_size: int, *, min_periods: int | None = None) -> Expr:

501

"""

502

Calculate rolling correlation.

503

504

Parameters:

505

- a: First expression

506

- b: Second expression

507

- window_size: Rolling window size

508

- min_periods: Minimum periods for calculation

509

510

Returns:

511

Rolling correlation expression

512

"""

513

514

def rolling_cov(a: IntoExpr, b: IntoExpr, window_size: int, *, min_periods: int | None = None) -> Expr:

515

"""

516

Calculate rolling covariance.

517

518

Parameters:

519

- a: First expression

520

- b: Second expression

521

- window_size: Rolling window size

522

- min_periods: Minimum periods for calculation

523

524

Returns:

525

Rolling covariance expression

526

"""

527

```

528

529

### Trigonometric Functions

530

531

Mathematical trigonometric operations.

532

533

```python { .api }

534

def arctan2(y: IntoExpr, x: IntoExpr) -> Expr:

535

"""

536

Calculate arctangent of y/x in radians.

537

538

Parameters:

539

- y: Y coordinate expression

540

- x: X coordinate expression

541

542

Returns:

543

Arctangent expression

544

"""

545

546

def arctan2d(y: IntoExpr, x: IntoExpr) -> Expr:

547

"""

548

Calculate arctangent of y/x in degrees.

549

550

Parameters:

551

- y: Y coordinate expression

552

- x: X coordinate expression

553

554

Returns:

555

Arctangent expression in degrees

556

"""

557

```

558

559

### Transform and Utility Functions

560

561

General utility and transformation functions.

562

563

```python { .api }

564

def map_batches(exprs: IntoExpr, function: Callable[[DataFrame], DataFrame], return_dtype: DataType | None = None, *, inference_size: int = 256) -> Expr:

565

"""

566

Apply function to batches of data.

567

568

Parameters:

569

- exprs: Input expressions

570

- function: Function to apply to DataFrame batches

571

- return_dtype: Expected return data type

572

- inference_size: Size for type inference

573

574

Returns:

575

Mapped expression

576

"""

577

578

def map_groups(exprs: IntoExpr, function: Callable[[DataFrame], DataFrame], return_dtype: DataType | None = None) -> Expr:

579

"""

580

Apply function to groups.

581

582

Parameters:

583

- exprs: Input expressions

584

- function: Function to apply to each group

585

- return_dtype: Expected return data type

586

587

Returns:

588

Mapped expression

589

"""

590

591

def fold(acc: IntoExpr, function: Callable[[Expr, Expr], Expr], exprs: Sequence[IntoExpr] | Expr) -> Expr:

592

"""

593

Fold operation with accumulator.

594

595

Parameters:

596

- acc: Initial accumulator value

597

- function: Fold function

598

- exprs: Expressions to fold

599

600

Returns:

601

Folded expression

602

"""

603

604

def reduce(function: Callable[[Expr, Expr], Expr], exprs: Sequence[IntoExpr] | Expr) -> Expr:

605

"""

606

Reduce operation.

607

608

Parameters:

609

- function: Reduce function

610

- exprs: Expressions to reduce

611

612

Returns:

613

Reduced expression

614

"""

615

616

def coalesce(*exprs: IntoExpr) -> Expr:

617

"""

618

Return first non-null value.

619

620

Parameters:

621

- exprs: Expressions to check

622

623

Returns:

624

Coalesced expression

625

"""

626

627

def element() -> Expr:

628

"""Get element at current index in context."""

629

630

def first(*exprs: IntoExpr) -> Expr:

631

"""Get first value."""

632

633

def last(*exprs: IntoExpr) -> Expr:

634

"""Get last value."""

635

636

def head(*exprs: IntoExpr, n: int = 10) -> Expr:

637

"""Get first n values."""

638

639

def tail(*exprs: IntoExpr, n: int = 10) -> Expr:

640

"""Get last n values."""

641

642

def nth(n: int, *exprs: IntoExpr) -> Expr:

643

"""

644

Get nth value.

645

646

Parameters:

647

- n: Index to retrieve

648

- exprs: Input expressions

649

650

Returns:

651

Nth value expression

652

"""

653

654

def len() -> Expr:

655

"""Get length/count."""

656

657

def implode(*exprs: IntoExpr) -> Expr:

658

"""Combine values into list."""

659

660

def explode(*exprs: IntoExpr) -> Expr:

661

"""Explode list elements to separate rows."""

662

663

def repeat(value: IntoExpr, n: int | IntoExpr, *, eager: bool = False) -> Expr | Series:

664

"""

665

Repeat value n times.

666

667

Parameters:

668

- value: Value to repeat

669

- n: Number of repetitions

670

- eager: Return Series instead of Expr

671

672

Returns:

673

Repeated values expression or Series

674

"""

675

676

def ones(n: int | IntoExpr, *, eager: bool = False) -> Expr | Series:

677

"""Create array of ones."""

678

679

def zeros(n: int | IntoExpr, *, eager: bool = False) -> Expr | Series:

680

"""Create array of zeros."""

681

682

def exclude(*columns: str | DataType) -> Expr:

683

"""

684

Exclude columns from selection.

685

686

Parameters:

687

- columns: Column names or types to exclude

688

689

Returns:

690

Exclusion expression

691

"""

692

693

def groups() -> Expr:

694

"""Get group indices in group-by context."""

695

696

def field(*names: str) -> Expr:

697

"""

698

Access struct field(s).

699

700

Parameters:

701

- names: Field name(s) to access

702

703

Returns:

704

Field access expression

705

"""

706

707

def arg_sort_by(*exprs: IntoExpr, descending: bool | Sequence[bool] = False, nulls_last: bool = False) -> Expr:

708

"""

709

Get indices that would sort by given expressions.

710

711

Parameters:

712

- exprs: Sort key expressions

713

- descending: Sort in descending order

714

- nulls_last: Place nulls at end

715

716

Returns:

717

Sorting indices expression

718

"""

719

720

def arg_where(condition: IntoExpr) -> Expr:

721

"""

722

Get indices where condition is true.

723

724

Parameters:

725

- condition: Boolean condition

726

727

Returns:

728

Indices expression

729

"""

730

731

def row_index(name: str = "row_nr", offset: int = 0) -> Expr:

732

"""

733

Add row index column.

734

735

Parameters:

736

- name: Column name for row index

737

- offset: Starting value

738

739

Returns:

740

Row index expression

741

"""

742

743

def business_day_count(start: IntoExpr, end: IntoExpr) -> Expr:

744

"""

745

Count business days between dates.

746

747

Parameters:

748

- start: Start date expression

749

- end: End date expression

750

751

Returns:

752

Business day count expression

753

"""

754

755

def dtype_of(*exprs: IntoExpr) -> Expr:

756

"""Get data type of expression."""

757

758

def self_dtype() -> Expr:

759

"""Get data type of current context."""

760

761

def set_random_seed(seed: int) -> None:

762

"""

763

Set random seed for reproducible results.

764

765

Parameters:

766

- seed: Random seed value

767

"""

768

```

769

770

### DataFrame Operations

771

772

Functions that operate on entire DataFrames.

773

774

```python { .api }

775

def concat(items: Iterable[DataFrame | LazyFrame], *, how: UnionStrategy = "vertical", rechunk: bool = False, parallel: bool = True) -> DataFrame | LazyFrame:

776

"""

777

Concatenate DataFrames or LazyFrames.

778

779

Parameters:

780

- items: DataFrames/LazyFrames to concatenate

781

- how: Concatenation strategy ("vertical", "horizontal", "diagonal")

782

- rechunk: Rechunk after concatenation

783

- parallel: Use parallel processing

784

785

Returns:

786

Concatenated DataFrame or LazyFrame

787

"""

788

789

def align_frames(*frames: DataFrame, on: str | Expr | None = None, select: str | Expr | list[str | Expr] | None = None, reverse: bool | list[bool] = False) -> list[DataFrame]:

790

"""

791

Align DataFrames by common column values.

792

793

Parameters:

794

- frames: DataFrames to align

795

- on: Column(s) to align on

796

- select: Columns to select after alignment

797

- reverse: Reverse sort order

798

799

Returns:

800

List of aligned DataFrames

801

"""

802

803

def collect_all(lazy_frames: Sequence[LazyFrame], *, type_coercion: bool = True, predicate_pushdown: bool = True, projection_pushdown: bool = True, simplify_expression: bool = True, slice_pushdown: bool = True, comm_subplan_elim: bool = True, comm_subexpr_elim: bool = True, cluster_with_columns: bool = True, no_optimization: bool = False, streaming: bool = False) -> list[DataFrame]:

804

"""

805

Collect multiple LazyFrames in parallel.

806

807

Parameters:

808

- lazy_frames: LazyFrames to collect

809

- Various optimization flags: Same as LazyFrame.collect()

810

811

Returns:

812

List of collected DataFrames

813

"""

814

815

def collect_all_async(lazy_frames: Sequence[LazyFrame], **kwargs) -> Awaitable[list[DataFrame]]:

816

"""Collect multiple LazyFrames asynchronously."""

817

818

def explain_all(lazy_frames: Sequence[LazyFrame], **kwargs) -> str:

819

"""Get execution plans for multiple LazyFrames."""

820

821

def select(*exprs: IntoExpr, **named_exprs: IntoExpr) -> Expr:

822

"""Create selection expression."""

823

824

def struct(*exprs: IntoExpr, **named_exprs: IntoExpr) -> Expr:

825

"""Create struct from expressions."""

826

827

def struct_with_fields(fields: Sequence[str], *exprs: IntoExpr) -> Expr:

828

"""

829

Create struct with named fields.

830

831

Parameters:

832

- fields: Field names

833

- exprs: Field value expressions

834

835

Returns:

836

Struct expression

837

"""

838

```

839

840

## Usage Examples

841

842

### Aggregation Operations

843

844

```python

845

import polars as pl

846

847

df = pl.DataFrame({

848

"group": ["A", "A", "B", "B", "C"],

849

"value1": [1, 2, 3, 4, 5],

850

"value2": [10, 20, 30, 40, 50]

851

})

852

853

# Basic aggregations

854

result = df.group_by("group").agg([

855

pl.sum("value1"),

856

pl.mean("value2"),

857

pl.max("value1", "value2").alias("max_of_both")

858

])

859

860

# Horizontal aggregations

861

result = df.with_columns([

862

pl.sum_horizontal("value1", "value2").alias("total"),

863

pl.mean_horizontal("value1", "value2").alias("average")

864

])

865

```

866

867

### Date and Time Operations

868

869

```python

870

# Create date ranges

871

dates = pl.date_range(

872

start=date(2023, 1, 1),

873

end=date(2023, 12, 31),

874

interval="1d",

875

eager=True

876

)

877

878

# Create datetime with components

879

df = pl.DataFrame({

880

"year": [2023, 2023, 2023],

881

"month": [1, 2, 3],

882

"day": [15, 20, 25]

883

}).with_columns([

884

pl.date("year", "month", "day").alias("date"),

885

pl.datetime("year", "month", "day", 12, 30, 0).alias("datetime")

886

])

887

888

# Duration calculations

889

df = df.with_columns([

890

pl.duration(days=30).alias("thirty_days"),

891

pl.duration(hours=2, minutes=30).alias("two_thirty")

892

])

893

```

894

895

### String Operations

896

897

```python

898

df = pl.DataFrame({

899

"first": ["John", "Jane", "Bob"],

900

"last": ["Doe", "Smith", "Johnson"],

901

"title": ["Mr", "Ms", "Dr"]

902

})

903

904

# String concatenation

905

result = df.with_columns([

906

pl.concat_str([

907

pl.col("title"),

908

pl.lit(" "),

909

pl.col("first"),

910

pl.lit(" "),

911

pl.col("last")

912

]).alias("full_name"),

913

914

# Format strings

915

pl.format("Hello, {} {}!", pl.col("first"), pl.col("last")).alias("greeting")

916

])

917

```

918

919

### Mathematical Operations

920

921

```python

922

df = pl.DataFrame({

923

"x": [1.0, 2.0, 3.0, 4.0],

924

"y": [2.0, 3.0, 4.0, 5.0],

925

"values": [10, 20, 30, 40]

926

})

927

928

# Trigonometric functions

929

result = df.with_columns([

930

pl.arctan2("y", "x").alias("angle_rad"),

931

pl.arctan2d("y", "x").alias("angle_deg")

932

])

933

934

# Statistical functions

935

result = df.select([

936

pl.std("values").alias("std_dev"),

937

pl.var("values").alias("variance"),

938

pl.median("values").alias("median"),

939

pl.quantile("values", 0.75).alias("q75")

940

])

941

```

942

943

### Advanced Transformations

944

945

```python

946

# Fold operation (cumulative sum with custom logic)

947

result = df.with_columns([

948

pl.fold(

949

acc=pl.lit(0),

950

function=lambda acc, x: acc + x,

951

exprs=["value1", "value2"]

952

).alias("cumulative_sum")

953

])

954

955

# Coalesce (first non-null value)

956

df_with_nulls = pl.DataFrame({

957

"a": [1, None, 3],

958

"b": [None, 2, None],

959

"c": [10, 20, 30]

960

})

961

962

result = df_with_nulls.with_columns([

963

pl.coalesce("a", "b", "c").alias("first_non_null")

964

])

965

966

# Map operations for complex transformations

967

def custom_transform(batch: pl.DataFrame) -> pl.DataFrame:

968

return batch.with_columns([

969

(pl.col("value") * 2 + 1).alias("transformed")

970

])

971

972

result = df.with_columns([

973

pl.map_batches("value1", custom_transform, return_dtype=pl.Int64)

974

])

975

```

976

977

### Window Functions and Rankings

978

979

```python

980

df = pl.DataFrame({

981

"group": ["A", "A", "A", "B", "B", "B"],

982

"value": [10, 20, 30, 15, 25, 35]

983

})

984

985

# Window functions with partitioning

986

result = df.with_columns([

987

pl.col("value").sum().over("group").alias("group_total"),

988

pl.col("value").rank().over("group").alias("rank_in_group"),

989

pl.col("value").shift(1).over("group").alias("previous_value")

990

])

991

```

992

993

### Working with Lists and Arrays

994

995

```python

996

df = pl.DataFrame({

997

"lists": [[1, 2, 3], [4, 5], [6, 7, 8]]

998

})

999

1000

# List operations

1001

result = df.with_columns([

1002

pl.col("lists").list.len().alias("list_length"),

1003

pl.col("lists").list.sum().alias("list_sum"),

1004

pl.col("lists").list.get(0).alias("first_element")

1005

])

1006

1007

# Concatenate lists

1008

df2 = pl.DataFrame({

1009

"list1": [[1, 2], [3, 4]],

1010

"list2": [[5, 6], [7, 8]]

1011

})

1012

1013

result = df2.with_columns([

1014

pl.concat_list("list1", "list2").alias("combined")

1015

])

1016

```