or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration.mdcore-classes.mddata-types.mdexpressions.mdfunctions.mdindex.mdio-operations.mdsql-functionality.md

functions.mddocs/

0

# Functions

1

2

Rich collection of functions for data manipulation including aggregation, lazy operations, range generation, mathematical operations, and utility functions. These functions provide the building blocks for complex data transformations and computations.

3

4

## Capabilities

5

6

### Column Selection and Manipulation

7

8

Core functions for selecting and manipulating columns in DataFrames and LazyFrames.

9

10

```python { .api }

11

def col(name: str | list[str]) -> Expr:

12

"""

13

Select column(s) by name.

14

15

Parameters:

16

- name: Column name(s) to select

17

18

Returns:

19

- Expr: Column selection expression

20

"""

21

22

def lit(value: Any) -> Expr:

23

"""

24

Create literal value expression.

25

26

Parameters:

27

- value: Literal value (int, float, str, bool, etc.)

28

29

Returns:

30

- Expr: Literal expression

31

"""

32

33

def when(predicate: IntoExpr) -> ExprWhenThen:

34

"""

35

Start conditional expression chain.

36

37

Parameters:

38

- predicate: Boolean condition

39

40

Returns:

41

- ExprWhenThen: Conditional expression builder

42

"""

43

44

def exclude(*columns: str | list[str]) -> Expr:

45

"""

46

Exclude specified columns from selection.

47

48

Parameters:

49

- columns: Column names to exclude

50

51

Returns:

52

- Expr: Column exclusion expression

53

"""

54

55

def select(*exprs: IntoExpr) -> Expr:

56

"""

57

Select expressions for DataFrame operations.

58

59

Parameters:

60

- exprs: Expressions to select

61

62

Returns:

63

- Expr: Selection expression

64

"""

65

```

66

67

### Aggregation Functions

68

69

Functions for computing aggregations across rows or columns.

70

71

```python { .api }

72

def sum(*args: IntoExpr) -> Expr:

73

"""

74

Sum values.

75

76

Parameters:

77

- args: Expressions to sum

78

79

Returns:

80

- Expr: Sum expression

81

"""

82

83

def mean(*args: IntoExpr) -> Expr:

84

"""

85

Compute mean of values.

86

87

Parameters:

88

- args: Expressions to average

89

90

Returns:

91

- Expr: Mean expression

92

"""

93

94

def max(*args: IntoExpr) -> Expr:

95

"""

96

Find maximum values.

97

98

Parameters:

99

- args: Expressions to find max of

100

101

Returns:

102

- Expr: Maximum expression

103

"""

104

105

def min(*args: IntoExpr) -> Expr:

106

"""

107

Find minimum values.

108

109

Parameters:

110

- args: Expressions to find min of

111

112

Returns:

113

- Expr: Minimum expression

114

"""

115

116

def count(*args: IntoExpr) -> Expr:

117

"""

118

Count values.

119

120

Parameters:

121

- args: Expressions to count

122

123

Returns:

124

- Expr: Count expression

125

"""

126

127

def median(*args: IntoExpr) -> Expr:

128

"""

129

Compute median of values.

130

131

Parameters:

132

- args: Expressions to find median of

133

134

Returns:

135

- Expr: Median expression

136

"""

137

138

def std(*args: IntoExpr, ddof: int = 1) -> Expr:

139

"""

140

Compute standard deviation.

141

142

Parameters:

143

- args: Expressions to compute std of

144

- ddof: Delta degrees of freedom

145

146

Returns:

147

- Expr: Standard deviation expression

148

"""

149

150

def var(*args: IntoExpr, ddof: int = 1) -> Expr:

151

"""

152

Compute variance.

153

154

Parameters:

155

- args: Expressions to compute variance of

156

- ddof: Delta degrees of freedom

157

158

Returns:

159

- Expr: Variance expression

160

"""

161

162

def quantile(*args: IntoExpr, quantile: float, interpolation: str = "nearest") -> Expr:

163

"""

164

Compute quantile.

165

166

Parameters:

167

- args: Expressions to compute quantile of

168

- quantile: Quantile value (0.0 to 1.0)

169

- interpolation: Interpolation method

170

171

Returns:

172

- Expr: Quantile expression

173

"""

174

```

175

176

### Horizontal Aggregations

177

178

Functions for computing aggregations across columns horizontally.

179

180

```python { .api }

181

def sum_horizontal(*exprs: IntoExpr) -> Expr:

182

"""

183

Sum values horizontally across columns.

184

185

Parameters:

186

- exprs: Column expressions to sum

187

188

Returns:

189

- Expr: Horizontal sum expression

190

"""

191

192

def mean_horizontal(*exprs: IntoExpr) -> Expr:

193

"""

194

Compute mean horizontally across columns.

195

196

Parameters:

197

- exprs: Column expressions to average

198

199

Returns:

200

- Expr: Horizontal mean expression

201

"""

202

203

def max_horizontal(*exprs: IntoExpr) -> Expr:

204

"""

205

Find maximum horizontally across columns.

206

207

Parameters:

208

- exprs: Column expressions to find max of

209

210

Returns:

211

- Expr: Horizontal maximum expression

212

"""

213

214

def min_horizontal(*exprs: IntoExpr) -> Expr:

215

"""

216

Find minimum horizontally across columns.

217

218

Parameters:

219

- exprs: Column expressions to find min of

220

221

Returns:

222

- Expr: Horizontal minimum expression

223

"""

224

225

def all_horizontal(*exprs: IntoExpr) -> Expr:

226

"""

227

Logical AND horizontally across columns.

228

229

Parameters:

230

- exprs: Boolean column expressions

231

232

Returns:

233

- Expr: Horizontal all expression

234

"""

235

236

def any_horizontal(*exprs: IntoExpr) -> Expr:

237

"""

238

Logical OR horizontally across columns.

239

240

Parameters:

241

- exprs: Boolean column expressions

242

243

Returns:

244

- Expr: Horizontal any expression

245

"""

246

```

247

248

### Boolean Logic Functions

249

250

Functions for boolean operations across columns and rows.

251

252

```python { .api }

253

def all(*args: IntoExpr) -> Expr:

254

"""

255

Check if all values are true.

256

257

Parameters:

258

- args: Boolean expressions

259

260

Returns:

261

- Expr: All expression

262

"""

263

264

def any(*args: IntoExpr) -> Expr:

265

"""

266

Check if any values are true.

267

268

Parameters:

269

- args: Boolean expressions

270

271

Returns:

272

- Expr: Any expression

273

"""

274

```

275

276

### Cumulative Functions

277

278

Functions for cumulative operations and reductions.

279

280

```python { .api }

281

def cum_sum(*args: IntoExpr, reverse: bool = False) -> Expr:

282

"""

283

Compute cumulative sum.

284

285

Parameters:

286

- args: Expressions to compute cumulative sum of

287

- reverse: Compute in reverse order

288

289

Returns:

290

- Expr: Cumulative sum expression

291

"""

292

293

def cum_sum_horizontal(*exprs: IntoExpr) -> Expr:

294

"""

295

Compute cumulative sum horizontally across columns.

296

297

Parameters:

298

- exprs: Column expressions

299

300

Returns:

301

- Expr: Horizontal cumulative sum expression

302

"""

303

304

def cum_count(*args: IntoExpr, reverse: bool = False) -> Expr:

305

"""

306

Compute cumulative count.

307

308

Parameters:

309

- args: Expressions to count

310

- reverse: Compute in reverse order

311

312

Returns:

313

- Expr: Cumulative count expression

314

"""

315

316

def cum_fold(

317

acc: IntoExpr,

318

function: Callable[[Expr, Expr], Expr],

319

*exprs: IntoExpr,

320

include_init: bool = False

321

) -> Expr:

322

"""

323

Cumulatively fold expressions with a function.

324

325

Parameters:

326

- acc: Initial accumulator value

327

- function: Folding function

328

- exprs: Expressions to fold

329

- include_init: Include initial value in result

330

331

Returns:

332

- Expr: Cumulative fold expression

333

"""

334

335

def cum_reduce(expression: Expr) -> Expr:

336

"""

337

Cumulatively reduce expression.

338

339

Parameters:

340

- expression: Expression to reduce

341

342

Returns:

343

- Expr: Cumulative reduce expression

344

"""

345

```

346

347

### Range Functions

348

349

Functions for generating ranges and sequences of values.

350

351

```python { .api }

352

def arange(

353

start: int | IntoExpr,

354

end: int | IntoExpr,

355

step: int = 1,

356

*,

357

eager: bool = False

358

) -> Expr | Series:

359

"""

360

Create range of integers.

361

362

Parameters:

363

- start: Start value (inclusive)

364

- end: End value (exclusive)

365

- step: Step size

366

- eager: Return Series instead of Expr

367

368

Returns:

369

- Expr | Series: Integer range

370

"""

371

372

def int_range(

373

start: int | IntoExpr,

374

end: int | IntoExpr | None = None,

375

step: int = 1,

376

*,

377

eager: bool = False

378

) -> Expr | Series:

379

"""

380

Create range of integers.

381

382

Parameters:

383

- start: Start value or end if end is None

384

- end: End value (exclusive)

385

- step: Step size

386

- eager: Return Series instead of Expr

387

388

Returns:

389

- Expr | Series: Integer range

390

"""

391

392

def int_ranges(

393

start: int | IntoExpr,

394

end: int | IntoExpr,

395

step: int = 1,

396

*,

397

eager: bool = False

398

) -> Expr | Series:

399

"""

400

Create multiple integer ranges.

401

402

Parameters:

403

- start: Start values

404

- end: End values

405

- step: Step size

406

- eager: Return Series instead of Expr

407

408

Returns:

409

- Expr | Series: List of integer ranges

410

"""

411

412

def date_range(

413

start: date | datetime | IntoExpr,

414

end: date | datetime | IntoExpr,

415

interval: str | timedelta = "1d",

416

*,

417

closed: str = "both",

418

eager: bool = False

419

) -> Expr | Series:

420

"""

421

Create range of dates.

422

423

Parameters:

424

- start: Start date

425

- end: End date

426

- interval: Date interval (e.g., '1d', '1w', '1mo')

427

- closed: Include endpoints ('both', 'left', 'right', 'none')

428

- eager: Return Series instead of Expr

429

430

Returns:

431

- Expr | Series: Date range

432

"""

433

434

def date_ranges(

435

start: date | datetime | IntoExpr,

436

end: date | datetime | IntoExpr,

437

interval: str | timedelta = "1d",

438

*,

439

closed: str = "both",

440

eager: bool = False

441

) -> Expr | Series:

442

"""

443

Create multiple date ranges.

444

445

Returns:

446

- Expr | Series: List of date ranges

447

"""

448

449

def datetime_range(

450

start: datetime | IntoExpr,

451

end: datetime | IntoExpr,

452

interval: str | timedelta = "1d",

453

*,

454

closed: str = "both",

455

time_unit: str | None = None,

456

time_zone: str | None = None,

457

eager: bool = False

458

) -> Expr | Series:

459

"""

460

Create range of datetimes.

461

462

Parameters:

463

- start: Start datetime

464

- end: End datetime

465

- interval: Datetime interval

466

- closed: Include endpoints

467

- time_unit: Time precision ('ns', 'us', 'ms')

468

- time_zone: Timezone

469

- eager: Return Series instead of Expr

470

471

Returns:

472

- Expr | Series: Datetime range

473

"""

474

475

def datetime_ranges(

476

start: datetime | IntoExpr,

477

end: datetime | IntoExpr,

478

interval: str | timedelta = "1d",

479

*,

480

closed: str = "both",

481

time_unit: str | None = None,

482

time_zone: str | None = None,

483

eager: bool = False

484

) -> Expr | Series:

485

"""

486

Create multiple datetime ranges.

487

488

Returns:

489

- Expr | Series: List of datetime ranges

490

"""

491

492

def time_range(

493

start: time | IntoExpr | None = None,

494

end: time | IntoExpr | None = None,

495

interval: str | timedelta = "1h",

496

*,

497

closed: str = "both",

498

eager: bool = False

499

) -> Expr | Series:

500

"""

501

Create range of times.

502

503

Parameters:

504

- start: Start time

505

- end: End time

506

- interval: Time interval

507

- closed: Include endpoints

508

- eager: Return Series instead of Expr

509

510

Returns:

511

- Expr | Series: Time range

512

"""

513

514

def time_ranges(

515

start: time | IntoExpr,

516

end: time | IntoExpr,

517

interval: str | timedelta = "1h",

518

*,

519

closed: str = "both",

520

eager: bool = False

521

) -> Expr | Series:

522

"""

523

Create multiple time ranges.

524

525

Returns:

526

- Expr | Series: List of time ranges

527

"""

528

```

529

530

### Linear Space Functions

531

532

Functions for generating linearly spaced values.

533

534

```python { .api }

535

def linear_space(

536

start: int | float | IntoExpr,

537

end: int | float | IntoExpr,

538

num: int,

539

*,

540

endpoint: bool = True,

541

dtype: type = Float64,

542

eager: bool = False

543

) -> Expr | Series:

544

"""

545

Create linearly spaced values.

546

547

Parameters:

548

- start: Start value

549

- end: End value

550

- num: Number of values

551

- endpoint: Include endpoint

552

- dtype: Data type of result

553

- eager: Return Series instead of Expr

554

555

Returns:

556

- Expr | Series: Linearly spaced values

557

"""

558

559

def linear_spaces(

560

start: int | float | IntoExpr,

561

end: int | float | IntoExpr,

562

num: int,

563

*,

564

endpoint: bool = True,

565

dtype: type = Float64,

566

eager: bool = False

567

) -> Expr | Series:

568

"""

569

Create multiple linear spaces.

570

571

Returns:

572

- Expr | Series: List of linearly spaced values

573

"""

574

```

575

576

### Data Type Constructor Functions

577

578

Functions for creating typed literal values and structures.

579

580

```python { .api }

581

def date(year: int, month: int, day: int) -> date:

582

"""

583

Create date value.

584

585

Parameters:

586

- year: Year

587

- month: Month (1-12)

588

- day: Day of month

589

590

Returns:

591

- date: Date object

592

"""

593

594

def datetime(

595

year: int,

596

month: int,

597

day: int,

598

hour: int = 0,

599

minute: int = 0,

600

second: int = 0,

601

microsecond: int = 0,

602

*,

603

time_unit: str = "us",

604

time_zone: str | None = None

605

) -> datetime:

606

"""

607

Create datetime value.

608

609

Parameters:

610

- year: Year

611

- month: Month

612

- day: Day

613

- hour: Hour

614

- minute: Minute

615

- second: Second

616

- microsecond: Microsecond

617

- time_unit: Time precision

618

- time_zone: Timezone

619

620

Returns:

621

- datetime: Datetime object

622

"""

623

624

def time(

625

hour: int = 0,

626

minute: int = 0,

627

second: int = 0,

628

microsecond: int = 0

629

) -> time:

630

"""

631

Create time value.

632

633

Parameters:

634

- hour: Hour (0-23)

635

- minute: Minute (0-59)

636

- second: Second (0-59)

637

- microsecond: Microsecond

638

639

Returns:

640

- time: Time object

641

"""

642

643

def duration(

644

*,

645

weeks: int | IntoExpr | None = None,

646

days: int | IntoExpr | None = None,

647

hours: int | IntoExpr | None = None,

648

minutes: int | IntoExpr | None = None,

649

seconds: int | IntoExpr | None = None,

650

milliseconds: int | IntoExpr | None = None,

651

microseconds: int | IntoExpr | None = None,

652

nanoseconds: int | IntoExpr | None = None,

653

time_unit: str = "us"

654

) -> Expr:

655

"""

656

Create duration expression.

657

658

Parameters:

659

- weeks: Number of weeks

660

- days: Number of days

661

- hours: Number of hours

662

- minutes: Number of minutes

663

- seconds: Number of seconds

664

- milliseconds: Number of milliseconds

665

- microseconds: Number of microseconds

666

- nanoseconds: Number of nanoseconds

667

- time_unit: Time precision

668

669

Returns:

670

- Expr: Duration expression

671

"""

672

673

def struct(*exprs: IntoExpr, schema: list[str] | None = None, **named_exprs: IntoExpr) -> Expr:

674

"""

675

Create struct expression from fields.

676

677

Parameters:

678

- exprs: Field expressions

679

- schema: Field names

680

- named_exprs: Named field expressions

681

682

Returns:

683

- Expr: Struct expression

684

"""

685

686

def struct_with_fields(fields: Sequence[Expr]) -> Expr:

687

"""

688

Create struct expression with explicit fields.

689

690

Parameters:

691

- fields: Field expressions

692

693

Returns:

694

- Expr: Struct expression

695

"""

696

```

697

698

### String and Concatenation Functions

699

700

Functions for string operations and concatenation.

701

702

```python { .api }

703

def concat_str(*exprs: IntoExpr, separator: str = "", ignore_nulls: bool = False) -> Expr:

704

"""

705

Concatenate string expressions.

706

707

Parameters:

708

- exprs: String expressions to concatenate

709

- separator: Separator between strings

710

- ignore_nulls: Skip null values

711

712

Returns:

713

- Expr: Concatenated string expression

714

"""

715

716

def concat_list(*exprs: IntoExpr) -> Expr:

717

"""

718

Concatenate expressions into list.

719

720

Parameters:

721

- exprs: Expressions to concatenate

722

723

Returns:

724

- Expr: List expression

725

"""

726

727

def concat_arr(*exprs: IntoExpr) -> Expr:

728

"""

729

Concatenate expressions into array.

730

731

Parameters:

732

- exprs: Expressions to concatenate

733

734

Returns:

735

- Expr: Array expression

736

"""

737

738

def format(format_str: str, *args: IntoExpr) -> Expr:

739

"""

740

Format string with expressions.

741

742

Parameters:

743

- format_str: Format string with {} placeholders

744

- args: Expressions to format

745

746

Returns:

747

- Expr: Formatted string expression

748

"""

749

750

def escape_regex(value: str) -> str:

751

"""

752

Escape regex special characters in string.

753

754

Parameters:

755

- value: String to escape

756

757

Returns:

758

- str: Escaped string

759

"""

760

```

761

762

### Mathematical Functions

763

764

Functions for mathematical operations.

765

766

```python { .api }

767

def arctan2(y: str | Expr, x: str | Expr) -> Expr:

768

"""

769

Compute element-wise arc tangent of y/x in radians.

770

771

Parameters:

772

- y: Y coordinates

773

- x: X coordinates

774

775

Returns:

776

- Expr: Arc tangent expression

777

"""

778

779

def arctan2d(y: str | Expr, x: str | Expr) -> Expr:

780

"""

781

Compute element-wise arc tangent of y/x in degrees.

782

783

Parameters:

784

- y: Y coordinates

785

- x: X coordinates

786

787

Returns:

788

- Expr: Arc tangent expression in degrees

789

"""

790

```

791

792

### Statistical Functions

793

794

Functions for correlation and covariance.

795

796

```python { .api }

797

def corr(a: IntoExpr, b: IntoExpr, *, method: str = "pearson", ddof: int = 1) -> Expr:

798

"""

799

Compute correlation between two expressions.

800

801

Parameters:

802

- a: First expression

803

- b: Second expression

804

- method: Correlation method ('pearson', 'spearman')

805

- ddof: Delta degrees of freedom

806

807

Returns:

808

- Expr: Correlation expression

809

"""

810

811

def cov(a: IntoExpr, b: IntoExpr, *, ddof: int = 1) -> Expr:

812

"""

813

Compute covariance between two expressions.

814

815

Parameters:

816

- a: First expression

817

- b: Second expression

818

- ddof: Delta degrees of freedom

819

820

Returns:

821

- Expr: Covariance expression

822

"""

823

824

def rolling_corr(

825

a: IntoExpr,

826

b: IntoExpr,

827

window_size: int,

828

*,

829

ddof: int = 1

830

) -> Expr:

831

"""

832

Compute rolling correlation.

833

834

Parameters:

835

- a: First expression

836

- b: Second expression

837

- window_size: Rolling window size

838

- ddof: Delta degrees of freedom

839

840

Returns:

841

- Expr: Rolling correlation expression

842

"""

843

844

def rolling_cov(

845

a: IntoExpr,

846

b: IntoExpr,

847

window_size: int,

848

*,

849

ddof: int = 1

850

) -> Expr:

851

"""

852

Compute rolling covariance.

853

854

Parameters:

855

- a: First expression

856

- b: Second expression

857

- window_size: Rolling window size

858

- ddof: Delta degrees of freedom

859

860

Returns:

861

- Expr: Rolling covariance expression

862

"""

863

```

864

865

### Utility Functions

866

867

Miscellaneous utility functions for data manipulation.

868

869

```python { .api }

870

def coalesce(*exprs: IntoExpr) -> Expr:

871

"""

872

Return first non-null value from expressions.

873

874

Parameters:

875

- exprs: Expressions to check

876

877

Returns:

878

- Expr: Coalesced expression

879

"""

880

881

def from_epoch(column: IntoExpr, time_unit: str = "s") -> Expr:

882

"""

883

Convert epoch timestamp to datetime.

884

885

Parameters:

886

- column: Epoch timestamp expression

887

- time_unit: Time unit of input ('s', 'ms', 'us', 'ns')

888

889

Returns:

890

- Expr: Datetime expression

891

"""

892

893

def approx_n_unique(column: IntoExpr) -> Expr:

894

"""

895

Approximate number of unique values.

896

897

Parameters:

898

- column: Column expression

899

900

Returns:

901

- Expr: Approximate unique count expression

902

"""

903

904

def n_unique(column: IntoExpr) -> Expr:

905

"""

906

Count unique values.

907

908

Parameters:

909

- column: Column expression

910

911

Returns:

912

- Expr: Unique count expression

913

"""

914

915

def dtype_of(column: IntoExpr) -> Expr:

916

"""

917

Get data type of expression.

918

919

Parameters:

920

- column: Expression to check

921

922

Returns:

923

- Expr: Data type expression

924

"""

925

926

def self_dtype() -> Expr:

927

"""

928

Get data type of current column context.

929

930

Returns:

931

- Expr: Self data type expression

932

"""

933

```

934

935

### Array Creation Functions

936

937

Functions for creating arrays with specific patterns.

938

939

```python { .api }

940

def ones(shape: int | tuple[int, ...], *, dtype: type = Float64, eager: bool = False) -> Expr | Series:

941

"""

942

Create array filled with ones.

943

944

Parameters:

945

- shape: Array shape

946

- dtype: Data type

947

- eager: Return Series instead of Expr

948

949

Returns:

950

- Expr | Series: Array of ones

951

"""

952

953

def zeros(shape: int | tuple[int, ...], *, dtype: type = Float64, eager: bool = False) -> Expr | Series:

954

"""

955

Create array filled with zeros.

956

957

Parameters:

958

- shape: Array shape

959

- dtype: Data type

960

- eager: Return Series instead of Expr

961

962

Returns:

963

- Expr | Series: Array of zeros

964

"""

965

966

def repeat(

967

value: IntoExpr,

968

n: int | IntoExpr,

969

*,

970

eager: bool = False

971

) -> Expr | Series:

972

"""

973

Repeat value n times.

974

975

Parameters:

976

- value: Value to repeat

977

- n: Number of repetitions

978

- eager: Return Series instead of Expr

979

980

Returns:

981

- Expr | Series: Repeated values

982

"""

983

```

984

985

### Collection Functions

986

987

Functions for working with multiple DataFrames and LazyFrames.

988

989

```python { .api }

990

def collect_all(

991

lazy_frames: list[LazyFrame],

992

*,

993

type_coercion: bool = True,

994

predicate_pushdown: bool = True,

995

projection_pushdown: bool = True,

996

simplify_expression: bool = True,

997

slice_pushdown: bool = True,

998

comm_subplan_elim: bool = True,

999

comm_subexpr_elim: bool = True,

1000

streaming: bool = False

1001

) -> list[DataFrame]:

1002

"""

1003

Collect multiple LazyFrames with shared optimization.

1004

1005

Parameters:

1006

- lazy_frames: List of LazyFrames to collect

1007

- type_coercion: Enable type coercion optimization

1008

- predicate_pushdown: Enable predicate pushdown

1009

- projection_pushdown: Enable projection pushdown

1010

- simplify_expression: Enable expression simplification

1011

- slice_pushdown: Enable slice pushdown

1012

- comm_subplan_elim: Enable common subplan elimination

1013

- comm_subexpr_elim: Enable common subexpression elimination

1014

- streaming: Enable streaming execution

1015

1016

Returns:

1017

- list[DataFrame]: Collected DataFrames

1018

"""

1019

1020

def collect_all_async(

1021

lazy_frames: list[LazyFrame],

1022

*,

1023

gevent: bool = False,

1024

**kwargs

1025

) -> Awaitable[list[DataFrame]]:

1026

"""

1027

Collect multiple LazyFrames asynchronously.

1028

1029

Parameters:

1030

- lazy_frames: List of LazyFrames to collect

1031

- gevent: Use gevent for async execution

1032

- **kwargs: Same optimization parameters as collect_all

1033

1034

Returns:

1035

- Awaitable[list[DataFrame]]: Async collected DataFrames

1036

"""

1037

1038

def concat(

1039

items: Iterable[DataFrame | LazyFrame | Series],

1040

*,

1041

rechunk: bool = False,

1042

how: str = "vertical",

1043

parallel: bool = True

1044

) -> DataFrame | LazyFrame | Series:

1045

"""

1046

Concatenate DataFrames, LazyFrames, or Series.

1047

1048

Parameters:

1049

- items: Items to concatenate

1050

- rechunk: Rechunk result for better memory layout

1051

- how: Concatenation method ('vertical', 'horizontal', 'diagonal')

1052

- parallel: Use parallel concatenation

1053

1054

Returns:

1055

- DataFrame | LazyFrame | Series: Concatenated result

1056

"""

1057

```

1058

1059

### Other Utility Functions

1060

1061

Additional utility functions for various operations.

1062

1063

```python { .api }

1064

def first(*args: IntoExpr) -> Expr:

1065

"""Get first value."""

1066

1067

def last(*args: IntoExpr) -> Expr:

1068

"""Get last value."""

1069

1070

def nth(column: IntoExpr, n: int | IntoExpr) -> Expr:

1071

"""Get nth value."""

1072

1073

def head(*args: IntoExpr, n: int = 10) -> Expr:

1074

"""Get first n values."""

1075

1076

def tail(*args: IntoExpr, n: int = 10) -> Expr:

1077

"""Get last n values."""

1078

1079

def groups() -> Expr:

1080

"""Get group indices."""

1081

1082

def implode(column: IntoExpr) -> Expr:

1083

"""Collect values into list."""

1084

1085

def len() -> Expr:

1086

"""Get length."""

1087

1088

def element() -> Expr:

1089

"""Get single element from length-1 Series."""

1090

1091

def arg_sort_by(*by: IntoExpr, descending: bool = False) -> Expr:

1092

"""Get indices that would sort by expressions."""

1093

1094

def arg_where(condition: IntoExpr) -> Expr:

1095

"""Get indices where condition is true."""

1096

1097

def business_day_count(

1098

start: IntoExpr,

1099

end: IntoExpr,

1100

*,

1101

week_mask: list[bool] = [True, True, True, True, True, False, False],

1102

holidays: list[date] | None = None

1103

) -> Expr:

1104

"""Count business days between dates."""

1105

1106

def set_random_seed(seed: int) -> None:

1107

"""Set random seed for reproducible operations."""

1108

1109

def field(name: str) -> Expr:

1110

"""Select struct field."""

1111

1112

def fold(

1113

acc: IntoExpr,

1114

function: Callable[[Expr, Expr], Expr],

1115

*exprs: IntoExpr

1116

) -> Expr:

1117

"""Fold expressions with function."""

1118

1119

def reduce(function: Callable[[Expr, Expr], Expr], *exprs: IntoExpr) -> Expr:

1120

"""Reduce expressions with function."""

1121

1122

def map_batches(

1123

function: Callable[[DataFrame], DataFrame],

1124

*exprs: IntoExpr,

1125

returns_scalar: bool = False,

1126

agg_list: bool = False

1127

) -> Expr:

1128

"""Apply function to DataFrame batches."""

1129

1130

def map_groups(

1131

function: Callable[[DataFrame], DataFrame],

1132

*exprs: IntoExpr,

1133

returns_scalar: bool = False

1134

) -> Expr:

1135

"""Apply function to grouped DataFrames."""

1136

1137

def align_frames(

1138

*frames: DataFrame | LazyFrame,

1139

on: str | Expr | list[str | Expr],

1140

select: str | Expr | list[str | Expr] | None = None,

1141

reverse: bool | list[bool] = False

1142

) -> list[DataFrame | LazyFrame]:

1143

"""Align frames on common values."""

1144

1145

def row_index() -> Expr:

1146

"""Add row index column."""

1147

1148

def explain_all(*lazy_frames: LazyFrame, **kwargs) -> None:

1149

"""Print query plans for multiple LazyFrames."""

1150

1151

def sql_expr(sql: str) -> Expr:

1152

"""Create expression from SQL fragment."""

1153

```

1154

1155

## Usage Examples

1156

1157

### Basic Function Usage

1158

1159

```python

1160

import polars as pl

1161

1162

df = pl.DataFrame({

1163

"a": [1, 2, 3, 4],

1164

"b": [10, 20, 30, 40],

1165

"c": [100, 200, 300, 400]

1166

})

1167

1168

# Column selection and manipulation

1169

result = df.select([

1170

pl.col("a"),

1171

pl.lit(42).alias("literal"),

1172

pl.when(pl.col("a") > 2).then(pl.col("b")).otherwise(0).alias("conditional")

1173

])

1174

1175

# Aggregations

1176

agg_result = df.select([

1177

pl.sum("a").alias("sum_a"),

1178

pl.mean("b").alias("mean_b"),

1179

pl.max("c").alias("max_c"),

1180

pl.count().alias("count")

1181

])

1182

```

1183

1184

### Horizontal Operations

1185

1186

```python

1187

# Horizontal aggregations

1188

result = df.with_columns([

1189

pl.sum_horizontal("a", "b", "c").alias("row_sum"),

1190

pl.max_horizontal("a", "b", "c").alias("row_max"),

1191

pl.mean_horizontal("a", "b", "c").alias("row_mean")

1192

])

1193

```

1194

1195

### Range Functions

1196

1197

```python

1198

# Create ranges

1199

ranges_df = pl.DataFrame({

1200

"int_range": pl.arange(0, 10, eager=True),

1201

"date_range": pl.date_range(

1202

pl.date(2023, 1, 1),

1203

pl.date(2023, 1, 10),

1204

"1d",

1205

eager=True

1206

)

1207

})

1208

1209

# Linear space

1210

linear_vals = pl.linear_space(0, 100, 11, eager=True)

1211

```

1212

1213

### String Operations

1214

1215

```python

1216

text_df = pl.DataFrame({

1217

"first": ["hello", "world"],

1218

"second": ["polars", "rocks"]

1219

})

1220

1221

result = text_df.select([

1222

pl.concat_str("first", "second", separator=" ").alias("combined"),

1223

pl.format("{} is {}", pl.col("first"), pl.col("second")).alias("formatted")

1224

])

1225

```

1226

1227

### Mathematical Functions

1228

1229

```python

1230

coords_df = pl.DataFrame({

1231

"x": [1.0, 2.0, 3.0],

1232

"y": [1.0, 2.0, 3.0]

1233

})

1234

1235

result = coords_df.with_columns([

1236

pl.arctan2("y", "x").alias("angle_rad"),

1237

pl.arctan2d("y", "x").alias("angle_deg")

1238

])

1239

```

1240

1241

### Statistical Functions

1242

1243

```python

1244

stats_df = pl.DataFrame({

1245

"x": [1, 2, 3, 4, 5],

1246

"y": [2, 4, 6, 8, 10]

1247

})

1248

1249

result = stats_df.select([

1250

pl.corr("x", "y").alias("correlation"),

1251

pl.cov("x", "y").alias("covariance")

1252

])

1253

```

1254

1255

### Collection Operations

1256

1257

```python

1258

# Multiple LazyFrames

1259

lazy1 = pl.scan_csv("file1.csv")

1260

lazy2 = pl.scan_csv("file2.csv")

1261

lazy3 = pl.scan_csv("file3.csv")

1262

1263

# Collect all with shared optimization

1264

results = pl.collect_all([lazy1, lazy2, lazy3])

1265

1266

# Concatenate DataFrames

1267

combined = pl.concat([results[0], results[1], results[2]], how="vertical")

1268

```

1269

1270

### Advanced Function Usage

1271

1272

```python

1273

# Cumulative operations

1274

cumulative_df = df.select([

1275

pl.col("a"),

1276

pl.cum_sum("a").alias("cumsum_a"),

1277

pl.cum_count("a").alias("cumcount_a")

1278

])

1279

1280

# Complex folding

1281

folded = df.select([

1282

pl.fold(

1283

acc=pl.lit(0),

1284

function=lambda acc, x: acc + x,

1285

exprs=["a", "b", "c"]

1286

).alias("total")

1287

])

1288

1289

# Utility functions

1290

utility_result = df.select([

1291

pl.coalesce("a", pl.lit(0)).alias("coalesced"),

1292

pl.n_unique("a").alias("unique_count"),

1293

pl.dtype_of("a").alias("data_type")

1294

])

1295

```