or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration.mdcore-classes.mddata-types.mdexpressions.mdfunctions.mdindex.mdio-operations.mdsql-functionality.md

expressions.mddocs/

0

# Expression Namespaces

1

2

Specialized namespaces for working with different data types including string operations (.str), datetime operations (.dt), list operations (.list), array operations (.arr), struct operations (.struct), categorical operations (.cat), binary operations (.bin), name operations (.name), and metadata operations (.meta).

3

4

## Capabilities

5

6

### String Namespace (.str)

7

8

String operations available on both Expr and Series for text processing and manipulation.

9

10

```python { .api }

11

# Available as expr.str.method() and series.str.method()

12

13

def contains(

14

pattern: str | Expr,

15

*,

16

literal: bool = False,

17

strict: bool = True

18

) -> Expr:

19

"""Check if string contains pattern."""

20

21

def ends_with(suffix: str | Expr) -> Expr:

22

"""Check if string ends with suffix."""

23

24

def starts_with(prefix: str | Expr) -> Expr:

25

"""Check if string starts with prefix."""

26

27

def extract(pattern: str, group_index: int = 1) -> Expr:

28

"""Extract regex capture group."""

29

30

def extract_all(pattern: str) -> Expr:

31

"""Extract all regex matches."""

32

33

def find(pattern: str, *, literal: bool = False) -> Expr:

34

"""Find first occurrence of pattern."""

35

36

def replace(pattern: str, value: str, *, literal: bool = False, n: int = 1) -> Expr:

37

"""Replace pattern with value."""

38

39

def replace_all(pattern: str, value: str, *, literal: bool = False) -> Expr:

40

"""Replace all occurrences of pattern."""

41

42

def slice(offset: int, length: int | None = None) -> Expr:

43

"""Extract substring by position."""

44

45

def head(n: int = 5) -> Expr:

46

"""Get first n characters."""

47

48

def tail(n: int = 5) -> Expr:

49

"""Get last n characters."""

50

51

def to_lowercase() -> Expr:

52

"""Convert to lowercase."""

53

54

def to_uppercase() -> Expr:

55

"""Convert to uppercase."""

56

57

def to_titlecase() -> Expr:

58

"""Convert to title case."""

59

60

def strip_chars(characters: str | None = None) -> Expr:

61

"""Remove characters from both ends."""

62

63

def strip_chars_start(characters: str | None = None) -> Expr:

64

"""Remove characters from start."""

65

66

def strip_chars_end(characters: str | None = None) -> Expr:

67

"""Remove characters from end."""

68

69

def zfill(width: int) -> Expr:

70

"""Pad with zeros to specified width."""

71

72

def pad_start(width: int, fillchar: str = " ") -> Expr:

73

"""Pad string to width from start."""

74

75

def pad_end(width: int, fillchar: str = " ") -> Expr:

76

"""Pad string to width from end."""

77

78

def len_bytes() -> Expr:

79

"""Get byte length of strings."""

80

81

def len_chars() -> Expr:

82

"""Get character length of strings."""

83

84

def n_chars() -> Expr:

85

"""Alias for len_chars."""

86

87

def concat(delimiter: str = "") -> Expr:

88

"""Concatenate strings in list/array."""

89

90

def explode() -> Expr:

91

"""Split string into characters."""

92

93

def split(by: str, *, inclusive: bool = False) -> Expr:

94

"""Split string by delimiter."""

95

96

def split_exact(by: str, n: int, *, inclusive: bool = False) -> Expr:

97

"""Split string into exactly n parts."""

98

99

def splitn(by: str, n: int) -> Expr:

100

"""Split string into at most n parts."""

101

102

def json_decode(dtype: type | None = None, *, infer_schema_length: int | None = None) -> Expr:

103

"""Parse JSON strings."""

104

105

def json_path_match(json_path: str) -> Expr:

106

"""Extract JSON values using JSONPath."""

107

108

def encode(encoding: str = "utf8") -> Expr:

109

"""Encode strings to bytes."""

110

111

def decode(encoding: str = "utf8", *, strict: bool = True) -> Expr:

112

"""Decode bytes to strings."""

113

114

def to_integer(*, base: int = 10, strict: bool = True) -> Expr:

115

"""Parse strings as integers."""

116

117

def to_decimal(*, infer_length: int = 10) -> Expr:

118

"""Parse strings as decimals."""

119

120

def strftime(format: str) -> Expr:

121

"""Format datetime as string."""

122

123

def strptime(

124

dtype: type,

125

format: str | None = None,

126

*,

127

strict: bool = True,

128

exact: bool = True,

129

cache: bool = True

130

) -> Expr:

131

"""Parse strings as datetime."""

132

```

133

134

### DateTime Namespace (.dt)

135

136

DateTime operations for temporal data manipulation and extraction.

137

138

```python { .api }

139

# Available as expr.dt.method() and series.dt.method()

140

141

def year() -> Expr:

142

"""Extract year."""

143

144

def month() -> Expr:

145

"""Extract month."""

146

147

def day() -> Expr:

148

"""Extract day."""

149

150

def hour() -> Expr:

151

"""Extract hour."""

152

153

def minute() -> Expr:

154

"""Extract minute."""

155

156

def second() -> Expr:

157

"""Extract second."""

158

159

def microsecond() -> Expr:

160

"""Extract microsecond."""

161

162

def nanosecond() -> Expr:

163

"""Extract nanosecond."""

164

165

def weekday() -> Expr:

166

"""Get weekday (Monday=1, Sunday=7)."""

167

168

def week() -> Expr:

169

"""Get ISO week number."""

170

171

def ordinal_day() -> Expr:

172

"""Get day of year (1-366)."""

173

174

def quarter() -> Expr:

175

"""Get quarter (1-4)."""

176

177

def date() -> Expr:

178

"""Extract date part."""

179

180

def time() -> Expr:

181

"""Extract time part."""

182

183

def epoch(time_unit: str = "us") -> Expr:

184

"""Convert to epoch timestamp."""

185

186

def timestamp(time_unit: str = "us") -> Expr:

187

"""Get timestamp."""

188

189

def with_time_unit(time_unit: str) -> Expr:

190

"""Change time unit."""

191

192

def cast_time_unit(time_unit: str) -> Expr:

193

"""Cast to different time unit."""

194

195

def convert_time_zone(time_zone: str) -> Expr:

196

"""Convert to different timezone."""

197

198

def replace_time_zone(

199

time_zone: str | None,

200

*,

201

ambiguous: str = "raise",

202

non_existent: str = "raise"

203

) -> Expr:

204

"""Replace timezone without conversion."""

205

206

def truncate(every: str) -> Expr:

207

"""Truncate to time interval."""

208

209

def round(every: str) -> Expr:

210

"""Round to nearest time interval."""

211

212

def strftime(format: str) -> Expr:

213

"""Format as string."""

214

215

def to_string(format: str) -> Expr:

216

"""Convert to string with format."""

217

218

def days() -> Expr:

219

"""Extract days from duration."""

220

221

def hours() -> Expr:

222

"""Extract hours from duration."""

223

224

def minutes() -> Expr:

225

"""Extract minutes from duration."""

226

227

def seconds() -> Expr:

228

"""Extract seconds from duration."""

229

230

def milliseconds() -> Expr:

231

"""Extract milliseconds from duration."""

232

233

def microseconds() -> Expr:

234

"""Extract microseconds from duration."""

235

236

def nanoseconds() -> Expr:

237

"""Extract nanoseconds from duration."""

238

239

def total_days() -> Expr:

240

"""Get total days in duration."""

241

242

def total_hours() -> Expr:

243

"""Get total hours in duration."""

244

245

def total_minutes() -> Expr:

246

"""Get total minutes in duration."""

247

248

def total_seconds() -> Expr:

249

"""Get total seconds in duration."""

250

251

def total_milliseconds() -> Expr:

252

"""Get total milliseconds in duration."""

253

254

def total_microseconds() -> Expr:

255

"""Get total microseconds in duration."""

256

257

def total_nanoseconds() -> Expr:

258

"""Get total nanoseconds in duration."""

259

260

def offset_by(by: str) -> Expr:

261

"""Offset datetime by interval."""

262

263

def is_between(

264

start: datetime | date | str | Expr,

265

end: datetime | date | str | Expr,

266

closed: str = "both"

267

) -> Expr:

268

"""Check if datetime is in range."""

269

```

270

271

### List Namespace (.list)

272

273

Operations for working with list-type columns.

274

275

```python { .api }

276

# Available as expr.list.method() and series.list.method()

277

278

def len() -> Expr:

279

"""Get length of lists."""

280

281

def sum() -> Expr:

282

"""Sum elements in each list."""

283

284

def max() -> Expr:

285

"""Get maximum element in each list."""

286

287

def min() -> Expr:

288

"""Get minimum element in each list."""

289

290

def mean() -> Expr:

291

"""Get mean of elements in each list."""

292

293

def sort(*, descending: bool = False, nulls_last: bool = False) -> Expr:

294

"""Sort elements in each list."""

295

296

def reverse() -> Expr:

297

"""Reverse order of elements in each list."""

298

299

def unique(*, maintain_order: bool = False) -> Expr:

300

"""Get unique elements in each list."""

301

302

def n_unique() -> Expr:

303

"""Count unique elements in each list."""

304

305

def get(index: int | Expr, *, null_on_oob: bool = True) -> Expr:

306

"""Get element at index."""

307

308

def first() -> Expr:

309

"""Get first element."""

310

311

def last() -> Expr:

312

"""Get last element."""

313

314

def head(n: int = 5) -> Expr:

315

"""Get first n elements."""

316

317

def tail(n: int = 5) -> Expr:

318

"""Get last n elements."""

319

320

def slice(offset: int, length: int | None = None) -> Expr:

321

"""Slice lists."""

322

323

def explode() -> Expr:

324

"""Explode list elements to separate rows."""

325

326

def contains(item: Any) -> Expr:

327

"""Check if lists contain item."""

328

329

def join(separator: str, *, ignore_nulls: bool = True) -> Expr:

330

"""Join list elements into string."""

331

332

def arg_min() -> Expr:

333

"""Get index of minimum element."""

334

335

def arg_max() -> Expr:

336

"""Get index of maximum element."""

337

338

def diff(n: int = 1, null_behavior: str = "ignore") -> Expr:

339

"""Calculate differences between consecutive elements."""

340

341

def shift(n: int = 1, *, fill_value: Any = None) -> Expr:

342

"""Shift elements by n positions."""

343

344

def drop_nulls() -> Expr:

345

"""Remove null values from lists."""

346

347

def sample(

348

n: int | None = None,

349

*,

350

fraction: float | None = None,

351

with_replacement: bool = False,

352

shuffle: bool = False,

353

seed: int | None = None

354

) -> Expr:

355

"""Sample elements from lists."""

356

357

def count_matches(element: Any, *, parallel: bool = False) -> Expr:

358

"""Count occurrences of element."""

359

360

def to_array(width: int) -> Expr:

361

"""Convert to array with fixed width."""

362

363

def to_struct(

364

n_field_strategy: str = "first_non_null",

365

fields: Callable[[int], str] | Sequence[str] | None = None

366

) -> Expr:

367

"""Convert to struct."""

368

369

def eval(expr: Expr, *, parallel: bool = False) -> Expr:

370

"""Evaluate expression on list elements."""

371

372

def all() -> Expr:

373

"""Check if all elements are true."""

374

375

def any() -> Expr:

376

"""Check if any elements are true."""

377

```

378

379

### Array Namespace (.arr)

380

381

Operations for working with fixed-size array columns.

382

383

```python { .api }

384

# Available as expr.arr.method() and series.arr.method()

385

386

def min() -> Expr:

387

"""Get minimum element in each array."""

388

389

def max() -> Expr:

390

"""Get maximum element in each array."""

391

392

def sum() -> Expr:

393

"""Sum elements in each array."""

394

395

def unique(*, maintain_order: bool = False) -> Expr:

396

"""Get unique elements in each array."""

397

398

def to_list() -> Expr:

399

"""Convert to list type."""

400

401

def get(index: int | Expr, *, null_on_oob: bool = True) -> Expr:

402

"""Get element at index."""

403

404

def first() -> Expr:

405

"""Get first element."""

406

407

def last() -> Expr:

408

"""Get last element."""

409

410

def join(separator: str, *, ignore_nulls: bool = True) -> Expr:

411

"""Join array elements into string."""

412

413

def contains(item: Any) -> Expr:

414

"""Check if arrays contain item."""

415

416

def count_matches(element: Any) -> Expr:

417

"""Count occurrences of element."""

418

419

def reverse() -> Expr:

420

"""Reverse order of elements."""

421

422

def shift(n: int = 1, *, fill_value: Any = None) -> Expr:

423

"""Shift elements by n positions."""

424

425

def slice(offset: int, length: int | None = None) -> Expr:

426

"""Slice arrays."""

427

428

def explode() -> Expr:

429

"""Explode array elements to separate rows."""

430

431

def all() -> Expr:

432

"""Check if all elements are true."""

433

434

def any() -> Expr:

435

"""Check if any elements are true."""

436

437

def sort(*, descending: bool = False, nulls_last: bool = False) -> Expr:

438

"""Sort elements in each array."""

439

440

def arg_min() -> Expr:

441

"""Get index of minimum element."""

442

443

def arg_max() -> Expr:

444

"""Get index of maximum element."""

445

446

def eval(expr: Expr, *, parallel: bool = False) -> Expr:

447

"""Evaluate expression on array elements."""

448

```

449

450

### Struct Namespace (.struct)

451

452

Operations for working with structured/nested data.

453

454

```python { .api }

455

# Available as expr.struct.method() and series.struct.method()

456

457

def field(name: str) -> Expr:

458

"""Extract field by name."""

459

460

def rename_fields(names: list[str]) -> Expr:

461

"""Rename struct fields."""

462

463

def json_encode() -> Expr:

464

"""Encode struct as JSON string."""

465

466

def with_fields(*exprs: Expr) -> Expr:

467

"""Add or update struct fields."""

468

469

def n_fields() -> int:

470

"""Get number of fields."""

471

472

def fields() -> list[str]:

473

"""Get field names."""

474

475

def schema() -> dict[str, type]:

476

"""Get struct schema."""

477

478

def to_frame() -> DataFrame:

479

"""Convert struct Series to DataFrame."""

480

```

481

482

### Categorical Namespace (.cat)

483

484

Operations for categorical data types.

485

486

```python { .api }

487

# Available as expr.cat.method() and series.cat.method()

488

489

def get_categories() -> Expr:

490

"""Get categorical categories."""

491

492

def len_bytes() -> Expr:

493

"""Get byte length of category strings."""

494

495

def len_chars() -> Expr:

496

"""Get character length of category strings."""

497

498

def set_ordering(ordering: str) -> Expr:

499

"""Set categorical ordering ('physical' or 'lexical')."""

500

501

def get_ordering() -> str:

502

"""Get current categorical ordering."""

503

504

def to_local() -> Expr:

505

"""Convert to local categorical."""

506

```

507

508

### Binary Namespace (.bin)

509

510

Operations for binary data types.

511

512

```python { .api }

513

# Available as expr.bin.method() and series.bin.method()

514

515

def contains(literal: bytes) -> Expr:

516

"""Check if binary contains literal bytes."""

517

518

def ends_with(suffix: bytes) -> Expr:

519

"""Check if binary ends with suffix."""

520

521

def starts_with(prefix: bytes) -> Expr:

522

"""Check if binary starts with prefix."""

523

524

def decode(encoding: str = "utf8", *, strict: bool = True) -> Expr:

525

"""Decode binary to string."""

526

527

def encode(encoding: str = "utf8") -> Expr:

528

"""Encode string to binary."""

529

530

def size() -> Expr:

531

"""Get size of binary data in bytes."""

532

```

533

534

### Name Namespace (.name)

535

536

Operations for working with expression and column names.

537

538

```python { .api }

539

# Available as expr.name.method()

540

541

def keep() -> Expr:

542

"""Keep original column name."""

543

544

def map(function: Callable[[str], str]) -> Expr:

545

"""Apply function to column name."""

546

547

def prefix(prefix: str) -> Expr:

548

"""Add prefix to column name."""

549

550

def suffix(suffix: str) -> Expr:

551

"""Add suffix to column name."""

552

553

def to_lowercase() -> Expr:

554

"""Convert column name to lowercase."""

555

556

def to_uppercase() -> Expr:

557

"""Convert column name to uppercase."""

558

```

559

560

### Meta Namespace (.meta)

561

562

Metadata operations for expressions.

563

564

```python { .api }

565

# Available as expr.meta.method()

566

567

def eq(other: Expr) -> bool:

568

"""Check expression equality."""

569

570

def ne(other: Expr) -> bool:

571

"""Check expression inequality."""

572

573

def has_multiple_outputs() -> bool:

574

"""Check if expression produces multiple columns."""

575

576

def is_column() -> bool:

577

"""Check if expression is a column reference."""

578

579

def is_regex_projection() -> bool:

580

"""Check if expression is a regex column selection."""

581

582

def output_name() -> str | None:

583

"""Get output column name if determinable."""

584

585

def pop() -> list[Expr]:

586

"""Pop and return child expressions."""

587

588

def root_names() -> list[str]:

589

"""Get root column names used by expression."""

590

591

def tree_format(*, return_as_string: bool = False) -> str | None:

592

"""Display expression tree structure."""

593

594

def undo_aliases() -> Expr:

595

"""Remove aliases from expression."""

596

597

def write_json(file: IOBase) -> None:

598

"""Write expression as JSON."""

599

```

600

601

## Usage Examples

602

603

### String Operations

604

605

```python

606

import polars as pl

607

608

df = pl.DataFrame({

609

"text": ["Hello World", "POLARS rocks", " data science "],

610

"emails": ["user@example.com", "admin@test.org", "info@company.net"]

611

})

612

613

result = df.select([

614

pl.col("text").str.to_lowercase().alias("lower"),

615

pl.col("text").str.len_chars().alias("length"),

616

pl.col("text").str.strip_chars().alias("stripped"),

617

pl.col("emails").str.extract(r"@(.+)").alias("domain"),

618

pl.col("text").str.contains("data").alias("has_data")

619

])

620

621

# Advanced string operations

622

processed = df.select([

623

pl.col("text").str.split(" ").alias("words"),

624

pl.col("text").str.replace("World", "Universe").alias("replaced"),

625

pl.col("emails").str.starts_with("admin").alias("is_admin")

626

])

627

```

628

629

### DateTime Operations

630

631

```python

632

df_dates = pl.DataFrame({

633

"timestamp": pl.datetime_range(

634

pl.datetime(2023, 1, 1),

635

pl.datetime(2023, 12, 31),

636

"1mo",

637

eager=True

638

)

639

})

640

641

result = df_dates.select([

642

pl.col("timestamp"),

643

pl.col("timestamp").dt.year().alias("year"),

644

pl.col("timestamp").dt.month().alias("month"),

645

pl.col("timestamp").dt.quarter().alias("quarter"),

646

pl.col("timestamp").dt.weekday().alias("weekday"),

647

pl.col("timestamp").dt.strftime("%Y-%m-%d").alias("formatted"),

648

pl.col("timestamp").dt.truncate("1w").alias("week_start")

649

])

650

651

# Duration operations

652

df_duration = pl.DataFrame({

653

"start": [pl.datetime(2023, 1, 1), pl.datetime(2023, 6, 1)],

654

"end": [pl.datetime(2023, 1, 15), pl.datetime(2023, 6, 30)]

655

})

656

657

duration_result = df_duration.select([

658

(pl.col("end") - pl.col("start")).alias("duration"),

659

(pl.col("end") - pl.col("start")).dt.total_days().alias("total_days")

660

])

661

```

662

663

### List Operations

664

665

```python

666

df_lists = pl.DataFrame({

667

"numbers": [[1, 2, 3], [4, 5], [6, 7, 8, 9]],

668

"words": [["hello", "world"], ["polars", "rocks"], ["data", "science"]]

669

})

670

671

result = df_lists.select([

672

pl.col("numbers").list.len().alias("count"),

673

pl.col("numbers").list.sum().alias("sum"),

674

pl.col("numbers").list.max().alias("max"),

675

pl.col("numbers").list.get(0).alias("first"),

676

pl.col("words").list.join(" ").alias("joined"),

677

pl.col("numbers").list.contains(5).alias("has_five")

678

])

679

680

# List transformations

681

transformed = df_lists.select([

682

pl.col("numbers").list.sort().alias("sorted"),

683

pl.col("numbers").list.reverse().alias("reversed"),

684

pl.col("numbers").list.unique().alias("unique"),

685

pl.col("numbers").list.slice(1, 2).alias("middle")

686

])

687

```

688

689

### Struct Operations

690

691

```python

692

df_struct = pl.DataFrame({

693

"person": [

694

{"name": "Alice", "age": 25, "city": "NYC"},

695

{"name": "Bob", "age": 30, "city": "LA"},

696

{"name": "Charlie", "age": 35, "city": "Chicago"}

697

]

698

})

699

700

result = df_struct.select([

701

pl.col("person").struct.field("name").alias("name"),

702

pl.col("person").struct.field("age").alias("age"),

703

pl.col("person").struct.field("city").alias("city")

704

])

705

706

# Struct modifications

707

modified = df_struct.select([

708

pl.col("person").struct.with_fields([

709

pl.col("person").struct.field("age").add(1).alias("age")

710

]).alias("person_older")

711

])

712

```

713

714

### Categorical Operations

715

716

```python

717

df_cat = pl.DataFrame({

718

"category": ["A", "B", "A", "C", "B", "A"]

719

}).with_columns(

720

pl.col("category").cast(pl.Categorical).alias("category")

721

)

722

723

result = df_cat.select([

724

pl.col("category"),

725

pl.col("category").cat.get_categories().alias("categories"),

726

pl.col("category").cat.len_chars().alias("category_length")

727

])

728

```

729

730

### Binary Operations

731

732

```python

733

df_binary = pl.DataFrame({

734

"data": [b"hello", b"world", b"polars"]

735

})

736

737

result = df_binary.select([

738

pl.col("data"),

739

pl.col("data").bin.size().alias("size"),

740

pl.col("data").bin.decode().alias("decoded"),

741

pl.col("data").bin.starts_with(b"hel").alias("starts_with_hel")

742

])

743

```

744

745

### Expression Metadata

746

747

```python

748

# Create complex expression

749

expr = pl.col("value").filter(pl.col("category") == "A").sum().over("group")

750

751

# Examine expression metadata

752

print(f"Output name: {expr.meta.output_name()}")

753

print(f"Root names: {expr.meta.root_names()}")

754

print(f"Has multiple outputs: {expr.meta.has_multiple_outputs()}")

755

print(f"Is column: {expr.meta.is_column()}")

756

757

# Display expression tree

758

print(expr.meta.tree_format(return_as_string=True))

759

```

760

761

### Advanced Namespace Combinations

762

763

```python

764

# Complex text processing with multiple namespaces

765

text_df = pl.DataFrame({

766

"logs": [

767

'{"timestamp": "2023-01-01T10:00:00", "level": "INFO", "message": "System started"}',

768

'{"timestamp": "2023-01-01T10:05:00", "level": "ERROR", "message": "Connection failed"}',

769

'{"timestamp": "2023-01-01T10:10:00", "level": "INFO", "message": "System recovered"}'

770

]

771

})

772

773

processed_logs = text_df.select([

774

pl.col("logs").str.json_path_match("$.timestamp").alias("timestamp_str"),

775

pl.col("logs").str.json_path_match("$.level").alias("level"),

776

pl.col("logs").str.json_path_match("$.message").alias("message")

777

]).with_columns([

778

pl.col("timestamp_str").str.strptime(pl.Datetime, "%Y-%m-%dT%H:%M:%S").alias("timestamp")

779

]).select([

780

pl.col("timestamp").dt.hour().alias("hour"),

781

pl.col("level"),

782

pl.col("message"),

783

pl.col("message").str.len_chars().alias("message_length")

784

])

785

786

# Multi-level list and struct operations

787

nested_df = pl.DataFrame({

788

"data": [

789

[{"values": [1, 2, 3], "label": "A"}, {"values": [4, 5], "label": "B"}],

790

[{"values": [6, 7, 8, 9], "label": "C"}]

791

]

792

})

793

794

result = nested_df.select([

795

pl.col("data").list.len().alias("num_items"),

796

pl.col("data").list.eval(

797

pl.element().struct.field("values").list.sum()

798

).alias("sums_per_item"),

799

pl.col("data").list.eval(

800

pl.element().struct.field("label")

801

).alias("labels")

802

])

803

```