or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration.mdcore-classes.mddata-types.mdexpressions.mdfunctions.mdindex.mdio-operations.mdsql-functionality.md

configuration.mddocs/

0

# Configuration and Utilities

1

2

Configuration options, selectors for column operations, string caching for categorical data, meta information utilities, and testing utilities for DataFrame comparisons. These components provide essential support for customizing Polars behavior and working efficiently with data.

3

4

## Capabilities

5

6

### Configuration

7

8

Customize Polars display options, performance settings, and behavior through the Config class.

9

10

```python { .api }

11

class Config:

12

def __init__(self):

13

"""Global configuration manager for Polars."""

14

15

# Table Display Configuration

16

def set_tbl_cols(self, n: int) -> Config:

17

"""

18

Set maximum number of columns to display.

19

20

Parameters:

21

- n: Maximum columns (-1 for unlimited)

22

23

Returns:

24

- Config: Self for method chaining

25

"""

26

27

def set_tbl_rows(self, n: int) -> Config:

28

"""

29

Set maximum number of rows to display.

30

31

Parameters:

32

- n: Maximum rows (-1 for unlimited)

33

34

Returns:

35

- Config: Self for method chaining

36

"""

37

38

def set_tbl_width_chars(self, width: int) -> Config:

39

"""

40

Set maximum table width in characters.

41

42

Parameters:

43

- width: Maximum width in characters

44

45

Returns:

46

- Config: Self for method chaining

47

"""

48

49

def set_tbl_column_data_type_inline(self, active: bool = True) -> Config:

50

"""

51

Show column data types inline with headers.

52

53

Parameters:

54

- active: Enable inline data types

55

56

Returns:

57

- Config: Self for method chaining

58

"""

59

60

def set_tbl_dataframe_shape_below(self, active: bool = True) -> Config:

61

"""

62

Display DataFrame shape below the table.

63

64

Parameters:

65

- active: Show shape below table

66

67

Returns:

68

- Config: Self for method chaining

69

"""

70

71

def set_tbl_formatting(

72

self,

73

format: str = "UTF8_FULL_CONDENSED",

74

rounded_corners: bool = False

75

) -> Config:

76

"""

77

Set table formatting style.

78

79

Parameters:

80

- format: Table format style

81

- rounded_corners: Use rounded table corners

82

83

Returns:

84

- Config: Self for method chaining

85

"""

86

87

def set_tbl_hide_column_data_types(self, active: bool = True) -> Config:

88

"""

89

Hide column data types from display.

90

91

Parameters:

92

- active: Hide data types

93

94

Returns:

95

- Config: Self for method chaining

96

"""

97

98

def set_tbl_hide_column_names(self, active: bool = True) -> Config:

99

"""

100

Hide column names from display.

101

102

Parameters:

103

- active: Hide column names

104

105

Returns:

106

- Config: Self for method chaining

107

"""

108

109

def set_tbl_hide_dtype_separator(self, active: bool = True) -> Config:

110

"""

111

Hide separator between column names and types.

112

113

Parameters:

114

- active: Hide dtype separator

115

116

Returns:

117

- Config: Self for method chaining

118

"""

119

120

# Performance and Behavior Configuration

121

def set_verbose(self, active: bool = True) -> Config:

122

"""

123

Enable verbose output for debugging.

124

125

Parameters:

126

- active: Enable verbose mode

127

128

Returns:

129

- Config: Self for method chaining

130

"""

131

132

def set_streaming_chunk_size(self, size: int) -> Config:

133

"""

134

Set chunk size for streaming operations.

135

136

Parameters:

137

- size: Chunk size in rows

138

139

Returns:

140

- Config: Self for method chaining

141

"""

142

143

def set_auto_structify(self, active: bool = True) -> Config:

144

"""

145

Automatically convert eligible data to struct format.

146

147

Parameters:

148

- active: Enable auto structification

149

150

Returns:

151

- Config: Self for method chaining

152

"""

153

154

# Context Manager Support

155

def __enter__(self) -> Config:

156

"""Enter configuration context."""

157

158

def __exit__(self, exc_type, exc_val, exc_tb) -> None:

159

"""Exit configuration context, restoring previous settings."""

160

161

# Function Decorator Support

162

def __call__(self, func: Callable) -> Callable:

163

"""Use as function decorator to apply config temporarily."""

164

```

165

166

### String Cache

167

168

Optimize memory usage and performance for categorical-like string data through string interning.

169

170

```python { .api }

171

class StringCache:

172

def __init__(self):

173

"""Context manager for string cache operations."""

174

175

def __enter__(self) -> StringCache:

176

"""Enable string cache."""

177

178

def __exit__(self, exc_type, exc_val, exc_tb) -> None:

179

"""Disable string cache and clean up."""

180

181

def enable_string_cache() -> None:

182

"""

183

Enable global string cache for categorical operations.

184

Strings are interned for memory efficiency and faster comparisons.

185

"""

186

187

def disable_string_cache() -> None:

188

"""

189

Disable global string cache.

190

Clean up interned strings and return to normal string handling.

191

"""

192

193

def using_string_cache() -> bool:

194

"""

195

Check if string cache is currently enabled.

196

197

Returns:

198

- bool: True if string cache is active

199

"""

200

```

201

202

### Meta Information

203

204

Access build information, version details, and system configuration.

205

206

```python { .api }

207

def build_info() -> dict[str, str]:

208

"""

209

Get Polars build information.

210

211

Returns:

212

- dict[str, str]: Build details including version, features, target

213

"""

214

215

def show_versions() -> None:

216

"""

217

Display version information for Polars and key dependencies.

218

Prints version details to stdout for debugging and support.

219

"""

220

221

def thread_pool_size() -> int:

222

"""

223

Get current thread pool size for parallel operations.

224

225

Returns:

226

- int: Number of threads in the pool

227

"""

228

229

def threadpool_size() -> int:

230

"""

231

Alias for thread_pool_size().

232

233

Returns:

234

- int: Number of threads in the pool

235

"""

236

237

def get_index_type() -> type:

238

"""

239

Get the data type used for DataFrame indices.

240

241

Returns:

242

- type: Index data type (typically UInt32 or UInt64)

243

"""

244

```

245

246

### Selectors System

247

248

Powerful column selection system for flexible DataFrame operations.

249

250

```python { .api }

251

class Selector:

252

"""Base class for column selectors."""

253

254

# Type-based Selectors

255

def by_dtype(*dtypes: type) -> Selector:

256

"""Select columns by data type."""

257

258

def numeric() -> Selector:

259

"""Select numeric columns (int, float, decimal)."""

260

261

def integer() -> Selector:

262

"""Select integer columns."""

263

264

def signed_integer() -> Selector:

265

"""Select signed integer columns."""

266

267

def unsigned_integer() -> Selector:

268

"""Select unsigned integer columns."""

269

270

def float() -> Selector:

271

"""Select floating-point columns."""

272

273

def string() -> Selector:

274

"""Select string/text columns."""

275

276

def boolean() -> Selector:

277

"""Select boolean columns."""

278

279

def temporal() -> Selector:

280

"""Select temporal columns (date, datetime, time, duration)."""

281

282

def date() -> Selector:

283

"""Select date columns."""

284

285

def datetime(time_unit: str | None = None, time_zone: str | None = None) -> Selector:

286

"""Select datetime columns with optional unit/timezone filtering."""

287

288

def time() -> Selector:

289

"""Select time columns."""

290

291

def duration(time_unit: str | None = None) -> Selector:

292

"""Select duration columns with optional unit filtering."""

293

294

def categorical() -> Selector:

295

"""Select categorical columns."""

296

297

def enum() -> Selector:

298

"""Select enum columns."""

299

300

def binary() -> Selector:

301

"""Select binary data columns."""

302

303

def decimal() -> Selector:

304

"""Select decimal columns."""

305

306

# Complex Type Selectors

307

def list() -> Selector:

308

"""Select list columns."""

309

310

def array() -> Selector:

311

"""Select array columns."""

312

313

def struct() -> Selector:

314

"""Select struct columns."""

315

316

def nested() -> Selector:

317

"""Select nested columns (list, array, struct)."""

318

319

# Position-based Selectors

320

def first() -> Selector:

321

"""Select first column."""

322

323

def last() -> Selector:

324

"""Select last column."""

325

326

def by_index(*indices: int) -> Selector:

327

"""Select columns by index positions."""

328

329

# Name-based Selectors

330

def by_name(*names: str | list[str]) -> Selector:

331

"""Select columns by exact names."""

332

333

def matches(pattern: str, *, flags: int = 0) -> Selector:

334

"""Select columns matching regex pattern."""

335

336

def contains(substring: str) -> Selector:

337

"""Select columns containing substring."""

338

339

def starts_with(prefix: str) -> Selector:

340

"""Select columns starting with prefix."""

341

342

def ends_with(suffix: str) -> Selector:

343

"""Select columns ending with suffix."""

344

345

# Character Class Selectors

346

def alpha() -> Selector:

347

"""Select columns with alphabetic names."""

348

349

def alphanumeric() -> Selector:

350

"""Select columns with alphanumeric names."""

351

352

def digit() -> Selector:

353

"""Select columns with numeric names."""

354

355

# Utility Selectors

356

def all() -> Selector:

357

"""Select all columns."""

358

359

def exclude(*selectors: Selector | str) -> Selector:

360

"""Exclude specified selectors or column names."""

361

362

# Selector Operations

363

def expand_selector(

364

frame: DataFrame | LazyFrame,

365

*selectors: Selector | str

366

) -> list[str]:

367

"""

368

Expand selectors to column names for given frame.

369

370

Parameters:

371

- frame: DataFrame or LazyFrame to expand selectors against

372

- selectors: Selectors to expand

373

374

Returns:

375

- list[str]: Column names matching selectors

376

"""

377

378

def is_selector(obj: Any) -> bool:

379

"""

380

Check if object is a selector.

381

382

Parameters:

383

- obj: Object to check

384

385

Returns:

386

- bool: True if object is a selector

387

"""

388

```

389

390

### Testing Utilities

391

392

Assertion functions for comparing DataFrames and Series in tests.

393

394

```python { .api }

395

def assert_frame_equal(

396

left: DataFrame | LazyFrame,

397

right: DataFrame | LazyFrame,

398

*,

399

check_dtype: bool = True,

400

check_exact: bool = False,

401

rtol: float = 1e-5,

402

atol: float = 1e-8,

403

categorical_as_str: bool = False,

404

check_column_order: bool = True,

405

check_row_order: bool = True

406

) -> None:

407

"""

408

Assert that two DataFrames are equal.

409

410

Parameters:

411

- left: First DataFrame

412

- right: Second DataFrame

413

- check_dtype: Check column data types

414

- check_exact: Check exact floating-point equality

415

- rtol: Relative tolerance for floating-point comparison

416

- atol: Absolute tolerance for floating-point comparison

417

- categorical_as_str: Compare categoricals as strings

418

- check_column_order: Check column order

419

- check_row_order: Check row order

420

421

Raises:

422

- AssertionError: If DataFrames are not equal

423

"""

424

425

def assert_frame_not_equal(

426

left: DataFrame | LazyFrame,

427

right: DataFrame | LazyFrame,

428

**kwargs

429

) -> None:

430

"""

431

Assert that two DataFrames are not equal.

432

433

Parameters:

434

- left: First DataFrame

435

- right: Second DataFrame

436

- **kwargs: Same parameters as assert_frame_equal

437

438

Raises:

439

- AssertionError: If DataFrames are equal

440

"""

441

442

def assert_series_equal(

443

left: Series,

444

right: Series,

445

*,

446

check_dtype: bool = True,

447

check_exact: bool = False,

448

rtol: float = 1e-5,

449

atol: float = 1e-8,

450

categorical_as_str: bool = False,

451

check_names: bool = True

452

) -> None:

453

"""

454

Assert that two Series are equal.

455

456

Parameters:

457

- left: First Series

458

- right: Second Series

459

- check_dtype: Check data types

460

- check_exact: Check exact floating-point equality

461

- rtol: Relative tolerance for floating-point comparison

462

- atol: Absolute tolerance for floating-point comparison

463

- categorical_as_str: Compare categoricals as strings

464

- check_names: Check Series names

465

466

Raises:

467

- AssertionError: If Series are not equal

468

"""

469

470

def assert_series_not_equal(

471

left: Series,

472

right: Series,

473

**kwargs

474

) -> None:

475

"""

476

Assert that two Series are not equal.

477

478

Parameters:

479

- left: First Series

480

- right: Second Series

481

- **kwargs: Same parameters as assert_series_equal

482

483

Raises:

484

- AssertionError: If Series are equal

485

"""

486

```

487

488

## Usage Examples

489

490

### Configuration Usage

491

492

```python

493

import polars as pl

494

495

# Global configuration changes

496

pl.Config.set_tbl_rows(10)

497

pl.Config.set_tbl_cols(8)

498

pl.Config.set_verbose(True)

499

500

# Context manager for temporary config

501

with pl.Config() as cfg:

502

cfg.set_tbl_rows(20)

503

cfg.set_tbl_cols(12)

504

# Configuration active only within this block

505

print(large_df) # Uses temporary settings

506

507

# Function decorator for config

508

@pl.Config(set_tbl_rows=5, set_verbose=False)

509

def analyze_data(df):

510

return df.describe()

511

512

# Streaming configuration

513

pl.Config.set_streaming_chunk_size(50000)

514

```

515

516

### String Cache Usage

517

518

```python

519

# Context manager approach

520

with pl.StringCache():

521

# String operations are optimized within this block

522

df1 = pl.DataFrame({"category": ["A", "B", "A", "C", "B"]})

523

df2 = pl.DataFrame({"category": ["A", "B", "C"]})

524

525

# Joins and categorical operations are faster

526

result = df1.join(df2, on="category")

527

528

# Global enable/disable

529

pl.enable_string_cache()

530

531

# Check if enabled

532

if pl.using_string_cache():

533

print("String cache is active")

534

535

# Categorical operations benefit from string cache

536

df_cat = df.with_columns(pl.col("category").cast(pl.Categorical))

537

538

pl.disable_string_cache()

539

```

540

541

### Meta Information

542

543

```python

544

# Get build information

545

build_info = pl.build_info()

546

print(f"Polars version: {build_info['version']}")

547

print(f"Build features: {build_info['features']}")

548

549

# Show all version information

550

pl.show_versions()

551

552

# Thread pool information

553

thread_count = pl.thread_pool_size()

554

print(f"Using {thread_count} threads")

555

556

# Index type information

557

index_type = pl.get_index_type()

558

print(f"Index type: {index_type}")

559

```

560

561

### Selectors Usage

562

563

```python

564

import polars.selectors as cs

565

566

df = pl.DataFrame({

567

"id": [1, 2, 3],

568

"name": ["Alice", "Bob", "Charlie"],

569

"age": [25, 30, 35],

570

"salary": [50000.0, 60000.0, 70000.0],

571

"active": [True, False, True],

572

"start_date": [pl.date(2020, 1, 1), pl.date(2019, 5, 15), pl.date(2021, 3, 10)]

573

})

574

575

# Type-based selection

576

numeric_cols = df.select(cs.numeric())

577

string_cols = df.select(cs.string())

578

temporal_cols = df.select(cs.temporal())

579

580

# Name-based selection

581

name_pattern_cols = df.select(cs.matches(r".*a.*")) # Contains 'a'

582

prefix_cols = df.select(cs.starts_with("s")) # Starts with 's'

583

584

# Combined selectors

585

analysis_cols = df.select(cs.numeric() | cs.temporal())

586

non_id_cols = df.select(cs.all() & ~cs.by_name("id"))

587

588

# Complex selector operations

589

selected_cols = df.select(

590

cs.numeric() & ~cs.by_name("id"), # Numeric except id

591

cs.string(), # All strings

592

cs.exclude(cs.boolean()) # Everything except boolean

593

)

594

595

# Expand selectors to column names

596

expanded = cs.expand_selector(df, cs.numeric(), cs.string())

597

print(f"Selected columns: {expanded}")

598

```

599

600

### Testing Utilities

601

602

```python

603

import polars.testing as plt

604

605

# Create test DataFrames

606

df1 = pl.DataFrame({

607

"a": [1, 2, 3],

608

"b": [4.0, 5.0, 6.0],

609

"c": ["x", "y", "z"]

610

})

611

612

df2 = pl.DataFrame({

613

"a": [1, 2, 3],

614

"b": [4.0, 5.0, 6.0],

615

"c": ["x", "y", "z"]

616

})

617

618

# Assert DataFrames are equal

619

plt.assert_frame_equal(df1, df2)

620

621

# Assert with tolerance for floating-point

622

df3 = pl.DataFrame({

623

"a": [1, 2, 3],

624

"b": [4.0001, 5.0001, 6.0001],

625

"c": ["x", "y", "z"]

626

})

627

628

plt.assert_frame_equal(df1, df3, rtol=1e-3)

629

630

# Assert Series equality

631

s1 = pl.Series("values", [1, 2, 3])

632

s2 = pl.Series("values", [1, 2, 3])

633

plt.assert_series_equal(s1, s2)

634

635

# Assert inequality

636

df_different = pl.DataFrame({"a": [1, 2, 4]}) # Different values

637

plt.assert_frame_not_equal(df1, df_different)

638

639

# Testing in unit tests

640

def test_data_processing():

641

input_df = pl.DataFrame({"x": [1, 2, 3]})

642

expected_df = pl.DataFrame({"x": [2, 4, 6]})

643

644

result_df = input_df.select(pl.col("x") * 2)

645

646

plt.assert_frame_equal(result_df, expected_df)

647

```

648

649

### Advanced Configuration Patterns

650

651

```python

652

# Chained configuration

653

config_result = (

654

pl.Config()

655

.set_tbl_rows(15)

656

.set_tbl_cols(10)

657

.set_verbose(True)

658

.set_streaming_chunk_size(25000)

659

)

660

661

# Configuration for different environments

662

def setup_dev_config():

663

return (

664

pl.Config()

665

.set_verbose(True)

666

.set_tbl_rows(-1) # Show all rows

667

.set_tbl_cols(-1) # Show all columns

668

)

669

670

def setup_prod_config():

671

return (

672

pl.Config()

673

.set_verbose(False)

674

.set_tbl_rows(10)

675

.set_streaming_chunk_size(100000)

676

)

677

678

# Environment-specific setup

679

if os.getenv("ENV") == "development":

680

setup_dev_config()

681

else:

682

setup_prod_config()

683

```

684

685

### String Cache Performance Benefits

686

687

```python

688

# Performance comparison example

689

import time

690

691

# Without string cache

692

start_time = time.time()

693

for _ in range(1000):

694

df = pl.DataFrame({"cat": ["A", "B", "C"] * 1000})

695

result = df.filter(pl.col("cat") == "A")

696

no_cache_time = time.time() - start_time

697

698

# With string cache

699

pl.enable_string_cache()

700

start_time = time.time()

701

for _ in range(1000):

702

df = pl.DataFrame({"cat": ["A", "B", "C"] * 1000})

703

result = df.filter(pl.col("cat") == "A")

704

cache_time = time.time() - start_time

705

pl.disable_string_cache()

706

707

print(f"Without cache: {no_cache_time:.3f}s")

708

print(f"With cache: {cache_time:.3f}s")

709

print(f"Speedup: {no_cache_time/cache_time:.2f}x")

710

```

711

712

### CompatLevel

713

714

Data structure compatibility level configuration for controlling format compatibility when working with external systems and data interchange.

715

716

```python { .api }

717

class CompatLevel:

718

"""

719

Data structure compatibility level for interchange protocols.

720

721

Used to control compatibility when converting to/from external formats

722

like Arrow, ensuring data structures are compatible with different

723

system requirements.

724

"""

725

726

@staticmethod

727

def newest() -> CompatLevel:

728

"""

729

Get the highest supported compatibility level.

730

731

Warning: Highest compatibility level is considered unstable

732

and may change without notice.

733

"""

734

735

@staticmethod

736

def oldest() -> CompatLevel:

737

"""Get the most compatible level for maximum compatibility."""

738

```