Tessl Tile for pypi/polars-lts-cpu@1.33.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

configuration.md core-classes.md data-types.md expressions.md functions.md index.md io-operations.md sql-functionality.md

configuration.mddocs/

0
# Configuration and Utilities
1

2
Configuration options, selectors for column operations, string caching for categorical data, meta information utilities, and testing utilities for DataFrame comparisons. These components provide essential support for customizing Polars behavior and working efficiently with data.
3

4
## Capabilities
5

6
### Configuration
7

8
Customize Polars display options, performance settings, and behavior through the Config class.
9

10
```python { .api }
11
class Config:
12
    def __init__(self):
13
        """Global configuration manager for Polars."""
14

15
    # Table Display Configuration
16
    def set_tbl_cols(self, n: int) -> Config:
17
        """
18
        Set maximum number of columns to display.
19
        
20
        Parameters:
21
        - n: Maximum columns (-1 for unlimited)
22
        
23
        Returns:
24
        - Config: Self for method chaining
25
        """
26

27
    def set_tbl_rows(self, n: int) -> Config:
28
        """
29
        Set maximum number of rows to display.
30
        
31
        Parameters:
32
        - n: Maximum rows (-1 for unlimited)
33
        
34
        Returns:
35
        - Config: Self for method chaining
36
        """
37

38
    def set_tbl_width_chars(self, width: int) -> Config:
39
        """
40
        Set maximum table width in characters.
41
        
42
        Parameters:
43
        - width: Maximum width in characters
44
        
45
        Returns:
46
        - Config: Self for method chaining
47
        """
48

49
    def set_tbl_column_data_type_inline(self, active: bool = True) -> Config:
50
        """
51
        Show column data types inline with headers.
52
        
53
        Parameters:
54
        - active: Enable inline data types
55
        
56
        Returns:
57
        - Config: Self for method chaining
58
        """
59

60
    def set_tbl_dataframe_shape_below(self, active: bool = True) -> Config:
61
        """
62
        Display DataFrame shape below the table.
63
        
64
        Parameters:
65
        - active: Show shape below table
66
        
67
        Returns:
68
        - Config: Self for method chaining
69
        """
70

71
    def set_tbl_formatting(
72
        self,
73
        format: str = "UTF8_FULL_CONDENSED",
74
        rounded_corners: bool = False
75
    ) -> Config:
76
        """
77
        Set table formatting style.
78
        
79
        Parameters:
80
        - format: Table format style
81
        - rounded_corners: Use rounded table corners
82
        
83
        Returns:
84
        - Config: Self for method chaining
85
        """
86

87
    def set_tbl_hide_column_data_types(self, active: bool = True) -> Config:
88
        """
89
        Hide column data types from display.
90
        
91
        Parameters:
92
        - active: Hide data types
93
        
94
        Returns:
95
        - Config: Self for method chaining
96
        """
97

98
    def set_tbl_hide_column_names(self, active: bool = True) -> Config:
99
        """
100
        Hide column names from display.
101
        
102
        Parameters:
103
        - active: Hide column names
104
        
105
        Returns:
106
        - Config: Self for method chaining
107
        """
108

109
    def set_tbl_hide_dtype_separator(self, active: bool = True) -> Config:
110
        """
111
        Hide separator between column names and types.
112
        
113
        Parameters:
114
        - active: Hide dtype separator
115
        
116
        Returns:
117
        - Config: Self for method chaining
118
        """
119

120
    # Performance and Behavior Configuration
121
    def set_verbose(self, active: bool = True) -> Config:
122
        """
123
        Enable verbose output for debugging.
124
        
125
        Parameters:
126
        - active: Enable verbose mode
127
        
128
        Returns:
129
        - Config: Self for method chaining
130
        """
131

132
    def set_streaming_chunk_size(self, size: int) -> Config:
133
        """
134
        Set chunk size for streaming operations.
135
        
136
        Parameters:
137
        - size: Chunk size in rows
138
        
139
        Returns:
140
        - Config: Self for method chaining
141
        """
142

143
    def set_auto_structify(self, active: bool = True) -> Config:
144
        """
145
        Automatically convert eligible data to struct format.
146
        
147
        Parameters:
148
        - active: Enable auto structification
149
        
150
        Returns:
151
        - Config: Self for method chaining
152
        """
153

154
    # Context Manager Support
155
    def __enter__(self) -> Config:
156
        """Enter configuration context."""
157

158
    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
159
        """Exit configuration context, restoring previous settings."""
160

161
    # Function Decorator Support
162
    def __call__(self, func: Callable) -> Callable:
163
        """Use as function decorator to apply config temporarily."""
164
```
165

166
### String Cache
167

168
Optimize memory usage and performance for categorical-like string data through string interning.
169

170
```python { .api }
171
class StringCache:
172
    def __init__(self):
173
        """Context manager for string cache operations."""
174

175
    def __enter__(self) -> StringCache:
176
        """Enable string cache."""
177

178
    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
179
        """Disable string cache and clean up."""
180

181
def enable_string_cache() -> None:
182
    """
183
    Enable global string cache for categorical operations.
184
    Strings are interned for memory efficiency and faster comparisons.
185
    """
186

187
def disable_string_cache() -> None:
188
    """
189
    Disable global string cache.
190
    Clean up interned strings and return to normal string handling.
191
    """
192

193
def using_string_cache() -> bool:
194
    """
195
    Check if string cache is currently enabled.
196
    
197
    Returns:
198
    - bool: True if string cache is active
199
    """
200
```
201

202
### Meta Information
203

204
Access build information, version details, and system configuration.
205

206
```python { .api }
207
def build_info() -> dict[str, str]:
208
    """
209
    Get Polars build information.
210
    
211
    Returns:
212
    - dict[str, str]: Build details including version, features, target
213
    """
214

215
def show_versions() -> None:
216
    """
217
    Display version information for Polars and key dependencies.
218
    Prints version details to stdout for debugging and support.
219
    """
220

221
def thread_pool_size() -> int:
222
    """
223
    Get current thread pool size for parallel operations.
224
    
225
    Returns:
226
    - int: Number of threads in the pool
227
    """
228

229
def threadpool_size() -> int:
230
    """
231
    Alias for thread_pool_size().
232
    
233
    Returns:
234
    - int: Number of threads in the pool
235
    """
236

237
def get_index_type() -> type:
238
    """
239
    Get the data type used for DataFrame indices.
240
    
241
    Returns:
242
    - type: Index data type (typically UInt32 or UInt64)
243
    """
244
```
245

246
### Selectors System
247

248
Powerful column selection system for flexible DataFrame operations.
249

250
```python { .api }
251
class Selector:
252
    """Base class for column selectors."""
253

254
# Type-based Selectors
255
def by_dtype(*dtypes: type) -> Selector:
256
    """Select columns by data type."""
257

258
def numeric() -> Selector:
259
    """Select numeric columns (int, float, decimal)."""
260

261
def integer() -> Selector:
262
    """Select integer columns."""
263

264
def signed_integer() -> Selector:
265
    """Select signed integer columns."""
266

267
def unsigned_integer() -> Selector:
268
    """Select unsigned integer columns."""
269

270
def float() -> Selector:
271
    """Select floating-point columns."""
272

273
def string() -> Selector:
274
    """Select string/text columns."""
275

276
def boolean() -> Selector:
277
    """Select boolean columns."""
278

279
def temporal() -> Selector:
280
    """Select temporal columns (date, datetime, time, duration)."""
281

282
def date() -> Selector:
283
    """Select date columns."""
284

285
def datetime(time_unit: str | None = None, time_zone: str | None = None) -> Selector:
286
    """Select datetime columns with optional unit/timezone filtering."""
287

288
def time() -> Selector:
289
    """Select time columns."""
290

291
def duration(time_unit: str | None = None) -> Selector:
292
    """Select duration columns with optional unit filtering."""
293

294
def categorical() -> Selector:
295
    """Select categorical columns."""
296

297
def enum() -> Selector:
298
    """Select enum columns."""
299

300
def binary() -> Selector:
301
    """Select binary data columns."""
302

303
def decimal() -> Selector:
304
    """Select decimal columns."""
305

306
# Complex Type Selectors
307
def list() -> Selector:
308
    """Select list columns."""
309

310
def array() -> Selector:
311
    """Select array columns."""
312

313
def struct() -> Selector:
314
    """Select struct columns."""
315

316
def nested() -> Selector:
317
    """Select nested columns (list, array, struct)."""
318

319
# Position-based Selectors
320
def first() -> Selector:
321
    """Select first column."""
322

323
def last() -> Selector:
324
    """Select last column."""
325

326
def by_index(*indices: int) -> Selector:
327
    """Select columns by index positions."""
328

329
# Name-based Selectors
330
def by_name(*names: str | list[str]) -> Selector:
331
    """Select columns by exact names."""
332

333
def matches(pattern: str, *, flags: int = 0) -> Selector:
334
    """Select columns matching regex pattern."""
335

336
def contains(substring: str) -> Selector:
337
    """Select columns containing substring."""
338

339
def starts_with(prefix: str) -> Selector:
340
    """Select columns starting with prefix."""
341

342
def ends_with(suffix: str) -> Selector:
343
    """Select columns ending with suffix."""
344

345
# Character Class Selectors
346
def alpha() -> Selector:
347
    """Select columns with alphabetic names."""
348

349
def alphanumeric() -> Selector:
350
    """Select columns with alphanumeric names."""
351

352
def digit() -> Selector:
353
    """Select columns with numeric names."""
354

355
# Utility Selectors
356
def all() -> Selector:
357
    """Select all columns."""
358

359
def exclude(*selectors: Selector | str) -> Selector:
360
    """Exclude specified selectors or column names."""
361

362
# Selector Operations
363
def expand_selector(
364
    frame: DataFrame | LazyFrame,
365
    *selectors: Selector | str
366
) -> list[str]:
367
    """
368
    Expand selectors to column names for given frame.
369
    
370
    Parameters:
371
    - frame: DataFrame or LazyFrame to expand selectors against
372
    - selectors: Selectors to expand
373
    
374
    Returns:
375
    - list[str]: Column names matching selectors
376
    """
377

378
def is_selector(obj: Any) -> bool:
379
    """
380
    Check if object is a selector.
381
    
382
    Parameters:
383
    - obj: Object to check
384
    
385
    Returns:
386
    - bool: True if object is a selector
387
    """
388
```
389

390
### Testing Utilities
391

392
Assertion functions for comparing DataFrames and Series in tests.
393

394
```python { .api }
395
def assert_frame_equal(
396
    left: DataFrame | LazyFrame,
397
    right: DataFrame | LazyFrame,
398
    *,
399
    check_dtype: bool = True,
400
    check_exact: bool = False,
401
    rtol: float = 1e-5,
402
    atol: float = 1e-8,
403
    categorical_as_str: bool = False,
404
    check_column_order: bool = True,
405
    check_row_order: bool = True
406
) -> None:
407
    """
408
    Assert that two DataFrames are equal.
409
    
410
    Parameters:
411
    - left: First DataFrame
412
    - right: Second DataFrame
413
    - check_dtype: Check column data types
414
    - check_exact: Check exact floating-point equality
415
    - rtol: Relative tolerance for floating-point comparison
416
    - atol: Absolute tolerance for floating-point comparison
417
    - categorical_as_str: Compare categoricals as strings
418
    - check_column_order: Check column order
419
    - check_row_order: Check row order
420
    
421
    Raises:
422
    - AssertionError: If DataFrames are not equal
423
    """
424

425
def assert_frame_not_equal(
426
    left: DataFrame | LazyFrame,
427
    right: DataFrame | LazyFrame,
428
    **kwargs
429
) -> None:
430
    """
431
    Assert that two DataFrames are not equal.
432
    
433
    Parameters:
434
    - left: First DataFrame
435
    - right: Second DataFrame
436
    - **kwargs: Same parameters as assert_frame_equal
437
    
438
    Raises:
439
    - AssertionError: If DataFrames are equal
440
    """
441

442
def assert_series_equal(
443
    left: Series,
444
    right: Series,
445
    *,
446
    check_dtype: bool = True,
447
    check_exact: bool = False,
448
    rtol: float = 1e-5,
449
    atol: float = 1e-8,
450
    categorical_as_str: bool = False,
451
    check_names: bool = True
452
) -> None:
453
    """
454
    Assert that two Series are equal.
455
    
456
    Parameters:
457
    - left: First Series
458
    - right: Second Series
459
    - check_dtype: Check data types
460
    - check_exact: Check exact floating-point equality
461
    - rtol: Relative tolerance for floating-point comparison
462
    - atol: Absolute tolerance for floating-point comparison
463
    - categorical_as_str: Compare categoricals as strings
464
    - check_names: Check Series names
465
    
466
    Raises:
467
    - AssertionError: If Series are not equal
468
    """
469

470
def assert_series_not_equal(
471
    left: Series,
472
    right: Series,
473
    **kwargs
474
) -> None:
475
    """
476
    Assert that two Series are not equal.
477
    
478
    Parameters:
479
    - left: First Series
480
    - right: Second Series
481
    - **kwargs: Same parameters as assert_series_equal
482
    
483
    Raises:
484
    - AssertionError: If Series are equal
485
    """
486
```
487

488
## Usage Examples
489

490
### Configuration Usage
491

492
```python
493
import polars as pl
494

495
# Global configuration changes
496
pl.Config.set_tbl_rows(10)
497
pl.Config.set_tbl_cols(8)
498
pl.Config.set_verbose(True)
499

500
# Context manager for temporary config
501
with pl.Config() as cfg:
502
    cfg.set_tbl_rows(20)
503
    cfg.set_tbl_cols(12)
504
    # Configuration active only within this block
505
    print(large_df)  # Uses temporary settings
506

507
# Function decorator for config
508
@pl.Config(set_tbl_rows=5, set_verbose=False)
509
def analyze_data(df):
510
    return df.describe()
511

512
# Streaming configuration
513
pl.Config.set_streaming_chunk_size(50000)
514
```
515

516
### String Cache Usage
517

518
```python
519
# Context manager approach
520
with pl.StringCache():
521
    # String operations are optimized within this block
522
    df1 = pl.DataFrame({"category": ["A", "B", "A", "C", "B"]})
523
    df2 = pl.DataFrame({"category": ["A", "B", "C"]})
524
    
525
    # Joins and categorical operations are faster
526
    result = df1.join(df2, on="category")
527

528
# Global enable/disable
529
pl.enable_string_cache()
530

531
# Check if enabled
532
if pl.using_string_cache():
533
    print("String cache is active")
534

535
# Categorical operations benefit from string cache
536
df_cat = df.with_columns(pl.col("category").cast(pl.Categorical))
537

538
pl.disable_string_cache()
539
```
540

541
### Meta Information
542

543
```python
544
# Get build information
545
build_info = pl.build_info()
546
print(f"Polars version: {build_info['version']}")
547
print(f"Build features: {build_info['features']}")
548

549
# Show all version information
550
pl.show_versions()
551

552
# Thread pool information
553
thread_count = pl.thread_pool_size()
554
print(f"Using {thread_count} threads")
555

556
# Index type information
557
index_type = pl.get_index_type()
558
print(f"Index type: {index_type}")
559
```
560

561
### Selectors Usage
562

563
```python
564
import polars.selectors as cs
565

566
df = pl.DataFrame({
567
    "id": [1, 2, 3],
568
    "name": ["Alice", "Bob", "Charlie"],
569
    "age": [25, 30, 35],
570
    "salary": [50000.0, 60000.0, 70000.0],
571
    "active": [True, False, True],
572
    "start_date": [pl.date(2020, 1, 1), pl.date(2019, 5, 15), pl.date(2021, 3, 10)]
573
})
574

575
# Type-based selection
576
numeric_cols = df.select(cs.numeric())
577
string_cols = df.select(cs.string())
578
temporal_cols = df.select(cs.temporal())
579

580
# Name-based selection
581
name_pattern_cols = df.select(cs.matches(r".*a.*"))  # Contains 'a'
582
prefix_cols = df.select(cs.starts_with("s"))  # Starts with 's'
583

584
# Combined selectors
585
analysis_cols = df.select(cs.numeric() | cs.temporal())
586
non_id_cols = df.select(cs.all() & ~cs.by_name("id"))
587

588
# Complex selector operations
589
selected_cols = df.select(
590
    cs.numeric() & ~cs.by_name("id"),  # Numeric except id
591
    cs.string(),                       # All strings
592
    cs.exclude(cs.boolean())          # Everything except boolean
593
)
594

595
# Expand selectors to column names
596
expanded = cs.expand_selector(df, cs.numeric(), cs.string())
597
print(f"Selected columns: {expanded}")
598
```
599

600
### Testing Utilities
601

602
```python
603
import polars.testing as plt
604

605
# Create test DataFrames
606
df1 = pl.DataFrame({
607
    "a": [1, 2, 3],
608
    "b": [4.0, 5.0, 6.0],
609
    "c": ["x", "y", "z"]
610
})
611

612
df2 = pl.DataFrame({
613
    "a": [1, 2, 3],
614
    "b": [4.0, 5.0, 6.0],
615
    "c": ["x", "y", "z"]
616
})
617

618
# Assert DataFrames are equal
619
plt.assert_frame_equal(df1, df2)
620

621
# Assert with tolerance for floating-point
622
df3 = pl.DataFrame({
623
    "a": [1, 2, 3],
624
    "b": [4.0001, 5.0001, 6.0001],
625
    "c": ["x", "y", "z"]
626
})
627

628
plt.assert_frame_equal(df1, df3, rtol=1e-3)
629

630
# Assert Series equality
631
s1 = pl.Series("values", [1, 2, 3])
632
s2 = pl.Series("values", [1, 2, 3])
633
plt.assert_series_equal(s1, s2)
634

635
# Assert inequality
636
df_different = pl.DataFrame({"a": [1, 2, 4]})  # Different values
637
plt.assert_frame_not_equal(df1, df_different)
638

639
# Testing in unit tests
640
def test_data_processing():
641
    input_df = pl.DataFrame({"x": [1, 2, 3]})
642
    expected_df = pl.DataFrame({"x": [2, 4, 6]})
643
    
644
    result_df = input_df.select(pl.col("x") * 2)
645
    
646
    plt.assert_frame_equal(result_df, expected_df)
647
```
648

649
### Advanced Configuration Patterns
650

651
```python
652
# Chained configuration
653
config_result = (
654
    pl.Config()
655
    .set_tbl_rows(15)
656
    .set_tbl_cols(10)
657
    .set_verbose(True)
658
    .set_streaming_chunk_size(25000)
659
)
660

661
# Configuration for different environments
662
def setup_dev_config():
663
    return (
664
        pl.Config()
665
        .set_verbose(True)
666
        .set_tbl_rows(-1)  # Show all rows
667
        .set_tbl_cols(-1)  # Show all columns
668
    )
669

670
def setup_prod_config():
671
    return (
672
        pl.Config()
673
        .set_verbose(False)
674
        .set_tbl_rows(10)
675
        .set_streaming_chunk_size(100000)
676
    )
677

678
# Environment-specific setup
679
if os.getenv("ENV") == "development":
680
    setup_dev_config()
681
else:
682
    setup_prod_config()
683
```
684

685
### String Cache Performance Benefits
686

687
```python
688
# Performance comparison example
689
import time
690

691
# Without string cache
692
start_time = time.time()
693
for _ in range(1000):
694
    df = pl.DataFrame({"cat": ["A", "B", "C"] * 1000})
695
    result = df.filter(pl.col("cat") == "A")
696
no_cache_time = time.time() - start_time
697

698
# With string cache
699
pl.enable_string_cache()
700
start_time = time.time()
701
for _ in range(1000):
702
    df = pl.DataFrame({"cat": ["A", "B", "C"] * 1000})
703
    result = df.filter(pl.col("cat") == "A")
704
cache_time = time.time() - start_time
705
pl.disable_string_cache()
706

707
print(f"Without cache: {no_cache_time:.3f}s")
708
print(f"With cache: {cache_time:.3f}s")
709
print(f"Speedup: {no_cache_time/cache_time:.2f}x")
710
```
711

712
### CompatLevel
713

714
Data structure compatibility level configuration for controlling format compatibility when working with external systems and data interchange.
715

716
```python { .api }
717
class CompatLevel:
718
    """
719
    Data structure compatibility level for interchange protocols.
720
    
721
    Used to control compatibility when converting to/from external formats
722
    like Arrow, ensuring data structures are compatible with different
723
    system requirements.
724
    """
725
    
726
    @staticmethod
727
    def newest() -> CompatLevel:
728
        """
729
        Get the highest supported compatibility level.
730
        
731
        Warning: Highest compatibility level is considered unstable
732
        and may change without notice.
733
        """
734
    
735
    @staticmethod 
736
    def oldest() -> CompatLevel:
737
        """Get the most compatible level for maximum compatibility."""
738
```

Version

Tile

Files

configuration.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

configuration.mddocs/