Blazingly fast DataFrame library with 64-bit index support for handling datasets with more than 4.2 billion rows
—
System configuration, metadata information, string cache management, API extensions, and plugin support for customizing Polars behavior and extending functionality.
Global configuration system for controlling display formatting, performance settings, and runtime behavior.
class Config:
"""
Global configuration manager for Polars settings.
Controls display formatting, performance parameters, and debugging options.
"""
def set_fmt_float(self, fmt: FloatFmt = "mixed") -> type[Config]:
"""
Set float formatting option.
Parameters:
- fmt: Float format ("mixed", "full", or custom format string)
Returns:
Config class for method chaining
"""
def set_fmt_str_lengths(self, n: int) -> type[Config]:
"""
Set maximum string length for display.
Parameters:
- n: Maximum string length
Returns:
Config class for method chaining
"""
def set_tbl_cols(self, n: int) -> type[Config]:
"""
Set maximum number of columns to display.
Parameters:
- n: Maximum number of columns
Returns:
Config class for method chaining
"""
def set_tbl_rows(self, n: int) -> type[Config]:
"""
Set maximum number of rows to display.
Parameters:
- n: Maximum number of rows
Returns:
Config class for method chaining
"""
def set_tbl_width_chars(self, width: int) -> type[Config]:
"""
Set table width in characters.
Parameters:
- width: Table width in characters
Returns:
Config class for method chaining
"""
def set_tbl_formatting(self, formatting: TableFormatNames) -> type[Config]:
"""
Set table formatting style.
Parameters:
- formatting: Table format style
Returns:
Config class for method chaining
"""
def set_streaming_chunk_size(self, size: int) -> type[Config]:
"""
Set streaming chunk size for processing.
Parameters:
- size: Chunk size in bytes
Returns:
Config class for method chaining
"""
def set_verbose(self, active: bool = True) -> type[Config]:
"""
Set verbose logging mode.
Parameters:
- active: Enable verbose logging
Returns:
Config class for method chaining
"""
def load(self, file: str | Path) -> type[Config]:
"""
Load configuration from JSON file.
Parameters:
- file: Path to configuration file
Returns:
Config class for method chaining
"""
def save(self, file: str | Path, *, set_as_default: bool = False) -> type[Config]:
"""
Save current configuration to JSON file.
Parameters:
- file: Path to save configuration
- set_as_default: Set as default configuration
Returns:
Config class for method chaining
"""
def restore_defaults(self) -> type[Config]:
"""
Restore default configuration settings.
Returns:
Config class for method chaining
"""
@contextlib.contextmanager
def __call__(self, **kwargs: Any) -> Config:
"""
Context manager for temporary configuration changes.
Parameters:
- kwargs: Configuration parameters to set temporarily
Returns:
Config context manager
"""Functions for retrieving information about the Polars installation and runtime environment.
def build_info() -> dict[str, Any]:
"""
Get build information for the Polars installation.
Returns:
Dictionary containing build details (version, commit, features, etc.)
"""
def show_versions() -> None:
"""
Print version information for Polars and optional dependencies.
Displays versions of Polars, Python, and installed optional packages.
"""
def get_index_type() -> str:
"""
Get the index type used by this Polars build.
Returns:
Index type ("UInt32" for standard, "UInt64" for u64-idx variant)
"""
def thread_pool_size() -> int:
"""
Get the size of the global thread pool.
Returns:
Number of threads in the global thread pool
"""
def threadpool_size() -> int:
"""
Get the size of the global thread pool (alias for thread_pool_size).
Returns:
Number of threads in the global thread pool
"""Global string cache for optimizing categorical data operations across DataFrames.
class StringCache:
"""
Context manager for enabling and disabling the global string cache.
Categorical columns created under the same global string cache have
the same underlying physical value when string values are equal, allowing
concatenation and join operations.
"""
def __enter__(self) -> StringCache:
"""Enable the global string cache."""
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
"""Disable and clear the global string cache."""
def enable_string_cache() -> None:
"""
Enable the global string cache.
This allows categorical columns with the same string values
to be concatenated or joined.
"""
def disable_string_cache() -> None:
"""
Disable and clear the global string cache.
This frees memory used by the string cache but prevents
categorical operations across separately created DataFrames.
"""
def using_string_cache() -> bool:
"""
Check if the global string cache is currently enabled.
Returns:
True if the global string cache is enabled
"""Functions for registering custom namespaces to extend Polars classes with user-defined functionality.
def register_dataframe_namespace(name: str) -> Callable[[type[NS]], type[NS]]:
"""
Register a custom namespace for DataFrame objects.
Parameters:
- name: Namespace name (will be accessible as df.name)
Returns:
Decorator function for namespace class
"""
def register_lazyframe_namespace(name: str) -> Callable[[type[NS]], type[NS]]:
"""
Register a custom namespace for LazyFrame objects.
Parameters:
- name: Namespace name (will be accessible as lf.name)
Returns:
Decorator function for namespace class
"""
def register_series_namespace(name: str) -> Callable[[type[NS]], type[NS]]:
"""
Register a custom namespace for Series objects.
Parameters:
- name: Namespace name (will be accessible as s.name)
Returns:
Decorator function for namespace class
"""
def register_expr_namespace(name: str) -> Callable[[type[NS]], type[NS]]:
"""
Register a custom namespace for Expr objects.
Parameters:
- name: Namespace name (will be accessible as expr.name)
Returns:
Decorator function for namespace class
"""System for registering and using external plugin functions written in Rust.
def register_plugin_function(
*,
plugin_path: Path | str,
function_name: str,
args: IntoExpr | Iterable[IntoExpr],
kwargs: dict[str, Any] | None = None,
is_elementwise: bool = False,
changes_length: bool = False,
returns_scalar: bool = False,
cast_to_supertype: bool = False,
input_wildcard_expansion: bool = False,
pass_name_to_apply: bool = False,
use_abs_path: bool = False
) -> Expr:
"""
Register a plugin function from a dynamic library.
Parameters:
- plugin_path: Path to the plugin dynamic library or directory
- function_name: Name of the Rust function to register
- args: Expression arguments to pass to the function
- kwargs: Keyword arguments for the function
- is_elementwise: Function operates element-wise
- changes_length: Function may change output length
- returns_scalar: Function returns a scalar value
- cast_to_supertype: Cast inputs to common supertype
- input_wildcard_expansion: Expand wildcard expressions
- pass_name_to_apply: Pass column name to function
- use_abs_path: Use absolute path for plugin resolution
Returns:
Expression that calls the plugin function
"""import polars as pl
# Temporary configuration changes
with pl.Config(set_tbl_cols=5, set_tbl_rows=10):
print(df) # Limited to 5 columns, 10 rows
# Permanent configuration changes
pl.Config.set_fmt_float("full")
pl.Config.set_tbl_formatting("ASCII_MARKDOWN")
# Save and load configuration
pl.Config.save("my_config.json")
pl.Config.load("my_config.json")import polars as pl
# Context manager approach
with pl.StringCache():
df1 = pl.DataFrame({"cat": ["A", "B", "C"]}).with_columns(
pl.col("cat").cast(pl.Categorical)
)
df2 = pl.DataFrame({"cat": ["A", "B", "D"]}).with_columns(
pl.col("cat").cast(pl.Categorical)
)
# These can now be concatenated
result = pl.concat([df1, df2])
# Function approach
pl.enable_string_cache()
# ... create categorical columns ...
pl.disable_string_cache()import polars as pl
@pl.register_dataframe_namespace("business")
class BusinessAccessor:
def __init__(self, df: pl.DataFrame):
self._df = df
def calculate_revenue(self) -> pl.DataFrame:
return self._df.with_columns(
(pl.col("price") * pl.col("quantity")).alias("revenue")
)
# Usage
df = pl.DataFrame({"price": [10, 20], "quantity": [5, 3]})
revenue_df = df.business.calculate_revenue()import polars as pl
# Register and use a custom plugin function
expr = pl.register_plugin_function(
plugin_path="./my_plugin.so",
function_name="custom_transform",
args=[pl.col("data")],
is_elementwise=True
)
result = df.with_columns(expr.alias("transformed"))Install with Tessl CLI
npx tessl i tessl/pypi-polars-u64-idx@1.33.1