tessl/pypi-polars-lts-cpu

Blazingly fast DataFrame library for legacy CPUs without AVX2 support

—

Pending

Overview

Eval results

Files

Core Classes

Name: tessl/pypi-polars-lts-cpu
Author: tessl

The fundamental data structures that form the foundation of polars-lts-cpu's data manipulation capabilities. These classes provide different approaches to working with tabular data, from eager evaluation to lazy optimization.

Capabilities

DataFrame

Two-dimensional data structure representing tabular data with rows and columns, providing eager evaluation for immediate operations.

class DataFrame:
    def __init__(
        self,
        data: Any = None,
        schema: Optional[SchemaDict] = None,
        schema_overrides: Optional[SchemaDict] = None,
        orient: Optional[str] = None,
        infer_schema_length: Optional[int] = 100,
        nan_to_null: bool = False
    ): 
        """
        Create a DataFrame from various data sources.
        
        Parameters:
        - data: Data source (dict, list, numpy array, pandas DataFrame, etc.)
        - schema: Schema specification as {column: dtype} dict
        - schema_overrides: Override inferred types for specific columns
        - orient: Data orientation ('col' or 'row')
        - infer_schema_length: Number of rows to scan for schema inference
        - nan_to_null: Convert NaN values to null
        """

    # Properties
    @property
    def shape(self) -> tuple[int, int]: 
        """Returns (height, width) tuple."""
    
    @property
    def height(self) -> int: 
        """Number of rows."""
    
    @property 
    def width(self) -> int: 
        """Number of columns."""
    
    @property
    def columns(self) -> list[str]: 
        """Column names."""
    
    @property
    def dtypes(self) -> list[type]: 
        """Column data types."""
    
    @property
    def schema(self) -> dict[str, type]: 
        """Schema as {column: dtype} dict."""

    # Data Selection and Filtering
    def select(self, *exprs: IntoExpr) -> DataFrame:
        """Select columns using expressions."""
    
    def filter(self, predicate: IntoExpr) -> DataFrame:
        """Filter rows based on predicate."""
    
    def with_columns(self, *exprs: IntoExpr, **named_exprs: IntoExpr) -> DataFrame:
        """Add or modify columns."""
    
    def drop(self, *columns: str) -> DataFrame:
        """Drop columns."""
    
    def rename(self, mapping: dict[str, str]) -> DataFrame:
        """Rename columns."""

    # Data Access
    def get_column(self, name: str) -> Series:
        """Get column as Series."""
    
    def get_columns(self) -> list[Series]:
        """Get all columns as list of Series."""
    
    def row(self, index: int, *, named: bool = False) -> tuple | dict:
        """Get single row."""
    
    def rows(self, *, named: bool = False) -> list[tuple] | list[dict]:
        """Get all rows."""
    
    def item(self, row: int = None, column: str | int = None) -> Any:
        """Get single item."""

    # Transformations
    def sort(
        self, 
        by: ColumnNameOrSelector | list[ColumnNameOrSelector], 
        *, 
        descending: bool | list[bool] = False,
        nulls_last: bool = False
    ) -> DataFrame:
        """Sort DataFrame."""
    
    def reverse(self) -> DataFrame:
        """Reverse row order."""
    
    def transpose(
        self, 
        *, 
        include_header: bool = False,
        header_name: str = "column",
        column_names: str | list[str] | None = None
    ) -> DataFrame:
        """Transpose DataFrame."""
    
    def cast(self, dtypes: dict[str, type] | type, *, strict: bool = True) -> DataFrame:
        """Cast column types."""

    # Aggregations
    def sum(self, *, axis: int = 0) -> DataFrame | Series:
        """Sum values."""
    
    def mean(self, *, axis: int = 0) -> DataFrame | Series:
        """Mean of values."""
    
    def max(self, *, axis: int = 0) -> DataFrame | Series:
        """Maximum values."""
    
    def min(self, *, axis: int = 0) -> DataFrame | Series:
        """Minimum values."""
    
    def std(self, *, ddof: int = 1, axis: int = 0) -> DataFrame | Series:
        """Standard deviation."""
    
    def var(self, *, ddof: int = 1, axis: int = 0) -> DataFrame | Series:
        """Variance."""
    
    def median(self, *, axis: int = 0) -> DataFrame | Series:
        """Median values."""
    
    def quantile(self, quantile: float, *, interpolation: str = "nearest", axis: int = 0) -> DataFrame | Series:
        """Quantile values."""

    # Horizontal Operations
    def sum_horizontal(self, *exprs: IntoExpr) -> DataFrame:
        """Sum values horizontally across columns."""
    
    def mean_horizontal(self, *exprs: IntoExpr) -> DataFrame:
        """Mean values horizontally across columns."""
    
    def max_horizontal(self, *exprs: IntoExpr) -> DataFrame:
        """Maximum values horizontally across columns."""
    
    def min_horizontal(self, *exprs: IntoExpr) -> DataFrame:
        """Minimum values horizontally across columns."""

    # Grouping Operations
    def group_by(
        self, 
        *by: IntoExpr, 
        maintain_order: bool = False,
        **named_by: IntoExpr
    ) -> GroupBy:
        """Group DataFrame by expressions."""
    
    def rolling(
        self,
        index_column: str,
        *, 
        period: str | timedelta,
        offset: str | timedelta | None = None,
        closed: str = "right",
        by: str | list[str] | None = None,
        check_sorted: bool = True
    ) -> RollingGroupBy:
        """Create rolling window groupby."""

    # Joins
    def join(
        self,
        other: DataFrame,
        on: str | list[str] | None = None,
        how: str = "inner",
        *,
        left_on: str | list[str] | None = None,
        right_on: str | list[str] | None = None,
        suffix: str = "_right",
        validate: str = "m:m",
        join_nulls: bool = False
    ) -> DataFrame:
        """Join with another DataFrame."""
    
    def join_asof(
        self,
        other: DataFrame,
        *,
        left_on: str | None = None,
        right_on: str | None = None,
        on: str | None = None,
        by_left: str | list[str] | None = None,
        by_right: str | list[str] | None = None,
        by: str | list[str] | None = None,
        strategy: str = "backward",
        suffix: str = "_right",
        tolerance: str | int | float | None = None,
        allow_exact_matches: bool = True
    ) -> DataFrame:
        """Perform asof join."""

    # Reshaping
    def pivot(
        self,
        *,
        on: ColumnNameOrSelector,
        index: ColumnNameOrSelector | None = None,
        values: ColumnNameOrSelector | None = None,
        aggregate_function: str | Expr | None = None,
        maintain_order: bool = True,
        sort_columns: bool = False,
        separator: str = "_"
    ) -> DataFrame:
        """Pivot DataFrame."""
    
    def unpivot(
        self,
        on: ColumnNameOrSelector | None = None,
        *,
        index: ColumnNameOrSelector | None = None,
        variable_name: str | None = None,
        value_name: str | None = None
    ) -> DataFrame:
        """Unpivot DataFrame."""
    
    def melt(
        self,
        id_vars: ColumnNameOrSelector | None = None,
        value_vars: ColumnNameOrSelector | None = None,
        *,
        variable_name: str | None = None,
        value_name: str | None = None
    ) -> DataFrame:
        """Melt DataFrame from wide to long format."""

    # Utilities
    def head(self, n: int = 5) -> DataFrame:
        """Get first n rows."""
    
    def tail(self, n: int = 5) -> DataFrame:
        """Get last n rows."""
    
    def slice(self, offset: int, length: int | None = None) -> DataFrame:
        """Slice DataFrame."""
    
    def limit(self, n: int) -> DataFrame:
        """Limit to n rows."""
    
    def sample(
        self, 
        n: int | None = None, 
        *, 
        fraction: float | None = None,
        with_replacement: bool = False,
        shuffle: bool = False,
        seed: int | None = None
    ) -> DataFrame:
        """Sample rows."""
    
    def unique(
        self, 
        subset: ColumnNameOrSelector | None = None, 
        *, 
        keep: str = "any",
        maintain_order: bool = False
    ) -> DataFrame:
        """Get unique rows."""
    
    def drop_nulls(self, subset: ColumnNameOrSelector | None = None) -> DataFrame:
        """Drop rows with null values."""
    
    def fill_null(
        self, 
        value: Any = None, 
        strategy: str | None = None, 
        limit: int | None = None,
        *,
        matches_supertype: bool = True
    ) -> DataFrame:
        """Fill null values."""

    # Conversion
    def lazy(self) -> LazyFrame:
        """Convert to LazyFrame."""
    
    def to_series(self, index: int = 0) -> Series:
        """Convert to Series."""
    
    def to_dict(self, *, as_series: bool = True) -> dict:
        """Convert to dictionary."""
    
    def to_dicts(self) -> list[dict]:
        """Convert to list of dictionaries."""
    
    def to_numpy(self, *, structured: bool = False, order: str = "c") -> np.ndarray:
        """Convert to numpy array."""
    
    def to_pandas(self, **kwargs) -> pd.DataFrame:
        """Convert to pandas DataFrame."""
    
    def to_arrow(self) -> pa.Table:
        """Convert to PyArrow table."""

    # I/O Operations
    def write_csv(
        self, 
        file: str | Path | BytesIO, 
        *,
        include_bom: bool = False,
        include_header: bool = True,
        separator: str = ",",
        line_terminator: str = "\n",
        quote_char: str = '"',
        batch_size: int = 1024,
        datetime_format: str | None = None,
        date_format: str | None = None,
        time_format: str | None = None,
        float_scientific: bool | None = None,
        float_precision: int | None = None,
        null_value: str = ""
    ) -> None:
        """Write to CSV file."""
    
    def write_parquet(
        self,
        file: str | Path | BytesIO,
        *,
        compression: str = "zstd",
        compression_level: int | None = None,
        statistics: bool | dict[str, bool] = True,
        row_group_size: int | None = None,
        data_page_size: int | None = None,
        maintain_order: bool = True
    ) -> None:
        """Write to Parquet file."""
    
    def write_json(self, file: str | Path | BytesIO, *, pretty: bool = False, row_oriented: bool = False) -> None:
        """Write to JSON file."""

    # Analysis
    def describe(self, *, percentiles: Sequence[float] = (0.25, 0.5, 0.75)) -> DataFrame:
        """Generate descriptive statistics."""
    
    def equals(self, other: DataFrame, *, null_equal: bool = True) -> bool:
        """Check equality with another DataFrame."""
    
    def is_duplicated(self) -> Series:
        """Check for duplicated rows."""
    
    def is_unique(self) -> Series:
        """Check for unique rows."""

LazyFrame

Lazy evaluation version of DataFrame that builds a computation graph for optimized query execution.

class LazyFrame:
    # Properties
    @property
    def columns(self) -> list[str]: 
        """Column names."""
    
    @property
    def dtypes(self) -> list[type]: 
        """Column data types."""
    
    @property 
    def schema(self) -> dict[str, type]: 
        """Schema as {column: dtype} dict."""
    
    @property
    def width(self) -> int: 
        """Number of columns."""

    # Query Execution
    def collect(
        self, 
        *,
        predicate_pushdown: bool = True,
        projection_pushdown: bool = True,
        simplify_expression: bool = True,
        slice_pushdown: bool = True,
        comm_subplan_elim: bool = True,
        comm_subexpr_elim: bool = True,
        cluster_with_columns: bool = True,
        streaming: bool = False
    ) -> DataFrame:
        """Execute lazy query and return DataFrame."""
    
    def collect_async(self, *, gevent: bool = False) -> Awaitable[DataFrame]:
        """Execute lazy query asynchronously."""
    
    def fetch(
        self, 
        n_rows: int = 500, 
        *,
        type_coercion: bool = True,
        predicate_pushdown: bool = True,
        projection_pushdown: bool = True,
        simplify_expression: bool = True,
        slice_pushdown: bool = True,
        comm_subplan_elim: bool = True,
        comm_subexpr_elim: bool = True,
        streaming: bool = False
    ) -> DataFrame:
        """Execute lazy query for first n rows."""
    
    def explain(
        self, 
        *,
        format: str = "plain",
        optimized: bool = True,
        type_coercion: bool = True,
        predicate_pushdown: bool = True,
        projection_pushdown: bool = True,
        simplify_expression: bool = True,
        slice_pushdown: bool = True,
        comm_subplan_elim: bool = True,
        comm_subexpr_elim: bool = True,
        streaming: bool = False,
        tree_format: bool | None = None
    ) -> str:
        """Show query execution plan."""

    # Transformations (same interface as DataFrame but lazy)
    def select(self, *exprs: IntoExpr) -> LazyFrame: ...
    def filter(self, predicate: IntoExpr) -> LazyFrame: ...
    def with_columns(self, *exprs: IntoExpr, **named_exprs: IntoExpr) -> LazyFrame: ...
    def drop(self, *columns: str) -> LazyFrame: ...
    def rename(self, mapping: dict[str, str]) -> LazyFrame: ...
    def sort(self, by: ColumnNameOrSelector, *, descending: bool = False) -> LazyFrame: ...
    def reverse(self) -> LazyFrame: ...
    def cast(self, dtypes: dict[str, type] | type, *, strict: bool = True) -> LazyFrame: ...

    # Grouping Operations
    def group_by(self, *by: IntoExpr, maintain_order: bool = False) -> LazyGroupBy: ...
    def rolling(self, index_column: str, *, period: str) -> RollingGroupBy: ...

    # Joins
    def join(self, other: LazyFrame, on: str | list[str], how: str = "inner", **kwargs) -> LazyFrame: ...
    def join_asof(self, other: LazyFrame, **kwargs) -> LazyFrame: ...

    # Utilities
    def head(self, n: int = 5) -> LazyFrame: ...
    def tail(self, n: int = 5) -> LazyFrame: ...
    def slice(self, offset: int, length: int | None = None) -> LazyFrame: ...
    def limit(self, n: int) -> LazyFrame: ...

    # Streaming Sinks
    def sink_parquet(
        self,
        path: str | Path,
        *,
        compression: str = "zstd",
        maintain_order: bool = True,
        **kwargs
    ) -> DataFrame:
        """Write to Parquet file using streaming engine."""
    
    def sink_csv(self, path: str | Path, **kwargs) -> DataFrame:
        """Write to CSV file using streaming engine."""

Series

One-dimensional data structure representing a single column of data.

class Series:
    def __init__(
        self,
        name: str | None = None,
        values: Sequence[Any] | None = None,
        dtype: type | None = None,
        *,
        strict: bool = True,
        nan_to_null: bool = False
    ): 
        """
        Create a Series.
        
        Parameters:
        - name: Series name
        - values: Data values
        - dtype: Data type
        - strict: Strict type checking
        - nan_to_null: Convert NaN to null
        """

    # Properties
    @property
    def dtype(self) -> type: 
        """Data type."""
    
    @property
    def name(self) -> str: 
        """Series name."""
    
    @property
    def shape(self) -> tuple[int]: 
        """Shape as (length,) tuple."""

    # Arithmetic Operations
    def __add__(self, other: Any) -> Series: ...
    def __sub__(self, other: Any) -> Series: ...
    def __mul__(self, other: Any) -> Series: ...
    def __truediv__(self, other: Any) -> Series: ...
    def __floordiv__(self, other: Any) -> Series: ...
    def __mod__(self, other: Any) -> Series: ...
    def __pow__(self, other: Any) -> Series: ...

    # Comparison Operations
    def eq(self, other: Any) -> Series:
        """Element-wise equality."""
    
    def ne(self, other: Any) -> Series:
        """Element-wise inequality."""
    
    def lt(self, other: Any) -> Series:
        """Element-wise less than."""
    
    def le(self, other: Any) -> Series:
        """Element-wise less than or equal."""
    
    def gt(self, other: Any) -> Series:
        """Element-wise greater than."""
    
    def ge(self, other: Any) -> Series:
        """Element-wise greater than or equal."""

    # Aggregations
    def sum(self) -> Any:
        """Sum of values."""
    
    def mean(self) -> float | None:
        """Mean of values."""
    
    def max(self) -> Any:
        """Maximum value."""
    
    def min(self) -> Any:
        """Minimum value."""
    
    def std(self, ddof: int = 1) -> float | None:
        """Standard deviation."""
    
    def var(self, ddof: int = 1) -> float | None:
        """Variance."""
    
    def median(self) -> float | None:
        """Median value."""
    
    def quantile(self, quantile: float, interpolation: str = "nearest") -> float | None:
        """Quantile value."""

    # Data Access
    def get(self, index: int) -> Any:
        """Get value by index."""
    
    def item(self, index: int | None = None) -> Any:
        """Get single item."""
    
    def gather(self, indices: list[int] | Series) -> Series:
        """Gather values by indices."""

    # Transformations
    def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Series:
        """Sort Series."""
    
    def reverse(self) -> Series:
        """Reverse Series."""
    
    def cast(self, dtype: type, *, strict: bool = True) -> Series:
        """Cast to different type."""
    
    def rename(self, name: str) -> Series:
        """Rename Series."""

    # Utilities
    def drop_nulls(self) -> Series:
        """Drop null values."""
    
    def fill_null(self, value: Any = None, strategy: str | None = None) -> Series:
        """Fill null values."""
    
    def unique(self, *, maintain_order: bool = False) -> Series:
        """Get unique values."""
    
    def value_counts(self, *, sort: bool = False, parallel: bool = False) -> DataFrame:
        """Count unique values."""

    # Conversion
    def to_frame(self, name: str | None = None) -> DataFrame:
        """Convert to DataFrame."""
    
    def to_list(self) -> list[Any]:
        """Convert to Python list."""
    
    def to_numpy(self, *, zero_copy_only: bool = False, writable: bool = False) -> np.ndarray:
        """Convert to numpy array."""
    
    def to_pandas(self, **kwargs) -> pd.Series:
        """Convert to pandas Series."""

    # Namespaces
    @property
    def str(self) -> StringNameSpace:
        """String operations namespace."""
    
    @property
    def dt(self) -> DateTimeNameSpace:
        """DateTime operations namespace."""
    
    @property
    def list(self) -> ListNameSpace:
        """List operations namespace."""
    
    @property
    def struct(self) -> StructNameSpace:
        """Struct operations namespace."""

Expr

Expression object for building complex lazy computations and transformations.

class Expr:
    # Arithmetic Operations
    def __add__(self, other: Any) -> Expr: ...
    def __sub__(self, other: Any) -> Expr: ...
    def __mul__(self, other: Any) -> Expr: ...
    def __truediv__(self, other: Any) -> Expr: ...

    # Aggregations
    def sum(self) -> Expr:
        """Sum aggregation."""
    
    def mean(self) -> Expr:
        """Mean aggregation."""
    
    def max(self) -> Expr:
        """Maximum aggregation."""
    
    def min(self) -> Expr:
        """Minimum aggregation."""
    
    def count(self) -> Expr:
        """Count aggregation."""
    
    def std(self, ddof: int = 1) -> Expr:
        """Standard deviation."""
    
    def var(self, ddof: int = 1) -> Expr:
        """Variance."""

    # Window Functions
    def over(self, *partition_by: IntoExpr, order_by: IntoExpr | None = None) -> Expr:
        """Window function over partitions."""
    
    def rolling_sum(self, window_size: int | str, weights: list[float] | None = None) -> Expr:
        """Rolling sum."""
    
    def rolling_mean(self, window_size: int | str, weights: list[float] | None = None) -> Expr:
        """Rolling mean."""

    # Conditional Logic
    def when(self, predicate: Expr) -> ExprWhenThen:
        """Start conditional expression."""
    
    def then(self, statement: IntoExpr) -> ExprWhenThen:
        """Then clause in conditional."""
    
    def otherwise(self, statement: IntoExpr) -> Expr:
        """Else clause in conditional."""

    # Transformations
    def cast(self, dtype: type, *, strict: bool = True) -> Expr:
        """Cast to different type."""
    
    def alias(self, name: str) -> Expr:
        """Alias expression."""
    
    def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Expr:
        """Sort expression."""
    
    def reverse(self) -> Expr:
        """Reverse expression."""

    # Utilities
    def is_null(self) -> Expr:
        """Check for null values."""
    
    def is_not_null(self) -> Expr:
        """Check for non-null values."""
    
    def fill_null(self, value: Any = None, strategy: str | None = None) -> Expr:
        """Fill null values."""
    
    def drop_nulls(self) -> Expr:
        """Drop null values."""

    # Namespaces
    @property
    def str(self) -> ExprStringNameSpace:
        """String operations namespace."""
    
    @property
    def dt(self) -> ExprDateTimeNameSpace:
        """DateTime operations namespace."""
    
    @property
    def list(self) -> ExprListNameSpace:
        """List operations namespace."""
    
    @property
    def arr(self) -> ExprArrayNameSpace:
        """Array operations namespace."""
    
    @property
    def struct(self) -> ExprStructNameSpace:
        """Struct operations namespace."""
    
    @property
    def cat(self) -> ExprCategoricalNameSpace:
        """Categorical operations namespace."""
    
    @property
    def bin(self) -> ExprBinaryNameSpace:
        """Binary operations namespace."""
    
    @property
    def name(self) -> ExprNameNameSpace:
        """Name operations namespace."""
    
    @property
    def meta(self) -> ExprMetaNameSpace:
        """Meta operations namespace."""

Usage Examples

DataFrame Operations

import polars as pl

# Create DataFrame
df = pl.DataFrame({
    "id": [1, 2, 3, 4],
    "name": ["Alice", "Bob", "Charlie", "Diana"],
    "age": [25, 30, 35, 28],
    "salary": [50000, 60000, 70000, 55000]
})

# Select and transform columns
result = df.select([
    pl.col("name"),
    pl.col("age"),
    (pl.col("salary") * 1.1).alias("new_salary")
]).filter(pl.col("age") > 25)

print(result)

LazyFrame Operations

# Create LazyFrame and build query
lazy_df = (
    pl.scan_csv("large_file.csv")
    .filter(pl.col("date") >= "2023-01-01")
    .group_by("category")
    .agg([
        pl.col("amount").sum().alias("total_amount"),
        pl.col("id").count().alias("count")
    ])
    .sort("total_amount", descending=True)
)

# Execute query
result = lazy_df.collect()

Series Operations

# Create Series
s = pl.Series("values", [1, 2, 3, 4, 5])

# Perform operations
doubled = s * 2
mean_val = s.mean()
unique_vals = s.unique()

# String operations
text_series = pl.Series("text", ["hello", "world", "polars"])
upper_text = text_series.str.upper()

Expression Building

# Complex expressions
expr = (
    pl.when(pl.col("age") < 30)
    .then(pl.col("salary") * 0.8)
    .when(pl.col("age") < 40)
    .then(pl.col("salary") * 0.9)
    .otherwise(pl.col("salary"))
    .alias("adjusted_salary")
)

# Use in DataFrame
df_with_adjustment = df.with_columns(expr)

QueryOptFlags

Configuration class for controlling query optimization behavior in LazyFrame operations, allowing fine-grained control over performance optimizations.

class QueryOptFlags:
    def __init__(
        self,
        *,
        predicate_pushdown: Optional[bool] = None,
        projection_pushdown: Optional[bool] = None,
        simplify_expression: Optional[bool] = None,
        slice_pushdown: Optional[bool] = None,
        comm_subplan_elim: Optional[bool] = None,
        comm_subexpr_elim: Optional[bool] = None,
        cluster_with_columns: Optional[bool] = None,
        collapse_joins: Optional[bool] = None,
        check_order_observe: Optional[bool] = None,
        fast_projection: Optional[bool] = None,
    ):
        """
        Configure query optimization flags.
        
        Parameters:
        - predicate_pushdown: Push predicates down in the query tree
        - projection_pushdown: Push projections down in the query tree
        - simplify_expression: Simplify expressions during optimization
        - slice_pushdown: Push slice operations down in the query tree
        - comm_subplan_elim: Eliminate common subplans
        - comm_subexpr_elim: Eliminate common subexpressions
        - cluster_with_columns: Cluster with_columns operations
        - collapse_joins: Collapse consecutive joins
        - check_order_observe: Check if ordering is observed
        - fast_projection: Use fast projection when possible
        """
    
    @staticmethod
    def none(**kwargs) -> QueryOptFlags:
        """Create QueryOptFlags with all optimizations disabled."""
    
    def update(self, **kwargs) -> QueryOptFlags:
        """Update optimization flags."""

GPUEngine

Configuration class for GPU-accelerated processing in LazyFrame operations.

class GPUEngine:
    def __init__(self):
        """
        Configure GPU engine for accelerated processing.
        
        Note: GPU processing requires compatible hardware and drivers.
        """

Install with Tessl CLI