Blazingly fast DataFrame library for legacy CPUs without AVX2 support
npx @tessl/cli install tessl/pypi-polars-lts-cpu@1.33.0A blazingly fast DataFrame library for data manipulation and analysis, specifically compiled for legacy CPUs without AVX2 support. Built in Rust with Python bindings, polars-lts-cpu provides comprehensive DataFrame operations, lazy evaluation capabilities, SQL integration, and extensive I/O support while maintaining compatibility with older hardware that lacks modern SIMD instruction sets.
pip install polars-lts-cpuimport polars as plCommon patterns for specific functionality:
# DataFrame and LazyFrame
from polars import DataFrame, LazyFrame
# Core functions
from polars import col, lit, when
# I/O operations
from polars import read_csv, read_parquet, scan_csv
# Data types
from polars import Int64, Float64, String, Boolean, Date, Datetimeimport polars as pl
# Read data from various sources
df = pl.read_csv("data.csv")
# Or create DataFrame from Python data
df = pl.DataFrame({
"name": ["Alice", "Bob", "Charlie"],
"age": [25, 30, 35],
"salary": [50000, 60000, 70000]
})
# Basic operations
result = (
df
.filter(pl.col("age") > 25)
.with_columns([
(pl.col("salary") * 1.1).alias("new_salary"),
pl.col("name").str.upper().alias("upper_name")
])
.sort("age", descending=True)
)
print(result)
# Lazy evaluation for better performance
lazy_result = (
df.lazy()
.group_by("age")
.agg([
pl.col("salary").mean().alias("avg_salary"),
pl.count().alias("count")
])
.collect()
)
print(lazy_result)Polars follows a columnar memory layout with lazy evaluation:
The design enables memory-efficient processing, streaming capabilities for large datasets, and seamless interoperability with pandas, NumPy, and PyArrow ecosystems.
The fundamental data structures for working with tabular data, including DataFrame for eager operations, LazyFrame for optimized query execution, Series for one-dimensional data, Expr for building complex transformations, plus configuration classes for query optimization and engine selection.
class DataFrame:
def __init__(self, data=None, schema=None, **kwargs): ...
def select(self, *exprs): ...
def filter(self, predicate): ...
def group_by(self, *by): ...
def sort(self, by, *, descending=False): ...
class LazyFrame:
def select(self, *exprs): ...
def filter(self, predicate): ...
def collect(self, **kwargs): ...
def explain(self, **kwargs): ...
class Series:
def __init__(self, name=None, values=None, dtype=None): ...
def sum(self): ...
def mean(self): ...
def to_frame(self): ...
class Expr:
def alias(self, name): ...
def over(self, *partition_by): ...
def sum(self): ...
def mean(self): ...
class QueryOptFlags:
def __init__(self, *, predicate_pushdown=None, **kwargs): ...
@staticmethod
def none(**kwargs): ...
class GPUEngine:
def __init__(self): ...Comprehensive type system supporting numeric, text, temporal, and complex nested data types with full type safety and memory efficiency.
# Numeric types
Int8, Int16, Int32, Int64, Int128
UInt8, UInt16, UInt32, UInt64
Float32, Float64, Decimal
# Text types
String, Utf8, Binary
# Temporal types
Date, Datetime, Time, Duration
# Complex types
List, Array, Struct, Categorical, EnumExtensive support for reading and writing data in various formats including CSV, Parquet, JSON, Arrow IPC, databases, Excel, and cloud storage with streaming capabilities.
def read_csv(source, **kwargs): ...
def read_parquet(source, **kwargs): ...
def read_json(source, **kwargs): ...
def scan_csv(source, **kwargs): ...
def scan_parquet(source, **kwargs): ...
# Database operations
def read_database(query, connection, **kwargs): ...
def read_database_uri(query, uri, **kwargs): ...Rich collection of functions for data manipulation including aggregation, lazy operations, range generation, mathematical operations, and utility functions.
# Column selection and manipulation
def col(name): ...
def lit(value): ...
def when(predicate): ...
# Aggregation functions
def sum(*args): ...
def mean(*args): ...
def count(*args): ...
def max(*args): ...
# Range functions
def arange(start, end, step=1): ...
def date_range(start, end, interval): ...Native SQL support allowing you to query DataFrames and LazyFrames using familiar SQL syntax with full integration into the polars ecosystem.
class SQLContext:
def register(self, name, frame): ...
def execute(self, query): ...
def sql(query, **kwargs): ...Configuration options, selectors for column operations, string caching for categorical data, and testing utilities for DataFrame comparisons.
class Config:
def set_tbl_rows(self, n): ...
def set_tbl_cols(self, n): ...
# Selectors
def by_dtype(*dtypes): ...
def numeric(): ...
def string(): ...
# String cache
def enable_string_cache(): ...
class CompatLevel:
@staticmethod
def newest(): ...
@staticmethod
def oldest(): ...Specialized namespaces for working with different data types including string operations (.str), datetime operations (.dt), list operations (.list), and more.
# String operations
expr.str.contains(pattern)
expr.str.replace(pattern, replacement)
# DateTime operations
expr.dt.year()
expr.dt.strftime(format)
# List operations
expr.list.len()
expr.list.explode()# Core type definitions used across the API
from typing import Any, Dict, List, Optional, Union, Sequence
from pathlib import Path
# Common type aliases
ColumnNameOrSelector = Union[str, Expr, Sequence[str]]
IntoExpr = Union[Expr, str, int, float, bool, Sequence[Any]]
SchemaDict = Dict[str, type]
TemporalLiteral = Union[date, datetime, time, timedelta, str]