Blazingly fast DataFrame library for legacy CPUs without AVX2 support
—
Comprehensive type system supporting numeric, text, temporal, and complex nested data types with full type safety and memory efficiency. Polars provides a rich set of data types that map efficiently to Arrow's columnar format.
Integer and floating-point types with various precision levels for optimal memory usage and performance.
# Signed integers
class Int8:
"""8-bit signed integer (-128 to 127)"""
class Int16:
"""16-bit signed integer (-32,768 to 32,767)"""
class Int32:
"""32-bit signed integer (-2^31 to 2^31-1)"""
class Int64:
"""64-bit signed integer (-2^63 to 2^63-1)"""
class Int128:
"""128-bit signed integer"""
# Unsigned integers
class UInt8:
"""8-bit unsigned integer (0 to 255)"""
class UInt16:
"""16-bit unsigned integer (0 to 65,535)"""
class UInt32:
"""32-bit unsigned integer (0 to 2^32-1)"""
class UInt64:
"""64-bit unsigned integer (0 to 2^64-1)"""
# Floating point
class Float32:
"""32-bit floating point number"""
class Float64:
"""64-bit floating point number"""
# Decimal
class Decimal:
def __init__(self, precision: int, scale: int = 0):
"""
Fixed-point decimal type.
Parameters:
- precision: Total number of digits
- scale: Number of digits after decimal point
"""String and binary data types with full Unicode support and efficient storage.
class String:
"""UTF-8 encoded string type"""
class Utf8:
"""Alias for String type"""
class Binary:
"""Binary data type for storing raw bytes"""Date, time, and duration types with timezone support and flexible precision.
class Date:
"""Date type (year, month, day)"""
class Datetime:
def __init__(self, time_unit: str = "us", time_zone: str | None = None):
"""
Datetime type with optional timezone.
Parameters:
- time_unit: Precision ('ns', 'us', 'ms')
- time_zone: Timezone name (e.g., 'UTC', 'America/New_York')
"""
class Time:
"""Time type (hour, minute, second, microsecond)"""
class Duration:
def __init__(self, time_unit: str = "us"):
"""
Duration type for time intervals.
Parameters:
- time_unit: Precision ('ns', 'us', 'ms')
"""Logical and null value types.
class Boolean:
"""Boolean type (True/False/null)"""
class Null:
"""Null type containing only null values"""
class Unknown:
"""Unknown type placeholder for type inference"""Nested and structured data types for handling complex data structures.
class List:
def __init__(self, inner: type):
"""
Variable-length list type.
Parameters:
- inner: Type of list elements
"""
class Array:
def __init__(self, inner: type, shape: int | tuple[int, ...]):
"""
Fixed-length array type.
Parameters:
- inner: Type of array elements
- shape: Array dimensions
"""
class Struct:
def __init__(self, fields: list[Field] | dict[str, type]):
"""
Structured type with named fields.
Parameters:
- fields: List of Field objects or dict of {name: type}
"""
class Field:
def __init__(self, name: str, dtype: type):
"""
Schema field definition.
Parameters:
- name: Field name
- dtype: Field data type
"""Types for handling categorical data with efficient storage and operations.
class Categorical:
def __init__(self, ordering: str = "physical"):
"""
Categorical type for string categories.
Parameters:
- ordering: Ordering method ('physical' or 'lexical')
"""
class Enum:
def __init__(self, categories: list[str]):
"""
Enumerated type with fixed categories.
Parameters:
- categories: List of allowed category values
"""
class Categories:
"""Categorical metadata container"""Additional types for Python object storage and type system utilities.
class Object:
"""Python object type for arbitrary Python objects"""
class DataType:
"""Base class for all data types"""def is_polars_dtype(dtype: Any) -> bool:
"""
Check if object is a Polars data type.
Parameters:
- dtype: Object to check
Returns:
- bool: True if dtype is a Polars type
"""
def dtype_to_py_type(dtype: type) -> type:
"""
Convert Polars data type to Python type.
Parameters:
- dtype: Polars data type
Returns:
- type: Corresponding Python type
"""
def parse_into_dtype(dtype: str | type) -> type:
"""
Parse string or type into Polars data type.
Parameters:
- dtype: String representation or type object
Returns:
- type: Polars data type
"""class Schema:
def __init__(self, schema: dict[str, type] | list[tuple[str, type]] | None = None):
"""
Schema definition for DataFrames.
Parameters:
- schema: Column definitions as dict or list of (name, type) tuples
"""
def __getitem__(self, key: str) -> type:
"""Get column type by name."""
def __contains__(self, key: str) -> bool:
"""Check if column exists in schema."""
def names(self) -> list[str]:
"""Get column names."""
def dtypes(self) -> list[type]:
"""Get column types."""
def to_python(self) -> dict[str, type]:
"""Convert to Python dict."""import polars as pl
# Create DataFrame with explicit types
df = pl.DataFrame({
"id": pl.Series([1, 2, 3], dtype=pl.Int32),
"name": pl.Series(["Alice", "Bob", "Charlie"], dtype=pl.String),
"score": pl.Series([95.5, 87.2, 92.1], dtype=pl.Float64),
"active": pl.Series([True, False, True], dtype=pl.Boolean),
"created": pl.Series(["2023-01-01", "2023-01-02", "2023-01-03"], dtype=pl.Date)
})
print(df.dtypes)
# [Int32, String, Float64, Boolean, Date]# List type
df_with_lists = pl.DataFrame({
"id": [1, 2, 3],
"scores": [[95, 87, 92], [88, 91], [85, 89, 93, 87]]
}, schema={"id": pl.Int32, "scores": pl.List(pl.Int32)})
# Struct type
df_with_struct = pl.DataFrame({
"person": [
{"name": "Alice", "age": 25},
{"name": "Bob", "age": 30},
{"name": "Charlie", "age": 35}
]
}, schema={"person": pl.Struct([
pl.Field("name", pl.String),
pl.Field("age", pl.Int32)
])})
# Array type (fixed length)
df_with_arrays = pl.DataFrame({
"coordinates": [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
}, schema={"coordinates": pl.Array(pl.Float64, 3)})# Datetime with timezone
df_with_tz = pl.DataFrame({
"timestamp": ["2023-01-01 12:00:00", "2023-01-01 15:30:00"],
}, schema={"timestamp": pl.Datetime("us", "UTC")})
# Duration type
df_with_duration = pl.DataFrame({
"elapsed": ["1h 30m", "2h 15m", "45m"]
}, schema={"elapsed": pl.Duration("us")})# Categorical type
df_categorical = pl.DataFrame({
"category": ["A", "B", "A", "C", "B"]
}, schema={"category": pl.Categorical()})
# Enum type with fixed categories
df_enum = pl.DataFrame({
"grade": ["A", "B", "A", "C"]
}, schema={"grade": pl.Enum(["A", "B", "C", "D", "F"])})# Decimal type for precise arithmetic
df_decimal = pl.DataFrame({
"price": ["19.99", "25.50", "12.75"]
}, schema={"price": pl.Decimal(precision=10, scale=2)})# Cast between types
df_cast = df.select([
pl.col("id").cast(pl.Int64),
pl.col("score").cast(pl.Float32),
pl.col("created").cast(pl.Datetime("us"))
])
# Cast with error handling
df_safe_cast = df.select([
pl.col("score").cast(pl.Int32, strict=False) # Returns null on cast failure
])# Define schema explicitly
schema = pl.Schema({
"id": pl.Int64,
"name": pl.String,
"timestamp": pl.Datetime("us", "UTC"),
"values": pl.List(pl.Float64)
})
# Use schema when reading data
df_with_schema = pl.read_csv("data.csv", schema=schema)
# Schema overrides for specific columns
df_override = pl.read_csv("data.csv", schema_overrides={
"id": pl.Int32, # Override inferred type
"date": pl.Date # Override inferred type
})# Check if value is a Polars type
assert pl.is_polars_dtype(pl.Int64)
assert not pl.is_polars_dtype(int)
# Convert to Python type
py_type = pl.dtype_to_py_type(pl.Float64) # Returns float
# Parse string to type
parsed_type = pl.parse_into_dtype("int64") # Returns pl.Int64DataType
├── Int8, Int16, Int32, Int64, Int128
├── UInt8, UInt16, UInt32, UInt64
├── Float32, Float64
└── DecimalDataType
├── Date
├── Datetime
├── Time
└── DurationDataType
├── List
├── Array
├── Struct
├── Categorical
├── Enum
└── ObjectPolars types are designed for optimal memory usage:
All Polars types map directly to Apache Arrow types for zero-copy interoperability:
# Convert to Arrow
arrow_table = df.to_arrow()
# Convert from Arrow
df_from_arrow = pl.from_arrow(arrow_table)Polars types convert to pandas types with appropriate handling:
# Convert to pandas
pandas_df = df.to_pandas()
# Convert from pandas
df_from_pandas = pl.from_pandas(pandas_df)Numeric types integrate seamlessly with NumPy:
# Convert to numpy
numpy_array = df.select(pl.col("score")).to_numpy()
# Convert from numpy
df_from_numpy = pl.from_numpy(numpy_array, schema=["values"])Install with Tessl CLI
npx tessl i tessl/pypi-polars-lts-cpu