The portable Python dataframe library that provides a unified API for data analysis across 20+ different backends
npx @tessl/cli install tessl/pypi-ibis-framework@10.8.0The portable Python dataframe library that provides a unified API for data analysis across 20+ different backends including DuckDB, PostgreSQL, BigQuery, Snowflake, Spark, and many others. Ibis enables developers to write dataframe expressions once and execute them on any supported backend, facilitating seamless transitions from local development to production deployment.
pip install ibis-frameworkimport ibisCommon patterns for working with specific backends:
import ibis
# DuckDB backend (default)
con = ibis.duckdb.connect()
# PostgreSQL backend
con = ibis.postgres.connect(user="postgres", password="password", host="localhost", database="mydb")
# BigQuery backend
con = ibis.bigquery.connect(project_id="my-project")import ibis
import pandas as pd
# Connect to DuckDB (default backend)
con = ibis.duckdb.connect()
# Create a table from pandas DataFrame
df = pd.DataFrame({
'name': ['Alice', 'Bob', 'Charlie'],
'age': [25, 30, 35],
'salary': [50000, 60000, 70000]
})
employees = con.create_table('employees', df)
# Build expressions using the unified API
result = (
employees
.filter(employees.age > 25)
.select(employees.name, employees.salary)
.order_by(employees.salary.desc())
)
# Execute and get results
print(result.to_pandas())
# Switch to a different backend with same expressions
pg_con = ibis.postgres.connect(...)
pg_employees = pg_con.table('employees')
same_result = (
pg_employees
.filter(pg_employees.age > 25)
.select(pg_employees.name, pg_employees.salary)
.order_by(pg_employees.salary.desc())
)Ibis uses a lazy evaluation system with backend abstraction:
The unified API allows writing portable data analysis code that can run on local engines (DuckDB, Polars), traditional databases (PostgreSQL, MySQL), cloud data warehouses (BigQuery, Snowflake), and distributed systems (Spark, Trino).
Core functions for creating table expressions from various data sources including in-memory data, files (CSV, Parquet, JSON), and database connections.
def table(schema=None, name=None, catalog=None, database=None): ...
def memtable(data, /, *, columns=None, schema=None, name=None): ...
def read_csv(path, **kwargs): ...
def read_parquet(path, **kwargs): ...
def read_json(path, **kwargs): ...
def connect(backend, **kwargs): ...Fundamental expression building blocks for creating scalar values, arrays, structs, and complex computations with support for parameters and deferred expressions.
def literal(value, type=None): ...
def null(type=None): ...
def array(values, type=None): ...
def struct(mapping): ...
def param(type): ...
def case(): ...
def ifelse(condition, true_expr, false_expr): ...Comprehensive table operations including filtering, selection, aggregation, joins, sorting, and window functions for data transformation and analysis.
# Table methods
table.select(*exprs): ...
table.filter(predicates): ...
table.group_by(*exprs): ...
table.aggregate(**kwargs): ...
table.join(other, predicates): ...
table.order_by(*exprs): ...
table.limit(n): ...Extensive date, time, and timestamp functionality including construction, arithmetic, formatting, and timezone handling for temporal data analysis.
def date(year, month, day): ...
def time(hour, minute, second): ...
def timestamp(year, month, day, hour, minute, second): ...
def now(): ...
def today(): ...
def interval(**kwargs): ...Statistical aggregation functions and window operations including ranking, cumulative calculations, and frame-based computations for advanced analytics.
def sum(arg): ...
def mean(arg): ...
def count(arg): ...
def row_number(): ...
def rank(): ...
def dense_rank(): ...
def window(**kwargs): ...Flexible column selection system with pattern matching, type-based selection, and logical combinations for working with wide datasets.
selectors.all(): ...
selectors.numeric(): ...
selectors.matches(pattern): ...
selectors.startswith(prefix): ...
selectors.of_type(*types): ...Backend connection, configuration, and management functions for working with different data processing engines and databases.
def get_backend(table): ...
def set_backend(backend): ...
def list_backends(): ...
backend.connect(**kwargs): ...
backend.compile(expr): ...Comprehensive UDF system supporting scalar, aggregate, and analytic functions with type safety and backend compatibility.
@ibis.udf.scalar(signature)
def my_function(arg): ...
@ibis.udf.aggregate(signature)
def my_aggregate(arg): ...Bidirectional SQL integration allowing parsing SQL into expressions and compiling expressions to SQL with backend-specific optimizations.
def parse_sql(sql, dialect=None): ...
def to_sql(expr, dialect=None): ...
def decompile(expr): ...Global and backend-specific configuration system for controlling behavior, output formatting, and performance optimizations.
ibis.options.sql.default_limit = 10000
ibis.options.interactive.mode = True
ibis.options.repr.interactive.max_rows = 20Core data types and type system components.
# Type constructors
dtype(type_spec): DataType
infer_dtype(value): DataType
infer_schema(data): Schema
# Common types
int64(): DataType
float64(): DataType
string(): DataType
boolean(): DataType
timestamp(): DataType
array(value_type): DataType
struct(fields): DataTypeFundamental classes for working with Ibis expressions and data structures.
class DataType:
"""Base class for all Ibis data types"""
class Expr:
"""Base class for all Ibis expressions"""
class Value(Expr):
"""Base class for value expressions (scalars, arrays, structs, etc.)"""
class Scalar(Value):
"""Scalar value expressions (single values)"""
class Column(Value):
"""Column expressions referencing table columns"""class IbisError(Exception):
"""Base exception for all Ibis errors"""
class IbisInputError(IbisError):
"""Raised for invalid input arguments"""
class IbisTypeError(IbisError):
"""Raised for type-related errors"""