The portable Python dataframe library that provides a unified API for data analysis across 20+ different backends
—
Core functions for creating table expressions from various data sources including in-memory data, files, and database connections.
Create table expressions from schema definitions for building queries before data materialization.
def table(schema=None, name=None, catalog=None, database=None):
"""
Create a table expression from a schema.
Parameters:
- schema: dict, Schema, or list of (name, type) tuples defining table structure (optional)
- name: str, optional name for the table
- catalog: str, optional catalog name
- database: str, optional database name
Returns:
Table: Table expression
"""Usage Example:
import ibis
# From dictionary
schema = {'name': 'string', 'age': 'int64', 'salary': 'float64'}
t = ibis.table(schema, name='employees')
# From Schema object
schema = ibis.schema([('name', 'string'), ('age', 'int64')])
t = ibis.table(schema)Create table expressions from in-memory data structures like pandas DataFrames, dictionaries, or lists.
def memtable(data, /, *, columns=None, schema=None, name=None):
"""
Create a table expression from in-memory data.
Parameters:
- data: pandas.DataFrame, dict, list of dicts, or pyarrow.Table (positional-only)
- columns: Iterable[str], optional column names (keyword-only)
- schema: Schema or schema-like, optional schema specification (keyword-only)
- name: str, optional name for the table (keyword-only)
Returns:
Table: Table expression backed by in-memory data
"""Usage Example:
import pandas as pd
import ibis
# From pandas DataFrame
df = pd.DataFrame({'x': [1, 2, 3], 'y': ['a', 'b', 'c']})
t = ibis.memtable(df)
# From dictionary
data = {'x': [1, 2, 3], 'y': ['a', 'b', 'c']}
t = ibis.memtable(data, name='my_table')Read CSV files into table expressions with configurable parsing options.
def read_csv(paths, /, *, table_name=None, **kwargs):
"""
Read a CSV file or set of CSV files into a table expression.
Parameters:
- paths: str, Path, or sequence of str/Path (positional-only), file path(s) or URL(s)
- table_name: str, optional name for resulting table (keyword-only)
- **kwargs: additional backend-specific options
Returns:
Table expression
"""Usage Example:
import ibis
# Basic CSV reading
t = ibis.read_csv('data.csv')
# With options
t = ibis.read_csv('data.csv', table_name='sales_data')Read Parquet files into table expressions with support for partitioned datasets.
def read_parquet(paths, /, *, table_name=None, **kwargs):
"""
Read a Parquet file or dataset into a table expression.
Parameters:
- paths: str, Path, or sequence of str/Path (positional-only), file path(s), directory, or URL(s)
- table_name: str, optional name for resulting table (keyword-only)
- **kwargs: additional backend-specific options
Returns:
Table expression
"""Usage Example:
import ibis
# Single file
t = ibis.read_parquet('data.parquet')
# Partitioned dataset
t = ibis.read_parquet('partitioned_data/')Read JSON files into table expressions with flexible schema inference.
def read_json(paths, /, *, table_name=None, **kwargs):
"""
Read a JSON file into a table expression.
Parameters:
- paths: str, Path, or sequence of str/Path (positional-only), file path(s) or URL(s)
- table_name: str, optional name for resulting table (keyword-only)
- **kwargs: additional backend-specific options
Returns:
Table expression
"""Usage Example:
import ibis
# Read JSON file
t = ibis.read_json('data.json')
# JSONL (newline-delimited JSON)
t = ibis.read_json('data.jsonl')Read Delta Lake tables into table expressions.
def read_delta(path, /, *, table_name=None, **kwargs):
"""
Read a Delta Lake table into a table expression.
Parameters:
- path: str or Path (positional-only), path to Delta table
- table_name: str, optional name for resulting table (keyword-only)
- **kwargs: additional backend-specific options
Returns:
Table expression
"""Connect to various backends and access existing tables.
def connect(backend=None, **kwargs):
"""
Connect to a backend.
Parameters:
- backend: str, backend name (duckdb, postgres, etc.)
- **kwargs: backend-specific connection parameters
Returns:
Backend connection object
"""Usage Examples:
import ibis
# DuckDB (default, in-memory)
con = ibis.connect()
# DuckDB with file
con = ibis.duckdb.connect('my_database.db')
# PostgreSQL
con = ibis.postgres.connect(
user='user',
password='password',
host='localhost',
database='mydb'
)
# BigQuery
con = ibis.bigquery.connect(project_id='my-project')
# Access existing table
table = con.table('existing_table')def schema(mapping_or_list):
"""
Create a schema from a mapping or list of field definitions.
Parameters:
- mapping_or_list: dict or list of (name, type) tuples
Returns:
Schema object
"""def infer_schema(data):
"""
Infer schema from data.
Parameters:
- data: pandas.DataFrame, dict, or other data structure
Returns:
Schema object with inferred types
"""Usage Example:
import pandas as pd
import ibis
df = pd.DataFrame({'x': [1, 2, 3], 'y': ['a', 'b', 'c']})
schema = ibis.infer_schema(df)
print(schema) # Schema({'x': int64, 'y': string})Install with Tessl CLI
npx tessl i tessl/pypi-ibis-framework