The portable Python dataframe library that provides a unified API for data analysis across 20+ different backends
—
Comprehensive table operations including filtering, selection, aggregation, joins, sorting, and transformations for data analysis.
Select specific columns or computed expressions from tables.
def select(*exprs):
"""
Select columns or computed expressions.
Parameters:
- *exprs: column names, expressions, or keyword expressions
Returns:
Table with selected columns
"""Usage Examples:
# Select specific columns
result = table.select('name', 'age')
# Select with expressions
result = table.select(
'name',
age_next_year=table.age + 1,
is_adult=table.age >= 18
)
# Select all columns
result = table.select('*')Filter rows based on boolean predicates.
def filter(*predicates):
"""
Filter rows based on predicates.
Parameters:
- *predicates: boolean expressions
Returns:
Table with filtered rows
"""Usage Examples:
# Single condition
result = table.filter(table.age > 25)
# Multiple conditions (AND)
result = table.filter(table.age > 25, table.salary > 50000)
# Complex conditions
result = table.filter(
(table.age > 25) & (table.department == 'Engineering')
)Group rows and compute aggregate statistics.
def group_by(*exprs):
"""
Group table by expressions.
Parameters:
- *exprs: column names or expressions to group by
Returns:
GroupedTable for aggregation
"""
def aggregate(**kwargs):
"""
Aggregate entire table or grouped table.
Parameters:
- **kwargs: name=expression pairs for aggregations
Returns:
Table with aggregated results
"""Usage Examples:
# Group by single column
result = table.group_by('department').aggregate(
avg_salary=table.salary.mean(),
count=table.count()
)
# Group by multiple columns
result = table.group_by('department', 'level').aggregate(
total_salary=table.salary.sum(),
employee_count=table.count()
)
# Aggregate without grouping
result = table.aggregate(
total_employees=table.count(),
avg_age=table.age.mean()
)Sort table rows by one or more expressions.
def order_by(*exprs):
"""
Sort table by expressions.
Parameters:
- *exprs: column names, expressions, or sort keys
Returns:
Sorted table
"""
def asc(expr):
"""Create ascending sort key."""
def desc(expr):
"""Create descending sort key."""Usage Examples:
# Sort by single column
result = table.order_by('name')
# Sort by multiple columns
result = table.order_by('department', 'salary')
# Explicit sort direction
result = table.order_by(
ibis.asc('department'),
ibis.desc('salary')
)Limit the number of rows returned.
def limit(n, offset=0):
"""
Limit number of rows.
Parameters:
- n: int, maximum number of rows
- offset: int, number of rows to skip
Returns:
Table with limited rows
"""
def head(n=5):
"""Return first n rows."""
def tail(n=5):
"""Return last n rows (requires ordering)."""Usage Examples:
# Top 10 rows
result = table.limit(10)
# Skip first 20, take next 10
result = table.limit(10, offset=20)
# Convenient methods
top_5 = table.head(5)
bottom_5 = table.order_by('salary').tail(5)Join tables using various join types and conditions.
def join(other, predicates=None, how='inner'):
"""
Join with another table.
Parameters:
- other: Table to join with
- predicates: join conditions or None for cross join
- how: join type ('inner', 'left', 'right', 'outer', 'semi', 'anti')
Returns:
Joined table
"""
def cross_join(other):
"""Cross join (Cartesian product)."""
def asof_join(other, predicates, tolerance=None):
"""As-of join for time-series data."""Usage Examples:
# Inner join
result = employees.join(
departments,
employees.dept_id == departments.id
)
# Left join with multiple conditions
result = employees.join(
departments,
[employees.dept_id == departments.id,
employees.location == departments.location],
how='left'
)
# Cross join
result = table1.cross_join(table2)Combine tables using set operations.
def union(table, /, *rest, distinct=False):
"""
Compute multiset (or set) union of multiple tables.
Parameters:
- table: Table to union with (positional-only)
- *rest: Additional tables to union
- distinct: bool, use set union (True) or multiset union (False, default)
Returns:
Union of all tables
"""
def intersect(other):
"""Intersection with another table."""
def difference(other):
"""Difference from another table."""Usage Examples:
# Union tables (multiset - keeps duplicates by default)
combined = table1.union(table2)
# Union with distinct values only
combined = table1.union(table2, distinct=True)
# Set operations
common = table1.intersect(table2)
unique_to_first = table1.difference(table2)Add, drop, and rename columns.
def mutate(**kwargs):
"""
Add or modify columns.
Parameters:
- **kwargs: name=expression pairs
Returns:
Table with new/modified columns
"""
def drop(*columns):
"""
Drop columns.
Parameters:
- *columns: column names to drop
Returns:
Table without specified columns
"""
def rename(**kwargs):
"""
Rename columns.
Parameters:
- **kwargs: old_name=new_name pairs
Returns:
Table with renamed columns
"""Usage Examples:
# Add computed columns
result = table.mutate(
age_next_year=table.age + 1,
full_name=table.first_name + ' ' + table.last_name
)
# Drop columns
result = table.drop('temp_column', 'unused_field')
# Rename columns
result = table.rename(old_name='new_name', id='employee_id')Remove duplicate rows or get unique values.
def distinct(*exprs):
"""
Get distinct rows or distinct values for expressions.
Parameters:
- *exprs: expressions to consider for distinctness (all columns if empty)
Returns:
Table with distinct rows
"""Usage Examples:
# Distinct rows
unique_rows = table.distinct()
# Distinct values for specific columns
unique_combinations = table.distinct('department', 'level')Sample random rows from the table.
def sample(fraction=None, method='row'):
"""
Sample rows from table.
Parameters:
- fraction: float, fraction of rows to sample (0.0 to 1.0)
- method: str, sampling method
Returns:
Sampled table
"""Usage Example:
# Sample 10% of rows
sample = table.sample(0.1)Install with Tessl CLI
npx tessl i tessl/pypi-ibis-framework