CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-ibis-framework

The portable Python dataframe library that provides a unified API for data analysis across 20+ different backends

Pending
Overview
Eval results
Files

table-operations.mddocs/

Table Operations and Transformations

Comprehensive table operations including filtering, selection, aggregation, joins, sorting, and transformations for data analysis.

Capabilities

Column Selection

Select specific columns or computed expressions from tables.

def select(*exprs):
    """
    Select columns or computed expressions.
    
    Parameters:
    - *exprs: column names, expressions, or keyword expressions
    
    Returns:
    Table with selected columns
    """

Usage Examples:

# Select specific columns
result = table.select('name', 'age')

# Select with expressions
result = table.select(
    'name',
    age_next_year=table.age + 1,
    is_adult=table.age >= 18
)

# Select all columns
result = table.select('*')

Row Filtering

Filter rows based on boolean predicates.

def filter(*predicates):
    """
    Filter rows based on predicates.
    
    Parameters:
    - *predicates: boolean expressions
    
    Returns:
    Table with filtered rows
    """

Usage Examples:

# Single condition
result = table.filter(table.age > 25)

# Multiple conditions (AND)
result = table.filter(table.age > 25, table.salary > 50000)

# Complex conditions
result = table.filter(
    (table.age > 25) & (table.department == 'Engineering')
)

Grouping and Aggregation

Group rows and compute aggregate statistics.

def group_by(*exprs):
    """
    Group table by expressions.
    
    Parameters:
    - *exprs: column names or expressions to group by
    
    Returns:
    GroupedTable for aggregation
    """

def aggregate(**kwargs):
    """
    Aggregate entire table or grouped table.
    
    Parameters:
    - **kwargs: name=expression pairs for aggregations
    
    Returns:
    Table with aggregated results
    """

Usage Examples:

# Group by single column
result = table.group_by('department').aggregate(
    avg_salary=table.salary.mean(),
    count=table.count()
)

# Group by multiple columns
result = table.group_by('department', 'level').aggregate(
    total_salary=table.salary.sum(),
    employee_count=table.count()
)

# Aggregate without grouping
result = table.aggregate(
    total_employees=table.count(),
    avg_age=table.age.mean()
)

Sorting

Sort table rows by one or more expressions.

def order_by(*exprs):
    """
    Sort table by expressions.
    
    Parameters:
    - *exprs: column names, expressions, or sort keys
    
    Returns:
    Sorted table
    """

def asc(expr):
    """Create ascending sort key."""
    
def desc(expr):
    """Create descending sort key."""

Usage Examples:

# Sort by single column
result = table.order_by('name')

# Sort by multiple columns
result = table.order_by('department', 'salary')

# Explicit sort direction
result = table.order_by(
    ibis.asc('department'),
    ibis.desc('salary')
)

Row Limiting

Limit the number of rows returned.

def limit(n, offset=0):
    """
    Limit number of rows.
    
    Parameters:
    - n: int, maximum number of rows
    - offset: int, number of rows to skip
    
    Returns:
    Table with limited rows
    """

def head(n=5):
    """Return first n rows."""

def tail(n=5):
    """Return last n rows (requires ordering)."""

Usage Examples:

# Top 10 rows
result = table.limit(10)

# Skip first 20, take next 10
result = table.limit(10, offset=20)

# Convenient methods
top_5 = table.head(5)
bottom_5 = table.order_by('salary').tail(5)

Joins

Join tables using various join types and conditions.

def join(other, predicates=None, how='inner'):
    """
    Join with another table.
    
    Parameters:
    - other: Table to join with
    - predicates: join conditions or None for cross join
    - how: join type ('inner', 'left', 'right', 'outer', 'semi', 'anti')
    
    Returns:
    Joined table
    """

def cross_join(other):
    """Cross join (Cartesian product)."""

def asof_join(other, predicates, tolerance=None):
    """As-of join for time-series data."""

Usage Examples:

# Inner join
result = employees.join(
    departments,
    employees.dept_id == departments.id
)

# Left join with multiple conditions
result = employees.join(
    departments,
    [employees.dept_id == departments.id,
     employees.location == departments.location],
    how='left'
)

# Cross join
result = table1.cross_join(table2)

Set Operations

Combine tables using set operations.

def union(table, /, *rest, distinct=False):
    """
    Compute multiset (or set) union of multiple tables.
    
    Parameters:
    - table: Table to union with (positional-only)
    - *rest: Additional tables to union
    - distinct: bool, use set union (True) or multiset union (False, default)
    
    Returns:
    Union of all tables
    """

def intersect(other):
    """Intersection with another table."""

def difference(other):
    """Difference from another table."""

Usage Examples:

# Union tables (multiset - keeps duplicates by default)
combined = table1.union(table2)

# Union with distinct values only
combined = table1.union(table2, distinct=True)

# Set operations
common = table1.intersect(table2)
unique_to_first = table1.difference(table2)

Column Operations

Add, drop, and rename columns.

def mutate(**kwargs):
    """
    Add or modify columns.
    
    Parameters:
    - **kwargs: name=expression pairs
    
    Returns:
    Table with new/modified columns
    """

def drop(*columns):
    """
    Drop columns.
    
    Parameters:
    - *columns: column names to drop
    
    Returns:
    Table without specified columns
    """

def rename(**kwargs):
    """
    Rename columns.
    
    Parameters:
    - **kwargs: old_name=new_name pairs
    
    Returns:
    Table with renamed columns
    """

Usage Examples:

# Add computed columns
result = table.mutate(
    age_next_year=table.age + 1,
    full_name=table.first_name + ' ' + table.last_name
)

# Drop columns
result = table.drop('temp_column', 'unused_field')

# Rename columns
result = table.rename(old_name='new_name', id='employee_id')

Distinct Values

Remove duplicate rows or get unique values.

def distinct(*exprs):
    """
    Get distinct rows or distinct values for expressions.
    
    Parameters:
    - *exprs: expressions to consider for distinctness (all columns if empty)
    
    Returns:
    Table with distinct rows
    """

Usage Examples:

# Distinct rows
unique_rows = table.distinct()

# Distinct values for specific columns
unique_combinations = table.distinct('department', 'level')

Sampling

Sample random rows from the table.

def sample(fraction=None, method='row'):
    """
    Sample rows from table.
    
    Parameters:
    - fraction: float, fraction of rows to sample (0.0 to 1.0)
    - method: str, sampling method
    
    Returns:
    Sampled table
    """

Usage Example:

# Sample 10% of rows
sample = table.sample(0.1)

Install with Tessl CLI

npx tessl i tessl/pypi-ibis-framework

docs

aggregation-windows.md

backends.md

configuration.md

expressions.md

index.md

selectors.md

sql-integration.md

table-construction.md

table-operations.md

temporal.md

udfs.md

tile.json