tessl/pypi-ibis-framework

The portable Python dataframe library that provides a unified API for data analysis across 20+ different backends

—

Pending

Overview

Eval results

Files

Table Operations and Transformations

Name: tessl/pypi-ibis-framework
Author: tessl

Comprehensive table operations including filtering, selection, aggregation, joins, sorting, and transformations for data analysis.

Capabilities

Column Selection

Select specific columns or computed expressions from tables.

def select(*exprs):
    """
    Select columns or computed expressions.
    
    Parameters:
    - *exprs: column names, expressions, or keyword expressions
    
    Returns:
    Table with selected columns
    """

Usage Examples:

# Select specific columns
result = table.select('name', 'age')

# Select with expressions
result = table.select(
    'name',
    age_next_year=table.age + 1,
    is_adult=table.age >= 18
)

# Select all columns
result = table.select('*')

Row Filtering

Filter rows based on boolean predicates.

def filter(*predicates):
    """
    Filter rows based on predicates.
    
    Parameters:
    - *predicates: boolean expressions
    
    Returns:
    Table with filtered rows
    """

Usage Examples:

# Single condition
result = table.filter(table.age > 25)

# Multiple conditions (AND)
result = table.filter(table.age > 25, table.salary > 50000)

# Complex conditions
result = table.filter(
    (table.age > 25) & (table.department == 'Engineering')
)

Grouping and Aggregation

Group rows and compute aggregate statistics.

def group_by(*exprs):
    """
    Group table by expressions.
    
    Parameters:
    - *exprs: column names or expressions to group by
    
    Returns:
    GroupedTable for aggregation
    """

def aggregate(**kwargs):
    """
    Aggregate entire table or grouped table.
    
    Parameters:
    - **kwargs: name=expression pairs for aggregations
    
    Returns:
    Table with aggregated results
    """

Usage Examples:

# Group by single column
result = table.group_by('department').aggregate(
    avg_salary=table.salary.mean(),
    count=table.count()
)

# Group by multiple columns
result = table.group_by('department', 'level').aggregate(
    total_salary=table.salary.sum(),
    employee_count=table.count()
)

# Aggregate without grouping
result = table.aggregate(
    total_employees=table.count(),
    avg_age=table.age.mean()
)

Sorting

Sort table rows by one or more expressions.

def order_by(*exprs):
    """
    Sort table by expressions.
    
    Parameters:
    - *exprs: column names, expressions, or sort keys
    
    Returns:
    Sorted table
    """

def asc(expr):
    """Create ascending sort key."""
    
def desc(expr):
    """Create descending sort key."""

Usage Examples:

# Sort by single column
result = table.order_by('name')

# Sort by multiple columns
result = table.order_by('department', 'salary')

# Explicit sort direction
result = table.order_by(
    ibis.asc('department'),
    ibis.desc('salary')
)

Row Limiting

Limit the number of rows returned.

def limit(n, offset=0):
    """
    Limit number of rows.
    
    Parameters:
    - n: int, maximum number of rows
    - offset: int, number of rows to skip
    
    Returns:
    Table with limited rows
    """

def head(n=5):
    """Return first n rows."""

def tail(n=5):
    """Return last n rows (requires ordering)."""

Usage Examples:

# Top 10 rows
result = table.limit(10)

# Skip first 20, take next 10
result = table.limit(10, offset=20)

# Convenient methods
top_5 = table.head(5)
bottom_5 = table.order_by('salary').tail(5)

Joins

Join tables using various join types and conditions.

def join(other, predicates=None, how='inner'):
    """
    Join with another table.
    
    Parameters:
    - other: Table to join with
    - predicates: join conditions or None for cross join
    - how: join type ('inner', 'left', 'right', 'outer', 'semi', 'anti')
    
    Returns:
    Joined table
    """

def cross_join(other):
    """Cross join (Cartesian product)."""

def asof_join(other, predicates, tolerance=None):
    """As-of join for time-series data."""

Usage Examples:

# Inner join
result = employees.join(
    departments,
    employees.dept_id == departments.id
)

# Left join with multiple conditions
result = employees.join(
    departments,
    [employees.dept_id == departments.id,
     employees.location == departments.location],
    how='left'
)

# Cross join
result = table1.cross_join(table2)

Set Operations

Combine tables using set operations.

def union(table, /, *rest, distinct=False):
    """
    Compute multiset (or set) union of multiple tables.
    
    Parameters:
    - table: Table to union with (positional-only)
    - *rest: Additional tables to union
    - distinct: bool, use set union (True) or multiset union (False, default)
    
    Returns:
    Union of all tables
    """

def intersect(other):
    """Intersection with another table."""

def difference(other):
    """Difference from another table."""

Usage Examples:

# Union tables (multiset - keeps duplicates by default)
combined = table1.union(table2)

# Union with distinct values only
combined = table1.union(table2, distinct=True)

# Set operations
common = table1.intersect(table2)
unique_to_first = table1.difference(table2)

Column Operations

Add, drop, and rename columns.

def mutate(**kwargs):
    """
    Add or modify columns.
    
    Parameters:
    - **kwargs: name=expression pairs
    
    Returns:
    Table with new/modified columns
    """

def drop(*columns):
    """
    Drop columns.
    
    Parameters:
    - *columns: column names to drop
    
    Returns:
    Table without specified columns
    """

def rename(**kwargs):
    """
    Rename columns.
    
    Parameters:
    - **kwargs: old_name=new_name pairs
    
    Returns:
    Table with renamed columns
    """

Usage Examples:

# Add computed columns
result = table.mutate(
    age_next_year=table.age + 1,
    full_name=table.first_name + ' ' + table.last_name
)

# Drop columns
result = table.drop('temp_column', 'unused_field')

# Rename columns
result = table.rename(old_name='new_name', id='employee_id')

Distinct Values

Remove duplicate rows or get unique values.

def distinct(*exprs):
    """
    Get distinct rows or distinct values for expressions.
    
    Parameters:
    - *exprs: expressions to consider for distinctness (all columns if empty)
    
    Returns:
    Table with distinct rows
    """

Usage Examples:

# Distinct rows
unique_rows = table.distinct()

# Distinct values for specific columns
unique_combinations = table.distinct('department', 'level')

Sampling

Sample random rows from the table.

def sample(fraction=None, method='row'):
    """
    Sample rows from table.
    
    Parameters:
    - fraction: float, fraction of rows to sample (0.0 to 1.0)
    - method: str, sampling method
    
    Returns:
    Sampled table
    """

Usage Example:

# Sample 10% of rows
sample = table.sample(0.1)

Install with Tessl CLI

npx tessl i tessl/pypi-ibis-framework

docs

aggregation-windows.md

table-construction.md

tessl/pypi-ibis-framework

table-operations.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

Table Operations and Transformations

Capabilities

Column Selection

Row Filtering

Grouping and Aggregation

Sorting

Row Limiting

Joins

Set Operations

Column Operations

Distinct Values

Sampling

table-operations.mddocs/