The portable Python dataframe library that provides a unified API for data analysis across 20+ different backends
—
Flexible column selection system with pattern matching, type-based selection, and logical combinations for working with wide datasets.
Select all columns or specific column patterns.
def all():
"""
Select all columns.
Returns:
Selector for all columns
"""
def c(*names):
"""
Select columns by name.
Parameters:
- *names: column names to select
Returns:
Selector for named columns
"""
def cols(*names):
"""
Alias for c() - select columns by name.
Parameters:
- *names: column names to select
Returns:
Selector for named columns
"""Usage Examples:
import ibis
from ibis import selectors as s
# Select all columns
table.select(s.all())
# Select specific columns
table.select(s.c('name', 'age', 'salary'))
# Same as above
table.select(s.cols('name', 'age', 'salary'))Select columns based on name patterns.
def matches(pattern):
"""
Select columns matching regex pattern.
Parameters:
- pattern: str, regular expression pattern
Returns:
Selector for columns matching pattern
"""
def startswith(prefix):
"""
Select columns starting with prefix.
Parameters:
- prefix: str, prefix to match
Returns:
Selector for columns with matching prefix
"""
def endswith(suffix):
"""
Select columns ending with suffix.
Parameters:
- suffix: str, suffix to match
Returns:
Selector for columns with matching suffix
"""
def contains(substring):
"""
Select columns containing substring.
Parameters:
- substring: str, substring to match
Returns:
Selector for columns containing substring
"""Usage Examples:
from ibis import selectors as s
# Regex pattern matching
table.select(s.matches(r'.*_id$')) # Columns ending with '_id'
# Prefix matching
table.select(s.startswith('sales_')) # sales_2021, sales_2022, etc.
# Suffix matching
table.select(s.endswith('_total')) # revenue_total, cost_total, etc.
# Substring matching
table.select(s.contains('temp')) # temperature, temporary, template, etc.Select columns based on their data types.
def numeric():
"""
Select numeric columns (integers and floats).
Returns:
Selector for numeric columns
"""
def of_type(*types):
"""
Select columns of specific types.
Parameters:
- *types: DataType objects or type strings
Returns:
Selector for columns of specified types
"""
def categorical():
"""
Select categorical/string columns.
Returns:
Selector for categorical columns
"""
def temporal():
"""
Select temporal columns (date, time, timestamp).
Returns:
Selector for temporal columns
"""Usage Examples:
from ibis import selectors as s
# Select all numeric columns
table.select(s.numeric())
# Select specific types
table.select(s.of_type('string', 'int64'))
# Select string columns
table.select(s.categorical())
# Select date/time columns
table.select(s.temporal())Select columns based on predicates or conditions.
def where(predicate):
"""
Select columns matching a predicate function.
Parameters:
- predicate: callable that takes column and returns bool
Returns:
Selector for columns matching predicate
"""Usage Examples:
from ibis import selectors as s
# Select columns based on custom predicate
def has_nulls(col):
return col.isnull().any()
table.select(s.where(has_nulls))
# Complex predicate
def numeric_with_high_variance(col):
return col.type().is_numeric() and col.std() > 100
table.select(s.where(numeric_with_high_variance))Combine selectors using logical operations.
# Logical operations on selectors
selector1 & selector2 # AND - columns matching both selectors
selector1 | selector2 # OR - columns matching either selector
~selector # NOT - columns not matching selectorUsage Examples:
from ibis import selectors as s
# Combine selectors with AND
numeric_sales = s.numeric() & s.startswith('sales_')
table.select(numeric_sales)
# Combine with OR
id_or_name = s.endswith('_id') | s.contains('name')
table.select(id_or_name)
# Negate selector
non_numeric = ~s.numeric()
table.select(non_numeric)
# Complex combinations
important_cols = (
s.c('id', 'name') | # Always include id and name
(s.numeric() & ~s.contains('temp')) # Numeric but not temporary
)
table.select(important_cols)Apply functions across selected columns.
def across(selector, func, names=None):
"""
Apply function across selected columns.
Parameters:
- selector: column selector
- func: function to apply to each selected column
- names: naming pattern for result columns
Returns:
Dict of name -> expression for selected columns
"""Usage Examples:
from ibis import selectors as s
# Apply function to multiple columns
table.select(
s.across(s.numeric(), lambda x: x.mean(), names='{}_avg')
)
# Multiple transformations
table.select(
'id', 'name', # Keep identifier columns
**s.across(s.numeric(), lambda x: x.fillna(0)), # Fill nulls in numeric
**s.across(s.categorical(), lambda x: x.upper()) # Uppercase strings
)Conditional selection based on column values.
def if_any(*selectors):
"""
Select if any of the selectors match.
Parameters:
- *selectors: selectors to check
Returns:
Selector matching if any selector matches
"""
def if_all(*selectors):
"""
Select if all selectors match.
Parameters:
- *selectors: selectors to check
Returns:
Selector matching if all selectors match
"""Usage Examples:
from ibis import selectors as s
# Select if any condition matches
flexible = s.if_any(
s.contains('revenue'),
s.contains('profit'),
s.contains('income')
)
# Select if all conditions match
strict = s.if_all(
s.numeric(),
s.startswith('sales_'),
s.endswith('_2023')
)
table.select(flexible)
table.select(strict)Frequently used selector combinations.
Usage Examples:
from ibis import selectors as s
# Financial columns
financial = s.matches(r'.*(revenue|profit|cost|price).*')
# ID columns
ids = s.endswith('_id') | s.endswith('_key') | s.matches(r'^id$')
# Metrics (numeric, not IDs)
metrics = s.numeric() & ~ids
# Clean dataset selection
clean_data = (
ids | # Always keep identifiers
s.where(lambda col: ~col.isnull().all()) # Remove all-null columns
)
# Time series columns
time_series = s.temporal() | s.matches(r'.*_(date|time|timestamp).*')
table.select(clean_data)Install with Tessl CLI
npx tessl i tessl/pypi-ibis-framework