A Python package for extracting, transforming and loading tables of data.
—
Core transformation operations for reshaping, filtering, and manipulating table data. This module provides essential functions for field operations, row selection, data conversion, and structural transformations that form the foundation of data processing workflows.
Operations for selecting, removing, and rearranging table fields.
def cut(table, *args, **kwargs) -> Table:
"""
Select specific fields from the table.
Parameters:
- table: Input table
- args: Field names, indices, or slice objects
- kwargs: Additional options
Returns:
Table with selected fields only
"""
def cutout(table, *args, **kwargs) -> Table:
"""
Remove specific fields from the table.
Parameters:
- table: Input table
- args: Field names or indices to remove
- kwargs: Additional options
Returns:
Table with specified fields removed
"""
def movefield(table, field, index) -> Table:
"""
Move a field to a different position in the table.
Parameters:
- table: Input table
- field: Field name to move
- index: Target position (0-based)
Returns:
Table with field moved to new position
"""
def rename(table, *args, **kwargs) -> Table:
"""
Rename table fields.
Parameters:
- table: Input table
- args: Rename specifications (old_name, new_name pairs or dict)
- kwargs: Additional options
Returns:
Table with renamed fields
"""Add new fields to tables with various value sources.
def addfield(table, field, value=None, index=None, missing=None) -> Table:
"""
Add a new field to the table.
Parameters:
- table: Input table
- field: Name of new field
- value: Value, function, or iterable for field values
- index: Position to insert field (default: end)
- missing: Value for missing data
Returns:
Table with new field added
"""
def addfields(table, field_defs, missing=None) -> Table:
"""
Add multiple fields to the table.
Parameters:
- table: Input table
- field_defs: Dictionary or list of (field, value) specifications
- missing: Value for missing data
Returns:
Table with new fields added
"""
def addfieldusingcontext(table, field, callable, **kwargs) -> Table:
"""
Add field using context-aware function.
Parameters:
- table: Input table
- field: Name of new field
- callable: Function that receives row context
- kwargs: Additional arguments passed to callable
Returns:
Table with new field added
"""
def addrownumbers(table, field='row', start=1) -> Table:
"""
Add row numbers as a new field.
Parameters:
- table: Input table
- field: Name for row number field
- start: Starting row number
Returns:
Table with row number field added
"""
def addcolumn(table, field, column) -> Table:
"""
Add a column of values as a new field.
Parameters:
- table: Input table
- field: Name of new field
- column: Iterable of values for the column
Returns:
Table with new column added
"""Filter and select rows based on various criteria.
def select(table, *args, **kwargs) -> Table:
"""
Select rows based on criteria.
Parameters:
- table: Input table
- args: Selection criteria (field, function/value pairs)
- kwargs: Additional options (complement, etc.)
Returns:
Table with selected rows
"""
def selecteq(table, field, value, complement=False) -> Table:
"""Select rows where field equals value."""
def selectne(table, field, value, complement=False) -> Table:
"""Select rows where field does not equal value."""
def selectlt(table, field, value, complement=False) -> Table:
"""Select rows where field is less than value."""
def selectle(table, field, value, complement=False) -> Table:
"""Select rows where field is less than or equal to value."""
def selectgt(table, field, value, complement=False) -> Table:
"""Select rows where field is greater than value."""
def selectge(table, field, value, complement=False) -> Table:
"""Select rows where field is greater than or equal to value."""
def selectin(table, field, test, complement=False) -> Table:
"""
Select rows where field value is in test collection.
Parameters:
- table: Input table
- field: Field name to test
- test: Collection of values to test membership
- complement: If True, select rows NOT in test
Returns:
Table with matching rows
"""
def selectnotin(table, field, test, complement=False) -> Table:
"""Select rows where field value is not in test collection."""
def selectnone(table, field, complement=False) -> Table:
"""Select rows where field is None."""
def selectnotnone(table, field, complement=False) -> Table:
"""Select rows where field is not None."""
def selectcontains(table, field, value, complement=False) -> Table:
"""Select rows where field contains value."""
def selectrangeclosed(table, field, minv, maxv, complement=False) -> Table:
"""Select rows where field is in closed range [minv, maxv]."""
def rowlenselect(table, n, complement=False) -> Table:
"""
Select rows with specified length.
Parameters:
- table: Input table
- n: Required row length
- complement: If True, select rows with different length
Returns:
Table with rows of specified length
"""Extract subsets of rows from tables.
def rowslice(table, *sliceargs) -> Table:
"""
Extract a slice of rows from the table.
Parameters:
- table: Input table
- sliceargs: Slice arguments (start, stop, step)
Returns:
Table with sliced rows
"""
def head(table, n=5) -> Table:
"""
Get the first n rows of the table.
Parameters:
- table: Input table
- n: Number of rows to return
Returns:
Table with first n rows
"""
def tail(table, n=5) -> Table:
"""
Get the last n rows of the table.
Parameters:
- table: Input table
- n: Number of rows to return
Returns:
Table with last n rows
"""
def skip(table, n) -> Table:
"""
Skip the first n data rows.
Parameters:
- table: Input table
- n: Number of rows to skip
Returns:
Table with first n rows skipped
"""
def skipcomments(table, prefix='#') -> Table:
"""
Skip rows that begin with comment prefix.
Parameters:
- table: Input table
- prefix: Comment prefix to identify rows to skip
Returns:
Table with comment rows removed
"""Convert and transform field values.
def convert(table, *args, **kwargs) -> Table:
"""
Apply conversion functions to specified fields.
Parameters:
- table: Input table
- args: Field/function pairs for conversion
- kwargs: Additional options (where, failonerror, etc.)
Returns:
Table with converted field values
"""
def convertall(table, *args, **kwargs) -> Table:
"""
Apply conversion function to all fields.
Parameters:
- table: Input table
- args: Conversion function(s)
- kwargs: Additional options
Returns:
Table with all fields converted
"""
def convertnumbers(table, strict=False, **kwargs) -> Table:
"""
Attempt to convert string fields to numeric types.
Parameters:
- table: Input table
- strict: If True, raise error on conversion failure
- kwargs: Additional options
Returns:
Table with numeric conversions applied
"""
def replace(table, field, a, b, **kwargs) -> Table:
"""
Replace occurrences of value a with b in specified field.
Parameters:
- table: Input table
- field: Field name to modify
- a: Value to replace
- b: Replacement value
- kwargs: Additional options
Returns:
Table with replacements applied
"""
def replaceall(table, a, b, **kwargs) -> Table:
"""Replace occurrences of value a with b in all fields."""
def update(table, field, value, **kwargs) -> Table:
"""
Update field values using a function or mapping.
Parameters:
- table: Input table
- field: Field name to update
- value: Update function, value, or mapping
- kwargs: Additional options (where, etc.)
Returns:
Table with updated field values
"""Format and manipulate string field values.
def format(table, field, fmt, **kwargs) -> Table:
"""
Format field values using format string.
Parameters:
- table: Input table
- field: Field name to format
- fmt: Format string (e.g., '{:.2f}', '{:>10}')
- kwargs: Additional options
Returns:
Table with formatted field values
"""
def formatall(table, fmt, **kwargs) -> Table:
"""Format all field values using format string."""
def interpolate(table, field, fmt, **kwargs) -> Table:
"""
Interpolate values into format string.
Parameters:
- table: Input table
- field: Field name for output
- fmt: Format string with field references
- kwargs: Additional options
Returns:
Table with interpolated values
"""
def interpolateall(table, fmt, **kwargs) -> Table:
"""Interpolate all field values into format string."""Combine multiple tables vertically or horizontally.
def cat(*tables, **kwargs) -> Table:
"""
Concatenate tables vertically.
Parameters:
- tables: Tables to concatenate
- kwargs: Additional options (missing, header, etc.)
Returns:
Single table with all rows combined
"""
def stack(*tables, **kwargs) -> Table:
"""
Stack tables with different structures.
Parameters:
- tables: Tables to stack
- kwargs: Additional options
Returns:
Table with unified structure containing all data
"""
def annex(*tables, **kwargs) -> Table:
"""
Join tables horizontally by row position.
Parameters:
- tables: Tables to join horizontally
- kwargs: Additional options
Returns:
Table with fields from all input tables
"""Modify table headers and structure.
def setheader(table, header) -> Table:
"""
Replace the table header.
Parameters:
- table: Input table
- header: New header row (list/tuple of field names)
Returns:
Table with new header
"""
def extendheader(table, fields) -> Table:
"""
Extend the header with additional fields.
Parameters:
- table: Input table
- fields: Additional field names to append
Returns:
Table with extended header
"""
def pushheader(table, header, *args) -> Table:
"""
Push the current header down as the first data row.
Parameters:
- table: Input table
- header: New header to use
- args: Additional arguments
Returns:
Table with old header as first data row
"""
def prefixheader(table, prefix) -> Table:
"""
Add prefix to all field names.
Parameters:
- table: Input table
- prefix: String prefix to add
Returns:
Table with prefixed field names
"""
def suffixheader(table, suffix) -> Table:
"""Add suffix to all field names."""
def sortheader(table) -> Table:
"""Sort header fields alphabetically."""Fill missing values and clean data inconsistencies.
def filldown(table, *fields, **kwargs) -> Table:
"""
Fill missing values by propagating non-missing values downward.
Parameters:
- table: Input table
- fields: Field names to fill (all fields if none specified)
- kwargs: Additional options (missing value specification)
Returns:
Table with missing values filled downward
"""
def fillright(table, *fields, **kwargs) -> Table:
"""
Fill missing values by propagating values from left to right.
Parameters:
- table: Input table
- fields: Field names to fill
- kwargs: Additional options
Returns:
Table with missing values filled rightward
"""
def fillleft(table, *fields, **kwargs) -> Table:
"""
Fill missing values by propagating values from right to left.
Parameters:
- table: Input table
- fields: Field names to fill
- kwargs: Additional options
Returns:
Table with missing values filled leftward
"""import petl as etl
table = etl.fromcsv('data.csv') # name, age, city, country, salary
# Select specific fields
selected = etl.cut(table, 'name', 'age', 'salary')
# Remove unwanted fields
cleaned = etl.cutout(table, 'country')
# Rename fields
renamed = etl.rename(table, 'salary', 'income')
# Move field to different position
reordered = etl.movefield(table, 'salary', 1) # salary becomes second fieldimport petl as etl
table = etl.fromcsv('employees.csv')
# Add constant field
with_status = etl.addfield(table, 'status', 'active')
# Add computed field
with_tax = etl.addfield(table, 'tax', lambda row: row.salary * 0.25)
# Add row numbers
numbered = etl.addrownumbers(table, 'id', start=1001)
# Add multiple fields
enhanced = etl.addfields(table, {
'department': 'Engineering',
'bonus': lambda rec: rec.salary * 0.1,
'year': 2023
})import petl as etl
table = etl.fromcsv('people.csv')
# Simple equality filter
adults = etl.selecteq(table, 'age', 18)
# Numeric comparisons
seniors = etl.selectgt(table, 'age', 65)
young_adults = etl.selectrangeclosed(table, 'age', 18, 30)
# Set membership
cities = etl.selectin(table, 'city', ['New York', 'London', 'Tokyo'])
# Complex filtering with functions
high_earners = etl.select(table, 'salary', lambda x: x > 100000)
# Multiple conditions
filtered = etl.select(table,
'age', lambda age: age > 25,
'city', lambda city: city.startswith('S'))import petl as etl
table = etl.fromcsv('data.csv')
# Convert specific field
clean_ages = etl.convert(table, 'age', int)
# Multiple conversions
converted = etl.convert(table,
'age', int,
'salary', float,
'name', str.title)
# Conditional conversion
cleaned = etl.convert(table, 'phone',
lambda x: x.replace('-', '') if x else None,
where=lambda row: row.phone is not None)
# Replace values
fixed = etl.replace(table, 'status', 'N/A', None)
# Auto-convert numbers
numeric = etl.convertnumbers(table)import petl as etl
table = etl.fromcsv('products.csv')
# Format currency
formatted = etl.format(table, 'price', '${:.2f}')
# Interpolate multiple fields
summary = etl.interpolate(table, 'description',
'{name} - ${price:.2f} ({category})')
# Format all numeric fields
all_formatted = etl.formatall(table, '{:.2f}')Install with Tessl CLI
npx tessl i tessl/pypi-petl