tessl/pypi-petl

A Python package for extracting, transforming and loading tables of data.

—

Pending

Overview

Eval results

Files

Basic Transformations

Name: tessl/pypi-petl
Author: tessl

Core transformation operations for reshaping, filtering, and manipulating table data. This module provides essential functions for field operations, row selection, data conversion, and structural transformations that form the foundation of data processing workflows.

Capabilities

Field Selection and Manipulation

Operations for selecting, removing, and rearranging table fields.

def cut(table, *args, **kwargs) -> Table:
    """
    Select specific fields from the table.
    
    Parameters:
    - table: Input table
    - args: Field names, indices, or slice objects
    - kwargs: Additional options
    
    Returns:
    Table with selected fields only
    """

def cutout(table, *args, **kwargs) -> Table:
    """
    Remove specific fields from the table.
    
    Parameters:
    - table: Input table
    - args: Field names or indices to remove
    - kwargs: Additional options
    
    Returns:
    Table with specified fields removed
    """

def movefield(table, field, index) -> Table:
    """
    Move a field to a different position in the table.
    
    Parameters:
    - table: Input table
    - field: Field name to move
    - index: Target position (0-based)
    
    Returns:
    Table with field moved to new position
    """

def rename(table, *args, **kwargs) -> Table:
    """
    Rename table fields.
    
    Parameters:
    - table: Input table
    - args: Rename specifications (old_name, new_name pairs or dict)
    - kwargs: Additional options
    
    Returns:
    Table with renamed fields
    """

Field Addition

Add new fields to tables with various value sources.

def addfield(table, field, value=None, index=None, missing=None) -> Table:
    """
    Add a new field to the table.
    
    Parameters:
    - table: Input table
    - field: Name of new field
    - value: Value, function, or iterable for field values
    - index: Position to insert field (default: end)
    - missing: Value for missing data
    
    Returns:
    Table with new field added
    """

def addfields(table, field_defs, missing=None) -> Table:
    """
    Add multiple fields to the table.
    
    Parameters:
    - table: Input table
    - field_defs: Dictionary or list of (field, value) specifications
    - missing: Value for missing data
    
    Returns:
    Table with new fields added
    """

def addfieldusingcontext(table, field, callable, **kwargs) -> Table:
    """
    Add field using context-aware function.
    
    Parameters:
    - table: Input table
    - field: Name of new field
    - callable: Function that receives row context
    - kwargs: Additional arguments passed to callable
    
    Returns:
    Table with new field added
    """

def addrownumbers(table, field='row', start=1) -> Table:
    """
    Add row numbers as a new field.
    
    Parameters:
    - table: Input table
    - field: Name for row number field
    - start: Starting row number
    
    Returns:
    Table with row number field added
    """

def addcolumn(table, field, column) -> Table:
    """
    Add a column of values as a new field.
    
    Parameters:
    - table: Input table
    - field: Name of new field
    - column: Iterable of values for the column
    
    Returns:
    Table with new column added
    """

Row Selection and Filtering

Filter and select rows based on various criteria.

def select(table, *args, **kwargs) -> Table:
    """
    Select rows based on criteria.
    
    Parameters:
    - table: Input table
    - args: Selection criteria (field, function/value pairs)
    - kwargs: Additional options (complement, etc.)
    
    Returns:
    Table with selected rows
    """

def selecteq(table, field, value, complement=False) -> Table:
    """Select rows where field equals value."""

def selectne(table, field, value, complement=False) -> Table:
    """Select rows where field does not equal value."""

def selectlt(table, field, value, complement=False) -> Table:
    """Select rows where field is less than value."""

def selectle(table, field, value, complement=False) -> Table:
    """Select rows where field is less than or equal to value."""

def selectgt(table, field, value, complement=False) -> Table:
    """Select rows where field is greater than value."""

def selectge(table, field, value, complement=False) -> Table:
    """Select rows where field is greater than or equal to value."""

def selectin(table, field, test, complement=False) -> Table:
    """
    Select rows where field value is in test collection.
    
    Parameters:
    - table: Input table
    - field: Field name to test
    - test: Collection of values to test membership
    - complement: If True, select rows NOT in test
    
    Returns:
    Table with matching rows
    """

def selectnotin(table, field, test, complement=False) -> Table:
    """Select rows where field value is not in test collection."""

def selectnone(table, field, complement=False) -> Table:
    """Select rows where field is None."""

def selectnotnone(table, field, complement=False) -> Table:
    """Select rows where field is not None."""

def selectcontains(table, field, value, complement=False) -> Table:
    """Select rows where field contains value."""

def selectrangeclosed(table, field, minv, maxv, complement=False) -> Table:
    """Select rows where field is in closed range [minv, maxv]."""

def rowlenselect(table, n, complement=False) -> Table:
    """
    Select rows with specified length.
    
    Parameters:
    - table: Input table
    - n: Required row length
    - complement: If True, select rows with different length
    
    Returns:
    Table with rows of specified length
    """

Row Slicing and Sampling

Extract subsets of rows from tables.

def rowslice(table, *sliceargs) -> Table:
    """
    Extract a slice of rows from the table.
    
    Parameters:
    - table: Input table
    - sliceargs: Slice arguments (start, stop, step)
    
    Returns:
    Table with sliced rows
    """

def head(table, n=5) -> Table:
    """
    Get the first n rows of the table.
    
    Parameters:
    - table: Input table
    - n: Number of rows to return
    
    Returns:
    Table with first n rows
    """

def tail(table, n=5) -> Table:
    """
    Get the last n rows of the table.
    
    Parameters:
    - table: Input table
    - n: Number of rows to return
    
    Returns:
    Table with last n rows
    """

def skip(table, n) -> Table:
    """
    Skip the first n data rows.
    
    Parameters:
    - table: Input table
    - n: Number of rows to skip
    
    Returns:
    Table with first n rows skipped
    """

def skipcomments(table, prefix='#') -> Table:
    """
    Skip rows that begin with comment prefix.
    
    Parameters:
    - table: Input table
    - prefix: Comment prefix to identify rows to skip
    
    Returns:
    Table with comment rows removed
    """

Data Type Conversion

Convert and transform field values.

def convert(table, *args, **kwargs) -> Table:
    """
    Apply conversion functions to specified fields.
    
    Parameters:
    - table: Input table
    - args: Field/function pairs for conversion
    - kwargs: Additional options (where, failonerror, etc.)
    
    Returns:
    Table with converted field values
    """

def convertall(table, *args, **kwargs) -> Table:
    """
    Apply conversion function to all fields.
    
    Parameters:
    - table: Input table
    - args: Conversion function(s)
    - kwargs: Additional options
    
    Returns:
    Table with all fields converted
    """

def convertnumbers(table, strict=False, **kwargs) -> Table:
    """
    Attempt to convert string fields to numeric types.
    
    Parameters:
    - table: Input table
    - strict: If True, raise error on conversion failure
    - kwargs: Additional options
    
    Returns:
    Table with numeric conversions applied
    """

def replace(table, field, a, b, **kwargs) -> Table:
    """
    Replace occurrences of value a with b in specified field.
    
    Parameters:
    - table: Input table
    - field: Field name to modify
    - a: Value to replace
    - b: Replacement value
    - kwargs: Additional options
    
    Returns:
    Table with replacements applied
    """

def replaceall(table, a, b, **kwargs) -> Table:
    """Replace occurrences of value a with b in all fields."""

def update(table, field, value, **kwargs) -> Table:
    """
    Update field values using a function or mapping.
    
    Parameters:
    - table: Input table
    - field: Field name to update
    - value: Update function, value, or mapping
    - kwargs: Additional options (where, etc.)
    
    Returns:
    Table with updated field values
    """

String Formatting

Format and manipulate string field values.

def format(table, field, fmt, **kwargs) -> Table:
    """
    Format field values using format string.
    
    Parameters:
    - table: Input table
    - field: Field name to format
    - fmt: Format string (e.g., '{:.2f}', '{:>10}')
    - kwargs: Additional options
    
    Returns:
    Table with formatted field values
    """

def formatall(table, fmt, **kwargs) -> Table:
    """Format all field values using format string."""

def interpolate(table, field, fmt, **kwargs) -> Table:
    """
    Interpolate values into format string.
    
    Parameters:
    - table: Input table
    - field: Field name for output
    - fmt: Format string with field references
    - kwargs: Additional options
    
    Returns:
    Table with interpolated values
    """

def interpolateall(table, fmt, **kwargs) -> Table:
    """Interpolate all field values into format string."""

Table Combination

Combine multiple tables vertically or horizontally.

def cat(*tables, **kwargs) -> Table:
    """
    Concatenate tables vertically.
    
    Parameters:
    - tables: Tables to concatenate
    - kwargs: Additional options (missing, header, etc.)
    
    Returns:
    Single table with all rows combined
    """

def stack(*tables, **kwargs) -> Table:
    """
    Stack tables with different structures.
    
    Parameters:
    - tables: Tables to stack
    - kwargs: Additional options
    
    Returns:
    Table with unified structure containing all data
    """

def annex(*tables, **kwargs) -> Table:
    """
    Join tables horizontally by row position.
    
    Parameters:
    - tables: Tables to join horizontally
    - kwargs: Additional options
    
    Returns:
    Table with fields from all input tables
    """

Header Manipulation

Modify table headers and structure.

def setheader(table, header) -> Table:
    """
    Replace the table header.
    
    Parameters:
    - table: Input table
    - header: New header row (list/tuple of field names)
    
    Returns:
    Table with new header
    """

def extendheader(table, fields) -> Table:
    """
    Extend the header with additional fields.
    
    Parameters:
    - table: Input table
    - fields: Additional field names to append
    
    Returns:
    Table with extended header
    """

def pushheader(table, header, *args) -> Table:
    """
    Push the current header down as the first data row.
    
    Parameters:
    - table: Input table
    - header: New header to use
    - args: Additional arguments
    
    Returns:
    Table with old header as first data row
    """

def prefixheader(table, prefix) -> Table:
    """
    Add prefix to all field names.
    
    Parameters:
    - table: Input table
    - prefix: String prefix to add
    
    Returns:
    Table with prefixed field names
    """

def suffixheader(table, suffix) -> Table:
    """Add suffix to all field names."""

def sortheader(table) -> Table:
    """Sort header fields alphabetically."""

Data Cleaning and Fill Operations

Fill missing values and clean data inconsistencies.

def filldown(table, *fields, **kwargs) -> Table:
    """
    Fill missing values by propagating non-missing values downward.
    
    Parameters:
    - table: Input table
    - fields: Field names to fill (all fields if none specified)
    - kwargs: Additional options (missing value specification)
    
    Returns:
    Table with missing values filled downward
    """

def fillright(table, *fields, **kwargs) -> Table:
    """
    Fill missing values by propagating values from left to right.
    
    Parameters:
    - table: Input table
    - fields: Field names to fill
    - kwargs: Additional options
    
    Returns:
    Table with missing values filled rightward
    """

def fillleft(table, *fields, **kwargs) -> Table:
    """
    Fill missing values by propagating values from right to left.
    
    Parameters:
    - table: Input table
    - fields: Field names to fill
    - kwargs: Additional options
    
    Returns:
    Table with missing values filled leftward
    """

Usage Examples

Field Selection and Manipulation

import petl as etl

table = etl.fromcsv('data.csv')  # name, age, city, country, salary

# Select specific fields
selected = etl.cut(table, 'name', 'age', 'salary')

# Remove unwanted fields
cleaned = etl.cutout(table, 'country')

# Rename fields
renamed = etl.rename(table, 'salary', 'income')

# Move field to different position
reordered = etl.movefield(table, 'salary', 1)  # salary becomes second field

Adding Fields

import petl as etl

table = etl.fromcsv('employees.csv')

# Add constant field
with_status = etl.addfield(table, 'status', 'active')

# Add computed field
with_tax = etl.addfield(table, 'tax', lambda row: row.salary * 0.25)

# Add row numbers
numbered = etl.addrownumbers(table, 'id', start=1001)

# Add multiple fields
enhanced = etl.addfields(table, {
    'department': 'Engineering',
    'bonus': lambda rec: rec.salary * 0.1,
    'year': 2023
})

Row Filtering

import petl as etl

table = etl.fromcsv('people.csv')

# Simple equality filter
adults = etl.selecteq(table, 'age', 18)

# Numeric comparisons
seniors = etl.selectgt(table, 'age', 65)
young_adults = etl.selectrangeclosed(table, 'age', 18, 30)

# Set membership
cities = etl.selectin(table, 'city', ['New York', 'London', 'Tokyo'])

# Complex filtering with functions
high_earners = etl.select(table, 'salary', lambda x: x > 100000)

# Multiple conditions
filtered = etl.select(table, 
                     'age', lambda age: age > 25,
                     'city', lambda city: city.startswith('S'))

Data Conversion

import petl as etl

table = etl.fromcsv('data.csv')

# Convert specific field
clean_ages = etl.convert(table, 'age', int)

# Multiple conversions
converted = etl.convert(table, 
                       'age', int,
                       'salary', float,
                       'name', str.title)

# Conditional conversion
cleaned = etl.convert(table, 'phone', 
                     lambda x: x.replace('-', '') if x else None,
                     where=lambda row: row.phone is not None)

# Replace values
fixed = etl.replace(table, 'status', 'N/A', None)

# Auto-convert numbers
numeric = etl.convertnumbers(table)

String Formatting

import petl as etl

table = etl.fromcsv('products.csv')

# Format currency
formatted = etl.format(table, 'price', '${:.2f}')

# Interpolate multiple fields
summary = etl.interpolate(table, 'description', 
                         '{name} - ${price:.2f} ({category})')

# Format all numeric fields
all_formatted = etl.formatall(table, '{:.2f}')

Install with Tessl CLI