A Python package for extracting, transforming and loading tables of data.
—
Core functionality for creating, accessing, and manipulating table structures. This module provides the fundamental operations for working with tabular data in PETL, including table creation, data access patterns, and conversion utilities.
Create tables from various data structures and empty tables for building data programmatically.
def empty() -> Table:
"""Create an empty table with no fields or data."""
def wrap(table) -> Table:
"""
Wrap an iterable as a Table object.
Parameters:
- table: Any iterable that yields rows (lists, tuples, etc.)
Returns:
Table object
"""
def fromcolumns(cols, header=None, missing=None) -> Table:
"""
Construct a table from a sequence of column arrays.
Parameters:
- cols: Sequence of column arrays/lists
- header: Optional list of field names
- missing: Value to use for missing data
Returns:
Table object
"""Access table structure and data using various patterns and formats.
def header(table):
"""
Get the header row of a table.
Parameters:
- table: Input table
Returns:
Tuple of field names
"""
def fieldnames(table):
"""
Get field names from table header.
Parameters:
- table: Input table
Returns:
Tuple of field names
"""
def data(table, *sliceargs):
"""
Return data rows from table (excluding header).
Parameters:
- table: Input table
- sliceargs: Optional slice arguments for row selection
Returns:
Iterator over data rows
"""
def records(table, *sliceargs, **kwargs):
"""
Return table data as Record objects with named field access.
Parameters:
- table: Input table
- sliceargs: Optional slice arguments
- missing: Value for missing fields
Returns:
Iterator over Record objects
"""
def dicts(table, *sliceargs, **kwargs):
"""
Return table data as dictionaries.
Parameters:
- table: Input table
- sliceargs: Optional slice arguments
- missing: Value for missing fields
Returns:
Iterator over dictionaries
"""
def namedtuples(table, *sliceargs, **kwargs):
"""
Return table data as named tuples.
Parameters:
- table: Input table
- sliceargs: Optional slice arguments
- missing: Value for missing fields
Returns:
Iterator over named tuples
"""
def values(table, *field, **kwargs):
"""
Return a container supporting iteration over values in specified field(s).
Parameters:
- table: Input table
- field: Field name(s) to extract values from
- missing: Value for missing data
Returns:
Container with field values
"""Convert tables to standard Python data structures for further processing.
def listoflists(table):
"""
Materialize table as a list of lists.
Parameters:
- table: Input table
Returns:
List of lists (including header)
"""
def listoftuples(table):
"""
Materialize table as a list of tuples.
Parameters:
- table: Input table
Returns:
List of tuples (including header)
"""
def tupleoftuples(table):
"""
Materialize table as a tuple of tuples.
Parameters:
- table: Input table
Returns:
Tuple of tuples (including header)
"""
def tupleoflists(table):
"""
Materialize table as a tuple of lists.
Parameters:
- table: Input table
Returns:
Tuple of lists (including header)
"""
def columns(table, missing=None):
"""
Extract table columns as a dictionary of lists.
Parameters:
- table: Input table
- missing: Value for missing data
Returns:
Dictionary mapping field names to column lists
"""
def facetcolumns(table, key, missing=None):
"""
Extract columns grouped by key values.
Parameters:
- table: Input table
- key: Field name or function to group by
- missing: Value for missing data
Returns:
Dictionary mapping key values to column dictionaries
"""Create lookup dictionaries and indexes from table data for fast data access patterns.
def lookup(table, key, value=None, dictionary=None):
"""
Load a dictionary with data from the table, allowing multiple values per key.
Parameters:
- table: Input table
- key: Field name for dictionary keys
- value: Field name for values (defaults to remaining fields)
- dictionary: Existing dictionary to extend
Returns:
Dictionary mapping keys to lists of values
"""
def lookupone(table, key, value=None, dictionary=None, strict=False):
"""
Load a dictionary expecting one value per key.
Parameters:
- table: Input table
- key: Field name for dictionary keys
- value: Field name for values
- dictionary: Existing dictionary to extend
- strict: If True, raise error on duplicate keys
Returns:
Dictionary mapping keys to single values
"""
def dictlookup(table, key, dictionary=None):
"""
Load a dictionary where values are complete rows as dictionaries.
Parameters:
- table: Input table
- key: Field name for dictionary keys
- dictionary: Existing dictionary to extend
Returns:
Dictionary mapping keys to lists of row dictionaries
"""
def dictlookupone(table, key, dictionary=None, strict=False):
"""
Load a dictionary where each value is a single row dictionary.
Parameters:
- table: Input table
- key: Field name for dictionary keys
- dictionary: Existing dictionary to extend
- strict: If True, raise error on duplicate keys
Returns:
Dictionary mapping keys to row dictionaries
"""
def recordlookup(table, key, dictionary=None):
"""
Load a dictionary where values are Record objects.
Parameters:
- table: Input table
- key: Field name for dictionary keys
- dictionary: Existing dictionary to extend
Returns:
Dictionary mapping keys to lists of Record objects
"""
def recordlookupone(table, key, dictionary=None, strict=False):
"""
Load a dictionary where each value is a single Record object.
Parameters:
- table: Input table
- key: Field name for dictionary keys
- dictionary: Existing dictionary to extend
- strict: If True, raise error on duplicate keys
Returns:
Dictionary mapping keys to Record objects
"""Specialized functions for complex data access patterns and expression evaluation.
def expr(expression_text, trusted=True):
"""
Create a function from a Python expression string for field operations.
Parameters:
- expression_text: Python expression as string
- trusted: If True, allow all Python expressions
Returns:
Function that can be used in field operations
"""
def rowgroupby(table, key, value=None):
"""
Group table rows by key and return grouped data.
Parameters:
- table: Input table
- key: Field name or function for grouping
- value: Field name for values
Returns:
Iterator over (key, group) pairs
"""
def nrows(table):
"""
Count the number of data rows in the table.
Parameters:
- table: Input table
Returns:
Integer count of rows (excluding header)
"""import petl as etl
# Create table from lists
data = [['name', 'age', 'city'],
['Alice', 25, 'New York'],
['Bob', 30, 'London']]
table = etl.wrap(data)
# Access header and data
print(etl.header(table)) # ('name', 'age', 'city')
print(list(etl.data(table))) # [('Alice', 25, 'New York'), ('Bob', 30, 'London')]
# Convert to different formats
records = list(etl.records(table))
print(records[0].name) # 'Alice'
dicts = list(etl.dicts(table))
print(dicts[0]['age']) # 25import petl as etl
table = etl.fromcsv('people.csv') # name, age, city, country
# Create lookups for fast data access
age_lookup = etl.lookupone(table, 'name', 'age')
print(age_lookup['Alice']) # 25
# Multiple values per key
city_lookup = etl.lookup(table, 'country', 'city')
print(city_lookup['USA']) # ['New York', 'Los Angeles', 'Chicago']
# Full record lookup
record_lookup = etl.recordlookupone(table, 'name')
print(record_lookup['Alice'].city) # 'New York'Install with Tessl CLI
npx tessl i tessl/pypi-petl