List processing tools and functional utilities
Comprehensive sequence processing functions for filtering, grouping, partitioning, and transforming iterables. These functions work with any iterable and form the backbone of functional data processing pipelines.
Functions for selecting elements from sequences based on position or predicate.
def remove(predicate, seq):
"""
Return items for which predicate(item) is False.
Parameters:
- predicate: function that returns True/False
- seq: iterable sequence
Returns:
Iterator of items where predicate is False
"""
def unique(seq, key=None):
"""
Return only unique elements of sequence.
Parameters:
- seq: iterable sequence
- key: function for computing uniqueness key (optional)
Returns:
Iterator of unique elements in order first seen
"""
def take(n, seq):
"""
First n elements of sequence.
Parameters:
- n: number of elements to take
- seq: iterable sequence
Returns:
Iterator of first n elements
"""
def drop(n, seq):
"""
Sequence following first n elements.
Parameters:
- n: number of elements to drop
- seq: iterable sequence
Returns:
Iterator starting after first n elements
"""
def take_nth(n, seq):
"""
Every nth item in sequence.
Parameters:
- n: step size (take every nth element)
- seq: iterable sequence
Returns:
Iterator of every nth element
"""
def tail(n, seq):
"""
Last n elements of sequence.
Parameters:
- n: number of elements from end
- seq: iterable sequence
Returns:
List of last n elements
"""Functions for accessing individual elements from sequences.
def first(seq):
"""
First element in sequence.
Parameters:
- seq: iterable sequence
Returns:
First element, or raises IndexError if empty
"""
def second(seq):
"""
Second element in sequence.
Parameters:
- seq: iterable sequence
Returns:
Second element, or raises IndexError if insufficient elements
"""
def nth(n, seq):
"""
nth element in sequence (0-indexed).
Parameters:
- n: index of element to retrieve
- seq: iterable sequence
Returns:
Element at index n, or raises IndexError
"""
def last(seq):
"""
Last element in sequence.
Parameters:
- seq: iterable sequence
Returns:
Last element, or raises IndexError if empty
"""
def get(ind, seq, default=no_default):
"""
Get element from sequence or dict with optional default.
Parameters:
- ind: index/key to retrieve
- seq: sequence or mapping
- default: value to return if key/index not found
Returns:
Element at ind, or default if not found
"""
def peek(seq):
"""
Retrieve next element of sequence without consuming it.
Parameters:
- seq: iterable sequence
Returns:
Tuple of (next_element, iterator_with_element_restored)
"""
def peekn(n, seq):
"""
Retrieve next n elements of sequence without consuming them.
Parameters:
- n: number of elements to peek
- seq: iterable sequence
Returns:
Tuple of (list_of_n_elements, iterator_with_elements_restored)
"""Functions for combining multiple sequences in various ways.
def concat(seqs):
"""
Concatenate zero or more iterables into single iterator.
Parameters:
- seqs: iterable of iterables to concatenate
Returns:
Iterator of all elements from all input iterables
"""
def concatv(*seqs):
"""
Variadic version of concat - concatenate sequences.
Parameters:
- *seqs: variable number of iterables
Returns:
Iterator of all elements from all input iterables
"""
def interleave(seqs):
"""
Interleave elements from multiple sequences.
Parameters:
- seqs: iterable of iterables
Returns:
Iterator alternating elements from each input sequence
"""
def merge_sorted(*seqs, **kwargs):
"""
Merge sorted sequences into single sorted sequence.
Parameters:
- *seqs: sorted iterables to merge
- key: function for sort key (optional)
- reverse: reverse sort order (optional)
Returns:
Iterator of merged sorted elements
"""
def join(leftkey, leftseq, rightkey, rightseq,
left_default=no_default, right_default=no_default):
"""
Join two sequences on common attributes like SQL join.
Parameters:
- leftkey: function to compute join key from left items
- leftseq: left sequence to join
- rightkey: function to compute join key from right items
- rightseq: right sequence to join
- left_default: default for missing left items (optional)
- right_default: default for missing right items (optional)
Returns:
Iterator of (left_item, right_item) tuples
"""Functions for organizing sequences into groups or partitions.
def groupby(key, seq):
"""
Group collection by key function.
Parameters:
- key: function to compute grouping key, or string for attribute access
- seq: iterable sequence to group
Returns:
Dictionary mapping keys to lists of grouped items
"""
def partition(n, seq, pad=no_pad):
"""
Partition sequence into tuples of length n.
Parameters:
- n: length of each partition tuple
- seq: iterable sequence to partition
- pad: value to pad final tuple if needed (optional)
Returns:
Iterator of tuples of length n
"""
def partition_all(n, seq):
"""
Partition sequence into tuples of length at most n.
Parameters:
- n: maximum length of each partition tuple
- seq: iterable sequence to partition
Returns:
Iterator of tuples of length up to n
"""
def sliding_window(n, seq):
"""
Sequence of overlapping subsequences of length n.
Parameters:
- n: window size
- seq: iterable sequence
Returns:
Iterator of overlapping tuples of length n
"""Functions for analyzing sequence properties and contents.
def frequencies(seq):
"""
Count occurrences of each value in sequence.
Parameters:
- seq: iterable sequence
Returns:
Dictionary mapping values to occurrence counts
"""
def isiterable(x):
"""
Check if object is iterable (but not string).
Parameters:
- x: object to test
Returns:
True if x is iterable and not a string
"""
def isdistinct(seq):
"""
Check if all values in sequence are distinct.
Parameters:
- seq: iterable sequence
Returns:
True if all elements are unique
"""
def count(seq):
"""
Count number of items in sequence.
Parameters:
- seq: iterable sequence
Returns:
Integer count of elements
"""
def diff(*seqs, **kwargs):
"""
Return items that differ between sequences.
Parameters:
- *seqs: sequences to compare
- default: value for missing items (optional)
Returns:
Iterator of items present in some but not all sequences
"""
def topk(k, seq, key=None):
"""
Find k largest elements of sequence.
Parameters:
- k: number of elements to return
- seq: iterable sequence
- key: function for comparison key (optional)
Returns:
List of k largest elements
"""Complex operations combining multiple sequence manipulations.
def accumulate(binop, seq, initial=no_default):
"""
Repeatedly apply binary function to sequence, accumulating results.
Parameters:
- binop: binary function (takes two args, returns one)
- seq: iterable sequence
- initial: starting value (optional)
Returns:
Iterator of accumulated results
"""
def reduceby(key, binop, seq, init=no_default):
"""
Simultaneously group by key and reduce each group.
Parameters:
- key: function to compute grouping key
- binop: binary function for reduction
- seq: iterable sequence
- init: initial value for reduction (optional)
Returns:
Dictionary mapping keys to reduced values
"""
def mapcat(func, seqs):
"""
Apply function to sequences and concatenate results.
Parameters:
- func: function that returns iterable
- seqs: iterable of sequences
Returns:
Iterator of concatenated function results
"""
def iterate(func, x):
"""
Repeatedly apply function to create infinite sequence.
Parameters:
- func: function to apply repeatedly
- x: initial value
Returns:
Iterator of x, func(x), func(func(x)), ...
"""
def pluck(ind, seqs, default=no_default):
"""
Pluck element(s) from each item in sequence.
Parameters:
- ind: index/key or list of indices/keys to pluck
- seqs: sequence of sequences/mappings
- default: value for missing indices (optional)
Returns:
Iterator of plucked elements
"""
def random_sample(prob, seq, random_state=None):
"""
Return elements from sequence with given probability.
Parameters:
- prob: probability (0-1) of including each element
- seq: iterable sequence
- random_state: random number generator seed (optional)
Returns:
Iterator of randomly sampled elements
"""
def getter(index):
"""
Create function that gets item from its operand.
Equivalent to lambda x: x[index] but optimized and supports
nested access with sequences of indices.
Parameters:
- index: index/key or sequence of indices for nested access
Returns:
Function that extracts specified item(s) from its argument
"""Functions for building new sequences from existing ones.
def cons(el, seq):
"""
Add element to beginning of sequence.
Parameters:
- el: element to prepend
- seq: iterable sequence
Returns:
Iterator with el followed by all elements of seq
"""
def interpose(el, seq):
"""
Introduce element between each pair of elements in sequence.
Parameters:
- el: element to interpose
- seq: iterable sequence
Returns:
Iterator with el between each adjacent pair
"""from toolz import pipe, unique, take, groupby
# Process transaction data
transactions = [
{'id': 1, 'amount': 100, 'category': 'food'},
{'id': 2, 'amount': 50, 'category': 'transport'},
{'id': 3, 'amount': 100, 'category': 'food'}, # duplicate amount
{'id': 4, 'amount': 200, 'category': 'entertainment'},
]
# Get unique amounts, group by range
result = pipe(
transactions,
lambda x: map(lambda t: t['amount'], x), # extract amounts
lambda x: unique(x), # unique amounts only
lambda x: groupby(lambda a: 'high' if a >= 100 else 'low', x)
)
# {'low': [50], 'high': [100, 200]}from toolz import partition, sliding_window, take
data = range(10) # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
# Partition into groups of 3
groups = list(partition(3, data))
# [(0, 1, 2), (3, 4, 5), (6, 7, 8)]
# Sliding window of size 3
windows = list(take(5, sliding_window(3, data)))
# [(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)]from toolz import groupby, frequencies, reduceby
from operator import add
words = ['apple', 'banana', 'apricot', 'cherry', 'blueberry']
# Group by first letter
by_letter = groupby(lambda w: w[0], words)
# {'a': ['apple', 'apricot'], 'b': ['banana', 'blueberry'], 'c': ['cherry']}
# Count word lengths
length_counts = frequencies(map(len, words))
# {5: 2, 6: 2, 7: 1}
# Sum lengths by first letter
letter_lengths = reduceby(lambda w: w[0], add, map(len, words))
# {'a': 12, 'b': 15, 'c': 6}from toolz import getter, pluck
# Create getter functions
get_first = getter(0)
get_name = getter('name')
get_nested = getter(['person', 'name'])
# Use with sequences
data = [(1, 'a'), (2, 'b'), (3, 'c')]
first_items = list(map(get_first, data))
# [1, 2, 3]
# Use with dictionaries
people = [{'name': 'Alice', 'age': 30}, {'name': 'Bob', 'age': 25}]
names = list(map(get_name, people))
# ['Alice', 'Bob']
# Nested access
nested_data = [
{'person': {'name': 'Alice', 'age': 30}},
{'person': {'name': 'Bob', 'age': 25}}
]
nested_names = list(map(get_nested, nested_data))
# ['Alice', 'Bob']
# pluck is similar but works on sequences of items
names_plucked = list(pluck('name', people))
# ['Alice', 'Bob']Install with Tessl CLI
npx tessl i tessl/pypi-toolz