List processing tools and functional utilities
Experimental and specialized utility functions for advanced use cases. These functions provide additional functionality beyond the core toolz capabilities.
from toolz.sandbox import EqualityHashKey, unzip, foldUtilities for creating hash keys from normally unhashable types.
class EqualityHashKey:
"""
Create a hash key that uses equality comparisons between items.
This may be used to create hash keys for otherwise unhashable types.
Adding N EqualityHashKey items to a hash container may require O(N**2) operations.
A suitable key function such as tuple or frozenset is usually preferred if possible.
"""
def __init__(self, key, item):
"""
Create equality-based hash key.
Parameters:
- key: function or index that returns a hashable object to distinguish unequal items
- item: the item to create a hash key for
"""Additional sequence manipulation functions.
def unzip(seq):
"""
Inverse of zip. Unpack a sequence of tuples into separate sequences.
Parameters:
- seq: sequence of tuples to unpack
Returns:
Tuple of sequences, one for each position in the input tuples
"""Functions for parallel and distributed computation.
def fold(binop, seq, default=no_default, map=map, chunksize=128, combine=None):
"""
Reduce without guarantee of ordered reduction.
Enables parallel reduction by chunking the sequence and distributing work
across multiple map operations.
Parameters:
- binop: associative binary operator for reduction
- seq: sequence to be aggregated
- default: identity element (optional)
- map: map implementation to use (can be parallel)
- chunksize: number of elements per chunk
- combine: binary operator to combine intermediate results (optional)
Returns:
Result of folding binop across the sequence
"""from toolz.sandbox import EqualityHashKey
from toolz import unique, curry
# Create hash keys for lists and other unhashable types
EqualityHashDefault = curry(EqualityHashKey, None)
EqualityHashLen = curry(EqualityHashKey, len)
# Use with collections that need hashable items
data = [[], (), [1], [1], [2]]
unique_items = list(unique(data, key=EqualityHashDefault))
# [[], (), [1], [2]]
# Better performance with meaningful key function
unique_by_len = list(unique(data, key=EqualityHashLen))
# [[], [1], [2]] (keeps first of each length)from toolz.sandbox import unzip
# Unpack pairs into separate sequences
pairs = [(1, 'a'), (2, 'b'), (3, 'c')]
numbers, letters = unzip(pairs)
# numbers: (1, 2, 3)
# letters: ('a', 'b', 'c')
# Unpack triplets
triplets = [(1, 'a', True), (2, 'b', False), (3, 'c', True)]
nums, chars, flags = unzip(triplets)
# nums: (1, 2, 3)
# chars: ('a', 'b', 'c')
# flags: (True, False, True)from toolz.sandbox import fold
from operator import add
import multiprocessing
# Sequential reduction
numbers = list(range(1000))
total = fold(add, numbers, 0)
# 499500
# Parallel reduction using multiprocessing
pool = multiprocessing.Pool()
parallel_total = fold(add, numbers, 0, map=pool.map, chunksize=50)
pool.close()
pool.join()
# 499500 (same result, computed in parallel)
# Custom combine function for different reduction patterns
from toolz import merge
dicts = [{'a': 1}, {'b': 2}, {'a': 3, 'c': 4}]
combined = fold(merge, dicts, {})
# {'a': 3, 'b': 2, 'c': 4}Install with Tessl CLI
npx tessl i tessl/pypi-toolz