tessl/pypi-toolz

List processing tools and functional utilities

Overview

Eval results

Files

Sandbox Functions

Name: tessl/pypi-toolz
Author: tessl

Experimental and specialized utility functions for advanced use cases. These functions provide additional functionality beyond the core toolz capabilities.

Package Access

from toolz.sandbox import EqualityHashKey, unzip, fold

Capabilities

Hash Key Utilities

Utilities for creating hash keys from normally unhashable types.

class EqualityHashKey:
    """
    Create a hash key that uses equality comparisons between items.
    
    This may be used to create hash keys for otherwise unhashable types.
    Adding N EqualityHashKey items to a hash container may require O(N**2) operations.
    A suitable key function such as tuple or frozenset is usually preferred if possible.
    """
    
    def __init__(self, key, item):
        """
        Create equality-based hash key.
        
        Parameters:
        - key: function or index that returns a hashable object to distinguish unequal items
        - item: the item to create a hash key for
        """

Sequence Utilities

Additional sequence manipulation functions.

def unzip(seq):
    """
    Inverse of zip. Unpack a sequence of tuples into separate sequences.
    
    Parameters:
    - seq: sequence of tuples to unpack
    
    Returns:
    Tuple of sequences, one for each position in the input tuples
    """

Parallel Processing

Functions for parallel and distributed computation.

def fold(binop, seq, default=no_default, map=map, chunksize=128, combine=None):
    """
    Reduce without guarantee of ordered reduction.
    
    Enables parallel reduction by chunking the sequence and distributing work
    across multiple map operations.
    
    Parameters:
    - binop: associative binary operator for reduction
    - seq: sequence to be aggregated
    - default: identity element (optional) 
    - map: map implementation to use (can be parallel)
    - chunksize: number of elements per chunk
    - combine: binary operator to combine intermediate results (optional)
    
    Returns:
    Result of folding binop across the sequence
    """

Usage Examples

Creating Hash Keys for Unhashable Types

from toolz.sandbox import EqualityHashKey
from toolz import unique, curry

# Create hash keys for lists and other unhashable types
EqualityHashDefault = curry(EqualityHashKey, None)
EqualityHashLen = curry(EqualityHashKey, len)

# Use with collections that need hashable items
data = [[], (), [1], [1], [2]]
unique_items = list(unique(data, key=EqualityHashDefault))
# [[], (), [1], [2]]

# Better performance with meaningful key function
unique_by_len = list(unique(data, key=EqualityHashLen)) 
# [[], [1], [2]]  (keeps first of each length)

Sequence Unzipping

from toolz.sandbox import unzip

# Unpack pairs into separate sequences
pairs = [(1, 'a'), (2, 'b'), (3, 'c')]
numbers, letters = unzip(pairs)
# numbers: (1, 2, 3)
# letters: ('a', 'b', 'c')

# Unpack triplets
triplets = [(1, 'a', True), (2, 'b', False), (3, 'c', True)]
nums, chars, flags = unzip(triplets)
# nums: (1, 2, 3)
# chars: ('a', 'b', 'c') 
# flags: (True, False, True)

Parallel Reduction

from toolz.sandbox import fold
from operator import add
import multiprocessing

# Sequential reduction
numbers = list(range(1000))
total = fold(add, numbers, 0)
# 499500

# Parallel reduction using multiprocessing
pool = multiprocessing.Pool()
parallel_total = fold(add, numbers, 0, map=pool.map, chunksize=50)
pool.close()
pool.join()
# 499500 (same result, computed in parallel)

# Custom combine function for different reduction patterns
from toolz import merge
dicts = [{'a': 1}, {'b': 2}, {'a': 3, 'c': 4}]
combined = fold(merge, dicts, {})
# {'a': 3, 'b': 2, 'c': 4}

Install with Tessl CLI