tessl/pypi-ubelt

A Python utility belt containing simple tools, a stdlib like feel, and extra batteries

Overview

Eval results

Files

List and Sequence Operations

Name: tessl/pypi-ubelt
Author: tessl

Comprehensive sequence manipulation including chunking, filtering, sorting, and uniqueness operations for working with iterables and sequences.

Capabilities

Sequence Analysis

Functions for analyzing and finding patterns in sequences.

def allsame(iterable, eq=operator.eq):
    """
    Check if all items in sequence are the same.
    
    Args:
        iterable: Sequence to check
        eq: Equality function (default: operator.eq)
        
    Returns:
        bool: True if all items are equal
    """

def unique(items, key=None):
    """
    Get unique items preserving order.
    
    Args:
        items: Input sequence
        key: Key function for uniqueness comparison
        
    Returns:
        list: Unique items in original order
    """

def unique_flags(items, key=None):
    """
    Boolean flags indicating unique items.
    
    Args:
        items: Input sequence
        key: Key function for uniqueness comparison
        
    Returns:
        list[bool]: True for first occurrence of each unique item
    """

def find_duplicates(items, k=2):
    """
    Find items occurring k+ times.
    
    Args:
        items: Input sequence
        k (int): Minimum occurrence count
        
    Returns:
        list: Items with k+ occurrences
    """

Sequence Indexing and Sorting

Functions for finding indices and sorting sequences.

def argmax(sequence, key=None):
    """
    Index of maximum value.
    
    Args:
        sequence: Input sequence
        key: Key function for comparison
        
    Returns:
        int: Index of maximum element
    """

def argmin(sequence, key=None):
    """
    Index of minimum value.
    
    Args:
        sequence: Input sequence
        key: Key function for comparison
        
    Returns:
        int: Index of minimum element
    """

def argsort(sequence, key=None, reverse=False):
    """
    Indices that would sort the sequence.
    
    Args:
        sequence: Input sequence
        key: Key function for sorting
        reverse (bool): Sort in descending order
        
    Returns:
        list[int]: Indices for sorted order
    """

def argunique(items, key=None):
    """
    Indices of unique items.
    
    Args:
        items: Input sequence
        key: Key function for uniqueness
        
    Returns:
        list[int]: Indices of unique items
    """

Sequence Filtering and Selection

Functions for filtering and selecting elements from sequences.

def compress(sequence, selectors):
    """
    Filter sequence by boolean selectors.
    
    Args:
        sequence: Input sequence
        selectors: Boolean sequence for filtering
        
    Returns:
        list: Filtered items where selector is True
    """

def boolmask(sequence, mask):
    """
    Apply boolean mask to sequence.
    
    Args:
        sequence: Input sequence
        mask: Boolean mask
        
    Returns:
        list: Items where mask is True
    """

def take(items, indices):
    """
    Take items at specified indices.
    
    Args:
        items: Input sequence
        indices: Indices to select
        
    Returns:
        list: Selected items
    """

def peek(iterable, default=NoParam):
    """
    Peek at first item without consuming iterator.
    
    Args:
        iterable: Input iterable
        default: Default if iterable is empty
        
    Returns:
        tuple: (first_item, new_iterator)
        
    Raises:
        StopIteration: If iterable is empty and no default
    """

Sequence Transformation

Functions for transforming and restructuring sequences.

class chunks:
    """
    Generate successive n-sized chunks from an iterable.
    
    Args:
        items: Input iterable to iterate over
        chunksize (int, optional): Size of each chunk yielded
        nchunks (int, optional): Number of chunks to create (cannot be used with chunksize)
        total (int, optional): Hints about the length of the input
        bordermode (str): How to handle last chunk if length not divisible by chunksize.
            Options: 'none' (smaller last chunk), 'cycle' (fill with values from beginning),
            'replicate' (fill by replicating last value). Default: 'none'
        legacy (bool): Use old behavior, defaults to False
        
    Yields:
        list: Successive non-overlapping chunks of the input items
        
    Attributes:
        remainder (int): Number of leftover items that don't divide cleanly
    """
    def __init__(self, items, chunksize=None, nchunks=None, total=None, bordermode='none', legacy=False): ...

def flatten(nested_list, isinstance=isinstance):
    """
    Flatten one level of nesting.
    
    Args:
        nested_list: Nested sequence
        isinstance: Type checking function
        
    Returns:
        generator: Flattened items
    """

def iter_window(iterable, size=2, step=1, wrap=False):
    """
    Sliding window iterator.
    
    Args:
        iterable: Input sequence
        size (int): Window size
        step (int): Step size between windows
        wrap (bool): Wrap around at end
        
    Returns:
        generator: Generator yielding windows
        
    Yields:
        tuple: Window of items
    """

Sequence Utilities

Helper functions for working with sequences and iterables.

def iterable(obj):
    """
    Check if object is iterable (but not string).
    
    Args:
        obj: Object to check
        
    Returns:
        bool: True if iterable and not string
    """

Usage Examples

Sequence Analysis

import ubelt as ub

# Check if all items are the same
numbers = [5, 5, 5, 5]
print(ub.allsame(numbers))  # True

mixed = [1, 2, 1, 2]
print(ub.allsame(mixed))   # False

# Find unique items (preserving order)
items = ['a', 'b', 'a', 'c', 'b', 'd']
unique_items = ub.unique(items)
print(unique_items)  # ['a', 'b', 'c', 'd']

# Get flags for unique items
flags = ub.unique_flags(items)
print(flags)  # [True, True, False, True, False, True]

# Find duplicates
data = [1, 2, 3, 2, 4, 1, 2]
duplicates = ub.find_duplicates(data)
print(duplicates)  # [1, 2] (items appearing 2+ times)

# Find items appearing 3+ times
frequent = ub.find_duplicates(data, k=3)
print(frequent)  # [2] (only 2 appears 3+ times)

Indexing and Sorting

import ubelt as ub

# Find indices of min/max
values = [10, 5, 8, 3, 12, 7]
max_idx = ub.argmax(values)
min_idx = ub.argmin(values)
print(f"Max at index {max_idx}: {values[max_idx]}")  # Max at index 4: 12
print(f"Min at index {min_idx}: {values[min_idx]}")  # Min at index 3: 3

# Get sort indices
words = ['banana', 'apple', 'cherry', 'date']
sort_indices = ub.argsort(words)
sorted_words = [words[i] for i in sort_indices]
print(sorted_words)  # ['apple', 'banana', 'cherry', 'date']

# Sort by custom key (word length)
length_indices = ub.argsort(words, key=len)
by_length = [words[i] for i in length_indices]
print(by_length)  # ['date', 'apple', 'banana', 'cherry']

# Indices of unique items
items = ['x', 'y', 'x', 'z', 'y']
unique_indices = ub.argunique(items)
unique_values = [items[i] for i in unique_indices]
print(unique_values)  # ['x', 'y', 'z']

Filtering and Selection

import ubelt as ub

# Filter with boolean mask
data = [1, 2, 3, 4, 5, 6]
mask = [True, False, True, False, True, False]
filtered = ub.compress(data, mask)
print(filtered)  # [1, 3, 5]

# Alternative boolean mask function
result = ub.boolmask(data, mask)
print(result)  # [1, 3, 5] (same as compress)

# Take specific indices
indices = [0, 2, 4]
selected = ub.take(data, indices)
print(selected)  # [1, 3, 5]

# Peek at iterator without consuming
numbers = iter([10, 20, 30, 40])
first, new_iter = ub.peek(numbers)
print(f"First item: {first}")  # First item: 10
remaining = list(new_iter)  # [10, 20, 30, 40] (first item included)

Sequence Transformation

import ubelt as ub

# Split into chunks by size
data = list(range(10))
for chunk in ub.chunks(data, chunksize=3):
    print(chunk)
# Output: [0, 1, 2], [3, 4, 5], [6, 7, 8], [9]

# Split into a specific number of chunks
for chunk in ub.chunks(data, nchunks=3):
    print(chunk)
# Output: [0, 1, 2, 3], [4, 5, 6], [7, 8, 9]

# Different border modes for incomplete chunks
items = [1, 2, 3, 4, 5, 6, 7]
for chunk in ub.chunks(items, chunksize=3, bordermode='cycle'):
    print(chunk)
# Output: [1, 2, 3], [4, 5, 6], [7, 1, 2]

# Flatten nested structure
nested = [[1, 2], [3, 4, 5], [6]]
flattened = list(ub.flatten(nested))
print(flattened)  # [1, 2, 3, 4, 5, 6]

# Sliding window
sequence = [1, 2, 3, 4, 5]
for window in ub.iter_window(sequence, size=3):
    print(window)
# Output: (1, 2, 3), (2, 3, 4), (3, 4, 5)

# Custom step size
for window in ub.iter_window(sequence, size=2, step=2):
    print(window)
# Output: (1, 2), (3, 4)

Advanced Patterns

import ubelt as ub

# Process data in chunks with progress
large_dataset = list(range(1000))
results = []

chunk_iter = ub.chunks(large_dataset, chunksize=50)
for chunk in ub.ProgIter(chunk_iter, desc='Processing chunks'):
    # Process each chunk
    chunk_result = sum(chunk)  # Example processing
    results.append(chunk_result)

# Find patterns in sequences
def find_runs(sequence):
    """Find consecutive runs of identical items"""
    runs = []
    if not sequence:
        return runs
    
    current_item = sequence[0]
    current_run = [0]  # Start with first index
    
    for i, item in enumerate(sequence[1:], 1):
        if item == current_item:
            current_run.append(i)
        else:
            runs.append((current_item, current_run))
            current_item = item
            current_run = [i]
    
    runs.append((current_item, current_run))
    return runs

# Example usage
sequence = [1, 1, 1, 2, 2, 3, 3, 3, 3]
runs = find_runs(sequence)
for value, indices in runs:
    print(f"Value {value} at indices: {indices}")

# Combine with ubelt utilities
unique_values = ub.unique([value for value, _ in runs])
max_run_length = max(len(indices) for _, indices in runs)
print(f"Unique values: {unique_values}")
print(f"Longest run: {max_run_length}")

Sequence Validation

import ubelt as ub

# Check if object is iterable
test_objects = [
    [1, 2, 3],      # list - iterable
    'hello',        # string - iterable but often treated specially
    42,             # int - not iterable
    (1, 2),         # tuple - iterable
    {1, 2, 3},      # set - iterable
]

for obj in test_objects:
    is_iter = ub.iterable(obj)
    print(f"{obj!r} is iterable: {is_iter}")

# Safe iteration over potentially non-iterable objects
def safe_process(obj):
    if ub.iterable(obj):
        return list(ub.unique(obj))
    else:
        return [obj]  # Wrap single item

examples = [[1, 2, 1, 3], 'abc', 42, (1, 2, 1)]
for example in examples:
    result = safe_process(example)
    print(f"{example} -> {result}")

Install with Tessl CLI