A Python utility belt containing simple tools, a stdlib like feel, and extra batteries
Comprehensive sequence manipulation including chunking, filtering, sorting, and uniqueness operations for working with iterables and sequences.
Functions for analyzing and finding patterns in sequences.
def allsame(iterable, eq=operator.eq):
"""
Check if all items in sequence are the same.
Args:
iterable: Sequence to check
eq: Equality function (default: operator.eq)
Returns:
bool: True if all items are equal
"""
def unique(items, key=None):
"""
Get unique items preserving order.
Args:
items: Input sequence
key: Key function for uniqueness comparison
Returns:
list: Unique items in original order
"""
def unique_flags(items, key=None):
"""
Boolean flags indicating unique items.
Args:
items: Input sequence
key: Key function for uniqueness comparison
Returns:
list[bool]: True for first occurrence of each unique item
"""
def find_duplicates(items, k=2):
"""
Find items occurring k+ times.
Args:
items: Input sequence
k (int): Minimum occurrence count
Returns:
list: Items with k+ occurrences
"""Functions for finding indices and sorting sequences.
def argmax(sequence, key=None):
"""
Index of maximum value.
Args:
sequence: Input sequence
key: Key function for comparison
Returns:
int: Index of maximum element
"""
def argmin(sequence, key=None):
"""
Index of minimum value.
Args:
sequence: Input sequence
key: Key function for comparison
Returns:
int: Index of minimum element
"""
def argsort(sequence, key=None, reverse=False):
"""
Indices that would sort the sequence.
Args:
sequence: Input sequence
key: Key function for sorting
reverse (bool): Sort in descending order
Returns:
list[int]: Indices for sorted order
"""
def argunique(items, key=None):
"""
Indices of unique items.
Args:
items: Input sequence
key: Key function for uniqueness
Returns:
list[int]: Indices of unique items
"""Functions for filtering and selecting elements from sequences.
def compress(sequence, selectors):
"""
Filter sequence by boolean selectors.
Args:
sequence: Input sequence
selectors: Boolean sequence for filtering
Returns:
list: Filtered items where selector is True
"""
def boolmask(sequence, mask):
"""
Apply boolean mask to sequence.
Args:
sequence: Input sequence
mask: Boolean mask
Returns:
list: Items where mask is True
"""
def take(items, indices):
"""
Take items at specified indices.
Args:
items: Input sequence
indices: Indices to select
Returns:
list: Selected items
"""
def peek(iterable, default=NoParam):
"""
Peek at first item without consuming iterator.
Args:
iterable: Input iterable
default: Default if iterable is empty
Returns:
tuple: (first_item, new_iterator)
Raises:
StopIteration: If iterable is empty and no default
"""Functions for transforming and restructuring sequences.
class chunks:
"""
Generate successive n-sized chunks from an iterable.
Args:
items: Input iterable to iterate over
chunksize (int, optional): Size of each chunk yielded
nchunks (int, optional): Number of chunks to create (cannot be used with chunksize)
total (int, optional): Hints about the length of the input
bordermode (str): How to handle last chunk if length not divisible by chunksize.
Options: 'none' (smaller last chunk), 'cycle' (fill with values from beginning),
'replicate' (fill by replicating last value). Default: 'none'
legacy (bool): Use old behavior, defaults to False
Yields:
list: Successive non-overlapping chunks of the input items
Attributes:
remainder (int): Number of leftover items that don't divide cleanly
"""
def __init__(self, items, chunksize=None, nchunks=None, total=None, bordermode='none', legacy=False): ...
def flatten(nested_list, isinstance=isinstance):
"""
Flatten one level of nesting.
Args:
nested_list: Nested sequence
isinstance: Type checking function
Returns:
generator: Flattened items
"""
def iter_window(iterable, size=2, step=1, wrap=False):
"""
Sliding window iterator.
Args:
iterable: Input sequence
size (int): Window size
step (int): Step size between windows
wrap (bool): Wrap around at end
Returns:
generator: Generator yielding windows
Yields:
tuple: Window of items
"""Helper functions for working with sequences and iterables.
def iterable(obj):
"""
Check if object is iterable (but not string).
Args:
obj: Object to check
Returns:
bool: True if iterable and not string
"""import ubelt as ub
# Check if all items are the same
numbers = [5, 5, 5, 5]
print(ub.allsame(numbers)) # True
mixed = [1, 2, 1, 2]
print(ub.allsame(mixed)) # False
# Find unique items (preserving order)
items = ['a', 'b', 'a', 'c', 'b', 'd']
unique_items = ub.unique(items)
print(unique_items) # ['a', 'b', 'c', 'd']
# Get flags for unique items
flags = ub.unique_flags(items)
print(flags) # [True, True, False, True, False, True]
# Find duplicates
data = [1, 2, 3, 2, 4, 1, 2]
duplicates = ub.find_duplicates(data)
print(duplicates) # [1, 2] (items appearing 2+ times)
# Find items appearing 3+ times
frequent = ub.find_duplicates(data, k=3)
print(frequent) # [2] (only 2 appears 3+ times)import ubelt as ub
# Find indices of min/max
values = [10, 5, 8, 3, 12, 7]
max_idx = ub.argmax(values)
min_idx = ub.argmin(values)
print(f"Max at index {max_idx}: {values[max_idx]}") # Max at index 4: 12
print(f"Min at index {min_idx}: {values[min_idx]}") # Min at index 3: 3
# Get sort indices
words = ['banana', 'apple', 'cherry', 'date']
sort_indices = ub.argsort(words)
sorted_words = [words[i] for i in sort_indices]
print(sorted_words) # ['apple', 'banana', 'cherry', 'date']
# Sort by custom key (word length)
length_indices = ub.argsort(words, key=len)
by_length = [words[i] for i in length_indices]
print(by_length) # ['date', 'apple', 'banana', 'cherry']
# Indices of unique items
items = ['x', 'y', 'x', 'z', 'y']
unique_indices = ub.argunique(items)
unique_values = [items[i] for i in unique_indices]
print(unique_values) # ['x', 'y', 'z']import ubelt as ub
# Filter with boolean mask
data = [1, 2, 3, 4, 5, 6]
mask = [True, False, True, False, True, False]
filtered = ub.compress(data, mask)
print(filtered) # [1, 3, 5]
# Alternative boolean mask function
result = ub.boolmask(data, mask)
print(result) # [1, 3, 5] (same as compress)
# Take specific indices
indices = [0, 2, 4]
selected = ub.take(data, indices)
print(selected) # [1, 3, 5]
# Peek at iterator without consuming
numbers = iter([10, 20, 30, 40])
first, new_iter = ub.peek(numbers)
print(f"First item: {first}") # First item: 10
remaining = list(new_iter) # [10, 20, 30, 40] (first item included)import ubelt as ub
# Split into chunks by size
data = list(range(10))
for chunk in ub.chunks(data, chunksize=3):
print(chunk)
# Output: [0, 1, 2], [3, 4, 5], [6, 7, 8], [9]
# Split into a specific number of chunks
for chunk in ub.chunks(data, nchunks=3):
print(chunk)
# Output: [0, 1, 2, 3], [4, 5, 6], [7, 8, 9]
# Different border modes for incomplete chunks
items = [1, 2, 3, 4, 5, 6, 7]
for chunk in ub.chunks(items, chunksize=3, bordermode='cycle'):
print(chunk)
# Output: [1, 2, 3], [4, 5, 6], [7, 1, 2]
# Flatten nested structure
nested = [[1, 2], [3, 4, 5], [6]]
flattened = list(ub.flatten(nested))
print(flattened) # [1, 2, 3, 4, 5, 6]
# Sliding window
sequence = [1, 2, 3, 4, 5]
for window in ub.iter_window(sequence, size=3):
print(window)
# Output: (1, 2, 3), (2, 3, 4), (3, 4, 5)
# Custom step size
for window in ub.iter_window(sequence, size=2, step=2):
print(window)
# Output: (1, 2), (3, 4)import ubelt as ub
# Process data in chunks with progress
large_dataset = list(range(1000))
results = []
chunk_iter = ub.chunks(large_dataset, chunksize=50)
for chunk in ub.ProgIter(chunk_iter, desc='Processing chunks'):
# Process each chunk
chunk_result = sum(chunk) # Example processing
results.append(chunk_result)
# Find patterns in sequences
def find_runs(sequence):
"""Find consecutive runs of identical items"""
runs = []
if not sequence:
return runs
current_item = sequence[0]
current_run = [0] # Start with first index
for i, item in enumerate(sequence[1:], 1):
if item == current_item:
current_run.append(i)
else:
runs.append((current_item, current_run))
current_item = item
current_run = [i]
runs.append((current_item, current_run))
return runs
# Example usage
sequence = [1, 1, 1, 2, 2, 3, 3, 3, 3]
runs = find_runs(sequence)
for value, indices in runs:
print(f"Value {value} at indices: {indices}")
# Combine with ubelt utilities
unique_values = ub.unique([value for value, _ in runs])
max_run_length = max(len(indices) for _, indices in runs)
print(f"Unique values: {unique_values}")
print(f"Longest run: {max_run_length}")import ubelt as ub
# Check if object is iterable
test_objects = [
[1, 2, 3], # list - iterable
'hello', # string - iterable but often treated specially
42, # int - not iterable
(1, 2), # tuple - iterable
{1, 2, 3}, # set - iterable
]
for obj in test_objects:
is_iter = ub.iterable(obj)
print(f"{obj!r} is iterable: {is_iter}")
# Safe iteration over potentially non-iterable objects
def safe_process(obj):
if ub.iterable(obj):
return list(ub.unique(obj))
else:
return [obj] # Wrap single item
examples = [[1, 2, 1, 3], 'abc', 42, (1, 2, 1)]
for example in examples:
result = safe_process(example)
print(f"{example} -> {result}")Install with Tessl CLI
npx tessl i tessl/pypi-ubelt