0
# List and Sequence Operations
1
2
Comprehensive sequence manipulation including chunking, filtering, sorting, and uniqueness operations for working with iterables and sequences.
3
4
## Capabilities
5
6
### Sequence Analysis
7
8
Functions for analyzing and finding patterns in sequences.
9
10
```python { .api }
11
def allsame(iterable, eq=operator.eq):
12
"""
13
Check if all items in sequence are the same.
14
15
Args:
16
iterable: Sequence to check
17
eq: Equality function (default: operator.eq)
18
19
Returns:
20
bool: True if all items are equal
21
"""
22
23
def unique(items, key=None):
24
"""
25
Get unique items preserving order.
26
27
Args:
28
items: Input sequence
29
key: Key function for uniqueness comparison
30
31
Returns:
32
list: Unique items in original order
33
"""
34
35
def unique_flags(items, key=None):
36
"""
37
Boolean flags indicating unique items.
38
39
Args:
40
items: Input sequence
41
key: Key function for uniqueness comparison
42
43
Returns:
44
list[bool]: True for first occurrence of each unique item
45
"""
46
47
def find_duplicates(items, k=2):
48
"""
49
Find items occurring k+ times.
50
51
Args:
52
items: Input sequence
53
k (int): Minimum occurrence count
54
55
Returns:
56
list: Items with k+ occurrences
57
"""
58
```
59
60
### Sequence Indexing and Sorting
61
62
Functions for finding indices and sorting sequences.
63
64
```python { .api }
65
def argmax(sequence, key=None):
66
"""
67
Index of maximum value.
68
69
Args:
70
sequence: Input sequence
71
key: Key function for comparison
72
73
Returns:
74
int: Index of maximum element
75
"""
76
77
def argmin(sequence, key=None):
78
"""
79
Index of minimum value.
80
81
Args:
82
sequence: Input sequence
83
key: Key function for comparison
84
85
Returns:
86
int: Index of minimum element
87
"""
88
89
def argsort(sequence, key=None, reverse=False):
90
"""
91
Indices that would sort the sequence.
92
93
Args:
94
sequence: Input sequence
95
key: Key function for sorting
96
reverse (bool): Sort in descending order
97
98
Returns:
99
list[int]: Indices for sorted order
100
"""
101
102
def argunique(items, key=None):
103
"""
104
Indices of unique items.
105
106
Args:
107
items: Input sequence
108
key: Key function for uniqueness
109
110
Returns:
111
list[int]: Indices of unique items
112
"""
113
```
114
115
### Sequence Filtering and Selection
116
117
Functions for filtering and selecting elements from sequences.
118
119
```python { .api }
120
def compress(sequence, selectors):
121
"""
122
Filter sequence by boolean selectors.
123
124
Args:
125
sequence: Input sequence
126
selectors: Boolean sequence for filtering
127
128
Returns:
129
list: Filtered items where selector is True
130
"""
131
132
def boolmask(sequence, mask):
133
"""
134
Apply boolean mask to sequence.
135
136
Args:
137
sequence: Input sequence
138
mask: Boolean mask
139
140
Returns:
141
list: Items where mask is True
142
"""
143
144
def take(items, indices):
145
"""
146
Take items at specified indices.
147
148
Args:
149
items: Input sequence
150
indices: Indices to select
151
152
Returns:
153
list: Selected items
154
"""
155
156
def peek(iterable, default=NoParam):
157
"""
158
Peek at first item without consuming iterator.
159
160
Args:
161
iterable: Input iterable
162
default: Default if iterable is empty
163
164
Returns:
165
tuple: (first_item, new_iterator)
166
167
Raises:
168
StopIteration: If iterable is empty and no default
169
"""
170
```
171
172
### Sequence Transformation
173
174
Functions for transforming and restructuring sequences.
175
176
```python { .api }
177
class chunks:
178
"""
179
Generate successive n-sized chunks from an iterable.
180
181
Args:
182
items: Input iterable to iterate over
183
chunksize (int, optional): Size of each chunk yielded
184
nchunks (int, optional): Number of chunks to create (cannot be used with chunksize)
185
total (int, optional): Hints about the length of the input
186
bordermode (str): How to handle last chunk if length not divisible by chunksize.
187
Options: 'none' (smaller last chunk), 'cycle' (fill with values from beginning),
188
'replicate' (fill by replicating last value). Default: 'none'
189
legacy (bool): Use old behavior, defaults to False
190
191
Yields:
192
list: Successive non-overlapping chunks of the input items
193
194
Attributes:
195
remainder (int): Number of leftover items that don't divide cleanly
196
"""
197
def __init__(self, items, chunksize=None, nchunks=None, total=None, bordermode='none', legacy=False): ...
198
199
def flatten(nested_list, isinstance=isinstance):
200
"""
201
Flatten one level of nesting.
202
203
Args:
204
nested_list: Nested sequence
205
isinstance: Type checking function
206
207
Returns:
208
generator: Flattened items
209
"""
210
211
def iter_window(iterable, size=2, step=1, wrap=False):
212
"""
213
Sliding window iterator.
214
215
Args:
216
iterable: Input sequence
217
size (int): Window size
218
step (int): Step size between windows
219
wrap (bool): Wrap around at end
220
221
Returns:
222
generator: Generator yielding windows
223
224
Yields:
225
tuple: Window of items
226
"""
227
```
228
229
### Sequence Utilities
230
231
Helper functions for working with sequences and iterables.
232
233
```python { .api }
234
def iterable(obj):
235
"""
236
Check if object is iterable (but not string).
237
238
Args:
239
obj: Object to check
240
241
Returns:
242
bool: True if iterable and not string
243
"""
244
```
245
246
## Usage Examples
247
248
### Sequence Analysis
249
250
```python
251
import ubelt as ub
252
253
# Check if all items are the same
254
numbers = [5, 5, 5, 5]
255
print(ub.allsame(numbers)) # True
256
257
mixed = [1, 2, 1, 2]
258
print(ub.allsame(mixed)) # False
259
260
# Find unique items (preserving order)
261
items = ['a', 'b', 'a', 'c', 'b', 'd']
262
unique_items = ub.unique(items)
263
print(unique_items) # ['a', 'b', 'c', 'd']
264
265
# Get flags for unique items
266
flags = ub.unique_flags(items)
267
print(flags) # [True, True, False, True, False, True]
268
269
# Find duplicates
270
data = [1, 2, 3, 2, 4, 1, 2]
271
duplicates = ub.find_duplicates(data)
272
print(duplicates) # [1, 2] (items appearing 2+ times)
273
274
# Find items appearing 3+ times
275
frequent = ub.find_duplicates(data, k=3)
276
print(frequent) # [2] (only 2 appears 3+ times)
277
```
278
279
### Indexing and Sorting
280
281
```python
282
import ubelt as ub
283
284
# Find indices of min/max
285
values = [10, 5, 8, 3, 12, 7]
286
max_idx = ub.argmax(values)
287
min_idx = ub.argmin(values)
288
print(f"Max at index {max_idx}: {values[max_idx]}") # Max at index 4: 12
289
print(f"Min at index {min_idx}: {values[min_idx]}") # Min at index 3: 3
290
291
# Get sort indices
292
words = ['banana', 'apple', 'cherry', 'date']
293
sort_indices = ub.argsort(words)
294
sorted_words = [words[i] for i in sort_indices]
295
print(sorted_words) # ['apple', 'banana', 'cherry', 'date']
296
297
# Sort by custom key (word length)
298
length_indices = ub.argsort(words, key=len)
299
by_length = [words[i] for i in length_indices]
300
print(by_length) # ['date', 'apple', 'banana', 'cherry']
301
302
# Indices of unique items
303
items = ['x', 'y', 'x', 'z', 'y']
304
unique_indices = ub.argunique(items)
305
unique_values = [items[i] for i in unique_indices]
306
print(unique_values) # ['x', 'y', 'z']
307
```
308
309
### Filtering and Selection
310
311
```python
312
import ubelt as ub
313
314
# Filter with boolean mask
315
data = [1, 2, 3, 4, 5, 6]
316
mask = [True, False, True, False, True, False]
317
filtered = ub.compress(data, mask)
318
print(filtered) # [1, 3, 5]
319
320
# Alternative boolean mask function
321
result = ub.boolmask(data, mask)
322
print(result) # [1, 3, 5] (same as compress)
323
324
# Take specific indices
325
indices = [0, 2, 4]
326
selected = ub.take(data, indices)
327
print(selected) # [1, 3, 5]
328
329
# Peek at iterator without consuming
330
numbers = iter([10, 20, 30, 40])
331
first, new_iter = ub.peek(numbers)
332
print(f"First item: {first}") # First item: 10
333
remaining = list(new_iter) # [10, 20, 30, 40] (first item included)
334
```
335
336
### Sequence Transformation
337
338
```python
339
import ubelt as ub
340
341
# Split into chunks by size
342
data = list(range(10))
343
for chunk in ub.chunks(data, chunksize=3):
344
print(chunk)
345
# Output: [0, 1, 2], [3, 4, 5], [6, 7, 8], [9]
346
347
# Split into a specific number of chunks
348
for chunk in ub.chunks(data, nchunks=3):
349
print(chunk)
350
# Output: [0, 1, 2, 3], [4, 5, 6], [7, 8, 9]
351
352
# Different border modes for incomplete chunks
353
items = [1, 2, 3, 4, 5, 6, 7]
354
for chunk in ub.chunks(items, chunksize=3, bordermode='cycle'):
355
print(chunk)
356
# Output: [1, 2, 3], [4, 5, 6], [7, 1, 2]
357
358
# Flatten nested structure
359
nested = [[1, 2], [3, 4, 5], [6]]
360
flattened = list(ub.flatten(nested))
361
print(flattened) # [1, 2, 3, 4, 5, 6]
362
363
# Sliding window
364
sequence = [1, 2, 3, 4, 5]
365
for window in ub.iter_window(sequence, size=3):
366
print(window)
367
# Output: (1, 2, 3), (2, 3, 4), (3, 4, 5)
368
369
# Custom step size
370
for window in ub.iter_window(sequence, size=2, step=2):
371
print(window)
372
# Output: (1, 2), (3, 4)
373
```
374
375
### Advanced Patterns
376
377
```python
378
import ubelt as ub
379
380
# Process data in chunks with progress
381
large_dataset = list(range(1000))
382
results = []
383
384
chunk_iter = ub.chunks(large_dataset, chunksize=50)
385
for chunk in ub.ProgIter(chunk_iter, desc='Processing chunks'):
386
# Process each chunk
387
chunk_result = sum(chunk) # Example processing
388
results.append(chunk_result)
389
390
# Find patterns in sequences
391
def find_runs(sequence):
392
"""Find consecutive runs of identical items"""
393
runs = []
394
if not sequence:
395
return runs
396
397
current_item = sequence[0]
398
current_run = [0] # Start with first index
399
400
for i, item in enumerate(sequence[1:], 1):
401
if item == current_item:
402
current_run.append(i)
403
else:
404
runs.append((current_item, current_run))
405
current_item = item
406
current_run = [i]
407
408
runs.append((current_item, current_run))
409
return runs
410
411
# Example usage
412
sequence = [1, 1, 1, 2, 2, 3, 3, 3, 3]
413
runs = find_runs(sequence)
414
for value, indices in runs:
415
print(f"Value {value} at indices: {indices}")
416
417
# Combine with ubelt utilities
418
unique_values = ub.unique([value for value, _ in runs])
419
max_run_length = max(len(indices) for _, indices in runs)
420
print(f"Unique values: {unique_values}")
421
print(f"Longest run: {max_run_length}")
422
```
423
424
### Sequence Validation
425
426
```python
427
import ubelt as ub
428
429
# Check if object is iterable
430
test_objects = [
431
[1, 2, 3], # list - iterable
432
'hello', # string - iterable but often treated specially
433
42, # int - not iterable
434
(1, 2), # tuple - iterable
435
{1, 2, 3}, # set - iterable
436
]
437
438
for obj in test_objects:
439
is_iter = ub.iterable(obj)
440
print(f"{obj!r} is iterable: {is_iter}")
441
442
# Safe iteration over potentially non-iterable objects
443
def safe_process(obj):
444
if ub.iterable(obj):
445
return list(ub.unique(obj))
446
else:
447
return [obj] # Wrap single item
448
449
examples = [[1, 2, 1, 3], 'abc', 42, (1, 2, 1)]
450
for example in examples:
451
result = safe_process(example)
452
print(f"{example} -> {result}")
453
```