0
# Iterator Operations
1
2
Comprehensive sequence processing functions for filtering, grouping, partitioning, and transforming iterables. These functions work with any iterable and form the backbone of functional data processing pipelines.
3
4
## Capabilities
5
6
### Sequence Filtering & Selection
7
8
Functions for selecting elements from sequences based on position or predicate.
9
10
```python { .api }
11
def remove(predicate, seq):
12
"""
13
Return items for which predicate(item) is False.
14
15
Parameters:
16
- predicate: function that returns True/False
17
- seq: iterable sequence
18
19
Returns:
20
Iterator of items where predicate is False
21
"""
22
23
def unique(seq, key=None):
24
"""
25
Return only unique elements of sequence.
26
27
Parameters:
28
- seq: iterable sequence
29
- key: function for computing uniqueness key (optional)
30
31
Returns:
32
Iterator of unique elements in order first seen
33
"""
34
35
def take(n, seq):
36
"""
37
First n elements of sequence.
38
39
Parameters:
40
- n: number of elements to take
41
- seq: iterable sequence
42
43
Returns:
44
Iterator of first n elements
45
"""
46
47
def drop(n, seq):
48
"""
49
Sequence following first n elements.
50
51
Parameters:
52
- n: number of elements to drop
53
- seq: iterable sequence
54
55
Returns:
56
Iterator starting after first n elements
57
"""
58
59
def take_nth(n, seq):
60
"""
61
Every nth item in sequence.
62
63
Parameters:
64
- n: step size (take every nth element)
65
- seq: iterable sequence
66
67
Returns:
68
Iterator of every nth element
69
"""
70
71
def tail(n, seq):
72
"""
73
Last n elements of sequence.
74
75
Parameters:
76
- n: number of elements from end
77
- seq: iterable sequence
78
79
Returns:
80
List of last n elements
81
"""
82
```
83
84
### Sequence Access
85
86
Functions for accessing individual elements from sequences.
87
88
```python { .api }
89
def first(seq):
90
"""
91
First element in sequence.
92
93
Parameters:
94
- seq: iterable sequence
95
96
Returns:
97
First element, or raises IndexError if empty
98
"""
99
100
def second(seq):
101
"""
102
Second element in sequence.
103
104
Parameters:
105
- seq: iterable sequence
106
107
Returns:
108
Second element, or raises IndexError if insufficient elements
109
"""
110
111
def nth(n, seq):
112
"""
113
nth element in sequence (0-indexed).
114
115
Parameters:
116
- n: index of element to retrieve
117
- seq: iterable sequence
118
119
Returns:
120
Element at index n, or raises IndexError
121
"""
122
123
def last(seq):
124
"""
125
Last element in sequence.
126
127
Parameters:
128
- seq: iterable sequence
129
130
Returns:
131
Last element, or raises IndexError if empty
132
"""
133
134
def get(ind, seq, default=no_default):
135
"""
136
Get element from sequence or dict with optional default.
137
138
Parameters:
139
- ind: index/key to retrieve
140
- seq: sequence or mapping
141
- default: value to return if key/index not found
142
143
Returns:
144
Element at ind, or default if not found
145
"""
146
147
def peek(seq):
148
"""
149
Retrieve next element of sequence without consuming it.
150
151
Parameters:
152
- seq: iterable sequence
153
154
Returns:
155
Tuple of (next_element, iterator_with_element_restored)
156
"""
157
158
def peekn(n, seq):
159
"""
160
Retrieve next n elements of sequence without consuming them.
161
162
Parameters:
163
- n: number of elements to peek
164
- seq: iterable sequence
165
166
Returns:
167
Tuple of (list_of_n_elements, iterator_with_elements_restored)
168
"""
169
```
170
171
### Sequence Combination
172
173
Functions for combining multiple sequences in various ways.
174
175
```python { .api }
176
def concat(seqs):
177
"""
178
Concatenate zero or more iterables into single iterator.
179
180
Parameters:
181
- seqs: iterable of iterables to concatenate
182
183
Returns:
184
Iterator of all elements from all input iterables
185
"""
186
187
def concatv(*seqs):
188
"""
189
Variadic version of concat - concatenate sequences.
190
191
Parameters:
192
- *seqs: variable number of iterables
193
194
Returns:
195
Iterator of all elements from all input iterables
196
"""
197
198
def interleave(seqs):
199
"""
200
Interleave elements from multiple sequences.
201
202
Parameters:
203
- seqs: iterable of iterables
204
205
Returns:
206
Iterator alternating elements from each input sequence
207
"""
208
209
def merge_sorted(*seqs, **kwargs):
210
"""
211
Merge sorted sequences into single sorted sequence.
212
213
Parameters:
214
- *seqs: sorted iterables to merge
215
- key: function for sort key (optional)
216
- reverse: reverse sort order (optional)
217
218
Returns:
219
Iterator of merged sorted elements
220
"""
221
222
def join(leftkey, leftseq, rightkey, rightseq,
223
left_default=no_default, right_default=no_default):
224
"""
225
Join two sequences on common attributes like SQL join.
226
227
Parameters:
228
- leftkey: function to compute join key from left items
229
- leftseq: left sequence to join
230
- rightkey: function to compute join key from right items
231
- rightseq: right sequence to join
232
- left_default: default for missing left items (optional)
233
- right_default: default for missing right items (optional)
234
235
Returns:
236
Iterator of (left_item, right_item) tuples
237
"""
238
```
239
240
### Sequence Grouping & Partitioning
241
242
Functions for organizing sequences into groups or partitions.
243
244
```python { .api }
245
def groupby(key, seq):
246
"""
247
Group collection by key function.
248
249
Parameters:
250
- key: function to compute grouping key, or string for attribute access
251
- seq: iterable sequence to group
252
253
Returns:
254
Dictionary mapping keys to lists of grouped items
255
"""
256
257
def partition(n, seq, pad=no_pad):
258
"""
259
Partition sequence into tuples of length n.
260
261
Parameters:
262
- n: length of each partition tuple
263
- seq: iterable sequence to partition
264
- pad: value to pad final tuple if needed (optional)
265
266
Returns:
267
Iterator of tuples of length n
268
"""
269
270
def partition_all(n, seq):
271
"""
272
Partition sequence into tuples of length at most n.
273
274
Parameters:
275
- n: maximum length of each partition tuple
276
- seq: iterable sequence to partition
277
278
Returns:
279
Iterator of tuples of length up to n
280
"""
281
282
def sliding_window(n, seq):
283
"""
284
Sequence of overlapping subsequences of length n.
285
286
Parameters:
287
- n: window size
288
- seq: iterable sequence
289
290
Returns:
291
Iterator of overlapping tuples of length n
292
"""
293
```
294
295
### Sequence Analysis
296
297
Functions for analyzing sequence properties and contents.
298
299
```python { .api }
300
def frequencies(seq):
301
"""
302
Count occurrences of each value in sequence.
303
304
Parameters:
305
- seq: iterable sequence
306
307
Returns:
308
Dictionary mapping values to occurrence counts
309
"""
310
311
def isiterable(x):
312
"""
313
Check if object is iterable (but not string).
314
315
Parameters:
316
- x: object to test
317
318
Returns:
319
True if x is iterable and not a string
320
"""
321
322
def isdistinct(seq):
323
"""
324
Check if all values in sequence are distinct.
325
326
Parameters:
327
- seq: iterable sequence
328
329
Returns:
330
True if all elements are unique
331
"""
332
333
def count(seq):
334
"""
335
Count number of items in sequence.
336
337
Parameters:
338
- seq: iterable sequence
339
340
Returns:
341
Integer count of elements
342
"""
343
344
def diff(*seqs, **kwargs):
345
"""
346
Return items that differ between sequences.
347
348
Parameters:
349
- *seqs: sequences to compare
350
- default: value for missing items (optional)
351
352
Returns:
353
Iterator of items present in some but not all sequences
354
"""
355
356
def topk(k, seq, key=None):
357
"""
358
Find k largest elements of sequence.
359
360
Parameters:
361
- k: number of elements to return
362
- seq: iterable sequence
363
- key: function for comparison key (optional)
364
365
Returns:
366
List of k largest elements
367
"""
368
```
369
370
### Advanced Operations
371
372
Complex operations combining multiple sequence manipulations.
373
374
```python { .api }
375
def accumulate(binop, seq, initial=no_default):
376
"""
377
Repeatedly apply binary function to sequence, accumulating results.
378
379
Parameters:
380
- binop: binary function (takes two args, returns one)
381
- seq: iterable sequence
382
- initial: starting value (optional)
383
384
Returns:
385
Iterator of accumulated results
386
"""
387
388
def reduceby(key, binop, seq, init=no_default):
389
"""
390
Simultaneously group by key and reduce each group.
391
392
Parameters:
393
- key: function to compute grouping key
394
- binop: binary function for reduction
395
- seq: iterable sequence
396
- init: initial value for reduction (optional)
397
398
Returns:
399
Dictionary mapping keys to reduced values
400
"""
401
402
def mapcat(func, seqs):
403
"""
404
Apply function to sequences and concatenate results.
405
406
Parameters:
407
- func: function that returns iterable
408
- seqs: iterable of sequences
409
410
Returns:
411
Iterator of concatenated function results
412
"""
413
414
def iterate(func, x):
415
"""
416
Repeatedly apply function to create infinite sequence.
417
418
Parameters:
419
- func: function to apply repeatedly
420
- x: initial value
421
422
Returns:
423
Iterator of x, func(x), func(func(x)), ...
424
"""
425
426
def pluck(ind, seqs, default=no_default):
427
"""
428
Pluck element(s) from each item in sequence.
429
430
Parameters:
431
- ind: index/key or list of indices/keys to pluck
432
- seqs: sequence of sequences/mappings
433
- default: value for missing indices (optional)
434
435
Returns:
436
Iterator of plucked elements
437
"""
438
439
def random_sample(prob, seq, random_state=None):
440
"""
441
Return elements from sequence with given probability.
442
443
Parameters:
444
- prob: probability (0-1) of including each element
445
- seq: iterable sequence
446
- random_state: random number generator seed (optional)
447
448
Returns:
449
Iterator of randomly sampled elements
450
"""
451
452
def getter(index):
453
"""
454
Create function that gets item from its operand.
455
456
Equivalent to lambda x: x[index] but optimized and supports
457
nested access with sequences of indices.
458
459
Parameters:
460
- index: index/key or sequence of indices for nested access
461
462
Returns:
463
Function that extracts specified item(s) from its argument
464
"""
465
```
466
467
### Sequence Construction
468
469
Functions for building new sequences from existing ones.
470
471
```python { .api }
472
def cons(el, seq):
473
"""
474
Add element to beginning of sequence.
475
476
Parameters:
477
- el: element to prepend
478
- seq: iterable sequence
479
480
Returns:
481
Iterator with el followed by all elements of seq
482
"""
483
484
def interpose(el, seq):
485
"""
486
Introduce element between each pair of elements in sequence.
487
488
Parameters:
489
- el: element to interpose
490
- seq: iterable sequence
491
492
Returns:
493
Iterator with el between each adjacent pair
494
"""
495
```
496
497
## Usage Examples
498
499
### Data Filtering Pipeline
500
501
```python
502
from toolz import pipe, unique, take, groupby
503
504
# Process transaction data
505
transactions = [
506
{'id': 1, 'amount': 100, 'category': 'food'},
507
{'id': 2, 'amount': 50, 'category': 'transport'},
508
{'id': 3, 'amount': 100, 'category': 'food'}, # duplicate amount
509
{'id': 4, 'amount': 200, 'category': 'entertainment'},
510
]
511
512
# Get unique amounts, group by range
513
result = pipe(
514
transactions,
515
lambda x: map(lambda t: t['amount'], x), # extract amounts
516
lambda x: unique(x), # unique amounts only
517
lambda x: groupby(lambda a: 'high' if a >= 100 else 'low', x)
518
)
519
# {'low': [50], 'high': [100, 200]}
520
```
521
522
### Sequence Partitioning
523
524
```python
525
from toolz import partition, sliding_window, take
526
527
data = range(10) # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
528
529
# Partition into groups of 3
530
groups = list(partition(3, data))
531
# [(0, 1, 2), (3, 4, 5), (6, 7, 8)]
532
533
# Sliding window of size 3
534
windows = list(take(5, sliding_window(3, data)))
535
# [(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)]
536
```
537
538
### Advanced Grouping
539
540
```python
541
from toolz import groupby, frequencies, reduceby
542
from operator import add
543
544
words = ['apple', 'banana', 'apricot', 'cherry', 'blueberry']
545
546
# Group by first letter
547
by_letter = groupby(lambda w: w[0], words)
548
# {'a': ['apple', 'apricot'], 'b': ['banana', 'blueberry'], 'c': ['cherry']}
549
550
# Count word lengths
551
length_counts = frequencies(map(len, words))
552
# {5: 2, 6: 2, 7: 1}
553
554
# Sum lengths by first letter
555
letter_lengths = reduceby(lambda w: w[0], add, map(len, words))
556
# {'a': 12, 'b': 15, 'c': 6}
557
```
558
559
### Index-Based Access
560
561
```python
562
from toolz import getter, pluck
563
564
# Create getter functions
565
get_first = getter(0)
566
get_name = getter('name')
567
get_nested = getter(['person', 'name'])
568
569
# Use with sequences
570
data = [(1, 'a'), (2, 'b'), (3, 'c')]
571
first_items = list(map(get_first, data))
572
# [1, 2, 3]
573
574
# Use with dictionaries
575
people = [{'name': 'Alice', 'age': 30}, {'name': 'Bob', 'age': 25}]
576
names = list(map(get_name, people))
577
# ['Alice', 'Bob']
578
579
# Nested access
580
nested_data = [
581
{'person': {'name': 'Alice', 'age': 30}},
582
{'person': {'name': 'Bob', 'age': 25}}
583
]
584
nested_names = list(map(get_nested, nested_data))
585
# ['Alice', 'Bob']
586
587
# pluck is similar but works on sequences of items
588
names_plucked = list(pluck('name', people))
589
# ['Alice', 'Bob']
590
```