0
# Basic Transformations
1
2
Core transformation operations for reshaping, filtering, and manipulating table data. This module provides essential functions for field operations, row selection, data conversion, and structural transformations that form the foundation of data processing workflows.
3
4
## Capabilities
5
6
### Field Selection and Manipulation
7
8
Operations for selecting, removing, and rearranging table fields.
9
10
```python { .api }
11
def cut(table, *args, **kwargs) -> Table:
12
"""
13
Select specific fields from the table.
14
15
Parameters:
16
- table: Input table
17
- args: Field names, indices, or slice objects
18
- kwargs: Additional options
19
20
Returns:
21
Table with selected fields only
22
"""
23
24
def cutout(table, *args, **kwargs) -> Table:
25
"""
26
Remove specific fields from the table.
27
28
Parameters:
29
- table: Input table
30
- args: Field names or indices to remove
31
- kwargs: Additional options
32
33
Returns:
34
Table with specified fields removed
35
"""
36
37
def movefield(table, field, index) -> Table:
38
"""
39
Move a field to a different position in the table.
40
41
Parameters:
42
- table: Input table
43
- field: Field name to move
44
- index: Target position (0-based)
45
46
Returns:
47
Table with field moved to new position
48
"""
49
50
def rename(table, *args, **kwargs) -> Table:
51
"""
52
Rename table fields.
53
54
Parameters:
55
- table: Input table
56
- args: Rename specifications (old_name, new_name pairs or dict)
57
- kwargs: Additional options
58
59
Returns:
60
Table with renamed fields
61
"""
62
```
63
64
### Field Addition
65
66
Add new fields to tables with various value sources.
67
68
```python { .api }
69
def addfield(table, field, value=None, index=None, missing=None) -> Table:
70
"""
71
Add a new field to the table.
72
73
Parameters:
74
- table: Input table
75
- field: Name of new field
76
- value: Value, function, or iterable for field values
77
- index: Position to insert field (default: end)
78
- missing: Value for missing data
79
80
Returns:
81
Table with new field added
82
"""
83
84
def addfields(table, field_defs, missing=None) -> Table:
85
"""
86
Add multiple fields to the table.
87
88
Parameters:
89
- table: Input table
90
- field_defs: Dictionary or list of (field, value) specifications
91
- missing: Value for missing data
92
93
Returns:
94
Table with new fields added
95
"""
96
97
def addfieldusingcontext(table, field, callable, **kwargs) -> Table:
98
"""
99
Add field using context-aware function.
100
101
Parameters:
102
- table: Input table
103
- field: Name of new field
104
- callable: Function that receives row context
105
- kwargs: Additional arguments passed to callable
106
107
Returns:
108
Table with new field added
109
"""
110
111
def addrownumbers(table, field='row', start=1) -> Table:
112
"""
113
Add row numbers as a new field.
114
115
Parameters:
116
- table: Input table
117
- field: Name for row number field
118
- start: Starting row number
119
120
Returns:
121
Table with row number field added
122
"""
123
124
def addcolumn(table, field, column) -> Table:
125
"""
126
Add a column of values as a new field.
127
128
Parameters:
129
- table: Input table
130
- field: Name of new field
131
- column: Iterable of values for the column
132
133
Returns:
134
Table with new column added
135
"""
136
```
137
138
### Row Selection and Filtering
139
140
Filter and select rows based on various criteria.
141
142
```python { .api }
143
def select(table, *args, **kwargs) -> Table:
144
"""
145
Select rows based on criteria.
146
147
Parameters:
148
- table: Input table
149
- args: Selection criteria (field, function/value pairs)
150
- kwargs: Additional options (complement, etc.)
151
152
Returns:
153
Table with selected rows
154
"""
155
156
def selecteq(table, field, value, complement=False) -> Table:
157
"""Select rows where field equals value."""
158
159
def selectne(table, field, value, complement=False) -> Table:
160
"""Select rows where field does not equal value."""
161
162
def selectlt(table, field, value, complement=False) -> Table:
163
"""Select rows where field is less than value."""
164
165
def selectle(table, field, value, complement=False) -> Table:
166
"""Select rows where field is less than or equal to value."""
167
168
def selectgt(table, field, value, complement=False) -> Table:
169
"""Select rows where field is greater than value."""
170
171
def selectge(table, field, value, complement=False) -> Table:
172
"""Select rows where field is greater than or equal to value."""
173
174
def selectin(table, field, test, complement=False) -> Table:
175
"""
176
Select rows where field value is in test collection.
177
178
Parameters:
179
- table: Input table
180
- field: Field name to test
181
- test: Collection of values to test membership
182
- complement: If True, select rows NOT in test
183
184
Returns:
185
Table with matching rows
186
"""
187
188
def selectnotin(table, field, test, complement=False) -> Table:
189
"""Select rows where field value is not in test collection."""
190
191
def selectnone(table, field, complement=False) -> Table:
192
"""Select rows where field is None."""
193
194
def selectnotnone(table, field, complement=False) -> Table:
195
"""Select rows where field is not None."""
196
197
def selectcontains(table, field, value, complement=False) -> Table:
198
"""Select rows where field contains value."""
199
200
def selectrangeclosed(table, field, minv, maxv, complement=False) -> Table:
201
"""Select rows where field is in closed range [minv, maxv]."""
202
203
def rowlenselect(table, n, complement=False) -> Table:
204
"""
205
Select rows with specified length.
206
207
Parameters:
208
- table: Input table
209
- n: Required row length
210
- complement: If True, select rows with different length
211
212
Returns:
213
Table with rows of specified length
214
"""
215
```
216
217
### Row Slicing and Sampling
218
219
Extract subsets of rows from tables.
220
221
```python { .api }
222
def rowslice(table, *sliceargs) -> Table:
223
"""
224
Extract a slice of rows from the table.
225
226
Parameters:
227
- table: Input table
228
- sliceargs: Slice arguments (start, stop, step)
229
230
Returns:
231
Table with sliced rows
232
"""
233
234
def head(table, n=5) -> Table:
235
"""
236
Get the first n rows of the table.
237
238
Parameters:
239
- table: Input table
240
- n: Number of rows to return
241
242
Returns:
243
Table with first n rows
244
"""
245
246
def tail(table, n=5) -> Table:
247
"""
248
Get the last n rows of the table.
249
250
Parameters:
251
- table: Input table
252
- n: Number of rows to return
253
254
Returns:
255
Table with last n rows
256
"""
257
258
def skip(table, n) -> Table:
259
"""
260
Skip the first n data rows.
261
262
Parameters:
263
- table: Input table
264
- n: Number of rows to skip
265
266
Returns:
267
Table with first n rows skipped
268
"""
269
270
def skipcomments(table, prefix='#') -> Table:
271
"""
272
Skip rows that begin with comment prefix.
273
274
Parameters:
275
- table: Input table
276
- prefix: Comment prefix to identify rows to skip
277
278
Returns:
279
Table with comment rows removed
280
"""
281
```
282
283
### Data Type Conversion
284
285
Convert and transform field values.
286
287
```python { .api }
288
def convert(table, *args, **kwargs) -> Table:
289
"""
290
Apply conversion functions to specified fields.
291
292
Parameters:
293
- table: Input table
294
- args: Field/function pairs for conversion
295
- kwargs: Additional options (where, failonerror, etc.)
296
297
Returns:
298
Table with converted field values
299
"""
300
301
def convertall(table, *args, **kwargs) -> Table:
302
"""
303
Apply conversion function to all fields.
304
305
Parameters:
306
- table: Input table
307
- args: Conversion function(s)
308
- kwargs: Additional options
309
310
Returns:
311
Table with all fields converted
312
"""
313
314
def convertnumbers(table, strict=False, **kwargs) -> Table:
315
"""
316
Attempt to convert string fields to numeric types.
317
318
Parameters:
319
- table: Input table
320
- strict: If True, raise error on conversion failure
321
- kwargs: Additional options
322
323
Returns:
324
Table with numeric conversions applied
325
"""
326
327
def replace(table, field, a, b, **kwargs) -> Table:
328
"""
329
Replace occurrences of value a with b in specified field.
330
331
Parameters:
332
- table: Input table
333
- field: Field name to modify
334
- a: Value to replace
335
- b: Replacement value
336
- kwargs: Additional options
337
338
Returns:
339
Table with replacements applied
340
"""
341
342
def replaceall(table, a, b, **kwargs) -> Table:
343
"""Replace occurrences of value a with b in all fields."""
344
345
def update(table, field, value, **kwargs) -> Table:
346
"""
347
Update field values using a function or mapping.
348
349
Parameters:
350
- table: Input table
351
- field: Field name to update
352
- value: Update function, value, or mapping
353
- kwargs: Additional options (where, etc.)
354
355
Returns:
356
Table with updated field values
357
"""
358
```
359
360
### String Formatting
361
362
Format and manipulate string field values.
363
364
```python { .api }
365
def format(table, field, fmt, **kwargs) -> Table:
366
"""
367
Format field values using format string.
368
369
Parameters:
370
- table: Input table
371
- field: Field name to format
372
- fmt: Format string (e.g., '{:.2f}', '{:>10}')
373
- kwargs: Additional options
374
375
Returns:
376
Table with formatted field values
377
"""
378
379
def formatall(table, fmt, **kwargs) -> Table:
380
"""Format all field values using format string."""
381
382
def interpolate(table, field, fmt, **kwargs) -> Table:
383
"""
384
Interpolate values into format string.
385
386
Parameters:
387
- table: Input table
388
- field: Field name for output
389
- fmt: Format string with field references
390
- kwargs: Additional options
391
392
Returns:
393
Table with interpolated values
394
"""
395
396
def interpolateall(table, fmt, **kwargs) -> Table:
397
"""Interpolate all field values into format string."""
398
```
399
400
### Table Combination
401
402
Combine multiple tables vertically or horizontally.
403
404
```python { .api }
405
def cat(*tables, **kwargs) -> Table:
406
"""
407
Concatenate tables vertically.
408
409
Parameters:
410
- tables: Tables to concatenate
411
- kwargs: Additional options (missing, header, etc.)
412
413
Returns:
414
Single table with all rows combined
415
"""
416
417
def stack(*tables, **kwargs) -> Table:
418
"""
419
Stack tables with different structures.
420
421
Parameters:
422
- tables: Tables to stack
423
- kwargs: Additional options
424
425
Returns:
426
Table with unified structure containing all data
427
"""
428
429
def annex(*tables, **kwargs) -> Table:
430
"""
431
Join tables horizontally by row position.
432
433
Parameters:
434
- tables: Tables to join horizontally
435
- kwargs: Additional options
436
437
Returns:
438
Table with fields from all input tables
439
"""
440
```
441
442
### Header Manipulation
443
444
Modify table headers and structure.
445
446
```python { .api }
447
def setheader(table, header) -> Table:
448
"""
449
Replace the table header.
450
451
Parameters:
452
- table: Input table
453
- header: New header row (list/tuple of field names)
454
455
Returns:
456
Table with new header
457
"""
458
459
def extendheader(table, fields) -> Table:
460
"""
461
Extend the header with additional fields.
462
463
Parameters:
464
- table: Input table
465
- fields: Additional field names to append
466
467
Returns:
468
Table with extended header
469
"""
470
471
def pushheader(table, header, *args) -> Table:
472
"""
473
Push the current header down as the first data row.
474
475
Parameters:
476
- table: Input table
477
- header: New header to use
478
- args: Additional arguments
479
480
Returns:
481
Table with old header as first data row
482
"""
483
484
def prefixheader(table, prefix) -> Table:
485
"""
486
Add prefix to all field names.
487
488
Parameters:
489
- table: Input table
490
- prefix: String prefix to add
491
492
Returns:
493
Table with prefixed field names
494
"""
495
496
def suffixheader(table, suffix) -> Table:
497
"""Add suffix to all field names."""
498
499
def sortheader(table) -> Table:
500
"""Sort header fields alphabetically."""
501
```
502
503
### Data Cleaning and Fill Operations
504
505
Fill missing values and clean data inconsistencies.
506
507
```python { .api }
508
def filldown(table, *fields, **kwargs) -> Table:
509
"""
510
Fill missing values by propagating non-missing values downward.
511
512
Parameters:
513
- table: Input table
514
- fields: Field names to fill (all fields if none specified)
515
- kwargs: Additional options (missing value specification)
516
517
Returns:
518
Table with missing values filled downward
519
"""
520
521
def fillright(table, *fields, **kwargs) -> Table:
522
"""
523
Fill missing values by propagating values from left to right.
524
525
Parameters:
526
- table: Input table
527
- fields: Field names to fill
528
- kwargs: Additional options
529
530
Returns:
531
Table with missing values filled rightward
532
"""
533
534
def fillleft(table, *fields, **kwargs) -> Table:
535
"""
536
Fill missing values by propagating values from right to left.
537
538
Parameters:
539
- table: Input table
540
- fields: Field names to fill
541
- kwargs: Additional options
542
543
Returns:
544
Table with missing values filled leftward
545
"""
546
```
547
548
## Usage Examples
549
550
### Field Selection and Manipulation
551
552
```python
553
import petl as etl
554
555
table = etl.fromcsv('data.csv') # name, age, city, country, salary
556
557
# Select specific fields
558
selected = etl.cut(table, 'name', 'age', 'salary')
559
560
# Remove unwanted fields
561
cleaned = etl.cutout(table, 'country')
562
563
# Rename fields
564
renamed = etl.rename(table, 'salary', 'income')
565
566
# Move field to different position
567
reordered = etl.movefield(table, 'salary', 1) # salary becomes second field
568
```
569
570
### Adding Fields
571
572
```python
573
import petl as etl
574
575
table = etl.fromcsv('employees.csv')
576
577
# Add constant field
578
with_status = etl.addfield(table, 'status', 'active')
579
580
# Add computed field
581
with_tax = etl.addfield(table, 'tax', lambda row: row.salary * 0.25)
582
583
# Add row numbers
584
numbered = etl.addrownumbers(table, 'id', start=1001)
585
586
# Add multiple fields
587
enhanced = etl.addfields(table, {
588
'department': 'Engineering',
589
'bonus': lambda rec: rec.salary * 0.1,
590
'year': 2023
591
})
592
```
593
594
### Row Filtering
595
596
```python
597
import petl as etl
598
599
table = etl.fromcsv('people.csv')
600
601
# Simple equality filter
602
adults = etl.selecteq(table, 'age', 18)
603
604
# Numeric comparisons
605
seniors = etl.selectgt(table, 'age', 65)
606
young_adults = etl.selectrangeclosed(table, 'age', 18, 30)
607
608
# Set membership
609
cities = etl.selectin(table, 'city', ['New York', 'London', 'Tokyo'])
610
611
# Complex filtering with functions
612
high_earners = etl.select(table, 'salary', lambda x: x > 100000)
613
614
# Multiple conditions
615
filtered = etl.select(table,
616
'age', lambda age: age > 25,
617
'city', lambda city: city.startswith('S'))
618
```
619
620
### Data Conversion
621
622
```python
623
import petl as etl
624
625
table = etl.fromcsv('data.csv')
626
627
# Convert specific field
628
clean_ages = etl.convert(table, 'age', int)
629
630
# Multiple conversions
631
converted = etl.convert(table,
632
'age', int,
633
'salary', float,
634
'name', str.title)
635
636
# Conditional conversion
637
cleaned = etl.convert(table, 'phone',
638
lambda x: x.replace('-', '') if x else None,
639
where=lambda row: row.phone is not None)
640
641
# Replace values
642
fixed = etl.replace(table, 'status', 'N/A', None)
643
644
# Auto-convert numbers
645
numeric = etl.convertnumbers(table)
646
```
647
648
### String Formatting
649
650
```python
651
import petl as etl
652
653
table = etl.fromcsv('products.csv')
654
655
# Format currency
656
formatted = etl.format(table, 'price', '${:.2f}')
657
658
# Interpolate multiple fields
659
summary = etl.interpolate(table, 'description',
660
'{name} - ${price:.2f} ({category})')
661
662
# Format all numeric fields
663
all_formatted = etl.formatall(table, '{:.2f}')
664
```