or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

aggregation.mdbasic-transformations.mddata-io.mddata-reshaping.mdindex.mdsorting-joins.mdtable-operations.mdvalidation-analysis.md

basic-transformations.mddocs/

0

# Basic Transformations

1

2

Core transformation operations for reshaping, filtering, and manipulating table data. This module provides essential functions for field operations, row selection, data conversion, and structural transformations that form the foundation of data processing workflows.

3

4

## Capabilities

5

6

### Field Selection and Manipulation

7

8

Operations for selecting, removing, and rearranging table fields.

9

10

```python { .api }

11

def cut(table, *args, **kwargs) -> Table:

12

"""

13

Select specific fields from the table.

14

15

Parameters:

16

- table: Input table

17

- args: Field names, indices, or slice objects

18

- kwargs: Additional options

19

20

Returns:

21

Table with selected fields only

22

"""

23

24

def cutout(table, *args, **kwargs) -> Table:

25

"""

26

Remove specific fields from the table.

27

28

Parameters:

29

- table: Input table

30

- args: Field names or indices to remove

31

- kwargs: Additional options

32

33

Returns:

34

Table with specified fields removed

35

"""

36

37

def movefield(table, field, index) -> Table:

38

"""

39

Move a field to a different position in the table.

40

41

Parameters:

42

- table: Input table

43

- field: Field name to move

44

- index: Target position (0-based)

45

46

Returns:

47

Table with field moved to new position

48

"""

49

50

def rename(table, *args, **kwargs) -> Table:

51

"""

52

Rename table fields.

53

54

Parameters:

55

- table: Input table

56

- args: Rename specifications (old_name, new_name pairs or dict)

57

- kwargs: Additional options

58

59

Returns:

60

Table with renamed fields

61

"""

62

```

63

64

### Field Addition

65

66

Add new fields to tables with various value sources.

67

68

```python { .api }

69

def addfield(table, field, value=None, index=None, missing=None) -> Table:

70

"""

71

Add a new field to the table.

72

73

Parameters:

74

- table: Input table

75

- field: Name of new field

76

- value: Value, function, or iterable for field values

77

- index: Position to insert field (default: end)

78

- missing: Value for missing data

79

80

Returns:

81

Table with new field added

82

"""

83

84

def addfields(table, field_defs, missing=None) -> Table:

85

"""

86

Add multiple fields to the table.

87

88

Parameters:

89

- table: Input table

90

- field_defs: Dictionary or list of (field, value) specifications

91

- missing: Value for missing data

92

93

Returns:

94

Table with new fields added

95

"""

96

97

def addfieldusingcontext(table, field, callable, **kwargs) -> Table:

98

"""

99

Add field using context-aware function.

100

101

Parameters:

102

- table: Input table

103

- field: Name of new field

104

- callable: Function that receives row context

105

- kwargs: Additional arguments passed to callable

106

107

Returns:

108

Table with new field added

109

"""

110

111

def addrownumbers(table, field='row', start=1) -> Table:

112

"""

113

Add row numbers as a new field.

114

115

Parameters:

116

- table: Input table

117

- field: Name for row number field

118

- start: Starting row number

119

120

Returns:

121

Table with row number field added

122

"""

123

124

def addcolumn(table, field, column) -> Table:

125

"""

126

Add a column of values as a new field.

127

128

Parameters:

129

- table: Input table

130

- field: Name of new field

131

- column: Iterable of values for the column

132

133

Returns:

134

Table with new column added

135

"""

136

```

137

138

### Row Selection and Filtering

139

140

Filter and select rows based on various criteria.

141

142

```python { .api }

143

def select(table, *args, **kwargs) -> Table:

144

"""

145

Select rows based on criteria.

146

147

Parameters:

148

- table: Input table

149

- args: Selection criteria (field, function/value pairs)

150

- kwargs: Additional options (complement, etc.)

151

152

Returns:

153

Table with selected rows

154

"""

155

156

def selecteq(table, field, value, complement=False) -> Table:

157

"""Select rows where field equals value."""

158

159

def selectne(table, field, value, complement=False) -> Table:

160

"""Select rows where field does not equal value."""

161

162

def selectlt(table, field, value, complement=False) -> Table:

163

"""Select rows where field is less than value."""

164

165

def selectle(table, field, value, complement=False) -> Table:

166

"""Select rows where field is less than or equal to value."""

167

168

def selectgt(table, field, value, complement=False) -> Table:

169

"""Select rows where field is greater than value."""

170

171

def selectge(table, field, value, complement=False) -> Table:

172

"""Select rows where field is greater than or equal to value."""

173

174

def selectin(table, field, test, complement=False) -> Table:

175

"""

176

Select rows where field value is in test collection.

177

178

Parameters:

179

- table: Input table

180

- field: Field name to test

181

- test: Collection of values to test membership

182

- complement: If True, select rows NOT in test

183

184

Returns:

185

Table with matching rows

186

"""

187

188

def selectnotin(table, field, test, complement=False) -> Table:

189

"""Select rows where field value is not in test collection."""

190

191

def selectnone(table, field, complement=False) -> Table:

192

"""Select rows where field is None."""

193

194

def selectnotnone(table, field, complement=False) -> Table:

195

"""Select rows where field is not None."""

196

197

def selectcontains(table, field, value, complement=False) -> Table:

198

"""Select rows where field contains value."""

199

200

def selectrangeclosed(table, field, minv, maxv, complement=False) -> Table:

201

"""Select rows where field is in closed range [minv, maxv]."""

202

203

def rowlenselect(table, n, complement=False) -> Table:

204

"""

205

Select rows with specified length.

206

207

Parameters:

208

- table: Input table

209

- n: Required row length

210

- complement: If True, select rows with different length

211

212

Returns:

213

Table with rows of specified length

214

"""

215

```

216

217

### Row Slicing and Sampling

218

219

Extract subsets of rows from tables.

220

221

```python { .api }

222

def rowslice(table, *sliceargs) -> Table:

223

"""

224

Extract a slice of rows from the table.

225

226

Parameters:

227

- table: Input table

228

- sliceargs: Slice arguments (start, stop, step)

229

230

Returns:

231

Table with sliced rows

232

"""

233

234

def head(table, n=5) -> Table:

235

"""

236

Get the first n rows of the table.

237

238

Parameters:

239

- table: Input table

240

- n: Number of rows to return

241

242

Returns:

243

Table with first n rows

244

"""

245

246

def tail(table, n=5) -> Table:

247

"""

248

Get the last n rows of the table.

249

250

Parameters:

251

- table: Input table

252

- n: Number of rows to return

253

254

Returns:

255

Table with last n rows

256

"""

257

258

def skip(table, n) -> Table:

259

"""

260

Skip the first n data rows.

261

262

Parameters:

263

- table: Input table

264

- n: Number of rows to skip

265

266

Returns:

267

Table with first n rows skipped

268

"""

269

270

def skipcomments(table, prefix='#') -> Table:

271

"""

272

Skip rows that begin with comment prefix.

273

274

Parameters:

275

- table: Input table

276

- prefix: Comment prefix to identify rows to skip

277

278

Returns:

279

Table with comment rows removed

280

"""

281

```

282

283

### Data Type Conversion

284

285

Convert and transform field values.

286

287

```python { .api }

288

def convert(table, *args, **kwargs) -> Table:

289

"""

290

Apply conversion functions to specified fields.

291

292

Parameters:

293

- table: Input table

294

- args: Field/function pairs for conversion

295

- kwargs: Additional options (where, failonerror, etc.)

296

297

Returns:

298

Table with converted field values

299

"""

300

301

def convertall(table, *args, **kwargs) -> Table:

302

"""

303

Apply conversion function to all fields.

304

305

Parameters:

306

- table: Input table

307

- args: Conversion function(s)

308

- kwargs: Additional options

309

310

Returns:

311

Table with all fields converted

312

"""

313

314

def convertnumbers(table, strict=False, **kwargs) -> Table:

315

"""

316

Attempt to convert string fields to numeric types.

317

318

Parameters:

319

- table: Input table

320

- strict: If True, raise error on conversion failure

321

- kwargs: Additional options

322

323

Returns:

324

Table with numeric conversions applied

325

"""

326

327

def replace(table, field, a, b, **kwargs) -> Table:

328

"""

329

Replace occurrences of value a with b in specified field.

330

331

Parameters:

332

- table: Input table

333

- field: Field name to modify

334

- a: Value to replace

335

- b: Replacement value

336

- kwargs: Additional options

337

338

Returns:

339

Table with replacements applied

340

"""

341

342

def replaceall(table, a, b, **kwargs) -> Table:

343

"""Replace occurrences of value a with b in all fields."""

344

345

def update(table, field, value, **kwargs) -> Table:

346

"""

347

Update field values using a function or mapping.

348

349

Parameters:

350

- table: Input table

351

- field: Field name to update

352

- value: Update function, value, or mapping

353

- kwargs: Additional options (where, etc.)

354

355

Returns:

356

Table with updated field values

357

"""

358

```

359

360

### String Formatting

361

362

Format and manipulate string field values.

363

364

```python { .api }

365

def format(table, field, fmt, **kwargs) -> Table:

366

"""

367

Format field values using format string.

368

369

Parameters:

370

- table: Input table

371

- field: Field name to format

372

- fmt: Format string (e.g., '{:.2f}', '{:>10}')

373

- kwargs: Additional options

374

375

Returns:

376

Table with formatted field values

377

"""

378

379

def formatall(table, fmt, **kwargs) -> Table:

380

"""Format all field values using format string."""

381

382

def interpolate(table, field, fmt, **kwargs) -> Table:

383

"""

384

Interpolate values into format string.

385

386

Parameters:

387

- table: Input table

388

- field: Field name for output

389

- fmt: Format string with field references

390

- kwargs: Additional options

391

392

Returns:

393

Table with interpolated values

394

"""

395

396

def interpolateall(table, fmt, **kwargs) -> Table:

397

"""Interpolate all field values into format string."""

398

```

399

400

### Table Combination

401

402

Combine multiple tables vertically or horizontally.

403

404

```python { .api }

405

def cat(*tables, **kwargs) -> Table:

406

"""

407

Concatenate tables vertically.

408

409

Parameters:

410

- tables: Tables to concatenate

411

- kwargs: Additional options (missing, header, etc.)

412

413

Returns:

414

Single table with all rows combined

415

"""

416

417

def stack(*tables, **kwargs) -> Table:

418

"""

419

Stack tables with different structures.

420

421

Parameters:

422

- tables: Tables to stack

423

- kwargs: Additional options

424

425

Returns:

426

Table with unified structure containing all data

427

"""

428

429

def annex(*tables, **kwargs) -> Table:

430

"""

431

Join tables horizontally by row position.

432

433

Parameters:

434

- tables: Tables to join horizontally

435

- kwargs: Additional options

436

437

Returns:

438

Table with fields from all input tables

439

"""

440

```

441

442

### Header Manipulation

443

444

Modify table headers and structure.

445

446

```python { .api }

447

def setheader(table, header) -> Table:

448

"""

449

Replace the table header.

450

451

Parameters:

452

- table: Input table

453

- header: New header row (list/tuple of field names)

454

455

Returns:

456

Table with new header

457

"""

458

459

def extendheader(table, fields) -> Table:

460

"""

461

Extend the header with additional fields.

462

463

Parameters:

464

- table: Input table

465

- fields: Additional field names to append

466

467

Returns:

468

Table with extended header

469

"""

470

471

def pushheader(table, header, *args) -> Table:

472

"""

473

Push the current header down as the first data row.

474

475

Parameters:

476

- table: Input table

477

- header: New header to use

478

- args: Additional arguments

479

480

Returns:

481

Table with old header as first data row

482

"""

483

484

def prefixheader(table, prefix) -> Table:

485

"""

486

Add prefix to all field names.

487

488

Parameters:

489

- table: Input table

490

- prefix: String prefix to add

491

492

Returns:

493

Table with prefixed field names

494

"""

495

496

def suffixheader(table, suffix) -> Table:

497

"""Add suffix to all field names."""

498

499

def sortheader(table) -> Table:

500

"""Sort header fields alphabetically."""

501

```

502

503

### Data Cleaning and Fill Operations

504

505

Fill missing values and clean data inconsistencies.

506

507

```python { .api }

508

def filldown(table, *fields, **kwargs) -> Table:

509

"""

510

Fill missing values by propagating non-missing values downward.

511

512

Parameters:

513

- table: Input table

514

- fields: Field names to fill (all fields if none specified)

515

- kwargs: Additional options (missing value specification)

516

517

Returns:

518

Table with missing values filled downward

519

"""

520

521

def fillright(table, *fields, **kwargs) -> Table:

522

"""

523

Fill missing values by propagating values from left to right.

524

525

Parameters:

526

- table: Input table

527

- fields: Field names to fill

528

- kwargs: Additional options

529

530

Returns:

531

Table with missing values filled rightward

532

"""

533

534

def fillleft(table, *fields, **kwargs) -> Table:

535

"""

536

Fill missing values by propagating values from right to left.

537

538

Parameters:

539

- table: Input table

540

- fields: Field names to fill

541

- kwargs: Additional options

542

543

Returns:

544

Table with missing values filled leftward

545

"""

546

```

547

548

## Usage Examples

549

550

### Field Selection and Manipulation

551

552

```python

553

import petl as etl

554

555

table = etl.fromcsv('data.csv') # name, age, city, country, salary

556

557

# Select specific fields

558

selected = etl.cut(table, 'name', 'age', 'salary')

559

560

# Remove unwanted fields

561

cleaned = etl.cutout(table, 'country')

562

563

# Rename fields

564

renamed = etl.rename(table, 'salary', 'income')

565

566

# Move field to different position

567

reordered = etl.movefield(table, 'salary', 1) # salary becomes second field

568

```

569

570

### Adding Fields

571

572

```python

573

import petl as etl

574

575

table = etl.fromcsv('employees.csv')

576

577

# Add constant field

578

with_status = etl.addfield(table, 'status', 'active')

579

580

# Add computed field

581

with_tax = etl.addfield(table, 'tax', lambda row: row.salary * 0.25)

582

583

# Add row numbers

584

numbered = etl.addrownumbers(table, 'id', start=1001)

585

586

# Add multiple fields

587

enhanced = etl.addfields(table, {

588

'department': 'Engineering',

589

'bonus': lambda rec: rec.salary * 0.1,

590

'year': 2023

591

})

592

```

593

594

### Row Filtering

595

596

```python

597

import petl as etl

598

599

table = etl.fromcsv('people.csv')

600

601

# Simple equality filter

602

adults = etl.selecteq(table, 'age', 18)

603

604

# Numeric comparisons

605

seniors = etl.selectgt(table, 'age', 65)

606

young_adults = etl.selectrangeclosed(table, 'age', 18, 30)

607

608

# Set membership

609

cities = etl.selectin(table, 'city', ['New York', 'London', 'Tokyo'])

610

611

# Complex filtering with functions

612

high_earners = etl.select(table, 'salary', lambda x: x > 100000)

613

614

# Multiple conditions

615

filtered = etl.select(table,

616

'age', lambda age: age > 25,

617

'city', lambda city: city.startswith('S'))

618

```

619

620

### Data Conversion

621

622

```python

623

import petl as etl

624

625

table = etl.fromcsv('data.csv')

626

627

# Convert specific field

628

clean_ages = etl.convert(table, 'age', int)

629

630

# Multiple conversions

631

converted = etl.convert(table,

632

'age', int,

633

'salary', float,

634

'name', str.title)

635

636

# Conditional conversion

637

cleaned = etl.convert(table, 'phone',

638

lambda x: x.replace('-', '') if x else None,

639

where=lambda row: row.phone is not None)

640

641

# Replace values

642

fixed = etl.replace(table, 'status', 'N/A', None)

643

644

# Auto-convert numbers

645

numeric = etl.convertnumbers(table)

646

```

647

648

### String Formatting

649

650

```python

651

import petl as etl

652

653

table = etl.fromcsv('products.csv')

654

655

# Format currency

656

formatted = etl.format(table, 'price', '${:.2f}')

657

658

# Interpolate multiple fields

659

summary = etl.interpolate(table, 'description',

660

'{name} - ${price:.2f} ({category})')

661

662

# Format all numeric fields

663

all_formatted = etl.formatall(table, '{:.2f}')

664

```