or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

aggregation.mdbasic-transformations.mddata-io.mddata-reshaping.mdindex.mdsorting-joins.mdtable-operations.mdvalidation-analysis.md

data-io.mddocs/

0

# Data Input/Output

1

2

Comprehensive support for reading and writing data from various sources including CSV, JSON, XML, Excel, databases, and many specialized formats. PETL provides a unified interface for data I/O operations with support for different encodings, formats, and data sources.

3

4

## Capabilities

5

6

### CSV and TSV Operations

7

8

Read and write comma-separated values and tab-separated values with extensive formatting options.

9

10

```python { .api }

11

def fromcsv(source=None, encoding=None, errors='strict', header=None, **csvargs) -> Table:

12

"""

13

Extract a table from a CSV file or source.

14

15

Parameters:

16

- source: File path, file object, or URL

17

- encoding: Character encoding (default: system default)

18

- errors: Error handling strategy ('strict', 'ignore', 'replace')

19

- header: Row number for header (0-based) or None for no header

20

- csvargs: Additional CSV reader arguments (delimiter, quotechar, etc.)

21

22

Returns:

23

Table object

24

"""

25

26

def fromtsv(source=None, encoding=None, errors='strict', header=None, **csvargs) -> Table:

27

"""Extract a table from a TSV (tab-separated values) file."""

28

29

def tocsv(table, source=None, encoding=None, errors='strict', write_header=True, **csvargs):

30

"""

31

Write a table to a CSV file.

32

33

Parameters:

34

- table: Input table

35

- source: Output file path or file object

36

- encoding: Character encoding

37

- errors: Error handling strategy

38

- write_header: Whether to write header row

39

- csvargs: Additional CSV writer arguments

40

"""

41

42

def totsv(table, source=None, encoding=None, errors='strict', write_header=True, **csvargs):

43

"""Write a table to a TSV file."""

44

45

def appendcsv(table, source=None, encoding=None, errors='strict', **csvargs):

46

"""Append a table to an existing CSV file."""

47

48

def appendtsv(table, source=None, encoding=None, errors='strict', **csvargs):

49

"""Append a table to an existing TSV file."""

50

51

def teecsv(table, source=None, encoding=None, errors='strict', write_header=True, **csvargs) -> Table:

52

"""Write a table to CSV while returning the table for further processing."""

53

54

def teetsv(table, source=None, encoding=None, errors='strict', write_header=True, **csvargs) -> Table:

55

"""Write a table to TSV while returning the table for further processing."""

56

```

57

58

### JSON Operations

59

60

Handle JSON data in various formats including JSON Lines and arrays of objects.

61

62

```python { .api }

63

def fromjson(source, *args, **kwargs) -> Table:

64

"""

65

Extract a table from JSON lines format or array of objects.

66

67

Parameters:

68

- source: File path, file object, or URL containing JSON data

69

- args: Additional arguments passed to json.loads

70

- kwargs: Additional keyword arguments

71

72

Returns:

73

Table object

74

"""

75

76

def fromdicts(dicts, header=None, sample=1000, missing=None) -> Table:

77

"""

78

Construct a table from an iterable of dictionaries.

79

80

Parameters:

81

- dicts: Iterable of dictionary objects

82

- header: Explicit field names (optional)

83

- sample: Number of records to sample for field detection

84

- missing: Value for missing fields

85

86

Returns:

87

Table object

88

"""

89

90

def tojson(table, source=None, prefix=None, suffix=None, *args, **kwargs):

91

"""

92

Write a table to JSON format.

93

94

Parameters:

95

- table: Input table

96

- source: Output file path or file object

97

- prefix: String to prepend to output

98

- suffix: String to append to output

99

- args: Additional arguments passed to json.dumps

100

- kwargs: Additional keyword arguments

101

"""

102

103

def tojsonarrays(table, source=None, prefix=None, suffix=None, output_header=False, *args, **kwargs):

104

"""

105

Write a table to JSON array format.

106

107

Parameters:

108

- table: Input table

109

- source: Output file path or file object

110

- prefix: String to prepend to output

111

- suffix: String to append to output

112

- output_header: Whether to include header in output

113

- args: Additional arguments

114

- kwargs: Additional keyword arguments

115

"""

116

```

117

118

### Database Operations

119

120

Connect to and work with various database systems using SQLAlchemy.

121

122

```python { .api }

123

def fromdb(dbo, query, *args, **kwargs) -> Table:

124

"""

125

Extract a table from a database query.

126

127

Parameters:

128

- dbo: Database connection object (SQLAlchemy engine/connection)

129

- query: SQL query string

130

- args: Query parameters

131

- kwargs: Additional arguments

132

133

Returns:

134

Table object with query results

135

"""

136

137

def todb(table, dbo, tablename, schema=None, commit=True, create=False, drop=False,

138

constraints=True, metadata=None, dialect=None, typeconv=None):

139

"""

140

Write a table to a database.

141

142

Parameters:

143

- table: Input table

144

- dbo: Database connection object

145

- tablename: Target table name

146

- schema: Database schema name

147

- commit: Whether to commit transaction

148

- create: Whether to create table if it doesn't exist

149

- drop: Whether to drop existing table

150

- constraints: Whether to create constraints

151

- metadata: SQLAlchemy metadata object

152

- dialect: Database dialect

153

- typeconv: Type conversion dictionary

154

"""

155

156

def appenddb(table, dbo, tablename, schema=None, commit=True):

157

"""

158

Append a table to a database table.

159

160

Parameters:

161

- table: Input table

162

- dbo: Database connection object

163

- tablename: Target table name

164

- schema: Database schema name

165

- commit: Whether to commit transaction

166

"""

167

```

168

169

### Excel Operations

170

171

Read and write Microsoft Excel files in both legacy (.xls) and modern (.xlsx) formats.

172

173

```python { .api }

174

def fromxls(filename, sheet=None, use_view=True, **kwargs) -> Table:

175

"""

176

Extract a table from an Excel .xls file.

177

178

Parameters:

179

- filename: Path to Excel file

180

- sheet: Sheet name or index (default: first sheet)

181

- use_view: Whether to use optimized view

182

- kwargs: Additional arguments

183

184

Returns:

185

Table object

186

"""

187

188

def fromxlsx(filename, sheet=None, range_string=None, min_row=None, max_row=None,

189

min_col=None, max_col=None, read_only=False, **kwargs) -> Table:

190

"""

191

Extract a table from an Excel .xlsx file.

192

193

Parameters:

194

- filename: Path to Excel file

195

- sheet: Sheet name or index

196

- range_string: Excel range string (e.g., 'A1:C10')

197

- min_row, max_row: Row range limits

198

- min_col, max_col: Column range limits

199

- read_only: Whether to open in read-only mode

200

- kwargs: Additional arguments

201

202

Returns:

203

Table object

204

"""

205

206

def toxls(table, filename, sheet=None, encoding=None, style_compression=0, **kwargs):

207

"""Write a table to an Excel .xls file."""

208

209

def toxlsx(table, filename, sheet=None, write_header=True, mode="replace"):

210

"""Write a table to an Excel .xlsx file."""

211

212

def appendxlsx(table, filename, sheet=None, write_header=False):

213

"""Append a table to an Excel .xlsx file."""

214

```

215

216

### XML and HTML Operations

217

218

Process XML and HTML data for web scraping and data integration.

219

220

```python { .api }

221

def fromxml(source, *args, **kwargs) -> Table:

222

"""

223

Extract a table from an XML file.

224

225

Parameters:

226

- source: XML file path or file object

227

- args: Additional arguments for XML parsing

228

- kwargs: Additional keyword arguments

229

230

Returns:

231

Table object

232

"""

233

234

def toxml(table, target=None, root='table', head='row', **kwargs):

235

"""

236

Write a table to XML format.

237

238

Parameters:

239

- table: Input table

240

- target: Output file path or file object

241

- root: Root element name

242

- head: Row element name

243

- kwargs: Additional XML formatting options

244

"""

245

246

def tohtml(table, source=None, encoding=None, errors='strict', caption=None,

247

vrepr=str, lineterminator='\r\n', class_=None, **kwargs):

248

"""

249

Write a table to HTML format.

250

251

Parameters:

252

- table: Input table

253

- source: Output file path or file object

254

- encoding: Character encoding

255

- errors: Error handling strategy

256

- caption: HTML table caption

257

- vrepr: Value representation function

258

- lineterminator: Line ending character

259

- class_: CSS class for table element

260

- kwargs: Additional HTML formatting options

261

"""

262

263

def teehtml(table, source=None, encoding=None, errors='strict', caption=None,

264

vrepr=str, lineterminator='\r\n', class_=None, **kwargs) -> Table:

265

"""Write a table to HTML while returning the table for further processing."""

266

```

267

268

### Specialized Formats

269

270

Support for various specialized data formats and scientific computing libraries.

271

272

```python { .api }

273

def fromarray(array) -> Table:

274

"""

275

Construct a table from a NumPy array.

276

277

Parameters:

278

- array: NumPy array

279

280

Returns:

281

Table object

282

"""

283

284

def toarray(table, dtype=None, count=-1, sample=1000):

285

"""

286

Convert a table to a NumPy array.

287

288

Parameters:

289

- table: Input table

290

- dtype: NumPy data type

291

- count: Maximum number of rows (-1 for all)

292

- sample: Number of rows to sample for type detection

293

294

Returns:

295

NumPy array

296

"""

297

298

def fromdataframe(df, include_index=False) -> Table:

299

"""

300

Construct a table from a Pandas DataFrame.

301

302

Parameters:

303

- df: Pandas DataFrame

304

- include_index: Whether to include DataFrame index as a column

305

306

Returns:

307

Table object

308

"""

309

310

def todataframe(table, index=None, exclude=None, columns=None,

311

coerce_float=False, nrows=None):

312

"""

313

Convert a table to a Pandas DataFrame.

314

315

Parameters:

316

- table: Input table

317

- index: Column to use as DataFrame index

318

- exclude: Columns to exclude

319

- columns: Column names to use

320

- coerce_float: Whether to coerce numeric strings to float

321

- nrows: Maximum number of rows

322

323

Returns:

324

Pandas DataFrame

325

"""

326

327

def frompickle(source=None) -> Table:

328

"""Extract a table from a pickle file."""

329

330

def topickle(table, source=None, protocol=-1, write_header=True):

331

"""Write a table to a pickle file."""

332

333

def fromavro(source, limit=None, skips=0, **avro_args) -> Table:

334

"""Extract a table from an Apache Avro file."""

335

336

def toavro(table, target, schema=None, sample=9, mode='wb', **avro_args):

337

"""Write a table to Apache Avro format."""

338

339

def fromhdf5(source, where=None, name=None, condition=None,

340

condvars=None, start=None, stop=None, step=None, **kwargs) -> Table:

341

"""

342

Extract a table from an HDF5 file using PyTables.

343

344

Parameters:

345

- source: HDF5 file path or file object

346

- where: Path to HDF5 table within file

347

- name: Name of table within HDF5 file

348

- condition: Selection condition

349

- condvars: Variables for selection condition

350

- start: Start index for selection

351

- stop: Stop index for selection

352

- step: Step size for selection

353

- kwargs: Additional PyTables arguments

354

355

Returns:

356

Table object

357

"""

358

359

def tohdf5(table, source, where=None, name='table', create=False,

360

description=None, title='', filters=None, expectedrows=10000,

361

chunkshape=None, byteorder=None, createparents=False,

362

sample=1000, **kwargs):

363

"""Write a table to HDF5 format using PyTables."""

364

365

def fromhdf5sorted(source, *args, **kwargs) -> Table:

366

"""Extract a table from HDF5 with sorted index reading."""

367

368

def frombcolz(source, expression=None, outcols=None, limit=None, skip=0) -> Table:

369

"""

370

Extract a table from a Bcolz compressed array.

371

372

Parameters:

373

- source: Bcolz source file or object

374

- expression: Query expression for filtering

375

- outcols: Output columns to select

376

- limit: Maximum number of rows to read

377

- skip: Number of rows to skip

378

379

Returns:

380

Table object

381

"""

382

383

def tobcolz(table, source=None, mode='w', **bcolz_args):

384

"""Write a table to Bcolz compressed format."""

385

386

def fromgsheet(url=None, title=None, sheet_name=None, encoding='utf-8',

387

auth_method='service_account', scopes=None, credentials=None,

388

**gsheet_args) -> Table:

389

"""

390

Extract a table from a Google Sheets document.

391

392

Parameters:

393

- url: Google Sheets URL

394

- title: Sheet title

395

- sheet_name: Name of specific sheet

396

- encoding: Character encoding

397

- auth_method: Authentication method

398

- scopes: OAuth scopes

399

- credentials: Authentication credentials

400

- gsheet_args: Additional Google Sheets arguments

401

402

Returns:

403

Table object

404

"""

405

406

def togsheet(table, url=None, title=None, sheet_name=None,

407

auth_method='service_account', scopes=None, credentials=None,

408

**gsheet_args):

409

"""Write a table to Google Sheets."""

410

```

411

412

### Data Sources

413

414

Flexible data source classes for handling various input/output scenarios.

415

416

```python { .api }

417

class FileSource:

418

"""File source for reading/writing local files."""

419

def __init__(self, filename, mode='rb'): ...

420

421

class URLSource:

422

"""URL-based data source for remote files."""

423

def __init__(self, url, **kwargs): ...

424

425

class StringSource:

426

"""In-memory string source."""

427

def __init__(self, string_data): ...

428

429

class GzipSource:

430

"""Compressed gzip file source."""

431

def __init__(self, filename, mode='rb'): ...

432

433

class RemoteSource:

434

"""Remote file system source using fsspec."""

435

def __init__(self, url, **kwargs): ...

436

437

class SMBSource:

438

"""SMB/CIFS network share source."""

439

def __init__(self, url, **kwargs): ...

440

```

441

442

## Usage Examples

443

444

### Working with CSV Files

445

446

```python

447

import petl as etl

448

449

# Read CSV with custom options

450

table = etl.fromcsv('data.csv', delimiter=';', encoding='utf-8')

451

452

# Write CSV with specific formatting

453

etl.tocsv(table, 'output.csv', delimiter='|', quotechar='"')

454

455

# Chain operations with CSV I/O

456

result = (etl.fromcsv('input.csv')

457

.select('age', lambda age: age > 18)

458

.cut('name', 'age')

459

.sort('name'))

460

result.tocsv('adults.csv')

461

```

462

463

### Database Integration

464

465

```python

466

import petl as etl

467

from sqlalchemy import create_engine

468

469

# Connect to database

470

engine = create_engine('postgresql://user:pass@host:port/db')

471

472

# Extract data from database

473

table = etl.fromdb(engine, 'SELECT * FROM customers WHERE active = true')

474

475

# Process and write back to database

476

processed = table.convert('email', str.lower).convert('phone', lambda x: x.replace('-', ''))

477

etl.todb(processed, engine, 'customers_clean', create=True)

478

```

479

480

### JSON Data Processing

481

482

```python

483

import petl as etl

484

485

# Read JSON Lines file

486

table = etl.fromjson('data.jsonl')

487

488

# Convert list of dictionaries to table

489

data = [{'name': 'Alice', 'age': 25}, {'name': 'Bob', 'age': 30}]

490

table = etl.fromdicts(data)

491

492

# Write to JSON format

493

etl.tojson(table, 'output.json')

494

```

495

496

### Excel File Operations

497

498

```python

499

import petl as etl

500

501

# Read specific sheet and range

502

table = etl.fromxlsx('report.xlsx', sheet='Sales', range_string='A1:F100')

503

504

# Write to Excel with formatting

505

etl.toxlsx(table, 'summary.xlsx', sheet='Results')

506

507

# Append to existing Excel file

508

etl.appendxlsx(additional_data, 'summary.xlsx', sheet='Results')

509

```