Tessl Tile for pypi/petl@1.7.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

aggregation.md basic-transformations.md data-io.md data-reshaping.md index.md sorting-joins.md table-operations.md validation-analysis.md

data-io.mddocs/

0
# Data Input/Output
1

2
Comprehensive support for reading and writing data from various sources including CSV, JSON, XML, Excel, databases, and many specialized formats. PETL provides a unified interface for data I/O operations with support for different encodings, formats, and data sources.
3

4
## Capabilities
5

6
### CSV and TSV Operations
7

8
Read and write comma-separated values and tab-separated values with extensive formatting options.
9

10
```python { .api }
11
def fromcsv(source=None, encoding=None, errors='strict', header=None, **csvargs) -> Table:
12
    """
13
    Extract a table from a CSV file or source.
14
    
15
    Parameters:
16
    - source: File path, file object, or URL
17
    - encoding: Character encoding (default: system default)
18
    - errors: Error handling strategy ('strict', 'ignore', 'replace')
19
    - header: Row number for header (0-based) or None for no header
20
    - csvargs: Additional CSV reader arguments (delimiter, quotechar, etc.)
21
    
22
    Returns:
23
    Table object
24
    """
25

26
def fromtsv(source=None, encoding=None, errors='strict', header=None, **csvargs) -> Table:
27
    """Extract a table from a TSV (tab-separated values) file."""
28

29
def tocsv(table, source=None, encoding=None, errors='strict', write_header=True, **csvargs):
30
    """
31
    Write a table to a CSV file.
32
    
33
    Parameters:
34
    - table: Input table
35
    - source: Output file path or file object
36
    - encoding: Character encoding
37
    - errors: Error handling strategy
38
    - write_header: Whether to write header row
39
    - csvargs: Additional CSV writer arguments
40
    """
41

42
def totsv(table, source=None, encoding=None, errors='strict', write_header=True, **csvargs):
43
    """Write a table to a TSV file."""
44

45
def appendcsv(table, source=None, encoding=None, errors='strict', **csvargs):
46
    """Append a table to an existing CSV file."""
47

48
def appendtsv(table, source=None, encoding=None, errors='strict', **csvargs):
49
    """Append a table to an existing TSV file."""
50

51
def teecsv(table, source=None, encoding=None, errors='strict', write_header=True, **csvargs) -> Table:
52
    """Write a table to CSV while returning the table for further processing."""
53

54
def teetsv(table, source=None, encoding=None, errors='strict', write_header=True, **csvargs) -> Table:
55
    """Write a table to TSV while returning the table for further processing."""
56
```
57

58
### JSON Operations
59

60
Handle JSON data in various formats including JSON Lines and arrays of objects.
61

62
```python { .api }
63
def fromjson(source, *args, **kwargs) -> Table:
64
    """
65
    Extract a table from JSON lines format or array of objects.
66
    
67
    Parameters:
68
    - source: File path, file object, or URL containing JSON data
69
    - args: Additional arguments passed to json.loads
70
    - kwargs: Additional keyword arguments
71
    
72
    Returns:
73
    Table object
74
    """
75

76
def fromdicts(dicts, header=None, sample=1000, missing=None) -> Table:
77
    """
78
    Construct a table from an iterable of dictionaries.
79
    
80
    Parameters:
81
    - dicts: Iterable of dictionary objects
82
    - header: Explicit field names (optional)
83
    - sample: Number of records to sample for field detection
84
    - missing: Value for missing fields
85
    
86
    Returns:
87
    Table object
88
    """
89

90
def tojson(table, source=None, prefix=None, suffix=None, *args, **kwargs):
91
    """
92
    Write a table to JSON format.
93
    
94
    Parameters:
95
    - table: Input table
96
    - source: Output file path or file object
97
    - prefix: String to prepend to output
98
    - suffix: String to append to output
99
    - args: Additional arguments passed to json.dumps
100
    - kwargs: Additional keyword arguments
101
    """
102

103
def tojsonarrays(table, source=None, prefix=None, suffix=None, output_header=False, *args, **kwargs):
104
    """
105
    Write a table to JSON array format.
106
    
107
    Parameters:
108
    - table: Input table
109
    - source: Output file path or file object
110
    - prefix: String to prepend to output
111
    - suffix: String to append to output
112
    - output_header: Whether to include header in output
113
    - args: Additional arguments
114
    - kwargs: Additional keyword arguments
115
    """
116
```
117

118
### Database Operations
119

120
Connect to and work with various database systems using SQLAlchemy.
121

122
```python { .api }
123
def fromdb(dbo, query, *args, **kwargs) -> Table:
124
    """
125
    Extract a table from a database query.
126
    
127
    Parameters:
128
    - dbo: Database connection object (SQLAlchemy engine/connection)
129
    - query: SQL query string
130
    - args: Query parameters
131
    - kwargs: Additional arguments
132
    
133
    Returns:
134
    Table object with query results
135
    """
136

137
def todb(table, dbo, tablename, schema=None, commit=True, create=False, drop=False, 
138
         constraints=True, metadata=None, dialect=None, typeconv=None):
139
    """
140
    Write a table to a database.
141
    
142
    Parameters:
143
    - table: Input table
144
    - dbo: Database connection object
145
    - tablename: Target table name
146
    - schema: Database schema name
147
    - commit: Whether to commit transaction
148
    - create: Whether to create table if it doesn't exist
149
    - drop: Whether to drop existing table
150
    - constraints: Whether to create constraints
151
    - metadata: SQLAlchemy metadata object
152
    - dialect: Database dialect
153
    - typeconv: Type conversion dictionary
154
    """
155

156
def appenddb(table, dbo, tablename, schema=None, commit=True):
157
    """
158
    Append a table to a database table.
159
    
160
    Parameters:
161
    - table: Input table
162
    - dbo: Database connection object
163
    - tablename: Target table name
164
    - schema: Database schema name
165
    - commit: Whether to commit transaction
166
    """
167
```
168

169
### Excel Operations
170

171
Read and write Microsoft Excel files in both legacy (.xls) and modern (.xlsx) formats.
172

173
```python { .api }
174
def fromxls(filename, sheet=None, use_view=True, **kwargs) -> Table:
175
    """
176
    Extract a table from an Excel .xls file.
177
    
178
    Parameters:
179
    - filename: Path to Excel file
180
    - sheet: Sheet name or index (default: first sheet)
181
    - use_view: Whether to use optimized view
182
    - kwargs: Additional arguments
183
    
184
    Returns:
185
    Table object
186
    """
187

188
def fromxlsx(filename, sheet=None, range_string=None, min_row=None, max_row=None, 
189
             min_col=None, max_col=None, read_only=False, **kwargs) -> Table:
190
    """
191
    Extract a table from an Excel .xlsx file.
192
    
193
    Parameters:
194
    - filename: Path to Excel file
195
    - sheet: Sheet name or index
196
    - range_string: Excel range string (e.g., 'A1:C10')
197
    - min_row, max_row: Row range limits
198
    - min_col, max_col: Column range limits
199
    - read_only: Whether to open in read-only mode
200
    - kwargs: Additional arguments
201
    
202
    Returns:
203
    Table object
204
    """
205

206
def toxls(table, filename, sheet=None, encoding=None, style_compression=0, **kwargs):
207
    """Write a table to an Excel .xls file."""
208

209
def toxlsx(table, filename, sheet=None, write_header=True, mode="replace"):
210
    """Write a table to an Excel .xlsx file."""
211

212
def appendxlsx(table, filename, sheet=None, write_header=False):
213
    """Append a table to an Excel .xlsx file."""
214
```
215

216
### XML and HTML Operations
217

218
Process XML and HTML data for web scraping and data integration.
219

220
```python { .api }
221
def fromxml(source, *args, **kwargs) -> Table:
222
    """
223
    Extract a table from an XML file.
224
    
225
    Parameters:
226
    - source: XML file path or file object
227
    - args: Additional arguments for XML parsing
228
    - kwargs: Additional keyword arguments
229
    
230
    Returns:
231
    Table object
232
    """
233

234
def toxml(table, target=None, root='table', head='row', **kwargs):
235
    """
236
    Write a table to XML format.
237
    
238
    Parameters:
239
    - table: Input table
240
    - target: Output file path or file object
241
    - root: Root element name
242
    - head: Row element name
243
    - kwargs: Additional XML formatting options
244
    """
245

246
def tohtml(table, source=None, encoding=None, errors='strict', caption=None, 
247
           vrepr=str, lineterminator='\r\n', class_=None, **kwargs):
248
    """
249
    Write a table to HTML format.
250
    
251
    Parameters:
252
    - table: Input table
253
    - source: Output file path or file object
254
    - encoding: Character encoding
255
    - errors: Error handling strategy
256
    - caption: HTML table caption
257
    - vrepr: Value representation function
258
    - lineterminator: Line ending character
259
    - class_: CSS class for table element
260
    - kwargs: Additional HTML formatting options
261
    """
262

263
def teehtml(table, source=None, encoding=None, errors='strict', caption=None, 
264
            vrepr=str, lineterminator='\r\n', class_=None, **kwargs) -> Table:
265
    """Write a table to HTML while returning the table for further processing."""
266
```
267

268
### Specialized Formats
269

270
Support for various specialized data formats and scientific computing libraries.
271

272
```python { .api }
273
def fromarray(array) -> Table:
274
    """
275
    Construct a table from a NumPy array.
276
    
277
    Parameters:
278
    - array: NumPy array
279
    
280
    Returns:
281
    Table object
282
    """
283

284
def toarray(table, dtype=None, count=-1, sample=1000):
285
    """
286
    Convert a table to a NumPy array.
287
    
288
    Parameters:
289
    - table: Input table
290
    - dtype: NumPy data type
291
    - count: Maximum number of rows (-1 for all)
292
    - sample: Number of rows to sample for type detection
293
    
294
    Returns:
295
    NumPy array
296
    """
297

298
def fromdataframe(df, include_index=False) -> Table:
299
    """
300
    Construct a table from a Pandas DataFrame.
301
    
302
    Parameters:
303
    - df: Pandas DataFrame
304
    - include_index: Whether to include DataFrame index as a column
305
    
306
    Returns:
307
    Table object
308
    """
309

310
def todataframe(table, index=None, exclude=None, columns=None, 
311
                coerce_float=False, nrows=None):
312
    """
313
    Convert a table to a Pandas DataFrame.
314
    
315
    Parameters:
316
    - table: Input table
317
    - index: Column to use as DataFrame index
318
    - exclude: Columns to exclude
319
    - columns: Column names to use
320
    - coerce_float: Whether to coerce numeric strings to float
321
    - nrows: Maximum number of rows
322
    
323
    Returns:
324
    Pandas DataFrame
325
    """
326

327
def frompickle(source=None) -> Table:
328
    """Extract a table from a pickle file."""
329

330
def topickle(table, source=None, protocol=-1, write_header=True):
331
    """Write a table to a pickle file."""
332

333
def fromavro(source, limit=None, skips=0, **avro_args) -> Table:
334
    """Extract a table from an Apache Avro file."""
335

336
def toavro(table, target, schema=None, sample=9, mode='wb', **avro_args):
337
    """Write a table to Apache Avro format."""
338

339
def fromhdf5(source, where=None, name=None, condition=None, 
340
             condvars=None, start=None, stop=None, step=None, **kwargs) -> Table:
341
    """
342
    Extract a table from an HDF5 file using PyTables.
343
    
344
    Parameters:
345
    - source: HDF5 file path or file object
346
    - where: Path to HDF5 table within file
347
    - name: Name of table within HDF5 file
348
    - condition: Selection condition
349
    - condvars: Variables for selection condition
350
    - start: Start index for selection
351
    - stop: Stop index for selection  
352
    - step: Step size for selection
353
    - kwargs: Additional PyTables arguments
354
    
355
    Returns:
356
    Table object
357
    """
358

359
def tohdf5(table, source, where=None, name='table', create=False, 
360
           description=None, title='', filters=None, expectedrows=10000, 
361
           chunkshape=None, byteorder=None, createparents=False, 
362
           sample=1000, **kwargs):
363
    """Write a table to HDF5 format using PyTables."""
364

365
def fromhdf5sorted(source, *args, **kwargs) -> Table:
366
    """Extract a table from HDF5 with sorted index reading."""
367

368
def frombcolz(source, expression=None, outcols=None, limit=None, skip=0) -> Table:
369
    """
370
    Extract a table from a Bcolz compressed array.
371
    
372
    Parameters:
373
    - source: Bcolz source file or object
374
    - expression: Query expression for filtering
375
    - outcols: Output columns to select
376
    - limit: Maximum number of rows to read
377
    - skip: Number of rows to skip
378
    
379
    Returns:
380
    Table object
381
    """
382

383
def tobcolz(table, source=None, mode='w', **bcolz_args):
384
    """Write a table to Bcolz compressed format."""
385

386
def fromgsheet(url=None, title=None, sheet_name=None, encoding='utf-8', 
387
               auth_method='service_account', scopes=None, credentials=None, 
388
               **gsheet_args) -> Table:
389
    """
390
    Extract a table from a Google Sheets document.
391
    
392
    Parameters:
393
    - url: Google Sheets URL
394
    - title: Sheet title  
395
    - sheet_name: Name of specific sheet
396
    - encoding: Character encoding
397
    - auth_method: Authentication method
398
    - scopes: OAuth scopes
399
    - credentials: Authentication credentials
400
    - gsheet_args: Additional Google Sheets arguments
401
    
402
    Returns:
403
    Table object
404
    """
405

406
def togsheet(table, url=None, title=None, sheet_name=None, 
407
             auth_method='service_account', scopes=None, credentials=None,
408
             **gsheet_args):
409
    """Write a table to Google Sheets."""
410
```
411

412
### Data Sources
413

414
Flexible data source classes for handling various input/output scenarios.
415

416
```python { .api }
417
class FileSource:
418
    """File source for reading/writing local files."""
419
    def __init__(self, filename, mode='rb'): ...
420

421
class URLSource:
422
    """URL-based data source for remote files."""
423
    def __init__(self, url, **kwargs): ...
424

425
class StringSource:
426
    """In-memory string source."""
427
    def __init__(self, string_data): ...
428

429
class GzipSource:
430
    """Compressed gzip file source."""
431
    def __init__(self, filename, mode='rb'): ...
432

433
class RemoteSource:
434
    """Remote file system source using fsspec."""
435
    def __init__(self, url, **kwargs): ...
436

437
class SMBSource:
438
    """SMB/CIFS network share source."""
439
    def __init__(self, url, **kwargs): ...
440
```
441

442
## Usage Examples
443

444
### Working with CSV Files
445

446
```python
447
import petl as etl
448

449
# Read CSV with custom options
450
table = etl.fromcsv('data.csv', delimiter=';', encoding='utf-8')
451

452
# Write CSV with specific formatting
453
etl.tocsv(table, 'output.csv', delimiter='|', quotechar='"')
454

455
# Chain operations with CSV I/O
456
result = (etl.fromcsv('input.csv')
457
            .select('age', lambda age: age > 18)
458
            .cut('name', 'age')
459
            .sort('name'))
460
result.tocsv('adults.csv')
461
```
462

463
### Database Integration
464

465
```python
466
import petl as etl
467
from sqlalchemy import create_engine
468

469
# Connect to database
470
engine = create_engine('postgresql://user:pass@host:port/db')
471

472
# Extract data from database
473
table = etl.fromdb(engine, 'SELECT * FROM customers WHERE active = true')
474

475
# Process and write back to database
476
processed = table.convert('email', str.lower).convert('phone', lambda x: x.replace('-', ''))
477
etl.todb(processed, engine, 'customers_clean', create=True)
478
```
479

480
### JSON Data Processing
481

482
```python
483
import petl as etl
484

485
# Read JSON Lines file
486
table = etl.fromjson('data.jsonl')
487

488
# Convert list of dictionaries to table
489
data = [{'name': 'Alice', 'age': 25}, {'name': 'Bob', 'age': 30}]
490
table = etl.fromdicts(data)
491

492
# Write to JSON format
493
etl.tojson(table, 'output.json')
494
```
495

496
### Excel File Operations
497

498
```python
499
import petl as etl
500

501
# Read specific sheet and range
502
table = etl.fromxlsx('report.xlsx', sheet='Sales', range_string='A1:F100')
503

504
# Write to Excel with formatting
505
etl.toxlsx(table, 'summary.xlsx', sheet='Results')
506

507
# Append to existing Excel file
508
etl.appendxlsx(additional_data, 'summary.xlsx', sheet='Results')
509
```

Version

Tile

Files

data-io.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

data-io.mddocs/