Tessl Tile for pypi/clevercsv@0.8.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

core-reading-writing.md data-reading.md data-writing.md dialect-detection.md dialects-configuration.md dictionary-operations.md index.md

data-writing.mddocs/

0
# Data Writing
1

2
High-level function for writing tabular data to CSV files with automatic formatting and RFC-4180 compliance by default. This wrapper function provides a convenient interface for common CSV writing tasks while supporting custom dialects and formatting options.
3

4
## Capabilities
5

6
### Table Writing
7

8
Write tabular data (lists of lists) to CSV files with support for transposition and custom dialects.
9

10
```python { .api }
11
def write_table(
12
    table: Iterable[Iterable[Any]],
13
    filename: Union[str, PathLike],
14
    dialect: Union[str, SimpleDialect, csv.Dialect] = 'excel',
15
    transpose: bool = False,
16
    encoding: Optional[str] = None
17
) -> None:
18
    """
19
    Write a table (list of lists) to a CSV file.
20
    
21
    Parameters:
22
    - table: Table data as an iterable of rows (each row is an iterable of values)
23
    - filename: Path to output CSV file
24
    - dialect: Dialect to use for writing (default: 'excel' for RFC-4180 compliance)
25
    - transpose: Transpose table before writing (swap rows and columns)
26
    - encoding: Text encoding for output file (platform default if None)
27
    
28
    Raises:
29
    ValueError: If table rows have inconsistent lengths
30
    
31
    Notes:
32
    - Empty tables result in no file being created
33
    - All rows must have the same number of columns (after transposition if applicable)
34
    - Uses RFC-4180 compliant 'excel' dialect by default for standardized output
35
    """
36
```
37

38
#### Usage Examples
39

40
```python
41
import clevercsv
42

43
# Basic table writing
44
data = [
45
    ['Name', 'Age', 'City'],
46
    ['Alice', 30, 'New York'],
47
    ['Bob', 25, 'San Francisco'],
48
    ['Charlie', 35, 'Chicago']
49
]
50

51
clevercsv.write_table(data, 'employees.csv')
52

53
# Write with custom dialect
54
pipe_dialect = clevercsv.SimpleDialect('|', '"', '')
55
clevercsv.write_table(data, 'pipe_separated.csv', dialect=pipe_dialect)
56

57
# Transpose data (swap rows and columns)
58
clevercsv.write_table(data, 'transposed.csv', transpose=True)
59

60
# Write with specific encoding
61
clevercsv.write_table(data, 'utf8_output.csv', encoding='utf-8')
62

63
# Write numeric data
64
numeric_data = [
65
    ['X', 'Y', 'Z'],
66
    [1.5, 2.7, 3.14159],
67
    [4.2, 5.8, 6.28318],
68
    [7.1, 8.9, 9.42477]
69
]
70

71
clevercsv.write_table(numeric_data, 'numeric.csv')
72

73
# Write generator data (memory efficient)
74
def generate_data():
75
    yield ['ID', 'Value']
76
    for i in range(1000):
77
        yield [i, f'Value_{i}']
78

79
clevercsv.write_table(generate_data(), 'generated.csv')
80
```
81

82
## Advanced Writing Patterns
83

84
### Data Processing and Export
85

86
Process data and export results with appropriate formatting:
87

88
```python
89
import clevercsv
90
from datetime import datetime
91

92
def export_processed_data(input_data, output_file):
93
    """Process and export data with formatting."""
94
    
95
    processed_rows = [['ID', 'Name', 'Email', 'Created Date', 'Active', 'Score']]
96
    
97
    for record in input_data:
98
        processed_row = [
99
            record['id'],
100
            record['name'].title(),  # Capitalize names
101
            record['email'].lower(),  # Lowercase emails
102
            datetime.now().strftime('%Y-%m-%d'),
103
            'Yes' if record.get('active', False) else 'No',
104
            f"{record.get('score', 0):.2f}"  # Format numbers
105
        ]
106
        processed_rows.append(processed_row)
107
    
108
    # Write with standard CSV format for compatibility
109
    clevercsv.write_table(processed_rows, output_file)
110
    print(f"Exported {len(processed_rows)-1} records to {output_file}")
111

112
# Usage
113
raw_data = [
114
    {'id': 1, 'name': 'alice smith', 'email': 'ALICE@EXAMPLE.COM', 'active': True, 'score': 95.678},
115
    {'id': 2, 'name': 'bob jones', 'email': 'BOB@EXAMPLE.COM', 'active': False, 'score': 78.234}
116
]
117

118
export_processed_data(raw_data, 'processed_export.csv')
119
```
120

121
### Multi-Format Export
122

123
Export data in multiple CSV formats:
124

125
```python
126
import clevercsv
127

128
def export_multiple_formats(data, base_filename):
129
    """Export data in multiple CSV formats."""
130
    
131
    formats = {
132
        'standard': clevercsv.SimpleDialect(',', '"', ''),
133
        'excel': 'excel',
134
        'tab_separated': clevercsv.SimpleDialect('\t', '"', ''),
135
        'pipe_separated': clevercsv.SimpleDialect('|', '"', ''),
136
        'semicolon_european': clevercsv.SimpleDialect(';', '"', '')
137
    }
138
    
139
    for format_name, dialect in formats.items():
140
        output_file = f"{base_filename}_{format_name}.csv"
141
        clevercsv.write_table(data, output_file, dialect=dialect)
142
        print(f"Exported {format_name} format to {output_file}")
143

144
# Usage
145
sample_data = [
146
    ['Product', 'Price', 'Category'],
147
    ['Laptop', '$999.99', 'Electronics'],
148
    ['Book', '$19.95', 'Education'],
149
    ['Coffee Mug', '$12.50', 'Kitchen']
150
]
151

152
export_multiple_formats(sample_data, 'products')
153
```
154

155
### Streaming Large Dataset Export
156

157
Export large datasets efficiently without loading all data into memory:
158

159
```python
160
import clevercsv
161

162
class StreamingTableExporter:
163
    """Export large tabular datasets with streaming to manage memory usage."""
164
    
165
    def __init__(self, filename, dialect='excel', encoding=None):
166
        self.filename = filename
167
        self.dialect = dialect
168
        self.encoding = encoding
169
        self.file = None
170
        self.writer = None
171
        self.row_count = 0
172
    
173
    def __enter__(self):
174
        self.file = open(self.filename, 'w', newline='', encoding=self.encoding)
175
        self.writer = clevercsv.writer(self.file, dialect=self.dialect)
176
        return self
177
    
178
    def __exit__(self, exc_type, exc_val, exc_tb):
179
        if self.file:
180
            self.file.close()
181
        print(f"Exported {self.row_count} rows to {self.filename}")
182
    
183
    def write_row(self, row):
184
        """Write a single row."""
185
        self.writer.writerow(row)
186
        self.row_count += 1
187
    
188
    def write_rows(self, rows):
189
        """Write multiple rows."""
190
        for row in rows:
191
            self.write_row(row)
192

193
# Usage
194
# Export large dataset with streaming
195
with StreamingTableExporter('large_export.csv') as exporter:
196
    # Write header
197
    exporter.write_row(['ID', 'Name', 'Department', 'Salary', 'Hire Date'])
198
    
199
    # Process data in batches to manage memory
200
    for batch_start in range(0, 100000, 1000):  # 100k records in 1k batches
201
        batch_data = generate_employee_batch(batch_start, 1000)  # Your data generator
202
        exporter.write_rows(batch_data)
203
```
204

205
### Data Validation Before Export
206

207
Validate data before writing to ensure quality:
208

209
```python
210
import clevercsv
211
from typing import List, Any
212

213
def validate_and_export_table(data: List[List[Any]], filename: str, validation_rules: dict):
214
    """Validate tabular data and export with error reporting."""
215
    
216
    if not data:
217
        print("No data to export")
218
        return 0, 0
219
    
220
    header = data[0]
221
    rows = data[1:]
222
    
223
    valid_rows = [header]  # Include header
224
    invalid_data = []
225
    
226
    for i, row in enumerate(rows):
227
        errors = []
228
        
229
        # Check row length
230
        if len(row) != len(header):
231
            errors.append(f"Expected {len(header)} columns, got {len(row)}")
232
        
233
        # Apply validation rules to each column
234
        for col_idx, (col_name, validator) in enumerate(validation_rules.items()):
235
            if col_idx < len(row):
236
                try:
237
                    if not validator(row[col_idx]):
238
                        errors.append(f"Invalid {col_name}: {row[col_idx]}")
239
                except Exception as e:
240
                    errors.append(f"Validation error for {col_name}: {e}")
241
            else:
242
                errors.append(f"Missing value for {col_name}")
243
        
244
        if errors:
245
            invalid_data.append({
246
                'row_index': i + 1,  # +1 for header
247
                'row': row,
248
                'errors': errors
249
            })
250
        else:
251
            valid_rows.append(row)
252
    
253
    # Export valid rows
254
    if len(valid_rows) > 1:  # More than just header
255
        clevercsv.write_table(valid_rows, filename)
256
        print(f"Exported {len(valid_rows)-1} valid rows to {filename}")
257
    
258
    # Export invalid rows for review
259
    if invalid_data:
260
        error_filename = filename.replace('.csv', '_errors.csv')
261
        error_rows = [header + ['_errors', '_row_index']]  # Add error columns
262
        
263
        for item in invalid_data:
264
            error_row = list(item['row'])
265
            # Pad row to match header length
266
            while len(error_row) < len(header):
267
                error_row.append('')
268
            error_row.extend(['; '.join(item['errors']), str(item['row_index'])])
269
            error_rows.append(error_row)
270
        
271
        clevercsv.write_table(error_rows, error_filename)
272
        print(f"Exported {len(invalid_data)} invalid rows to {error_filename}")
273
    
274
    return len(valid_rows) - 1, len(invalid_data)
275

276
# Usage
277
validation_rules = {
278
    'Name': lambda x: isinstance(x, str) and len(x.strip()) > 0,
279
    'Age': lambda x: str(x).isdigit() and 0 < int(x) < 150,
280
    'Email': lambda x: '@' in str(x) and '.' in str(x)
281
}
282

283
test_data = [
284
    ['Name', 'Age', 'Email'],
285
    ['Alice', '30', 'alice@example.com'],
286
    ['', '25', 'bob@example.com'],  # Invalid: empty name
287
    ['Charlie', '200', 'invalid-email'],  # Invalid: age too high, bad email
288
    ['Dave', '35']  # Invalid: missing email
289
]
290

291
valid_count, invalid_count = validate_and_export_table(test_data, 'validated_export.csv', validation_rules)
292
print(f"Validation complete: {valid_count} valid, {invalid_count} invalid")
293
```
294

295
## Working with Dictionary Data
296

297
While `write_dicts` is not available in the main package API, you can write dictionary data using the DictWriter class:
298

299
```python
300
import clevercsv
301

302
# Convert dictionaries to table format for write_table
303
def write_dict_data_as_table(dict_data, filename, fieldnames=None):
304
    """Write dictionary data using write_table."""
305
    
306
    if not dict_data:
307
        return
308
    
309
    # Get fieldnames from first dictionary if not provided
310
    if fieldnames is None:
311
        fieldnames = list(dict_data[0].keys())
312
    
313
    # Convert to table format
314
    table_data = [fieldnames]  # Header row
315
    for record in dict_data:
316
        row = [record.get(field, '') for field in fieldnames]
317
        table_data.append(row)
318
    
319
    clevercsv.write_table(table_data, filename)
320

321
# Alternative: Use DictWriter directly
322
def write_dict_data_with_dictwriter(dict_data, filename, fieldnames=None):
323
    """Write dictionary data using DictWriter."""
324
    
325
    if not dict_data:
326
        return
327
    
328
    if fieldnames is None:
329
        fieldnames = list(dict_data[0].keys())
330
    
331
    with open(filename, 'w', newline='') as f:
332
        writer = clevercsv.DictWriter(f, fieldnames=fieldnames)
333
        writer.writeheader()
334
        writer.writerows(dict_data)
335

336
# Usage
337
records = [
338
    {'name': 'Alice', 'age': 30, 'city': 'New York'},
339
    {'name': 'Bob', 'age': 25, 'city': 'San Francisco'},
340
    {'name': 'Charlie', 'age': 35, 'city': 'Chicago'}
341
]
342

343
# Method 1: Convert to table
344
write_dict_data_as_table(records, 'method1_output.csv')
345

346
# Method 2: Use DictWriter
347
write_dict_data_with_dictwriter(records, 'method2_output.csv')
348
```
349

350
## Performance Considerations
351

352
### Memory Efficiency
353

354
```python
355
# Memory efficient: Use generators or iterators
356
def generate_large_table():
357
    yield ['ID', 'Value', 'Timestamp']
358
    for i in range(1000000):
359
        yield [i, f'value_{i}', datetime.now().isoformat()]
360

361
clevercsv.write_table(generate_large_table(), 'large_file.csv')  # Constant memory usage
362

363
# Memory intensive: Load all data first
364
large_data = list(generate_large_table())  # Loads all 1M rows into memory
365
clevercsv.write_table(large_data, 'large_file.csv')  # High memory usage
366
```
367

368
### Write Performance
369

370
```python
371
# Faster: Prepare all data first, then write once
372
all_rows = prepare_all_data()
373
clevercsv.write_table(all_rows, 'output.csv')
374

375
# Slower: Multiple file operations (avoid this pattern)
376
for i, row_data in enumerate(data_source):
377
    mode = 'w' if i == 0 else 'a'
378
    # Opening file repeatedly is inefficient
379
    clevercsv.write_table([row_data], 'output.csv')  # Don't do this
380
```
381

382
### Dialect Selection for Compatibility
383

384
```python
385
# Maximum compatibility: Use 'excel' dialect (RFC-4180)
386
clevercsv.write_table(data, 'compatible.csv', dialect='excel')
387

388
# Custom requirements: Create appropriate dialect
389
european_dialect = clevercsv.SimpleDialect(';', '"', '')  # Common in Europe
390
clevercsv.write_table(data, 'european.csv', dialect=european_dialect)
391
```
392

393
## Error Handling
394

395
### Handling Write Errors
396

397
```python
398
import clevercsv
399

400
def safe_csv_write(data, filename):
401
    """Write CSV with error handling."""
402
    try:
403
        clevercsv.write_table(data, filename)
404
        print(f"Successfully wrote {len(data)} rows to {filename}")
405
        return True
406
    except ValueError as e:
407
        print(f"Data validation error: {e}")
408
        return False
409
    except IOError as e:
410
        print(f"File write error: {e}")
411
        return False
412
    except Exception as e:
413
        print(f"Unexpected error: {e}")
414
        return False
415

416
# Usage
417
test_data = [
418
    ['A', 'B', 'C'],
419
    ['1', '2', '3'],
420
    ['4', '5']  # Inconsistent row length - will cause ValueError
421
]
422

423
success = safe_csv_write(test_data, 'test_output.csv')
424
if not success:
425
    print("Write operation failed, check data consistency")
426
```
427

428
### Validation and Recovery
429

430
```python
431
import clevercsv
432

433
def write_with_validation(table, filename):
434
    """Write table with row length validation and repair."""
435
    if not table:
436
        print("Empty table - no file created")
437
        return
438
    
439
    # Check for consistent row lengths
440
    row_lengths = [len(row) for row in table]
441
    if len(set(row_lengths)) > 1:
442
        print(f"Inconsistent row lengths detected: {set(row_lengths)}")
443
        
444
        # Option 1: Pad short rows
445
        max_length = max(row_lengths)
446
        padded_table = []
447
        for row in table:
448
            padded_row = list(row) + [''] * (max_length - len(row))
449
            padded_table.append(padded_row)
450
        
451
        print(f"Padded short rows to {max_length} columns")
452
        clevercsv.write_table(padded_table, filename)
453
        
454
        # Option 2: Truncate long rows (alternative approach)
455
        # min_length = min(row_lengths)
456
        # truncated_table = [row[:min_length] for row in table]
457
        # clevercsv.write_table(truncated_table, filename)
458
    else:
459
        clevercsv.write_table(table, filename)
460
        print(f"Successfully wrote consistent table with {row_lengths[0]} columns")
461

462
# Usage
463
inconsistent_data = [
464
    ['Name', 'Age', 'City', 'Country'],
465
    ['Alice', '30', 'New York'],  # Missing country
466
    ['Bob', '25', 'SF', 'USA', 'Extra']  # Extra field
467
]
468

469
write_with_validation(inconsistent_data, 'repaired_output.csv')
470
```

Version

Tile

Files

data-writing.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

data-writing.mddocs/