0
# Data Writing
1
2
High-level function for writing tabular data to CSV files with automatic formatting and RFC-4180 compliance by default. This wrapper function provides a convenient interface for common CSV writing tasks while supporting custom dialects and formatting options.
3
4
## Capabilities
5
6
### Table Writing
7
8
Write tabular data (lists of lists) to CSV files with support for transposition and custom dialects.
9
10
```python { .api }
11
def write_table(
12
table: Iterable[Iterable[Any]],
13
filename: Union[str, PathLike],
14
dialect: Union[str, SimpleDialect, csv.Dialect] = 'excel',
15
transpose: bool = False,
16
encoding: Optional[str] = None
17
) -> None:
18
"""
19
Write a table (list of lists) to a CSV file.
20
21
Parameters:
22
- table: Table data as an iterable of rows (each row is an iterable of values)
23
- filename: Path to output CSV file
24
- dialect: Dialect to use for writing (default: 'excel' for RFC-4180 compliance)
25
- transpose: Transpose table before writing (swap rows and columns)
26
- encoding: Text encoding for output file (platform default if None)
27
28
Raises:
29
ValueError: If table rows have inconsistent lengths
30
31
Notes:
32
- Empty tables result in no file being created
33
- All rows must have the same number of columns (after transposition if applicable)
34
- Uses RFC-4180 compliant 'excel' dialect by default for standardized output
35
"""
36
```
37
38
#### Usage Examples
39
40
```python
41
import clevercsv
42
43
# Basic table writing
44
data = [
45
['Name', 'Age', 'City'],
46
['Alice', 30, 'New York'],
47
['Bob', 25, 'San Francisco'],
48
['Charlie', 35, 'Chicago']
49
]
50
51
clevercsv.write_table(data, 'employees.csv')
52
53
# Write with custom dialect
54
pipe_dialect = clevercsv.SimpleDialect('|', '"', '')
55
clevercsv.write_table(data, 'pipe_separated.csv', dialect=pipe_dialect)
56
57
# Transpose data (swap rows and columns)
58
clevercsv.write_table(data, 'transposed.csv', transpose=True)
59
60
# Write with specific encoding
61
clevercsv.write_table(data, 'utf8_output.csv', encoding='utf-8')
62
63
# Write numeric data
64
numeric_data = [
65
['X', 'Y', 'Z'],
66
[1.5, 2.7, 3.14159],
67
[4.2, 5.8, 6.28318],
68
[7.1, 8.9, 9.42477]
69
]
70
71
clevercsv.write_table(numeric_data, 'numeric.csv')
72
73
# Write generator data (memory efficient)
74
def generate_data():
75
yield ['ID', 'Value']
76
for i in range(1000):
77
yield [i, f'Value_{i}']
78
79
clevercsv.write_table(generate_data(), 'generated.csv')
80
```
81
82
## Advanced Writing Patterns
83
84
### Data Processing and Export
85
86
Process data and export results with appropriate formatting:
87
88
```python
89
import clevercsv
90
from datetime import datetime
91
92
def export_processed_data(input_data, output_file):
93
"""Process and export data with formatting."""
94
95
processed_rows = [['ID', 'Name', 'Email', 'Created Date', 'Active', 'Score']]
96
97
for record in input_data:
98
processed_row = [
99
record['id'],
100
record['name'].title(), # Capitalize names
101
record['email'].lower(), # Lowercase emails
102
datetime.now().strftime('%Y-%m-%d'),
103
'Yes' if record.get('active', False) else 'No',
104
f"{record.get('score', 0):.2f}" # Format numbers
105
]
106
processed_rows.append(processed_row)
107
108
# Write with standard CSV format for compatibility
109
clevercsv.write_table(processed_rows, output_file)
110
print(f"Exported {len(processed_rows)-1} records to {output_file}")
111
112
# Usage
113
raw_data = [
114
{'id': 1, 'name': 'alice smith', 'email': 'ALICE@EXAMPLE.COM', 'active': True, 'score': 95.678},
115
{'id': 2, 'name': 'bob jones', 'email': 'BOB@EXAMPLE.COM', 'active': False, 'score': 78.234}
116
]
117
118
export_processed_data(raw_data, 'processed_export.csv')
119
```
120
121
### Multi-Format Export
122
123
Export data in multiple CSV formats:
124
125
```python
126
import clevercsv
127
128
def export_multiple_formats(data, base_filename):
129
"""Export data in multiple CSV formats."""
130
131
formats = {
132
'standard': clevercsv.SimpleDialect(',', '"', ''),
133
'excel': 'excel',
134
'tab_separated': clevercsv.SimpleDialect('\t', '"', ''),
135
'pipe_separated': clevercsv.SimpleDialect('|', '"', ''),
136
'semicolon_european': clevercsv.SimpleDialect(';', '"', '')
137
}
138
139
for format_name, dialect in formats.items():
140
output_file = f"{base_filename}_{format_name}.csv"
141
clevercsv.write_table(data, output_file, dialect=dialect)
142
print(f"Exported {format_name} format to {output_file}")
143
144
# Usage
145
sample_data = [
146
['Product', 'Price', 'Category'],
147
['Laptop', '$999.99', 'Electronics'],
148
['Book', '$19.95', 'Education'],
149
['Coffee Mug', '$12.50', 'Kitchen']
150
]
151
152
export_multiple_formats(sample_data, 'products')
153
```
154
155
### Streaming Large Dataset Export
156
157
Export large datasets efficiently without loading all data into memory:
158
159
```python
160
import clevercsv
161
162
class StreamingTableExporter:
163
"""Export large tabular datasets with streaming to manage memory usage."""
164
165
def __init__(self, filename, dialect='excel', encoding=None):
166
self.filename = filename
167
self.dialect = dialect
168
self.encoding = encoding
169
self.file = None
170
self.writer = None
171
self.row_count = 0
172
173
def __enter__(self):
174
self.file = open(self.filename, 'w', newline='', encoding=self.encoding)
175
self.writer = clevercsv.writer(self.file, dialect=self.dialect)
176
return self
177
178
def __exit__(self, exc_type, exc_val, exc_tb):
179
if self.file:
180
self.file.close()
181
print(f"Exported {self.row_count} rows to {self.filename}")
182
183
def write_row(self, row):
184
"""Write a single row."""
185
self.writer.writerow(row)
186
self.row_count += 1
187
188
def write_rows(self, rows):
189
"""Write multiple rows."""
190
for row in rows:
191
self.write_row(row)
192
193
# Usage
194
# Export large dataset with streaming
195
with StreamingTableExporter('large_export.csv') as exporter:
196
# Write header
197
exporter.write_row(['ID', 'Name', 'Department', 'Salary', 'Hire Date'])
198
199
# Process data in batches to manage memory
200
for batch_start in range(0, 100000, 1000): # 100k records in 1k batches
201
batch_data = generate_employee_batch(batch_start, 1000) # Your data generator
202
exporter.write_rows(batch_data)
203
```
204
205
### Data Validation Before Export
206
207
Validate data before writing to ensure quality:
208
209
```python
210
import clevercsv
211
from typing import List, Any
212
213
def validate_and_export_table(data: List[List[Any]], filename: str, validation_rules: dict):
214
"""Validate tabular data and export with error reporting."""
215
216
if not data:
217
print("No data to export")
218
return 0, 0
219
220
header = data[0]
221
rows = data[1:]
222
223
valid_rows = [header] # Include header
224
invalid_data = []
225
226
for i, row in enumerate(rows):
227
errors = []
228
229
# Check row length
230
if len(row) != len(header):
231
errors.append(f"Expected {len(header)} columns, got {len(row)}")
232
233
# Apply validation rules to each column
234
for col_idx, (col_name, validator) in enumerate(validation_rules.items()):
235
if col_idx < len(row):
236
try:
237
if not validator(row[col_idx]):
238
errors.append(f"Invalid {col_name}: {row[col_idx]}")
239
except Exception as e:
240
errors.append(f"Validation error for {col_name}: {e}")
241
else:
242
errors.append(f"Missing value for {col_name}")
243
244
if errors:
245
invalid_data.append({
246
'row_index': i + 1, # +1 for header
247
'row': row,
248
'errors': errors
249
})
250
else:
251
valid_rows.append(row)
252
253
# Export valid rows
254
if len(valid_rows) > 1: # More than just header
255
clevercsv.write_table(valid_rows, filename)
256
print(f"Exported {len(valid_rows)-1} valid rows to {filename}")
257
258
# Export invalid rows for review
259
if invalid_data:
260
error_filename = filename.replace('.csv', '_errors.csv')
261
error_rows = [header + ['_errors', '_row_index']] # Add error columns
262
263
for item in invalid_data:
264
error_row = list(item['row'])
265
# Pad row to match header length
266
while len(error_row) < len(header):
267
error_row.append('')
268
error_row.extend(['; '.join(item['errors']), str(item['row_index'])])
269
error_rows.append(error_row)
270
271
clevercsv.write_table(error_rows, error_filename)
272
print(f"Exported {len(invalid_data)} invalid rows to {error_filename}")
273
274
return len(valid_rows) - 1, len(invalid_data)
275
276
# Usage
277
validation_rules = {
278
'Name': lambda x: isinstance(x, str) and len(x.strip()) > 0,
279
'Age': lambda x: str(x).isdigit() and 0 < int(x) < 150,
280
'Email': lambda x: '@' in str(x) and '.' in str(x)
281
}
282
283
test_data = [
284
['Name', 'Age', 'Email'],
285
['Alice', '30', 'alice@example.com'],
286
['', '25', 'bob@example.com'], # Invalid: empty name
287
['Charlie', '200', 'invalid-email'], # Invalid: age too high, bad email
288
['Dave', '35'] # Invalid: missing email
289
]
290
291
valid_count, invalid_count = validate_and_export_table(test_data, 'validated_export.csv', validation_rules)
292
print(f"Validation complete: {valid_count} valid, {invalid_count} invalid")
293
```
294
295
## Working with Dictionary Data
296
297
While `write_dicts` is not available in the main package API, you can write dictionary data using the DictWriter class:
298
299
```python
300
import clevercsv
301
302
# Convert dictionaries to table format for write_table
303
def write_dict_data_as_table(dict_data, filename, fieldnames=None):
304
"""Write dictionary data using write_table."""
305
306
if not dict_data:
307
return
308
309
# Get fieldnames from first dictionary if not provided
310
if fieldnames is None:
311
fieldnames = list(dict_data[0].keys())
312
313
# Convert to table format
314
table_data = [fieldnames] # Header row
315
for record in dict_data:
316
row = [record.get(field, '') for field in fieldnames]
317
table_data.append(row)
318
319
clevercsv.write_table(table_data, filename)
320
321
# Alternative: Use DictWriter directly
322
def write_dict_data_with_dictwriter(dict_data, filename, fieldnames=None):
323
"""Write dictionary data using DictWriter."""
324
325
if not dict_data:
326
return
327
328
if fieldnames is None:
329
fieldnames = list(dict_data[0].keys())
330
331
with open(filename, 'w', newline='') as f:
332
writer = clevercsv.DictWriter(f, fieldnames=fieldnames)
333
writer.writeheader()
334
writer.writerows(dict_data)
335
336
# Usage
337
records = [
338
{'name': 'Alice', 'age': 30, 'city': 'New York'},
339
{'name': 'Bob', 'age': 25, 'city': 'San Francisco'},
340
{'name': 'Charlie', 'age': 35, 'city': 'Chicago'}
341
]
342
343
# Method 1: Convert to table
344
write_dict_data_as_table(records, 'method1_output.csv')
345
346
# Method 2: Use DictWriter
347
write_dict_data_with_dictwriter(records, 'method2_output.csv')
348
```
349
350
## Performance Considerations
351
352
### Memory Efficiency
353
354
```python
355
# Memory efficient: Use generators or iterators
356
def generate_large_table():
357
yield ['ID', 'Value', 'Timestamp']
358
for i in range(1000000):
359
yield [i, f'value_{i}', datetime.now().isoformat()]
360
361
clevercsv.write_table(generate_large_table(), 'large_file.csv') # Constant memory usage
362
363
# Memory intensive: Load all data first
364
large_data = list(generate_large_table()) # Loads all 1M rows into memory
365
clevercsv.write_table(large_data, 'large_file.csv') # High memory usage
366
```
367
368
### Write Performance
369
370
```python
371
# Faster: Prepare all data first, then write once
372
all_rows = prepare_all_data()
373
clevercsv.write_table(all_rows, 'output.csv')
374
375
# Slower: Multiple file operations (avoid this pattern)
376
for i, row_data in enumerate(data_source):
377
mode = 'w' if i == 0 else 'a'
378
# Opening file repeatedly is inefficient
379
clevercsv.write_table([row_data], 'output.csv') # Don't do this
380
```
381
382
### Dialect Selection for Compatibility
383
384
```python
385
# Maximum compatibility: Use 'excel' dialect (RFC-4180)
386
clevercsv.write_table(data, 'compatible.csv', dialect='excel')
387
388
# Custom requirements: Create appropriate dialect
389
european_dialect = clevercsv.SimpleDialect(';', '"', '') # Common in Europe
390
clevercsv.write_table(data, 'european.csv', dialect=european_dialect)
391
```
392
393
## Error Handling
394
395
### Handling Write Errors
396
397
```python
398
import clevercsv
399
400
def safe_csv_write(data, filename):
401
"""Write CSV with error handling."""
402
try:
403
clevercsv.write_table(data, filename)
404
print(f"Successfully wrote {len(data)} rows to {filename}")
405
return True
406
except ValueError as e:
407
print(f"Data validation error: {e}")
408
return False
409
except IOError as e:
410
print(f"File write error: {e}")
411
return False
412
except Exception as e:
413
print(f"Unexpected error: {e}")
414
return False
415
416
# Usage
417
test_data = [
418
['A', 'B', 'C'],
419
['1', '2', '3'],
420
['4', '5'] # Inconsistent row length - will cause ValueError
421
]
422
423
success = safe_csv_write(test_data, 'test_output.csv')
424
if not success:
425
print("Write operation failed, check data consistency")
426
```
427
428
### Validation and Recovery
429
430
```python
431
import clevercsv
432
433
def write_with_validation(table, filename):
434
"""Write table with row length validation and repair."""
435
if not table:
436
print("Empty table - no file created")
437
return
438
439
# Check for consistent row lengths
440
row_lengths = [len(row) for row in table]
441
if len(set(row_lengths)) > 1:
442
print(f"Inconsistent row lengths detected: {set(row_lengths)}")
443
444
# Option 1: Pad short rows
445
max_length = max(row_lengths)
446
padded_table = []
447
for row in table:
448
padded_row = list(row) + [''] * (max_length - len(row))
449
padded_table.append(padded_row)
450
451
print(f"Padded short rows to {max_length} columns")
452
clevercsv.write_table(padded_table, filename)
453
454
# Option 2: Truncate long rows (alternative approach)
455
# min_length = min(row_lengths)
456
# truncated_table = [row[:min_length] for row in table]
457
# clevercsv.write_table(truncated_table, filename)
458
else:
459
clevercsv.write_table(table, filename)
460
print(f"Successfully wrote consistent table with {row_lengths[0]} columns")
461
462
# Usage
463
inconsistent_data = [
464
['Name', 'Age', 'City', 'Country'],
465
['Alice', '30', 'New York'], # Missing country
466
['Bob', '25', 'SF', 'USA', 'Extra'] # Extra field
467
]
468
469
write_with_validation(inconsistent_data, 'repaired_output.csv')
470
```