0
# Dictionary Operations
1
2
Dictionary-based CSV reading and writing that treats the first row as column headers, providing a more convenient interface for structured CSV data. These classes mirror Python's csv.DictReader and csv.DictWriter but with CleverCSV's enhanced dialect support.
3
4
## Capabilities
5
6
### DictReader Class
7
8
Dictionary-based CSV reader that automatically uses the first row as field names (headers) and returns each subsequent row as a dictionary.
9
10
```python { .api }
11
class DictReader:
12
"""
13
CSV reader that returns rows as dictionaries.
14
Uses first row as field names unless fieldnames are explicitly provided.
15
"""
16
17
def __init__(
18
self,
19
f: Iterable[str],
20
fieldnames: Optional[Sequence[str]] = None,
21
restkey: Optional[str] = None,
22
restval: Optional[str] = None,
23
dialect: Union[str, SimpleDialect, csv.Dialect] = 'excel',
24
*args,
25
**kwds
26
):
27
"""
28
Initialize dictionary CSV reader.
29
30
Parameters:
31
- f: File-like object or iterable of strings
32
- fieldnames: Field names to use (first row if None)
33
- restkey: Key for fields beyond fieldnames length
34
- restval: Value for missing fields
35
- dialect: Dialect specification
36
- *args, **kwds: Additional arguments passed to underlying reader
37
"""
38
39
def __iter__(self) -> Iterator[Dict[str, str]]:
40
"""Return iterator over dictionary rows."""
41
42
def __next__(self) -> Dict[str, str]:
43
"""
44
Return next row as dictionary.
45
46
Returns:
47
Dictionary mapping field names to values
48
49
Raises:
50
StopIteration: When no more rows available
51
"""
52
53
@property
54
def fieldnames(self) -> Sequence[str]:
55
"""Field names (column headers) used for dictionaries."""
56
57
@fieldnames.setter
58
def fieldnames(self, value: Sequence[str]) -> None:
59
"""Set field names explicitly."""
60
61
@property
62
def line_num(self) -> int:
63
"""Current line number being processed."""
64
```
65
66
#### Usage Examples
67
68
```python
69
import clevercsv
70
71
# Basic dictionary reading
72
with open('employees.csv', 'r', newline='') as f:
73
reader = clevercsv.DictReader(f)
74
for row in reader:
75
print(f"Name: {row['name']}, Age: {row['age']}, Department: {row['dept']}")
76
77
# With automatic dialect detection
78
with open('data.csv', 'r', newline='') as f:
79
sample = f.read()
80
dialect = clevercsv.Detector().detect(sample)
81
f.seek(0)
82
reader = clevercsv.DictReader(f, dialect=dialect)
83
records = list(reader)
84
85
# Custom field names (ignore first row)
86
fieldnames = ['id', 'name', 'score', 'grade']
87
with open('data.csv', 'r', newline='') as f:
88
reader = clevercsv.DictReader(f, fieldnames=fieldnames)
89
for row in reader:
90
print(f"Student {row['name']} scored {row['score']}")
91
92
# Handle extra/missing fields
93
with open('irregular.csv', 'r', newline='') as f:
94
reader = clevercsv.DictReader(f, restkey='extra_fields', restval='N/A')
95
for row in reader:
96
print(f"Regular data: {row}")
97
if 'extra_fields' in row:
98
print(f"Extra fields: {row['extra_fields']}")
99
```
100
101
### DictWriter Class
102
103
Dictionary-based CSV writer that writes dictionaries as CSV rows, using field names to determine column order and handling.
104
105
```python { .api }
106
class DictWriter:
107
"""
108
CSV writer that accepts dictionaries and writes them as CSV rows.
109
Requires fieldnames to determine column order and content.
110
"""
111
112
def __init__(
113
self,
114
f: SupportsWrite[str],
115
fieldnames: Collection[str],
116
restval: Optional[Any] = '',
117
extrasaction: Literal['raise', 'ignore'] = 'raise',
118
dialect: Union[str, SimpleDialect, csv.Dialect] = 'excel',
119
*args,
120
**kwds
121
):
122
"""
123
Initialize dictionary CSV writer.
124
125
Parameters:
126
- f: File-like object that supports writing
127
- fieldnames: Field names that determine column order
128
- restval: Value for missing dictionary keys
129
- extrasaction: Action for extra dictionary keys ('raise' or 'ignore')
130
- dialect: Dialect specification
131
- *args, **kwds: Additional arguments passed to underlying writer
132
"""
133
134
def writeheader(self) -> Any:
135
"""
136
Write header row containing field names.
137
138
Returns:
139
Return value from underlying writerow call
140
"""
141
142
def writerow(self, rowdict: Mapping[str, Any]) -> Any:
143
"""
144
Write a single dictionary as a CSV row.
145
146
Parameters:
147
- rowdict: Dictionary with field values
148
149
Returns:
150
Return value from underlying writerow call
151
152
Raises:
153
ValueError: If extrasaction='raise' and dictionary contains extra keys
154
"""
155
156
def writerows(self, rowdicts: Iterable[Mapping[str, Any]]) -> None:
157
"""
158
Write multiple dictionaries as CSV rows.
159
160
Parameters:
161
- rowdicts: Iterable of dictionaries to write
162
163
Raises:
164
ValueError: If extrasaction='raise' and any dictionary contains extra keys
165
"""
166
167
@property
168
def fieldnames(self) -> Collection[str]:
169
"""Field names that determine column order."""
170
```
171
172
#### Usage Examples
173
174
```python
175
import clevercsv
176
177
# Basic dictionary writing
178
data = [
179
{'name': 'Alice', 'age': 30, 'city': 'New York'},
180
{'name': 'Bob', 'age': 25, 'city': 'San Francisco'},
181
{'name': 'Charlie', 'age': 35, 'city': 'Chicago'}
182
]
183
184
fieldnames = ['name', 'age', 'city']
185
with open('output.csv', 'w', newline='') as f:
186
writer = clevercsv.DictWriter(f, fieldnames=fieldnames)
187
writer.writeheader()
188
writer.writerows(data)
189
190
# With specific dialect
191
dialect = clevercsv.SimpleDialect(';', '"', '')
192
with open('output.csv', 'w', newline='') as f:
193
writer = clevercsv.DictWriter(f, fieldnames=fieldnames, dialect=dialect)
194
writer.writeheader()
195
for row in data:
196
writer.writerow(row)
197
198
# Handle missing values
199
data_with_missing = [
200
{'name': 'Alice', 'age': 30}, # Missing 'city'
201
{'name': 'Bob', 'city': 'SF'}, # Missing 'age'
202
]
203
204
with open('output.csv', 'w', newline='') as f:
205
writer = clevercsv.DictWriter(f, fieldnames=['name', 'age', 'city'], restval='Unknown')
206
writer.writeheader()
207
writer.writerows(data_with_missing)
208
209
# Handle extra fields
210
data_with_extra = [
211
{'name': 'Alice', 'age': 30, 'city': 'NYC', 'country': 'USA'}, # Extra 'country'
212
]
213
214
# Ignore extra fields
215
with open('output.csv', 'w', newline='') as f:
216
writer = clevercsv.DictWriter(f, fieldnames=['name', 'age', 'city'], extrasaction='ignore')
217
writer.writeheader()
218
writer.writerows(data_with_extra)
219
```
220
221
## Advanced Usage Patterns
222
223
### Data Processing Pipeline
224
225
Process CSV data through transformation pipelines while maintaining dictionary structure:
226
227
```python
228
import clevercsv
229
230
def process_employee_data(input_file, output_file):
231
"""Process employee data with transformations."""
232
233
transformations = []
234
235
with open(input_file, 'r', newline='') as infile:
236
reader = clevercsv.DictReader(infile)
237
238
for row in reader:
239
# Apply transformations
240
row['name'] = row['name'].title() # Capitalize names
241
row['age'] = int(row['age']) if row['age'].isdigit() else 0
242
row['salary'] = float(row['salary'].replace('$', '').replace(',', ''))
243
244
# Add computed fields
245
row['seniority'] = 'Senior' if int(row['age']) > 40 else 'Junior'
246
247
transformations.append(row)
248
249
# Write processed data
250
if transformations:
251
fieldnames = list(transformations[0].keys())
252
with open(output_file, 'w', newline='') as outfile:
253
writer = clevercsv.DictWriter(outfile, fieldnames=fieldnames)
254
writer.writeheader()
255
writer.writerows(transformations)
256
257
# Usage
258
process_employee_data('employees.csv', 'processed_employees.csv')
259
```
260
261
### Data Validation and Filtering
262
263
Validate and filter CSV data using dictionary operations:
264
265
```python
266
import clevercsv
267
268
def validate_and_filter_data(filename, validation_rules):
269
"""Validate and filter CSV data based on rules."""
270
271
valid_records = []
272
invalid_records = []
273
274
with open(filename, 'r', newline='') as f:
275
reader = clevercsv.DictReader(f)
276
277
for row_num, row in enumerate(reader, 1):
278
errors = []
279
280
# Apply validation rules
281
for field, rule in validation_rules.items():
282
if field in row:
283
if not rule(row[field]):
284
errors.append(f"Invalid {field}: {row[field]}")
285
else:
286
errors.append(f"Missing required field: {field}")
287
288
if errors:
289
invalid_records.append({
290
'row_number': row_num,
291
'data': row,
292
'errors': errors
293
})
294
else:
295
valid_records.append(row)
296
297
return valid_records, invalid_records
298
299
# Usage
300
validation_rules = {
301
'email': lambda x: '@' in x and '.' in x,
302
'age': lambda x: x.isdigit() and 0 < int(x) < 120,
303
'salary': lambda x: x.replace('$', '').replace(',', '').replace('.', '').isdigit()
304
}
305
306
valid_data, invalid_data = validate_and_filter_data('employees.csv', validation_rules)
307
print(f"Valid records: {len(valid_data)}")
308
print(f"Invalid records: {len(invalid_data)}")
309
```
310
311
### Column Mapping and Renaming
312
313
Map and rename columns during CSV processing:
314
315
```python
316
import clevercsv
317
318
def remap_csv_columns(input_file, output_file, column_mapping):
319
"""Remap column names and reorganize CSV data."""
320
321
with open(input_file, 'r', newline='') as infile:
322
reader = clevercsv.DictReader(infile)
323
324
# Prepare data with remapped columns
325
remapped_data = []
326
for row in reader:
327
new_row = {}
328
for old_name, new_name in column_mapping.items():
329
if old_name in row:
330
new_row[new_name] = row[old_name]
331
else:
332
new_row[new_name] = '' # Default for missing columns
333
remapped_data.append(new_row)
334
335
# Write remapped data
336
if remapped_data:
337
fieldnames = list(column_mapping.values())
338
with open(output_file, 'w', newline='') as outfile:
339
writer = clevercsv.DictWriter(outfile, fieldnames=fieldnames)
340
writer.writeheader()
341
writer.writerows(remapped_data)
342
343
# Usage
344
column_mapping = {
345
'full_name': 'name',
346
'years_old': 'age',
347
'home_city': 'city',
348
'job_title': 'position'
349
}
350
351
remap_csv_columns('input.csv', 'output.csv', column_mapping)
352
```
353
354
## Error Handling
355
356
### Handling Duplicate Field Names
357
358
CleverCSV warns about duplicate field names in headers:
359
360
```python
361
import clevercsv
362
import warnings
363
364
# Capture warnings about duplicate headers
365
with warnings.catch_warnings(record=True) as w:
366
warnings.simplefilter("always")
367
368
with open('file_with_duplicate_headers.csv', 'r', newline='') as f:
369
reader = clevercsv.DictReader(f)
370
data = list(reader)
371
372
if w:
373
for warning in w:
374
print(f"Warning: {warning.message}")
375
```
376
377
### Handling Extra Fields
378
379
```python
380
import clevercsv
381
382
# Raise error on extra fields
383
try:
384
with open('output.csv', 'w', newline='') as f:
385
writer = clevercsv.DictWriter(f, fieldnames=['a', 'b'], extrasaction='raise')
386
writer.writerow({'a': '1', 'b': '2', 'c': '3'}) # 'c' is extra
387
except ValueError as e:
388
print(f"Extra field error: {e}")
389
390
# Ignore extra fields silently
391
with open('output.csv', 'w', newline='') as f:
392
writer = clevercsv.DictWriter(f, fieldnames=['a', 'b'], extrasaction='ignore')
393
writer.writerow({'a': '1', 'b': '2', 'c': '3'}) # 'c' ignored
394
```
395
396
### Handling Missing Fields
397
398
```python
399
import clevercsv
400
401
# Use restval for missing fields
402
data = [{'name': 'Alice'}, {'name': 'Bob', 'age': 25}] # Missing 'age' in first row
403
404
with open('output.csv', 'w', newline='') as f:
405
writer = clevercsv.DictWriter(f, fieldnames=['name', 'age'], restval='N/A')
406
writer.writeheader()
407
writer.writerows(data)
408
```
409
410
## Performance Considerations
411
412
### Memory Efficiency for Large Files
413
414
```python
415
import clevercsv
416
417
def process_large_csv_efficiently(filename):
418
"""Process large CSV files without loading all data into memory."""
419
420
with open(filename, 'r', newline='') as f:
421
reader = clevercsv.DictReader(f)
422
423
# Process one row at a time
424
for row in reader:
425
# Process row immediately
426
process_single_record(row)
427
# Don't store in list - keeps memory usage constant
428
429
def process_large_csv_inefficiently(filename):
430
"""Inefficient approach that loads everything into memory."""
431
432
with open(filename, 'r', newline='') as f:
433
reader = clevercsv.DictReader(f)
434
all_records = list(reader) # Loads entire file into memory
435
436
for record in all_records:
437
process_single_record(record)
438
```
439
440
### Field Name Optimization
441
442
```python
443
# Efficient: Access fieldnames once
444
reader = clevercsv.DictReader(file)
445
fieldnames = reader.fieldnames # Cache fieldnames
446
for row in reader:
447
# Use cached fieldnames if needed
448
process_row(row, fieldnames)
449
450
# Less efficient: Access fieldnames repeatedly in loop
451
reader = clevercsv.DictReader(file)
452
for row in reader:
453
fieldnames = reader.fieldnames # Repeated access
454
process_row(row, fieldnames)
455
```