or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-reading-writing.mddata-reading.mddata-writing.mddialect-detection.mddialects-configuration.mddictionary-operations.mdindex.md

dictionary-operations.mddocs/

0

# Dictionary Operations

1

2

Dictionary-based CSV reading and writing that treats the first row as column headers, providing a more convenient interface for structured CSV data. These classes mirror Python's csv.DictReader and csv.DictWriter but with CleverCSV's enhanced dialect support.

3

4

## Capabilities

5

6

### DictReader Class

7

8

Dictionary-based CSV reader that automatically uses the first row as field names (headers) and returns each subsequent row as a dictionary.

9

10

```python { .api }

11

class DictReader:

12

"""

13

CSV reader that returns rows as dictionaries.

14

Uses first row as field names unless fieldnames are explicitly provided.

15

"""

16

17

def __init__(

18

self,

19

f: Iterable[str],

20

fieldnames: Optional[Sequence[str]] = None,

21

restkey: Optional[str] = None,

22

restval: Optional[str] = None,

23

dialect: Union[str, SimpleDialect, csv.Dialect] = 'excel',

24

*args,

25

**kwds

26

):

27

"""

28

Initialize dictionary CSV reader.

29

30

Parameters:

31

- f: File-like object or iterable of strings

32

- fieldnames: Field names to use (first row if None)

33

- restkey: Key for fields beyond fieldnames length

34

- restval: Value for missing fields

35

- dialect: Dialect specification

36

- *args, **kwds: Additional arguments passed to underlying reader

37

"""

38

39

def __iter__(self) -> Iterator[Dict[str, str]]:

40

"""Return iterator over dictionary rows."""

41

42

def __next__(self) -> Dict[str, str]:

43

"""

44

Return next row as dictionary.

45

46

Returns:

47

Dictionary mapping field names to values

48

49

Raises:

50

StopIteration: When no more rows available

51

"""

52

53

@property

54

def fieldnames(self) -> Sequence[str]:

55

"""Field names (column headers) used for dictionaries."""

56

57

@fieldnames.setter

58

def fieldnames(self, value: Sequence[str]) -> None:

59

"""Set field names explicitly."""

60

61

@property

62

def line_num(self) -> int:

63

"""Current line number being processed."""

64

```

65

66

#### Usage Examples

67

68

```python

69

import clevercsv

70

71

# Basic dictionary reading

72

with open('employees.csv', 'r', newline='') as f:

73

reader = clevercsv.DictReader(f)

74

for row in reader:

75

print(f"Name: {row['name']}, Age: {row['age']}, Department: {row['dept']}")

76

77

# With automatic dialect detection

78

with open('data.csv', 'r', newline='') as f:

79

sample = f.read()

80

dialect = clevercsv.Detector().detect(sample)

81

f.seek(0)

82

reader = clevercsv.DictReader(f, dialect=dialect)

83

records = list(reader)

84

85

# Custom field names (ignore first row)

86

fieldnames = ['id', 'name', 'score', 'grade']

87

with open('data.csv', 'r', newline='') as f:

88

reader = clevercsv.DictReader(f, fieldnames=fieldnames)

89

for row in reader:

90

print(f"Student {row['name']} scored {row['score']}")

91

92

# Handle extra/missing fields

93

with open('irregular.csv', 'r', newline='') as f:

94

reader = clevercsv.DictReader(f, restkey='extra_fields', restval='N/A')

95

for row in reader:

96

print(f"Regular data: {row}")

97

if 'extra_fields' in row:

98

print(f"Extra fields: {row['extra_fields']}")

99

```

100

101

### DictWriter Class

102

103

Dictionary-based CSV writer that writes dictionaries as CSV rows, using field names to determine column order and handling.

104

105

```python { .api }

106

class DictWriter:

107

"""

108

CSV writer that accepts dictionaries and writes them as CSV rows.

109

Requires fieldnames to determine column order and content.

110

"""

111

112

def __init__(

113

self,

114

f: SupportsWrite[str],

115

fieldnames: Collection[str],

116

restval: Optional[Any] = '',

117

extrasaction: Literal['raise', 'ignore'] = 'raise',

118

dialect: Union[str, SimpleDialect, csv.Dialect] = 'excel',

119

*args,

120

**kwds

121

):

122

"""

123

Initialize dictionary CSV writer.

124

125

Parameters:

126

- f: File-like object that supports writing

127

- fieldnames: Field names that determine column order

128

- restval: Value for missing dictionary keys

129

- extrasaction: Action for extra dictionary keys ('raise' or 'ignore')

130

- dialect: Dialect specification

131

- *args, **kwds: Additional arguments passed to underlying writer

132

"""

133

134

def writeheader(self) -> Any:

135

"""

136

Write header row containing field names.

137

138

Returns:

139

Return value from underlying writerow call

140

"""

141

142

def writerow(self, rowdict: Mapping[str, Any]) -> Any:

143

"""

144

Write a single dictionary as a CSV row.

145

146

Parameters:

147

- rowdict: Dictionary with field values

148

149

Returns:

150

Return value from underlying writerow call

151

152

Raises:

153

ValueError: If extrasaction='raise' and dictionary contains extra keys

154

"""

155

156

def writerows(self, rowdicts: Iterable[Mapping[str, Any]]) -> None:

157

"""

158

Write multiple dictionaries as CSV rows.

159

160

Parameters:

161

- rowdicts: Iterable of dictionaries to write

162

163

Raises:

164

ValueError: If extrasaction='raise' and any dictionary contains extra keys

165

"""

166

167

@property

168

def fieldnames(self) -> Collection[str]:

169

"""Field names that determine column order."""

170

```

171

172

#### Usage Examples

173

174

```python

175

import clevercsv

176

177

# Basic dictionary writing

178

data = [

179

{'name': 'Alice', 'age': 30, 'city': 'New York'},

180

{'name': 'Bob', 'age': 25, 'city': 'San Francisco'},

181

{'name': 'Charlie', 'age': 35, 'city': 'Chicago'}

182

]

183

184

fieldnames = ['name', 'age', 'city']

185

with open('output.csv', 'w', newline='') as f:

186

writer = clevercsv.DictWriter(f, fieldnames=fieldnames)

187

writer.writeheader()

188

writer.writerows(data)

189

190

# With specific dialect

191

dialect = clevercsv.SimpleDialect(';', '"', '')

192

with open('output.csv', 'w', newline='') as f:

193

writer = clevercsv.DictWriter(f, fieldnames=fieldnames, dialect=dialect)

194

writer.writeheader()

195

for row in data:

196

writer.writerow(row)

197

198

# Handle missing values

199

data_with_missing = [

200

{'name': 'Alice', 'age': 30}, # Missing 'city'

201

{'name': 'Bob', 'city': 'SF'}, # Missing 'age'

202

]

203

204

with open('output.csv', 'w', newline='') as f:

205

writer = clevercsv.DictWriter(f, fieldnames=['name', 'age', 'city'], restval='Unknown')

206

writer.writeheader()

207

writer.writerows(data_with_missing)

208

209

# Handle extra fields

210

data_with_extra = [

211

{'name': 'Alice', 'age': 30, 'city': 'NYC', 'country': 'USA'}, # Extra 'country'

212

]

213

214

# Ignore extra fields

215

with open('output.csv', 'w', newline='') as f:

216

writer = clevercsv.DictWriter(f, fieldnames=['name', 'age', 'city'], extrasaction='ignore')

217

writer.writeheader()

218

writer.writerows(data_with_extra)

219

```

220

221

## Advanced Usage Patterns

222

223

### Data Processing Pipeline

224

225

Process CSV data through transformation pipelines while maintaining dictionary structure:

226

227

```python

228

import clevercsv

229

230

def process_employee_data(input_file, output_file):

231

"""Process employee data with transformations."""

232

233

transformations = []

234

235

with open(input_file, 'r', newline='') as infile:

236

reader = clevercsv.DictReader(infile)

237

238

for row in reader:

239

# Apply transformations

240

row['name'] = row['name'].title() # Capitalize names

241

row['age'] = int(row['age']) if row['age'].isdigit() else 0

242

row['salary'] = float(row['salary'].replace('$', '').replace(',', ''))

243

244

# Add computed fields

245

row['seniority'] = 'Senior' if int(row['age']) > 40 else 'Junior'

246

247

transformations.append(row)

248

249

# Write processed data

250

if transformations:

251

fieldnames = list(transformations[0].keys())

252

with open(output_file, 'w', newline='') as outfile:

253

writer = clevercsv.DictWriter(outfile, fieldnames=fieldnames)

254

writer.writeheader()

255

writer.writerows(transformations)

256

257

# Usage

258

process_employee_data('employees.csv', 'processed_employees.csv')

259

```

260

261

### Data Validation and Filtering

262

263

Validate and filter CSV data using dictionary operations:

264

265

```python

266

import clevercsv

267

268

def validate_and_filter_data(filename, validation_rules):

269

"""Validate and filter CSV data based on rules."""

270

271

valid_records = []

272

invalid_records = []

273

274

with open(filename, 'r', newline='') as f:

275

reader = clevercsv.DictReader(f)

276

277

for row_num, row in enumerate(reader, 1):

278

errors = []

279

280

# Apply validation rules

281

for field, rule in validation_rules.items():

282

if field in row:

283

if not rule(row[field]):

284

errors.append(f"Invalid {field}: {row[field]}")

285

else:

286

errors.append(f"Missing required field: {field}")

287

288

if errors:

289

invalid_records.append({

290

'row_number': row_num,

291

'data': row,

292

'errors': errors

293

})

294

else:

295

valid_records.append(row)

296

297

return valid_records, invalid_records

298

299

# Usage

300

validation_rules = {

301

'email': lambda x: '@' in x and '.' in x,

302

'age': lambda x: x.isdigit() and 0 < int(x) < 120,

303

'salary': lambda x: x.replace('$', '').replace(',', '').replace('.', '').isdigit()

304

}

305

306

valid_data, invalid_data = validate_and_filter_data('employees.csv', validation_rules)

307

print(f"Valid records: {len(valid_data)}")

308

print(f"Invalid records: {len(invalid_data)}")

309

```

310

311

### Column Mapping and Renaming

312

313

Map and rename columns during CSV processing:

314

315

```python

316

import clevercsv

317

318

def remap_csv_columns(input_file, output_file, column_mapping):

319

"""Remap column names and reorganize CSV data."""

320

321

with open(input_file, 'r', newline='') as infile:

322

reader = clevercsv.DictReader(infile)

323

324

# Prepare data with remapped columns

325

remapped_data = []

326

for row in reader:

327

new_row = {}

328

for old_name, new_name in column_mapping.items():

329

if old_name in row:

330

new_row[new_name] = row[old_name]

331

else:

332

new_row[new_name] = '' # Default for missing columns

333

remapped_data.append(new_row)

334

335

# Write remapped data

336

if remapped_data:

337

fieldnames = list(column_mapping.values())

338

with open(output_file, 'w', newline='') as outfile:

339

writer = clevercsv.DictWriter(outfile, fieldnames=fieldnames)

340

writer.writeheader()

341

writer.writerows(remapped_data)

342

343

# Usage

344

column_mapping = {

345

'full_name': 'name',

346

'years_old': 'age',

347

'home_city': 'city',

348

'job_title': 'position'

349

}

350

351

remap_csv_columns('input.csv', 'output.csv', column_mapping)

352

```

353

354

## Error Handling

355

356

### Handling Duplicate Field Names

357

358

CleverCSV warns about duplicate field names in headers:

359

360

```python

361

import clevercsv

362

import warnings

363

364

# Capture warnings about duplicate headers

365

with warnings.catch_warnings(record=True) as w:

366

warnings.simplefilter("always")

367

368

with open('file_with_duplicate_headers.csv', 'r', newline='') as f:

369

reader = clevercsv.DictReader(f)

370

data = list(reader)

371

372

if w:

373

for warning in w:

374

print(f"Warning: {warning.message}")

375

```

376

377

### Handling Extra Fields

378

379

```python

380

import clevercsv

381

382

# Raise error on extra fields

383

try:

384

with open('output.csv', 'w', newline='') as f:

385

writer = clevercsv.DictWriter(f, fieldnames=['a', 'b'], extrasaction='raise')

386

writer.writerow({'a': '1', 'b': '2', 'c': '3'}) # 'c' is extra

387

except ValueError as e:

388

print(f"Extra field error: {e}")

389

390

# Ignore extra fields silently

391

with open('output.csv', 'w', newline='') as f:

392

writer = clevercsv.DictWriter(f, fieldnames=['a', 'b'], extrasaction='ignore')

393

writer.writerow({'a': '1', 'b': '2', 'c': '3'}) # 'c' ignored

394

```

395

396

### Handling Missing Fields

397

398

```python

399

import clevercsv

400

401

# Use restval for missing fields

402

data = [{'name': 'Alice'}, {'name': 'Bob', 'age': 25}] # Missing 'age' in first row

403

404

with open('output.csv', 'w', newline='') as f:

405

writer = clevercsv.DictWriter(f, fieldnames=['name', 'age'], restval='N/A')

406

writer.writeheader()

407

writer.writerows(data)

408

```

409

410

## Performance Considerations

411

412

### Memory Efficiency for Large Files

413

414

```python

415

import clevercsv

416

417

def process_large_csv_efficiently(filename):

418

"""Process large CSV files without loading all data into memory."""

419

420

with open(filename, 'r', newline='') as f:

421

reader = clevercsv.DictReader(f)

422

423

# Process one row at a time

424

for row in reader:

425

# Process row immediately

426

process_single_record(row)

427

# Don't store in list - keeps memory usage constant

428

429

def process_large_csv_inefficiently(filename):

430

"""Inefficient approach that loads everything into memory."""

431

432

with open(filename, 'r', newline='') as f:

433

reader = clevercsv.DictReader(f)

434

all_records = list(reader) # Loads entire file into memory

435

436

for record in all_records:

437

process_single_record(record)

438

```

439

440

### Field Name Optimization

441

442

```python

443

# Efficient: Access fieldnames once

444

reader = clevercsv.DictReader(file)

445

fieldnames = reader.fieldnames # Cache fieldnames

446

for row in reader:

447

# Use cached fieldnames if needed

448

process_row(row, fieldnames)

449

450

# Less efficient: Access fieldnames repeatedly in loop

451

reader = clevercsv.DictReader(file)

452

for row in reader:

453

fieldnames = reader.fieldnames # Repeated access

454

process_row(row, fieldnames)

455

```