or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-reading-writing.mddata-reading.mddata-writing.mddialect-detection.mddialects-configuration.mddictionary-operations.mdindex.md

data-writing.mddocs/

0

# Data Writing

1

2

High-level function for writing tabular data to CSV files with automatic formatting and RFC-4180 compliance by default. This wrapper function provides a convenient interface for common CSV writing tasks while supporting custom dialects and formatting options.

3

4

## Capabilities

5

6

### Table Writing

7

8

Write tabular data (lists of lists) to CSV files with support for transposition and custom dialects.

9

10

```python { .api }

11

def write_table(

12

table: Iterable[Iterable[Any]],

13

filename: Union[str, PathLike],

14

dialect: Union[str, SimpleDialect, csv.Dialect] = 'excel',

15

transpose: bool = False,

16

encoding: Optional[str] = None

17

) -> None:

18

"""

19

Write a table (list of lists) to a CSV file.

20

21

Parameters:

22

- table: Table data as an iterable of rows (each row is an iterable of values)

23

- filename: Path to output CSV file

24

- dialect: Dialect to use for writing (default: 'excel' for RFC-4180 compliance)

25

- transpose: Transpose table before writing (swap rows and columns)

26

- encoding: Text encoding for output file (platform default if None)

27

28

Raises:

29

ValueError: If table rows have inconsistent lengths

30

31

Notes:

32

- Empty tables result in no file being created

33

- All rows must have the same number of columns (after transposition if applicable)

34

- Uses RFC-4180 compliant 'excel' dialect by default for standardized output

35

"""

36

```

37

38

#### Usage Examples

39

40

```python

41

import clevercsv

42

43

# Basic table writing

44

data = [

45

['Name', 'Age', 'City'],

46

['Alice', 30, 'New York'],

47

['Bob', 25, 'San Francisco'],

48

['Charlie', 35, 'Chicago']

49

]

50

51

clevercsv.write_table(data, 'employees.csv')

52

53

# Write with custom dialect

54

pipe_dialect = clevercsv.SimpleDialect('|', '"', '')

55

clevercsv.write_table(data, 'pipe_separated.csv', dialect=pipe_dialect)

56

57

# Transpose data (swap rows and columns)

58

clevercsv.write_table(data, 'transposed.csv', transpose=True)

59

60

# Write with specific encoding

61

clevercsv.write_table(data, 'utf8_output.csv', encoding='utf-8')

62

63

# Write numeric data

64

numeric_data = [

65

['X', 'Y', 'Z'],

66

[1.5, 2.7, 3.14159],

67

[4.2, 5.8, 6.28318],

68

[7.1, 8.9, 9.42477]

69

]

70

71

clevercsv.write_table(numeric_data, 'numeric.csv')

72

73

# Write generator data (memory efficient)

74

def generate_data():

75

yield ['ID', 'Value']

76

for i in range(1000):

77

yield [i, f'Value_{i}']

78

79

clevercsv.write_table(generate_data(), 'generated.csv')

80

```

81

82

## Advanced Writing Patterns

83

84

### Data Processing and Export

85

86

Process data and export results with appropriate formatting:

87

88

```python

89

import clevercsv

90

from datetime import datetime

91

92

def export_processed_data(input_data, output_file):

93

"""Process and export data with formatting."""

94

95

processed_rows = [['ID', 'Name', 'Email', 'Created Date', 'Active', 'Score']]

96

97

for record in input_data:

98

processed_row = [

99

record['id'],

100

record['name'].title(), # Capitalize names

101

record['email'].lower(), # Lowercase emails

102

datetime.now().strftime('%Y-%m-%d'),

103

'Yes' if record.get('active', False) else 'No',

104

f"{record.get('score', 0):.2f}" # Format numbers

105

]

106

processed_rows.append(processed_row)

107

108

# Write with standard CSV format for compatibility

109

clevercsv.write_table(processed_rows, output_file)

110

print(f"Exported {len(processed_rows)-1} records to {output_file}")

111

112

# Usage

113

raw_data = [

114

{'id': 1, 'name': 'alice smith', 'email': 'ALICE@EXAMPLE.COM', 'active': True, 'score': 95.678},

115

{'id': 2, 'name': 'bob jones', 'email': 'BOB@EXAMPLE.COM', 'active': False, 'score': 78.234}

116

]

117

118

export_processed_data(raw_data, 'processed_export.csv')

119

```

120

121

### Multi-Format Export

122

123

Export data in multiple CSV formats:

124

125

```python

126

import clevercsv

127

128

def export_multiple_formats(data, base_filename):

129

"""Export data in multiple CSV formats."""

130

131

formats = {

132

'standard': clevercsv.SimpleDialect(',', '"', ''),

133

'excel': 'excel',

134

'tab_separated': clevercsv.SimpleDialect('\t', '"', ''),

135

'pipe_separated': clevercsv.SimpleDialect('|', '"', ''),

136

'semicolon_european': clevercsv.SimpleDialect(';', '"', '')

137

}

138

139

for format_name, dialect in formats.items():

140

output_file = f"{base_filename}_{format_name}.csv"

141

clevercsv.write_table(data, output_file, dialect=dialect)

142

print(f"Exported {format_name} format to {output_file}")

143

144

# Usage

145

sample_data = [

146

['Product', 'Price', 'Category'],

147

['Laptop', '$999.99', 'Electronics'],

148

['Book', '$19.95', 'Education'],

149

['Coffee Mug', '$12.50', 'Kitchen']

150

]

151

152

export_multiple_formats(sample_data, 'products')

153

```

154

155

### Streaming Large Dataset Export

156

157

Export large datasets efficiently without loading all data into memory:

158

159

```python

160

import clevercsv

161

162

class StreamingTableExporter:

163

"""Export large tabular datasets with streaming to manage memory usage."""

164

165

def __init__(self, filename, dialect='excel', encoding=None):

166

self.filename = filename

167

self.dialect = dialect

168

self.encoding = encoding

169

self.file = None

170

self.writer = None

171

self.row_count = 0

172

173

def __enter__(self):

174

self.file = open(self.filename, 'w', newline='', encoding=self.encoding)

175

self.writer = clevercsv.writer(self.file, dialect=self.dialect)

176

return self

177

178

def __exit__(self, exc_type, exc_val, exc_tb):

179

if self.file:

180

self.file.close()

181

print(f"Exported {self.row_count} rows to {self.filename}")

182

183

def write_row(self, row):

184

"""Write a single row."""

185

self.writer.writerow(row)

186

self.row_count += 1

187

188

def write_rows(self, rows):

189

"""Write multiple rows."""

190

for row in rows:

191

self.write_row(row)

192

193

# Usage

194

# Export large dataset with streaming

195

with StreamingTableExporter('large_export.csv') as exporter:

196

# Write header

197

exporter.write_row(['ID', 'Name', 'Department', 'Salary', 'Hire Date'])

198

199

# Process data in batches to manage memory

200

for batch_start in range(0, 100000, 1000): # 100k records in 1k batches

201

batch_data = generate_employee_batch(batch_start, 1000) # Your data generator

202

exporter.write_rows(batch_data)

203

```

204

205

### Data Validation Before Export

206

207

Validate data before writing to ensure quality:

208

209

```python

210

import clevercsv

211

from typing import List, Any

212

213

def validate_and_export_table(data: List[List[Any]], filename: str, validation_rules: dict):

214

"""Validate tabular data and export with error reporting."""

215

216

if not data:

217

print("No data to export")

218

return 0, 0

219

220

header = data[0]

221

rows = data[1:]

222

223

valid_rows = [header] # Include header

224

invalid_data = []

225

226

for i, row in enumerate(rows):

227

errors = []

228

229

# Check row length

230

if len(row) != len(header):

231

errors.append(f"Expected {len(header)} columns, got {len(row)}")

232

233

# Apply validation rules to each column

234

for col_idx, (col_name, validator) in enumerate(validation_rules.items()):

235

if col_idx < len(row):

236

try:

237

if not validator(row[col_idx]):

238

errors.append(f"Invalid {col_name}: {row[col_idx]}")

239

except Exception as e:

240

errors.append(f"Validation error for {col_name}: {e}")

241

else:

242

errors.append(f"Missing value for {col_name}")

243

244

if errors:

245

invalid_data.append({

246

'row_index': i + 1, # +1 for header

247

'row': row,

248

'errors': errors

249

})

250

else:

251

valid_rows.append(row)

252

253

# Export valid rows

254

if len(valid_rows) > 1: # More than just header

255

clevercsv.write_table(valid_rows, filename)

256

print(f"Exported {len(valid_rows)-1} valid rows to {filename}")

257

258

# Export invalid rows for review

259

if invalid_data:

260

error_filename = filename.replace('.csv', '_errors.csv')

261

error_rows = [header + ['_errors', '_row_index']] # Add error columns

262

263

for item in invalid_data:

264

error_row = list(item['row'])

265

# Pad row to match header length

266

while len(error_row) < len(header):

267

error_row.append('')

268

error_row.extend(['; '.join(item['errors']), str(item['row_index'])])

269

error_rows.append(error_row)

270

271

clevercsv.write_table(error_rows, error_filename)

272

print(f"Exported {len(invalid_data)} invalid rows to {error_filename}")

273

274

return len(valid_rows) - 1, len(invalid_data)

275

276

# Usage

277

validation_rules = {

278

'Name': lambda x: isinstance(x, str) and len(x.strip()) > 0,

279

'Age': lambda x: str(x).isdigit() and 0 < int(x) < 150,

280

'Email': lambda x: '@' in str(x) and '.' in str(x)

281

}

282

283

test_data = [

284

['Name', 'Age', 'Email'],

285

['Alice', '30', 'alice@example.com'],

286

['', '25', 'bob@example.com'], # Invalid: empty name

287

['Charlie', '200', 'invalid-email'], # Invalid: age too high, bad email

288

['Dave', '35'] # Invalid: missing email

289

]

290

291

valid_count, invalid_count = validate_and_export_table(test_data, 'validated_export.csv', validation_rules)

292

print(f"Validation complete: {valid_count} valid, {invalid_count} invalid")

293

```

294

295

## Working with Dictionary Data

296

297

While `write_dicts` is not available in the main package API, you can write dictionary data using the DictWriter class:

298

299

```python

300

import clevercsv

301

302

# Convert dictionaries to table format for write_table

303

def write_dict_data_as_table(dict_data, filename, fieldnames=None):

304

"""Write dictionary data using write_table."""

305

306

if not dict_data:

307

return

308

309

# Get fieldnames from first dictionary if not provided

310

if fieldnames is None:

311

fieldnames = list(dict_data[0].keys())

312

313

# Convert to table format

314

table_data = [fieldnames] # Header row

315

for record in dict_data:

316

row = [record.get(field, '') for field in fieldnames]

317

table_data.append(row)

318

319

clevercsv.write_table(table_data, filename)

320

321

# Alternative: Use DictWriter directly

322

def write_dict_data_with_dictwriter(dict_data, filename, fieldnames=None):

323

"""Write dictionary data using DictWriter."""

324

325

if not dict_data:

326

return

327

328

if fieldnames is None:

329

fieldnames = list(dict_data[0].keys())

330

331

with open(filename, 'w', newline='') as f:

332

writer = clevercsv.DictWriter(f, fieldnames=fieldnames)

333

writer.writeheader()

334

writer.writerows(dict_data)

335

336

# Usage

337

records = [

338

{'name': 'Alice', 'age': 30, 'city': 'New York'},

339

{'name': 'Bob', 'age': 25, 'city': 'San Francisco'},

340

{'name': 'Charlie', 'age': 35, 'city': 'Chicago'}

341

]

342

343

# Method 1: Convert to table

344

write_dict_data_as_table(records, 'method1_output.csv')

345

346

# Method 2: Use DictWriter

347

write_dict_data_with_dictwriter(records, 'method2_output.csv')

348

```

349

350

## Performance Considerations

351

352

### Memory Efficiency

353

354

```python

355

# Memory efficient: Use generators or iterators

356

def generate_large_table():

357

yield ['ID', 'Value', 'Timestamp']

358

for i in range(1000000):

359

yield [i, f'value_{i}', datetime.now().isoformat()]

360

361

clevercsv.write_table(generate_large_table(), 'large_file.csv') # Constant memory usage

362

363

# Memory intensive: Load all data first

364

large_data = list(generate_large_table()) # Loads all 1M rows into memory

365

clevercsv.write_table(large_data, 'large_file.csv') # High memory usage

366

```

367

368

### Write Performance

369

370

```python

371

# Faster: Prepare all data first, then write once

372

all_rows = prepare_all_data()

373

clevercsv.write_table(all_rows, 'output.csv')

374

375

# Slower: Multiple file operations (avoid this pattern)

376

for i, row_data in enumerate(data_source):

377

mode = 'w' if i == 0 else 'a'

378

# Opening file repeatedly is inefficient

379

clevercsv.write_table([row_data], 'output.csv') # Don't do this

380

```

381

382

### Dialect Selection for Compatibility

383

384

```python

385

# Maximum compatibility: Use 'excel' dialect (RFC-4180)

386

clevercsv.write_table(data, 'compatible.csv', dialect='excel')

387

388

# Custom requirements: Create appropriate dialect

389

european_dialect = clevercsv.SimpleDialect(';', '"', '') # Common in Europe

390

clevercsv.write_table(data, 'european.csv', dialect=european_dialect)

391

```

392

393

## Error Handling

394

395

### Handling Write Errors

396

397

```python

398

import clevercsv

399

400

def safe_csv_write(data, filename):

401

"""Write CSV with error handling."""

402

try:

403

clevercsv.write_table(data, filename)

404

print(f"Successfully wrote {len(data)} rows to {filename}")

405

return True

406

except ValueError as e:

407

print(f"Data validation error: {e}")

408

return False

409

except IOError as e:

410

print(f"File write error: {e}")

411

return False

412

except Exception as e:

413

print(f"Unexpected error: {e}")

414

return False

415

416

# Usage

417

test_data = [

418

['A', 'B', 'C'],

419

['1', '2', '3'],

420

['4', '5'] # Inconsistent row length - will cause ValueError

421

]

422

423

success = safe_csv_write(test_data, 'test_output.csv')

424

if not success:

425

print("Write operation failed, check data consistency")

426

```

427

428

### Validation and Recovery

429

430

```python

431

import clevercsv

432

433

def write_with_validation(table, filename):

434

"""Write table with row length validation and repair."""

435

if not table:

436

print("Empty table - no file created")

437

return

438

439

# Check for consistent row lengths

440

row_lengths = [len(row) for row in table]

441

if len(set(row_lengths)) > 1:

442

print(f"Inconsistent row lengths detected: {set(row_lengths)}")

443

444

# Option 1: Pad short rows

445

max_length = max(row_lengths)

446

padded_table = []

447

for row in table:

448

padded_row = list(row) + [''] * (max_length - len(row))

449

padded_table.append(padded_row)

450

451

print(f"Padded short rows to {max_length} columns")

452

clevercsv.write_table(padded_table, filename)

453

454

# Option 2: Truncate long rows (alternative approach)

455

# min_length = min(row_lengths)

456

# truncated_table = [row[:min_length] for row in table]

457

# clevercsv.write_table(truncated_table, filename)

458

else:

459

clevercsv.write_table(table, filename)

460

print(f"Successfully wrote consistent table with {row_lengths[0]} columns")

461

462

# Usage

463

inconsistent_data = [

464

['Name', 'Age', 'City', 'Country'],

465

['Alice', '30', 'New York'], # Missing country

466

['Bob', '25', 'SF', 'USA', 'Extra'] # Extra field

467

]

468

469

write_with_validation(inconsistent_data, 'repaired_output.csv')

470

```