Tessl Tile for pypi/clevercsv@0.8.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

core-reading-writing.md data-reading.md data-writing.md dialect-detection.md dialects-configuration.md dictionary-operations.md index.md

dialects-configuration.mddocs/

0
# Dialects and Configuration
1

2
Dialect classes and configuration utilities for managing CSV parsing parameters. CleverCSV provides enhanced dialect support with the SimpleDialect class and utilities for working with various CSV formats and configurations.
3

4
## Capabilities
5

6
### SimpleDialect Class
7

8
Enhanced dialect class that provides a simplified and more flexible alternative to Python's csv.Dialect, with better support for CleverCSV's detection algorithms.
9

10
```python { .api }
11
class SimpleDialect:
12
    """
13
    Simplified dialect object for CSV parsing configuration.
14
    
15
    For delimiter, quotechar, and escapechar:
16
    - Empty string ('') means no delimiter/quotechar/escapechar in the file
17
    - None is used to mark it as undefined
18
    """
19
    
20
    def __init__(
21
        self,
22
        delimiter: Optional[str],
23
        quotechar: Optional[str],
24
        escapechar: Optional[str],
25
        strict: bool = False
26
    ):
27
        """
28
        Create a new SimpleDialect.
29
        
30
        Parameters:
31
        - delimiter: Field delimiter character
32
        - quotechar: Quote character for fields containing special characters
33
        - escapechar: Escape character for escaping delimiters/quotes
34
        - strict: Whether to enforce strict parsing
35
        """
36
    
37
    def validate(self) -> None:
38
        """
39
        Validate dialect parameters.
40
        
41
        Raises:
42
        ValueError: If any parameter is invalid
43
        """
44
    
45
    def to_csv_dialect(self) -> csv.Dialect:
46
        """
47
        Convert to standard csv.Dialect object.
48
        
49
        Returns:
50
        csv.Dialect compatible object
51
        """
52
    
53
    def to_dict(self) -> Dict[str, Union[str, bool, None]]:
54
        """
55
        Convert dialect to dictionary representation.
56
        
57
        Returns:
58
        Dictionary with dialect parameters
59
        """
60
    
61
    def serialize(self) -> str:
62
        """
63
        Serialize dialect to JSON string.
64
        
65
        Returns:
66
        JSON string representation of dialect
67
        """
68
    
69
    @classmethod
70
    def deserialize(cls, obj: str) -> 'SimpleDialect':
71
        """
72
        Deserialize dialect from JSON string.
73
        
74
        Parameters:
75
        - obj: JSON string representation
76
        
77
        Returns:
78
        SimpleDialect instance
79
        """
80
    
81
    @classmethod
82
    def from_dict(cls, d: Dict[str, Any]) -> 'SimpleDialect':
83
        """
84
        Create SimpleDialect from dictionary.
85
        
86
        Parameters:
87
        - d: Dictionary with dialect parameters
88
        
89
        Returns:
90
        SimpleDialect instance
91
        """
92
    
93
    @classmethod
94
    def from_csv_dialect(cls, d: csv.Dialect) -> 'SimpleDialect':
95
        """
96
        Create SimpleDialect from csv.Dialect.
97
        
98
        Parameters:
99
        - d: csv.Dialect instance
100
        
101
        Returns:
102
        SimpleDialect instance
103
        """
104
```
105

106
#### Usage Examples
107

108
```python
109
import clevercsv
110
import json
111

112
# Create custom dialect
113
dialect = clevercsv.SimpleDialect(',', '"', '\\', strict=True)
114
print(f"Delimiter: '{dialect.delimiter}'")
115
print(f"Quote char: '{dialect.quotechar}'")
116
print(f"Escape char: '{dialect.escapechar}'")
117

118
# Validate dialect
119
try:
120
    dialect.validate()
121
    print("Dialect is valid")
122
except ValueError as e:
123
    print(f"Invalid dialect: {e}")
124

125
# Convert to csv.Dialect for use with standard library
126
csv_dialect = dialect.to_csv_dialect()
127
with open('data.csv', 'r', newline='') as f:
128
    reader = csv.reader(f, dialect=csv_dialect)
129
    data = list(reader)
130

131
# Serialize dialect for storage
132
serialized = dialect.serialize()
133
print(f"Serialized: {serialized}")
134

135
# Deserialize dialect
136
restored_dialect = clevercsv.SimpleDialect.deserialize(serialized)
137
print(f"Restored: {restored_dialect}")
138

139
# Create from dictionary
140
dialect_dict = {'delimiter': ';', 'quotechar': "'", 'escapechar': '', 'strict': False}
141
dialect_from_dict = clevercsv.SimpleDialect.from_dict(dialect_dict)
142

143
# Create from csv.Dialect
144
csv_excel = csv.excel
145
simple_from_csv = clevercsv.SimpleDialect.from_csv_dialect(csv_excel)
146
```
147

148
### Predefined Dialects
149

150
CleverCSV provides access to standard CSV dialects for common formats.
151

152
```python { .api }
153
# Standard CSV dialects
154
excel: csv.Dialect          # Excel-compatible format (comma-separated, quoted fields)
155
excel_tab: csv.Dialect      # Excel tab-separated format
156
unix_dialect: csv.Dialect   # Unix-style format (comma-separated, quoted fields, escaped quotes)
157
```
158

159
#### Usage Examples
160

161
```python
162
import clevercsv
163

164
# Use predefined dialects
165
with open('data.csv', 'r', newline='') as f:
166
    reader = clevercsv.reader(f, dialect=clevercsv.excel)
167
    data = list(reader)
168

169
# Compare dialects
170
print("Excel dialect:")
171
excel_simple = clevercsv.SimpleDialect.from_csv_dialect(clevercsv.excel)
172
print(f"  Delimiter: '{excel_simple.delimiter}'")
173
print(f"  Quote char: '{excel_simple.quotechar}'")
174

175
print("Unix dialect:")
176
unix_simple = clevercsv.SimpleDialect.from_csv_dialect(clevercsv.unix_dialect)
177
print(f"  Delimiter: '{unix_simple.delimiter}'")
178
print(f"  Quote char: '{unix_simple.quotechar}'")
179
print(f"  Escape char: '{unix_simple.escapechar}'")
180
```
181

182
### Configuration Utilities
183

184
Utility functions for managing CSV parsing configuration and field size limits.
185

186
```python { .api }
187
def field_size_limit(*args, **kwargs) -> int:
188
    """
189
    Get or set the field size limit for CSV parsing.
190
    
191
    Parameters:
192
    - limit (optional): New field size limit in characters
193
    
194
    Returns:
195
    Previous field size limit
196
    
197
    Raises:
198
    TypeError: If limit is not an integer or too many arguments provided
199
    
200
    Notes:
201
    - Default limit is 128KB (131,072 characters)
202
    - Setting limit to 0 removes the limit (use with caution)
203
    - Large limits may impact performance and memory usage
204
    """
205
```
206

207
#### Usage Examples
208

209
```python
210
import clevercsv
211

212
# Get current field size limit
213
current_limit = clevercsv.field_size_limit()
214
print(f"Current field size limit: {current_limit} characters")
215

216
# Set new field size limit
217
old_limit = clevercsv.field_size_limit(256 * 1024)  # 256KB
218
print(f"Previous limit: {old_limit}, New limit: {clevercsv.field_size_limit()}")
219

220
# Remove field size limit (use with caution)
221
clevercsv.field_size_limit(0)
222
print("Field size limit removed")
223

224
# Restore reasonable limit
225
clevercsv.field_size_limit(128 * 1024)  # 128KB default
226
```
227

228
## Advanced Dialect Management
229

230
### Custom Dialect Creation
231

232
Create specialized dialects for unique CSV formats:
233

234
```python
235
import clevercsv
236

237
def create_pipe_separated_dialect():
238
    """Create dialect for pipe-separated values."""
239
    return clevercsv.SimpleDialect('|', '"', '\\')
240

241
def create_tab_separated_no_quotes():
242
    """Create dialect for tab-separated without quotes."""
243
    return clevercsv.SimpleDialect('\t', '', '')
244

245
def create_semicolon_single_quotes():
246
    """Create dialect for semicolon-separated with single quotes."""
247
    return clevercsv.SimpleDialect(';', "'", '')
248

249
# Usage
250
pipe_dialect = create_pipe_separated_dialect()
251
with open('pipe_data.csv', 'r', newline='') as f:
252
    reader = clevercsv.reader(f, dialect=pipe_dialect)
253
    data = list(reader)
254
```
255

256
### Dialect Comparison and Analysis
257

258
Compare and analyze different dialects:
259

260
```python
261
import clevercsv
262

263
def compare_dialects(file_path, dialects):
264
    """Compare how different dialects parse the same file."""
265
    
266
    results = {}
267
    
268
    with open(file_path, 'r', newline='') as f:
269
        sample = f.read(1000)  # First 1000 characters
270
    
271
    for name, dialect in dialects.items():
272
        try:
273
            # Parse sample with this dialect
274
            rows = list(clevercsv.parse_string(sample, dialect))
275
            results[name] = {
276
                'rows': len(rows),
277
                'columns': len(rows[0]) if rows else 0,
278
                'sample_row': rows[0] if rows else []
279
            }
280
        except Exception as e:
281
            results[name] = {'error': str(e)}
282
    
283
    return results
284

285
# Usage
286
dialects = {
287
    'comma': clevercsv.SimpleDialect(',', '"', ''),
288
    'semicolon': clevercsv.SimpleDialect(';', '"', ''),
289
    'pipe': clevercsv.SimpleDialect('|', '"', ''),
290
    'tab': clevercsv.SimpleDialect('\t', '"', '')
291
}
292

293
comparison = compare_dialects('ambiguous.csv', dialects)
294
for name, result in comparison.items():
295
    print(f"{name}: {result}")
296
```
297

298
### Dialect Persistence
299

300
Save and load dialect configurations:
301

302
```python
303
import clevercsv
304
import json
305

306
class DialectManager:
307
    """Manage dialect configurations with persistence."""
308
    
309
    def __init__(self, config_file='dialects.json'):
310
        self.config_file = config_file
311
        self.dialects = {}
312
        self.load_dialects()
313
    
314
    def save_dialect(self, name, dialect):
315
        """Save a dialect configuration."""
316
        self.dialects[name] = dialect.to_dict()
317
        self._save_to_file()
318
    
319
    def load_dialect(self, name):
320
        """Load a dialect configuration."""
321
        if name in self.dialects:
322
            return clevercsv.SimpleDialect.from_dict(self.dialects[name])
323
        return None
324
    
325
    def list_dialects(self):
326
        """List all saved dialects."""
327
        return list(self.dialects.keys())
328
    
329
    def delete_dialect(self, name):
330
        """Delete a dialect configuration."""
331
        if name in self.dialects:
332
            del self.dialects[name]
333
            self._save_to_file()
334
    
335
    def load_dialects(self):
336
        """Load dialects from file."""
337
        try:
338
            with open(self.config_file, 'r') as f:
339
                self.dialects = json.load(f)
340
        except FileNotFoundError:
341
            self.dialects = {}
342
    
343
    def _save_to_file(self):
344
        """Save dialects to file."""
345
        with open(self.config_file, 'w') as f:
346
            json.dump(self.dialects, f, indent=2)
347

348
# Usage
349
manager = DialectManager()
350

351
# Save custom dialects
352
custom_dialect = clevercsv.SimpleDialect('|', "'", '\\')
353
manager.save_dialect('pipe_single_quote', custom_dialect)
354

355
# Load and use saved dialect
356
loaded_dialect = manager.load_dialect('pipe_single_quote')
357
if loaded_dialect:
358
    with open('data.csv', 'r', newline='') as f:
359
        reader = clevercsv.reader(f, dialect=loaded_dialect)
360
        data = list(reader)
361
```
362

363
## Dialect Detection Integration
364

365
### Combining Detection and Configuration
366

367
Use detected dialects with configuration management:
368

369
```python
370
import clevercsv
371

372
def smart_csv_processing(file_path):
373
    """Process CSV with detection fallback to configuration."""
374
    
375
    # Try automatic detection first
376
    detected_dialect = clevercsv.detect_dialect(file_path)
377
    
378
    if detected_dialect:
379
        print(f"Using detected dialect: {detected_dialect}")
380
        dialect = detected_dialect
381
    else:
382
        # Fallback to common dialects
383
        print("Detection failed, trying common dialects...")
384
        
385
        common_dialects = [
386
            clevercsv.SimpleDialect(',', '"', ''),     # Standard CSV
387
            clevercsv.SimpleDialect(';', '"', ''),     # European CSV
388
            clevercsv.SimpleDialect('\t', '"', ''),    # Tab-separated
389
            clevercsv.SimpleDialect('|', '"', ''),     # Pipe-separated
390
        ]
391
        
392
        dialect = None
393
        for test_dialect in common_dialects:
394
            try:
395
                with open(file_path, 'r', newline='') as f:
396
                    reader = clevercsv.reader(f, dialect=test_dialect)
397
                    first_row = next(reader)
398
                    if len(first_row) > 1:  # Reasonable number of columns
399
                        dialect = test_dialect
400
                        print(f"Using fallback dialect: {dialect}")
401
                        break
402
            except:
403
                continue
404
        
405
        if not dialect:
406
            raise ValueError("Could not determine appropriate dialect")
407
    
408
    # Process file with determined dialect
409
    with open(file_path, 'r', newline='') as f:
410
        reader = clevercsv.reader(f, dialect=dialect)
411
        return list(reader)
412

413
# Usage
414
try:
415
    data = smart_csv_processing('difficult_file.csv')
416
    print(f"Successfully processed {len(data)} rows")
417
except ValueError as e:
418
    print(f"Processing failed: {e}")
419
```
420

421
### Dialect Validation and Testing
422

423
Validate dialects against actual CSV files:
424

425
```python
426
import clevercsv
427

428
def validate_dialect_for_file(file_path, dialect):
429
    """Validate that a dialect works correctly for a file."""
430
    
431
    validation_results = {
432
        'valid': True,
433
        'issues': [],
434
        'statistics': {}
435
    }
436
    
437
    try:
438
        with open(file_path, 'r', newline='') as f:
439
            reader = clevercsv.reader(f, dialect=dialect)
440
            rows = list(reader)
441
        
442
        if not rows:
443
            validation_results['valid'] = False
444
            validation_results['issues'].append('No rows parsed')
445
            return validation_results
446
        
447
        # Check for consistent column count
448
        column_counts = [len(row) for row in rows]
449
        unique_counts = set(column_counts)
450
        
451
        if len(unique_counts) > 1:
452
            validation_results['issues'].append(
453
                f'Inconsistent column counts: {sorted(unique_counts)}'
454
            )
455
        
456
        # Gather statistics
457
        validation_results['statistics'] = {
458
            'total_rows': len(rows),
459
            'column_counts': dict(zip(*zip(*[(c, column_counts.count(c)) for c in unique_counts]))),
460
            'average_columns': sum(column_counts) / len(column_counts),
461
            'max_field_length': max(len(field) for row in rows for field in row) if rows else 0
462
        }
463
        
464
    except Exception as e:
465
        validation_results['valid'] = False
466
        validation_results['issues'].append(f'Parsing error: {str(e)}')
467
    
468
    return validation_results
469

470
# Usage
471
test_dialect = clevercsv.SimpleDialect(',', '"', '')
472
results = validate_dialect_for_file('test.csv', test_dialect)
473

474
if results['valid']:
475
    print("Dialect validation passed")
476
    print(f"Statistics: {results['statistics']}")
477
else:
478
    print("Dialect validation failed")
479
    print(f"Issues: {results['issues']}")
480
```

Version

Tile

Files

dialects-configuration.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

dialects-configuration.mddocs/