A Python package for handling messy CSV files with enhanced dialect detection capabilities
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
High-level function for writing tabular data to CSV files with automatic formatting and RFC-4180 compliance by default. This wrapper function provides a convenient interface for common CSV writing tasks while supporting custom dialects and formatting options.
Write tabular data (lists of lists) to CSV files with support for transposition and custom dialects.
def write_table(
table: Iterable[Iterable[Any]],
filename: Union[str, PathLike],
dialect: Union[str, SimpleDialect, csv.Dialect] = 'excel',
transpose: bool = False,
encoding: Optional[str] = None
) -> None:
"""
Write a table (list of lists) to a CSV file.
Parameters:
- table: Table data as an iterable of rows (each row is an iterable of values)
- filename: Path to output CSV file
- dialect: Dialect to use for writing (default: 'excel' for RFC-4180 compliance)
- transpose: Transpose table before writing (swap rows and columns)
- encoding: Text encoding for output file (platform default if None)
Raises:
ValueError: If table rows have inconsistent lengths
Notes:
- Empty tables result in no file being created
- All rows must have the same number of columns (after transposition if applicable)
- Uses RFC-4180 compliant 'excel' dialect by default for standardized output
"""import clevercsv
# Basic table writing
data = [
['Name', 'Age', 'City'],
['Alice', 30, 'New York'],
['Bob', 25, 'San Francisco'],
['Charlie', 35, 'Chicago']
]
clevercsv.write_table(data, 'employees.csv')
# Write with custom dialect
pipe_dialect = clevercsv.SimpleDialect('|', '"', '')
clevercsv.write_table(data, 'pipe_separated.csv', dialect=pipe_dialect)
# Transpose data (swap rows and columns)
clevercsv.write_table(data, 'transposed.csv', transpose=True)
# Write with specific encoding
clevercsv.write_table(data, 'utf8_output.csv', encoding='utf-8')
# Write numeric data
numeric_data = [
['X', 'Y', 'Z'],
[1.5, 2.7, 3.14159],
[4.2, 5.8, 6.28318],
[7.1, 8.9, 9.42477]
]
clevercsv.write_table(numeric_data, 'numeric.csv')
# Write generator data (memory efficient)
def generate_data():
yield ['ID', 'Value']
for i in range(1000):
yield [i, f'Value_{i}']
clevercsv.write_table(generate_data(), 'generated.csv')Process data and export results with appropriate formatting:
import clevercsv
from datetime import datetime
def export_processed_data(input_data, output_file):
"""Process and export data with formatting."""
processed_rows = [['ID', 'Name', 'Email', 'Created Date', 'Active', 'Score']]
for record in input_data:
processed_row = [
record['id'],
record['name'].title(), # Capitalize names
record['email'].lower(), # Lowercase emails
datetime.now().strftime('%Y-%m-%d'),
'Yes' if record.get('active', False) else 'No',
f"{record.get('score', 0):.2f}" # Format numbers
]
processed_rows.append(processed_row)
# Write with standard CSV format for compatibility
clevercsv.write_table(processed_rows, output_file)
print(f"Exported {len(processed_rows)-1} records to {output_file}")
# Usage
raw_data = [
{'id': 1, 'name': 'alice smith', 'email': 'ALICE@EXAMPLE.COM', 'active': True, 'score': 95.678},
{'id': 2, 'name': 'bob jones', 'email': 'BOB@EXAMPLE.COM', 'active': False, 'score': 78.234}
]
export_processed_data(raw_data, 'processed_export.csv')Export data in multiple CSV formats:
import clevercsv
def export_multiple_formats(data, base_filename):
"""Export data in multiple CSV formats."""
formats = {
'standard': clevercsv.SimpleDialect(',', '"', ''),
'excel': 'excel',
'tab_separated': clevercsv.SimpleDialect('\t', '"', ''),
'pipe_separated': clevercsv.SimpleDialect('|', '"', ''),
'semicolon_european': clevercsv.SimpleDialect(';', '"', '')
}
for format_name, dialect in formats.items():
output_file = f"{base_filename}_{format_name}.csv"
clevercsv.write_table(data, output_file, dialect=dialect)
print(f"Exported {format_name} format to {output_file}")
# Usage
sample_data = [
['Product', 'Price', 'Category'],
['Laptop', '$999.99', 'Electronics'],
['Book', '$19.95', 'Education'],
['Coffee Mug', '$12.50', 'Kitchen']
]
export_multiple_formats(sample_data, 'products')Export large datasets efficiently without loading all data into memory:
import clevercsv
class StreamingTableExporter:
"""Export large tabular datasets with streaming to manage memory usage."""
def __init__(self, filename, dialect='excel', encoding=None):
self.filename = filename
self.dialect = dialect
self.encoding = encoding
self.file = None
self.writer = None
self.row_count = 0
def __enter__(self):
self.file = open(self.filename, 'w', newline='', encoding=self.encoding)
self.writer = clevercsv.writer(self.file, dialect=self.dialect)
return self
def __exit__(self, exc_type, exc_val, exc_tb):
if self.file:
self.file.close()
print(f"Exported {self.row_count} rows to {self.filename}")
def write_row(self, row):
"""Write a single row."""
self.writer.writerow(row)
self.row_count += 1
def write_rows(self, rows):
"""Write multiple rows."""
for row in rows:
self.write_row(row)
# Usage
# Export large dataset with streaming
with StreamingTableExporter('large_export.csv') as exporter:
# Write header
exporter.write_row(['ID', 'Name', 'Department', 'Salary', 'Hire Date'])
# Process data in batches to manage memory
for batch_start in range(0, 100000, 1000): # 100k records in 1k batches
batch_data = generate_employee_batch(batch_start, 1000) # Your data generator
exporter.write_rows(batch_data)Validate data before writing to ensure quality:
import clevercsv
from typing import List, Any
def validate_and_export_table(data: List[List[Any]], filename: str, validation_rules: dict):
"""Validate tabular data and export with error reporting."""
if not data:
print("No data to export")
return 0, 0
header = data[0]
rows = data[1:]
valid_rows = [header] # Include header
invalid_data = []
for i, row in enumerate(rows):
errors = []
# Check row length
if len(row) != len(header):
errors.append(f"Expected {len(header)} columns, got {len(row)}")
# Apply validation rules to each column
for col_idx, (col_name, validator) in enumerate(validation_rules.items()):
if col_idx < len(row):
try:
if not validator(row[col_idx]):
errors.append(f"Invalid {col_name}: {row[col_idx]}")
except Exception as e:
errors.append(f"Validation error for {col_name}: {e}")
else:
errors.append(f"Missing value for {col_name}")
if errors:
invalid_data.append({
'row_index': i + 1, # +1 for header
'row': row,
'errors': errors
})
else:
valid_rows.append(row)
# Export valid rows
if len(valid_rows) > 1: # More than just header
clevercsv.write_table(valid_rows, filename)
print(f"Exported {len(valid_rows)-1} valid rows to {filename}")
# Export invalid rows for review
if invalid_data:
error_filename = filename.replace('.csv', '_errors.csv')
error_rows = [header + ['_errors', '_row_index']] # Add error columns
for item in invalid_data:
error_row = list(item['row'])
# Pad row to match header length
while len(error_row) < len(header):
error_row.append('')
error_row.extend(['; '.join(item['errors']), str(item['row_index'])])
error_rows.append(error_row)
clevercsv.write_table(error_rows, error_filename)
print(f"Exported {len(invalid_data)} invalid rows to {error_filename}")
return len(valid_rows) - 1, len(invalid_data)
# Usage
validation_rules = {
'Name': lambda x: isinstance(x, str) and len(x.strip()) > 0,
'Age': lambda x: str(x).isdigit() and 0 < int(x) < 150,
'Email': lambda x: '@' in str(x) and '.' in str(x)
}
test_data = [
['Name', 'Age', 'Email'],
['Alice', '30', 'alice@example.com'],
['', '25', 'bob@example.com'], # Invalid: empty name
['Charlie', '200', 'invalid-email'], # Invalid: age too high, bad email
['Dave', '35'] # Invalid: missing email
]
valid_count, invalid_count = validate_and_export_table(test_data, 'validated_export.csv', validation_rules)
print(f"Validation complete: {valid_count} valid, {invalid_count} invalid")While write_dicts is not available in the main package API, you can write dictionary data using the DictWriter class:
import clevercsv
# Convert dictionaries to table format for write_table
def write_dict_data_as_table(dict_data, filename, fieldnames=None):
"""Write dictionary data using write_table."""
if not dict_data:
return
# Get fieldnames from first dictionary if not provided
if fieldnames is None:
fieldnames = list(dict_data[0].keys())
# Convert to table format
table_data = [fieldnames] # Header row
for record in dict_data:
row = [record.get(field, '') for field in fieldnames]
table_data.append(row)
clevercsv.write_table(table_data, filename)
# Alternative: Use DictWriter directly
def write_dict_data_with_dictwriter(dict_data, filename, fieldnames=None):
"""Write dictionary data using DictWriter."""
if not dict_data:
return
if fieldnames is None:
fieldnames = list(dict_data[0].keys())
with open(filename, 'w', newline='') as f:
writer = clevercsv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(dict_data)
# Usage
records = [
{'name': 'Alice', 'age': 30, 'city': 'New York'},
{'name': 'Bob', 'age': 25, 'city': 'San Francisco'},
{'name': 'Charlie', 'age': 35, 'city': 'Chicago'}
]
# Method 1: Convert to table
write_dict_data_as_table(records, 'method1_output.csv')
# Method 2: Use DictWriter
write_dict_data_with_dictwriter(records, 'method2_output.csv')# Memory efficient: Use generators or iterators
def generate_large_table():
yield ['ID', 'Value', 'Timestamp']
for i in range(1000000):
yield [i, f'value_{i}', datetime.now().isoformat()]
clevercsv.write_table(generate_large_table(), 'large_file.csv') # Constant memory usage
# Memory intensive: Load all data first
large_data = list(generate_large_table()) # Loads all 1M rows into memory
clevercsv.write_table(large_data, 'large_file.csv') # High memory usage# Faster: Prepare all data first, then write once
all_rows = prepare_all_data()
clevercsv.write_table(all_rows, 'output.csv')
# Slower: Multiple file operations (avoid this pattern)
for i, row_data in enumerate(data_source):
mode = 'w' if i == 0 else 'a'
# Opening file repeatedly is inefficient
clevercsv.write_table([row_data], 'output.csv') # Don't do this# Maximum compatibility: Use 'excel' dialect (RFC-4180)
clevercsv.write_table(data, 'compatible.csv', dialect='excel')
# Custom requirements: Create appropriate dialect
european_dialect = clevercsv.SimpleDialect(';', '"', '') # Common in Europe
clevercsv.write_table(data, 'european.csv', dialect=european_dialect)import clevercsv
def safe_csv_write(data, filename):
"""Write CSV with error handling."""
try:
clevercsv.write_table(data, filename)
print(f"Successfully wrote {len(data)} rows to {filename}")
return True
except ValueError as e:
print(f"Data validation error: {e}")
return False
except IOError as e:
print(f"File write error: {e}")
return False
except Exception as e:
print(f"Unexpected error: {e}")
return False
# Usage
test_data = [
['A', 'B', 'C'],
['1', '2', '3'],
['4', '5'] # Inconsistent row length - will cause ValueError
]
success = safe_csv_write(test_data, 'test_output.csv')
if not success:
print("Write operation failed, check data consistency")import clevercsv
def write_with_validation(table, filename):
"""Write table with row length validation and repair."""
if not table:
print("Empty table - no file created")
return
# Check for consistent row lengths
row_lengths = [len(row) for row in table]
if len(set(row_lengths)) > 1:
print(f"Inconsistent row lengths detected: {set(row_lengths)}")
# Option 1: Pad short rows
max_length = max(row_lengths)
padded_table = []
for row in table:
padded_row = list(row) + [''] * (max_length - len(row))
padded_table.append(padded_row)
print(f"Padded short rows to {max_length} columns")
clevercsv.write_table(padded_table, filename)
# Option 2: Truncate long rows (alternative approach)
# min_length = min(row_lengths)
# truncated_table = [row[:min_length] for row in table]
# clevercsv.write_table(truncated_table, filename)
else:
clevercsv.write_table(table, filename)
print(f"Successfully wrote consistent table with {row_lengths[0]} columns")
# Usage
inconsistent_data = [
['Name', 'Age', 'City', 'Country'],
['Alice', '30', 'New York'], # Missing country
['Bob', '25', 'SF', 'USA', 'Extra'] # Extra field
]
write_with_validation(inconsistent_data, 'repaired_output.csv')Install with Tessl CLI
npx tessl i tessl/pypi-clevercsv