tessl/pypi-clevercsv

A Python package for handling messy CSV files with enhanced dialect detection capabilities

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Data Writing

Name: tessl/pypi-clevercsv
Author: tessl

High-level function for writing tabular data to CSV files with automatic formatting and RFC-4180 compliance by default. This wrapper function provides a convenient interface for common CSV writing tasks while supporting custom dialects and formatting options.

Capabilities

Table Writing

Write tabular data (lists of lists) to CSV files with support for transposition and custom dialects.

def write_table(
    table: Iterable[Iterable[Any]],
    filename: Union[str, PathLike],
    dialect: Union[str, SimpleDialect, csv.Dialect] = 'excel',
    transpose: bool = False,
    encoding: Optional[str] = None
) -> None:
    """
    Write a table (list of lists) to a CSV file.
    
    Parameters:
    - table: Table data as an iterable of rows (each row is an iterable of values)
    - filename: Path to output CSV file
    - dialect: Dialect to use for writing (default: 'excel' for RFC-4180 compliance)
    - transpose: Transpose table before writing (swap rows and columns)
    - encoding: Text encoding for output file (platform default if None)
    
    Raises:
    ValueError: If table rows have inconsistent lengths
    
    Notes:
    - Empty tables result in no file being created
    - All rows must have the same number of columns (after transposition if applicable)
    - Uses RFC-4180 compliant 'excel' dialect by default for standardized output
    """

Usage Examples

import clevercsv

# Basic table writing
data = [
    ['Name', 'Age', 'City'],
    ['Alice', 30, 'New York'],
    ['Bob', 25, 'San Francisco'],
    ['Charlie', 35, 'Chicago']
]

clevercsv.write_table(data, 'employees.csv')

# Write with custom dialect
pipe_dialect = clevercsv.SimpleDialect('|', '"', '')
clevercsv.write_table(data, 'pipe_separated.csv', dialect=pipe_dialect)

# Transpose data (swap rows and columns)
clevercsv.write_table(data, 'transposed.csv', transpose=True)

# Write with specific encoding
clevercsv.write_table(data, 'utf8_output.csv', encoding='utf-8')

# Write numeric data
numeric_data = [
    ['X', 'Y', 'Z'],
    [1.5, 2.7, 3.14159],
    [4.2, 5.8, 6.28318],
    [7.1, 8.9, 9.42477]
]

clevercsv.write_table(numeric_data, 'numeric.csv')

# Write generator data (memory efficient)
def generate_data():
    yield ['ID', 'Value']
    for i in range(1000):
        yield [i, f'Value_{i}']

clevercsv.write_table(generate_data(), 'generated.csv')

Advanced Writing Patterns

Data Processing and Export

Process data and export results with appropriate formatting:

import clevercsv
from datetime import datetime

def export_processed_data(input_data, output_file):
    """Process and export data with formatting."""
    
    processed_rows = [['ID', 'Name', 'Email', 'Created Date', 'Active', 'Score']]
    
    for record in input_data:
        processed_row = [
            record['id'],
            record['name'].title(),  # Capitalize names
            record['email'].lower(),  # Lowercase emails
            datetime.now().strftime('%Y-%m-%d'),
            'Yes' if record.get('active', False) else 'No',
            f"{record.get('score', 0):.2f}"  # Format numbers
        ]
        processed_rows.append(processed_row)
    
    # Write with standard CSV format for compatibility
    clevercsv.write_table(processed_rows, output_file)
    print(f"Exported {len(processed_rows)-1} records to {output_file}")

# Usage
raw_data = [
    {'id': 1, 'name': 'alice smith', 'email': 'ALICE@EXAMPLE.COM', 'active': True, 'score': 95.678},
    {'id': 2, 'name': 'bob jones', 'email': 'BOB@EXAMPLE.COM', 'active': False, 'score': 78.234}
]

export_processed_data(raw_data, 'processed_export.csv')

Multi-Format Export

Export data in multiple CSV formats:

import clevercsv

def export_multiple_formats(data, base_filename):
    """Export data in multiple CSV formats."""
    
    formats = {
        'standard': clevercsv.SimpleDialect(',', '"', ''),
        'excel': 'excel',
        'tab_separated': clevercsv.SimpleDialect('\t', '"', ''),
        'pipe_separated': clevercsv.SimpleDialect('|', '"', ''),
        'semicolon_european': clevercsv.SimpleDialect(';', '"', '')
    }
    
    for format_name, dialect in formats.items():
        output_file = f"{base_filename}_{format_name}.csv"
        clevercsv.write_table(data, output_file, dialect=dialect)
        print(f"Exported {format_name} format to {output_file}")

# Usage
sample_data = [
    ['Product', 'Price', 'Category'],
    ['Laptop', '$999.99', 'Electronics'],
    ['Book', '$19.95', 'Education'],
    ['Coffee Mug', '$12.50', 'Kitchen']
]

export_multiple_formats(sample_data, 'products')

Streaming Large Dataset Export

Export large datasets efficiently without loading all data into memory:

import clevercsv

class StreamingTableExporter:
    """Export large tabular datasets with streaming to manage memory usage."""
    
    def __init__(self, filename, dialect='excel', encoding=None):
        self.filename = filename
        self.dialect = dialect
        self.encoding = encoding
        self.file = None
        self.writer = None
        self.row_count = 0
    
    def __enter__(self):
        self.file = open(self.filename, 'w', newline='', encoding=self.encoding)
        self.writer = clevercsv.writer(self.file, dialect=self.dialect)
        return self
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        if self.file:
            self.file.close()
        print(f"Exported {self.row_count} rows to {self.filename}")
    
    def write_row(self, row):
        """Write a single row."""
        self.writer.writerow(row)
        self.row_count += 1
    
    def write_rows(self, rows):
        """Write multiple rows."""
        for row in rows:
            self.write_row(row)

# Usage
# Export large dataset with streaming
with StreamingTableExporter('large_export.csv') as exporter:
    # Write header
    exporter.write_row(['ID', 'Name', 'Department', 'Salary', 'Hire Date'])
    
    # Process data in batches to manage memory
    for batch_start in range(0, 100000, 1000):  # 100k records in 1k batches
        batch_data = generate_employee_batch(batch_start, 1000)  # Your data generator
        exporter.write_rows(batch_data)

Data Validation Before Export

Validate data before writing to ensure quality:

import clevercsv
from typing import List, Any

def validate_and_export_table(data: List[List[Any]], filename: str, validation_rules: dict):
    """Validate tabular data and export with error reporting."""
    
    if not data:
        print("No data to export")
        return 0, 0
    
    header = data[0]
    rows = data[1:]
    
    valid_rows = [header]  # Include header
    invalid_data = []
    
    for i, row in enumerate(rows):
        errors = []
        
        # Check row length
        if len(row) != len(header):
            errors.append(f"Expected {len(header)} columns, got {len(row)}")
        
        # Apply validation rules to each column
        for col_idx, (col_name, validator) in enumerate(validation_rules.items()):
            if col_idx < len(row):
                try:
                    if not validator(row[col_idx]):
                        errors.append(f"Invalid {col_name}: {row[col_idx]}")
                except Exception as e:
                    errors.append(f"Validation error for {col_name}: {e}")
            else:
                errors.append(f"Missing value for {col_name}")
        
        if errors:
            invalid_data.append({
                'row_index': i + 1,  # +1 for header
                'row': row,
                'errors': errors
            })
        else:
            valid_rows.append(row)
    
    # Export valid rows
    if len(valid_rows) > 1:  # More than just header
        clevercsv.write_table(valid_rows, filename)
        print(f"Exported {len(valid_rows)-1} valid rows to {filename}")
    
    # Export invalid rows for review
    if invalid_data:
        error_filename = filename.replace('.csv', '_errors.csv')
        error_rows = [header + ['_errors', '_row_index']]  # Add error columns
        
        for item in invalid_data:
            error_row = list(item['row'])
            # Pad row to match header length
            while len(error_row) < len(header):
                error_row.append('')
            error_row.extend(['; '.join(item['errors']), str(item['row_index'])])
            error_rows.append(error_row)
        
        clevercsv.write_table(error_rows, error_filename)
        print(f"Exported {len(invalid_data)} invalid rows to {error_filename}")
    
    return len(valid_rows) - 1, len(invalid_data)

# Usage
validation_rules = {
    'Name': lambda x: isinstance(x, str) and len(x.strip()) > 0,
    'Age': lambda x: str(x).isdigit() and 0 < int(x) < 150,
    'Email': lambda x: '@' in str(x) and '.' in str(x)
}

test_data = [
    ['Name', 'Age', 'Email'],
    ['Alice', '30', 'alice@example.com'],
    ['', '25', 'bob@example.com'],  # Invalid: empty name
    ['Charlie', '200', 'invalid-email'],  # Invalid: age too high, bad email
    ['Dave', '35']  # Invalid: missing email
]

valid_count, invalid_count = validate_and_export_table(test_data, 'validated_export.csv', validation_rules)
print(f"Validation complete: {valid_count} valid, {invalid_count} invalid")

Working with Dictionary Data

While write_dicts is not available in the main package API, you can write dictionary data using the DictWriter class:

import clevercsv

# Convert dictionaries to table format for write_table
def write_dict_data_as_table(dict_data, filename, fieldnames=None):
    """Write dictionary data using write_table."""
    
    if not dict_data:
        return
    
    # Get fieldnames from first dictionary if not provided
    if fieldnames is None:
        fieldnames = list(dict_data[0].keys())
    
    # Convert to table format
    table_data = [fieldnames]  # Header row
    for record in dict_data:
        row = [record.get(field, '') for field in fieldnames]
        table_data.append(row)
    
    clevercsv.write_table(table_data, filename)

# Alternative: Use DictWriter directly
def write_dict_data_with_dictwriter(dict_data, filename, fieldnames=None):
    """Write dictionary data using DictWriter."""
    
    if not dict_data:
        return
    
    if fieldnames is None:
        fieldnames = list(dict_data[0].keys())
    
    with open(filename, 'w', newline='') as f:
        writer = clevercsv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(dict_data)

# Usage
records = [
    {'name': 'Alice', 'age': 30, 'city': 'New York'},
    {'name': 'Bob', 'age': 25, 'city': 'San Francisco'},
    {'name': 'Charlie', 'age': 35, 'city': 'Chicago'}
]

# Method 1: Convert to table
write_dict_data_as_table(records, 'method1_output.csv')

# Method 2: Use DictWriter
write_dict_data_with_dictwriter(records, 'method2_output.csv')

Performance Considerations

Memory Efficiency

# Memory efficient: Use generators or iterators
def generate_large_table():
    yield ['ID', 'Value', 'Timestamp']
    for i in range(1000000):
        yield [i, f'value_{i}', datetime.now().isoformat()]

clevercsv.write_table(generate_large_table(), 'large_file.csv')  # Constant memory usage

# Memory intensive: Load all data first
large_data = list(generate_large_table())  # Loads all 1M rows into memory
clevercsv.write_table(large_data, 'large_file.csv')  # High memory usage

Write Performance

# Faster: Prepare all data first, then write once
all_rows = prepare_all_data()
clevercsv.write_table(all_rows, 'output.csv')

# Slower: Multiple file operations (avoid this pattern)
for i, row_data in enumerate(data_source):
    mode = 'w' if i == 0 else 'a'
    # Opening file repeatedly is inefficient
    clevercsv.write_table([row_data], 'output.csv')  # Don't do this

Dialect Selection for Compatibility

# Maximum compatibility: Use 'excel' dialect (RFC-4180)
clevercsv.write_table(data, 'compatible.csv', dialect='excel')

# Custom requirements: Create appropriate dialect
european_dialect = clevercsv.SimpleDialect(';', '"', '')  # Common in Europe
clevercsv.write_table(data, 'european.csv', dialect=european_dialect)

Error Handling

Handling Write Errors

import clevercsv

def safe_csv_write(data, filename):
    """Write CSV with error handling."""
    try:
        clevercsv.write_table(data, filename)
        print(f"Successfully wrote {len(data)} rows to {filename}")
        return True
    except ValueError as e:
        print(f"Data validation error: {e}")
        return False
    except IOError as e:
        print(f"File write error: {e}")
        return False
    except Exception as e:
        print(f"Unexpected error: {e}")
        return False

# Usage
test_data = [
    ['A', 'B', 'C'],
    ['1', '2', '3'],
    ['4', '5']  # Inconsistent row length - will cause ValueError
]

success = safe_csv_write(test_data, 'test_output.csv')
if not success:
    print("Write operation failed, check data consistency")

Validation and Recovery

import clevercsv

def write_with_validation(table, filename):
    """Write table with row length validation and repair."""
    if not table:
        print("Empty table - no file created")
        return
    
    # Check for consistent row lengths
    row_lengths = [len(row) for row in table]
    if len(set(row_lengths)) > 1:
        print(f"Inconsistent row lengths detected: {set(row_lengths)}")
        
        # Option 1: Pad short rows
        max_length = max(row_lengths)
        padded_table = []
        for row in table:
            padded_row = list(row) + [''] * (max_length - len(row))
            padded_table.append(padded_row)
        
        print(f"Padded short rows to {max_length} columns")
        clevercsv.write_table(padded_table, filename)
        
        # Option 2: Truncate long rows (alternative approach)
        # min_length = min(row_lengths)
        # truncated_table = [row[:min_length] for row in table]
        # clevercsv.write_table(truncated_table, filename)
    else:
        clevercsv.write_table(table, filename)
        print(f"Successfully wrote consistent table with {row_lengths[0]} columns")

# Usage
inconsistent_data = [
    ['Name', 'Age', 'City', 'Country'],
    ['Alice', '30', 'New York'],  # Missing country
    ['Bob', '25', 'SF', 'USA', 'Extra']  # Extra field
]

write_with_validation(inconsistent_data, 'repaired_output.csv')

Install with Tessl CLI