A Python package for handling messy CSV files with enhanced dialect detection capabilities
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Low-level CSV reader and writer classes that provide drop-in compatibility with Python's csv module while supporting CleverCSV's enhanced dialect handling. These classes form the foundation of CleverCSV's CSV processing capabilities.
CSV reader class that parses CSV data row by row, supporting all CleverCSV dialect features and providing an iterator interface.
class reader:
"""
CSV reader with enhanced dialect support.
Drop-in replacement for csv.reader with better dialect handling.
"""
def __init__(
self,
csvfile: Iterable[str],
dialect: Union[str, SimpleDialect, csv.Dialect] = 'excel',
**fmtparams
):
"""
Initialize CSV reader.
Parameters:
- csvfile: File-like object or iterable of strings
- dialect: Dialect specification ('excel', SimpleDialect, or csv.Dialect)
- **fmtparams: Format parameters (delimiter, quotechar, escapechar, strict)
"""
def __iter__(self) -> Iterator[List[str]]:
"""Return iterator over rows."""
def __next__(self) -> List[str]:
"""
Return next row as list of strings.
Returns:
List of field values as strings
Raises:
StopIteration: When no more rows available
Error: When parsing error occurs
"""
@property
def dialect(self) -> csv.Dialect:
"""Current dialect used for reading."""
@property
def line_num(self) -> int:
"""Current line number being processed."""import clevercsv
# Basic usage with file
with open('data.csv', 'r', newline='') as f:
reader = clevercsv.reader(f)
for row in reader:
print(row)
# With automatic dialect detection
with open('data.csv', 'r', newline='') as f:
sample = f.read()
dialect = clevercsv.Detector().detect(sample)
f.seek(0)
reader = clevercsv.reader(f, dialect=dialect)
rows = list(reader)
# With custom dialect
dialect = clevercsv.SimpleDialect(',', '"', '\\')
with open('data.csv', 'r', newline='') as f:
reader = clevercsv.reader(f, dialect=dialect)
header = next(reader) # First row
data_rows = list(reader) # Remaining rows
# With format parameters
with open('data.csv', 'r', newline='') as f:
reader = clevercsv.reader(f, delimiter=';', quotechar="'")
for i, row in enumerate(reader):
print(f"Row {reader.line_num}: {row}")
if i >= 10: # First 10 rows only
breakCSV writer class that formats and writes CSV data, supporting all CleverCSV dialect features and maintaining compatibility with csv.writer interface.
class writer:
"""
CSV writer with enhanced dialect support.
Drop-in replacement for csv.writer with better dialect handling.
"""
def __init__(
self,
csvfile: SupportsWrite[str],
dialect: Union[str, SimpleDialect, csv.Dialect] = 'excel',
**fmtparams
):
"""
Initialize CSV writer.
Parameters:
- csvfile: File-like object that supports writing
- dialect: Dialect specification ('excel', SimpleDialect, or csv.Dialect)
- **fmtparams: Format parameters (delimiter, quotechar, escapechar, etc.)
"""
def writerow(self, row: Iterable[Any]) -> Any:
"""
Write a single row to the CSV file.
Parameters:
- row: Iterable of values to write
Returns:
Return value from underlying csv.writer.writerow()
Raises:
Error: When writing fails
"""
def writerows(self, rows: Iterable[Iterable[Any]]) -> Any:
"""
Write multiple rows to the CSV file.
Parameters:
- rows: Iterable of rows, each row is an iterable of values
Returns:
Return value from underlying csv.writer.writerows()
Raises:
Error: When writing fails
"""
@property
def dialect(self) -> Type[csv.Dialect]:
"""Current dialect used for writing."""import clevercsv
# Basic writing
data = [
['Name', 'Age', 'City'],
['Alice', '30', 'New York'],
['Bob', '25', 'San Francisco']
]
with open('output.csv', 'w', newline='') as f:
writer = clevercsv.writer(f)
writer.writerows(data)
# Write with specific dialect
dialect = clevercsv.SimpleDialect(';', '"', '')
with open('output.csv', 'w', newline='') as f:
writer = clevercsv.writer(f, dialect=dialect)
writer.writerow(['col1', 'col2', 'col3'])
writer.writerow(['value1', 'value2', 'value3'])
# Write with format parameters
with open('output.csv', 'w', newline='') as f:
writer = clevercsv.writer(f, delimiter='|', quotechar="'")
for row in data:
writer.writerow(row)
# Write using detected dialect from input file
input_dialect = clevercsv.detect_dialect('input.csv')
with open('output.csv', 'w', newline='') as f:
writer = clevercsv.writer(f, dialect=input_dialect)
writer.writerows(processed_data)Process large CSV files without loading everything into memory:
import clevercsv
def process_large_csv(input_file, output_file, transform_func):
"""Process CSV file row by row with transformation."""
# Detect dialect from input
dialect = clevercsv.detect_dialect(input_file)
with open(input_file, 'r', newline='') as infile, \
open(output_file, 'w', newline='') as outfile:
reader = clevercsv.reader(infile, dialect=dialect)
writer = clevercsv.writer(outfile, dialect='excel') # Standardize output
# Process header
header = next(reader)
writer.writerow(transform_func(header))
# Process data rows
for row in reader:
transformed_row = transform_func(row)
writer.writerow(transformed_row)
# Usage
process_large_csv('input.csv', 'output.csv', lambda row: [cell.upper() for cell in row])Preserve input dialect when processing files:
import clevercsv
def preserve_dialect_processing(filename):
"""Process CSV while preserving original dialect."""
# Detect original dialect
original_dialect = clevercsv.detect_dialect(filename)
# Read data
with open(filename, 'r', newline='') as f:
reader = clevercsv.reader(f, dialect=original_dialect)
data = list(reader)
# Process data
processed_data = process_data(data)
# Write back with same dialect
with open(filename, 'w', newline='') as f:
writer = clevercsv.writer(f, dialect=original_dialect)
writer.writerows(processed_data)Robust error handling for CSV processing:
import clevercsv
def robust_csv_processing(filename):
"""CSV processing with comprehensive error handling."""
try:
# Attempt dialect detection
dialect = clevercsv.detect_dialect(filename)
if dialect is None:
raise ValueError("Could not detect CSV dialect")
with open(filename, 'r', newline='') as f:
reader = clevercsv.reader(f, dialect=dialect)
try:
for line_num, row in enumerate(reader, 1):
try:
# Process row
process_row(row)
except Exception as e:
print(f"Error processing row {line_num}: {e}")
continue
except clevercsv.Error as e:
print(f"CSV parsing error at line {reader.line_num}: {e}")
except FileNotFoundError:
print(f"File not found: {filename}")
except Exception as e:
print(f"Unexpected error: {e}")# Memory-efficient: process row by row
with open('large_file.csv', 'r', newline='') as f:
reader = clevercsv.reader(f)
for row in reader:
process_row(row) # Process immediately, don't store
# Memory-intensive: load all at once
with open('large_file.csv', 'r', newline='') as f:
reader = clevercsv.reader(f)
all_rows = list(reader) # Loads entire file into memory# Faster: Use SimpleDialect directly if known
known_dialect = clevercsv.SimpleDialect(',', '"', '')
reader = clevercsv.reader(file, dialect=known_dialect)
# Slower: String dialect requires lookup
reader = clevercsv.reader(file, dialect='excel')CleverCSV readers and writers are designed as drop-in replacements:
# Standard csv module
import csv
with open('data.csv', 'r', newline='') as f:
reader = csv.reader(f)
data = list(reader)
# CleverCSV replacement
import clevercsv
with open('data.csv', 'r', newline='') as f:
reader = clevercsv.reader(f) # Enhanced dialect handling
data = list(reader)
# Mixed usage
import clevercsv
import csv
# Detect with CleverCSV, read with standard csv
dialect = clevercsv.detect_dialect('data.csv')
with open('data.csv', 'r', newline='') as f:
reader = csv.reader(f, dialect=dialect.to_csv_dialect())
data = list(reader)Both reader and writer support the same format parameters as the csv module:
delimiter: Field separator characterquotechar: Character used to quote fields containing special charactersescapechar: Character used to escape delimiter/quote charactersstrict: Whether to raise exceptions on bad CSV inputskipinitialspace: Whether to ignore whitespace after delimiterquoting: Quoting behavior (QUOTE_MINIMAL, QUOTE_ALL, etc.)# Example with multiple format parameters
reader = clevercsv.reader(
file,
delimiter=';',
quotechar="'",
escapechar='\\',
strict=True
)Install with Tessl CLI
npx tessl i tessl/pypi-clevercsv