A Python package for handling messy CSV files with enhanced dialect detection capabilities
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
High-level wrapper functions that automatically detect CSV dialects and encodings, providing the most convenient way to read CSV files without manual configuration. These functions handle the complexity of dialect detection and provide clean interfaces for common CSV reading tasks.
Read CSV files as lists of lists (rows), with automatic dialect detection and optional streaming for large files.
def read_table(
filename: Union[str, PathLike],
dialect: Optional[Union[str, SimpleDialect, csv.Dialect]] = None,
encoding: Optional[str] = None,
num_chars: Optional[int] = None,
verbose: bool = False
) -> List[List[str]]:
"""
Read a CSV file as a table (list of lists).
Parameters:
- filename: Path to the CSV file
- dialect: Dialect to use (auto-detected if None)
- encoding: File encoding (auto-detected if None)
- num_chars: Number of characters for detection (entire file if None)
- verbose: Show detection progress
Returns:
List of rows, each row is a list of string fields
Raises:
NoDetectionResult: When dialect detection fails
"""
def stream_table(
filename: Union[str, PathLike],
dialect: Optional[Union[str, SimpleDialect, csv.Dialect]] = None,
encoding: Optional[str] = None,
num_chars: Optional[int] = None,
verbose: bool = False
) -> Iterator[List[str]]:
"""
Read a CSV file as a generator over rows.
Parameters:
- filename: Path to the CSV file
- dialect: Dialect to use (auto-detected if None)
- encoding: File encoding (auto-detected if None)
- num_chars: Number of characters for detection (entire file if None)
- verbose: Show detection progress
Returns:
Generator yielding rows as lists of strings
Raises:
NoDetectionResult: When dialect detection fails
"""import clevercsv
# Read entire CSV file
rows = clevercsv.read_table('data.csv')
print(f"Read {len(rows)} rows")
# Read with specific encoding
rows = clevercsv.read_table('data.csv', encoding='utf-8')
# Stream large files to avoid memory issues
for row in clevercsv.stream_table('large_file.csv'):
process_row(row)
# Use specific dialect
dialect = clevercsv.SimpleDialect(',', '"', '')
rows = clevercsv.read_table('data.csv', dialect=dialect)Read CSV files as lists of dictionaries, treating the first row as column headers. Ideal for structured data where you need named access to fields.
def read_dicts(
filename: Union[str, PathLike],
dialect: Optional[Union[str, SimpleDialect, csv.Dialect]] = None,
encoding: Optional[str] = None,
num_chars: Optional[int] = None,
verbose: bool = False
) -> List[Dict[str, str]]:
"""
Read a CSV file as a list of dictionaries.
Parameters:
- filename: Path to the CSV file
- dialect: Dialect to use (auto-detected if None)
- encoding: File encoding (auto-detected if None)
- num_chars: Number of characters for detection (entire file if None)
- verbose: Show detection progress
Returns:
List of dictionaries where keys are column names from first row
Raises:
NoDetectionResult: When dialect detection fails
"""
def stream_dicts(
filename: Union[str, PathLike],
dialect: Optional[Union[str, SimpleDialect, csv.Dialect]] = None,
encoding: Optional[str] = None,
num_chars: Optional[int] = None,
verbose: bool = False
) -> Iterator[Dict[str, str]]:
"""
Read a CSV file as a generator over dictionaries.
Parameters:
- filename: Path to the CSV file
- dialect: Dialect to use (auto-detected if None)
- encoding: File encoding (auto-detected if None)
- num_chars: Number of characters for detection (entire file if None)
- verbose: Show detection progress
Returns:
Generator yielding rows as dictionaries
Raises:
NoDetectionResult: When dialect detection fails
"""import clevercsv
# Read CSV as dictionaries
records = clevercsv.read_dicts('employees.csv')
for record in records:
print(f"Name: {record['name']}, Age: {record['age']}")
# Stream dictionaries for large files
for record in clevercsv.stream_dicts('large_dataset.csv'):
if int(record['score']) > 90:
high_scorers.append(record)
# Access with error handling
try:
records = clevercsv.read_dicts('messy_file.csv', verbose=True)
except clevercsv.NoDetectionResult:
print("Could not detect CSV dialect")Read CSV files directly into pandas DataFrames with automatic dialect detection. Combines CleverCSV's robust dialect detection with pandas' powerful data manipulation capabilities.
def read_dataframe(
filename: Union[str, PathLike],
*args,
num_chars: Optional[int] = None,
**kwargs
) -> 'pandas.DataFrame':
"""
Read a CSV file to a pandas DataFrame.
Parameters:
- filename: Path to the CSV file
- *args: Additional arguments passed to pandas.read_csv
- num_chars: Number of characters for dialect detection (entire file if None)
- **kwargs: Additional keyword arguments passed to pandas.read_csv
Returns:
pandas DataFrame containing the CSV data
Raises:
NoDetectionResult: When dialect detection fails
ValueError: If filename is not a regular file
Notes:
- Requires pandas to be installed
- Detected dialect is automatically passed to pandas.read_csv
- Encoding detection is performed if not specified in kwargs
"""import clevercsv
# Read CSV into DataFrame
df = clevercsv.read_dataframe('sales_data.csv')
print(df.head())
# Pass additional pandas parameters
df = clevercsv.read_dataframe(
'data.csv',
parse_dates=['date_column'],
index_col='id'
)
# Specify encoding and other options
df = clevercsv.read_dataframe(
'data.csv',
encoding='latin-1',
na_values=['N/A', 'NULL'],
dtype={'price': float}
)
# Use subset for detection on large files
df = clevercsv.read_dataframe('huge_file.csv', num_chars=10000)num_chars parameter to limit detection to first N charactersstream_table, stream_dicts) for large filescchardet package for faster encoding detectionAll reading functions may raise NoDetectionResult when dialect detection fails. This typically happens with:
import clevercsv
try:
data = clevercsv.read_table('problematic.csv')
except clevercsv.NoDetectionResult:
# Fallback to manual dialect specification
dialect = clevercsv.SimpleDialect(',', '"', '')
data = clevercsv.read_table('problematic.csv', dialect=dialect)Install with Tessl CLI
npx tessl i tessl/pypi-clevercsv