A pandas-based library to visualize and compare datasets.
Configuration system for controlling feature type detection, analysis parameters, and report customization. Enables fine-tuned control over which features to analyze and how they should be interpreted.
Controls how individual features are processed during analysis. Allows overriding automatic type detection and excluding features from analysis.
class FeatureConfig:
def __init__(self,
skip: Union[str, List[str], Tuple[str]] = None,
force_cat: Union[str, List[str], Tuple[str]] = None,
force_text: Union[str, List[str], Tuple[str]] = None,
force_num: Union[str, List[str], Tuple[str]] = None):
"""
Configure feature processing behavior.
Parameters:
- skip: Features to exclude from analysis
- force_cat: Features to treat as categorical
- force_text: Features to treat as text
- force_num: Features to treat as numerical
All parameters accept single strings, lists, or tuples of feature names.
"""
def get_predetermined_type(self, feature_name: str) -> FeatureType:
"""
Get the predetermined type for a feature.
Parameters:
- feature_name: Name of the feature
Returns:
FeatureType enum value indicating predetermined type
"""
def get_all_mentioned_features(self) -> List[str]:
"""
Get list of all features mentioned in configuration.
Returns:
List of all feature names in any configuration category
"""import sweetviz as sv
# Skip specific features
config = sv.FeatureConfig(skip=['id', 'timestamp'])
report = sv.analyze(df, feat_cfg=config)
# Force feature types
config = sv.FeatureConfig(
skip='user_id',
force_cat=['status', 'category'],
force_num=['year', 'rating'],
force_text='description'
)
report = sv.analyze(df, feat_cfg=config)
# Multiple ways to specify features
config = sv.FeatureConfig(
skip=['id', 'created_at'], # List
force_cat=('status', 'type'), # Tuple
force_num='rating' # Single string
)
# Check configuration
config = sv.FeatureConfig(skip=['id'], force_cat=['status'])
feature_type = config.get_predetermined_type('status') # Returns FeatureType.TYPE_CAT
all_features = config.get_all_mentioned_features() # Returns ['id', 'status']System-wide settings controlled through INI configuration files. Allows customizing default behavior, appearance, and performance parameters.
import configparser
config_parser: configparser.ConfigParserimport sweetviz as sv
# Load custom configuration
sv.config_parser.read("my_config.ini")
# Must be called before creating reports
report = sv.analyze(df)Create custom INI files to override defaults:
[General]
default_verbosity = progress_only
use_cjk_font = 1
[Output_Defaults]
html_layout = vertical
html_scale = 0.9
notebook_layout = widescreen
notebook_scale = 0.8
notebook_width = 100%
notebook_height = 700
[Layout]
show_logo = 0
[comet_ml_defaults]
html_layout = vertical
html_scale = 0.85Sweetviz automatically detects feature types:
# Common override scenarios
# Treat year as categorical instead of numerical
config = sv.FeatureConfig(force_cat=['year'])
# Treat encoded categories as numerical
config = sv.FeatureConfig(force_num=['category_encoded'])
# Treat long strings as text features
config = sv.FeatureConfig(force_text=['comments', 'description'])
# Skip features that shouldn't be analyzed
config = sv.FeatureConfig(skip=['id', 'uuid', 'internal_code'])
# Combined configuration
config = sv.FeatureConfig(
skip=['id', 'created_at', 'updated_at'],
force_cat=['zip_code', 'product_code'],
force_num=['rating_1_to_5'],
force_text=['user_comments']
)[General]
# Verbosity levels: full, progress_only, off, default
default_verbosity = progress_only
# Enable CJK (Chinese/Japanese/Korean) font support
use_cjk_font = 1[Output_Defaults]
# HTML report defaults
html_layout = widescreen # widescreen or vertical
html_scale = 1.0
# Notebook display defaults
notebook_layout = vertical
notebook_scale = 0.9
notebook_width = 100% # Use %% for literal %
notebook_height = 700[Layout]
# Remove Sweetviz logo
show_logo = 0
# Custom styling options (advanced)
# See sweetviz_defaults.ini for full options[comet_ml_defaults]
# Defaults for Comet.ml logging
html_layout = vertical
html_scale = 0.85Features named "index" are automatically renamed to "df_index" to avoid conflicts:
# If DataFrame has column named 'index'
df = pd.DataFrame({'index': [1,2,3], 'value': [10,20,30]})
# Sweetviz automatically renames to 'df_index'
config = sv.FeatureConfig(skip=['df_index']) # Use 'df_index', not 'index'
report = sv.analyze(df, feat_cfg=config)# Target features must be boolean or numerical
config = sv.FeatureConfig(force_num=['encoded_target'])
report = sv.analyze(df, target_feat='encoded_target', feat_cfg=config)
# This will raise ValueError - categorical targets not supported
try:
report = sv.analyze(df, target_feat='category_column')
except ValueError as e:
print("Use force_num to convert categorical to numerical if appropriate")Control when correlation analysis prompts for confirmation:
# Large datasets - control pairwise analysis
report = sv.analyze(large_df, pairwise_analysis='off') # Skip correlations
report = sv.analyze(large_df, pairwise_analysis='on') # Force correlations
report = sv.analyze(large_df, pairwise_analysis='auto') # Auto-decide (default)# For large datasets, skip expensive computations
config = sv.FeatureConfig(skip=list_of_high_cardinality_features)
report = sv.analyze(df,
feat_cfg=config,
pairwise_analysis='off')
# Use smaller scale for large reports
report.show_html(scale=0.7)# Handle configuration errors
try:
config = sv.FeatureConfig(skip=['nonexistent_column'])
report = sv.analyze(df, feat_cfg=config)
except Exception as e:
print(f"Configuration warning: {e}")
# Handle INI file errors
try:
sv.config_parser.read("nonexistent.ini")
except FileNotFoundError:
print("Configuration file not found, using defaults")
# Validate feature names exist
available_features = set(df.columns)
skip_features = ['id', 'timestamp']
valid_skip = [f for f in skip_features if f in available_features]
config = sv.FeatureConfig(skip=valid_skip)# Standard data science workflow
config = sv.FeatureConfig(
skip=['id', 'uuid', 'created_at', 'updated_at'], # Skip metadata
force_cat=['zip_code', 'product_id'], # IDs as categories
force_num=['rating', 'score'], # Ordinal as numeric
force_text=['comments', 'description'] # Long text fields
)
# Time series data
config = sv.FeatureConfig(
skip=['timestamp', 'date'], # Skip time columns
force_cat=['day_of_week'], # Cyclical as categorical
force_num=['month', 'quarter'] # Temporal as numeric
)
# Survey data
config = sv.FeatureConfig(
force_cat=['satisfaction_level', 'education'], # Ordinal categories
force_num=['age_group', 'income_bracket'], # Ranked as numeric
force_text=['feedback_text'] # Open responses
)# Save configuration for reuse
def create_standard_config():
return sv.FeatureConfig(
skip=['id', 'timestamp'],
force_cat=['category', 'status'],
force_num=['rating']
)
# Use across multiple analyses
config = create_standard_config()
train_report = sv.analyze(train_df, feat_cfg=config)
test_report = sv.analyze(test_df, feat_cfg=config)Install with Tessl CLI
npx tessl i tessl/pypi-sweetviz