A pandas-based library to visualize and compare datasets.
npx @tessl/cli install tessl/pypi-sweetviz@2.3.0A pandas-based library that generates beautiful, high-density visualizations for exploratory data analysis (EDA) with minimal code. Sweetviz specializes in target analysis, dataset comparison, and feature analysis, offering unified mixed-type associations that integrate numerical correlations, categorical associations, and categorical-numerical relationships seamlessly.
pip install sweetvizimport sweetviz as svimport sweetviz as sv
import pandas as pd
# Load your dataset
df = pd.read_csv('your_dataset.csv')
# Create a report analyzing the entire dataset
my_report = sv.analyze(df)
my_report.show_html() # Opens in browser
# Analyze with a target feature
my_report = sv.analyze(df, target_feat='target_column')
my_report.show_html()
# Compare two datasets (e.g., training vs test)
train_report = sv.compare([train_df, "Training"], [test_df, "Test"])
train_report.show_html()
# Compare subsets within the same dataset
my_report = sv.compare_intra(df, df["gender"] == "male", ["Male", "Female"])
my_report.show_html()Sweetviz operates through a three-step process:
analyze(), compare(), or compare_intra() create DataframeReport objectsKey components:
Primary functions for creating exploratory data analysis reports. These functions analyze dataframes and return DataframeReport objects containing comprehensive statistics, visualizations, and association matrices.
def analyze(source, target_feat=None, feat_cfg=None, pairwise_analysis='auto'): ...
def compare(source, compare, target_feat=None, feat_cfg=None, pairwise_analysis='auto'): ...
def compare_intra(source_df, condition_series, names, target_feat=None, feat_cfg=None, pairwise_analysis='auto'): ...Methods for rendering and outputting analysis reports in various formats. DataframeReport objects provide multiple output options including HTML files, notebook embedding, and experiment tracking integration.
class DataframeReport:
def show_html(filepath='SWEETVIZ_REPORT.html', open_browser=True, layout='widescreen', scale=None): ...
def show_notebook(w=None, h=None, scale=None, layout=None, filepath=None, file_layout=None, file_scale=None): ...
def log_comet(experiment): ...Configuration system for controlling feature type detection, analysis parameters, and report customization. Enables fine-tuned control over which features to analyze and how they should be interpreted.
class FeatureConfig:
def __init__(skip=None, force_cat=None, force_text=None, force_num=None): ...
def get_predetermined_type(feature_name): ...
def get_all_mentioned_features(): ...from typing import Union, Tuple, List
import pandas as pd
from enum import Enum
# Core type aliases
DataFrameInput = Union[pd.DataFrame, Tuple[pd.DataFrame, str]]
class FeatureType(Enum):
TYPE_CAT = "CATEGORICAL"
TYPE_BOOL = "BOOL"
TYPE_NUM = "NUMERIC"
TYPE_TEXT = "TEXT"
TYPE_UNSUPPORTED = "UNSUPPORTED"
TYPE_ALL_NAN = "ALL_NAN"
TYPE_UNKNOWN = "UNKNOWN"
TYPE_SKIPPED = "SKIPPED"
def __str__(self): ...
class NumWithPercent:
def __init__(self, number, total_for_percentage): ...
def __int__(self): ...
def __float__(self): ...
def __repr__(self): ...