tessl/pypi-plotly

An open-source interactive data visualization library for Python

—

Pending

Overview

Eval results

Files

Built-in Datasets

Name: tessl/pypi-plotly
Author: tessl

Sample datasets for learning and experimentation with plotly visualizations. The data module provides 10+ commonly used datasets in data science, returned as pandas DataFrames (or other backends if configured).

Capabilities

Classification and Clustering Datasets

Classic datasets for machine learning and statistical analysis.

def iris():
    """
    Load the Iris flower dataset.
    
    Contains measurements of iris flowers from three species: setosa, versicolor, and virginica.
    Each sample has four features: sepal length, sepal width, petal length, and petal width.
    
    Returns:
    DataFrame: 150 rows × 5 columns
        - sepal_length: float, sepal length in cm
        - sepal_width: float, sepal width in cm  
        - petal_length: float, petal length in cm
        - petal_width: float, petal width in cm
        - species: str, flower species ('setosa', 'versicolor', 'virginica')
        - species_id: int, numeric species identifier (0, 1, 2)
    """

def tips():
    """
    Load restaurant tips dataset.
    
    Contains information about restaurant bills, tips, and customer characteristics.
    Useful for exploring relationships between categorical and continuous variables.
    
    Returns:
    DataFrame: 244 rows × 7 columns
        - total_bill: float, total bill amount in dollars
        - tip: float, tip amount in dollars
        - sex: str, customer gender ('Male', 'Female')
        - smoker: str, smoking status ('Yes', 'No')
        - day: str, day of week ('Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat')
        - time: str, meal time ('Lunch', 'Dinner')
        - size: int, party size (number of people)
    """

Economic and Demographic Data

Datasets containing economic indicators and demographic information over time.

def gapminder():
    """
    Load Gapminder world development dataset.
    
    Contains country-level data on life expectancy, GDP per capita, and population
    from 1952 to 2007. Excellent for demonstrating animated visualizations and
    geographic mapping.
    
    Returns:
    DataFrame: 1704 rows × 8 columns
        - country: str, country name
        - continent: str, continent name ('Africa', 'Americas', 'Asia', 'Europe', 'Oceania')
        - year: int, year (1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002, 2007)
        - lifeExp: float, life expectancy in years
        - pop: int, population count
        - gdpPercap: float, GDP per capita in US dollars
        - iso_alpha: str, 3-letter ISO country code
        - iso_num: int, numeric ISO country code
    """

def medals_wide():
    """
    Load Olympic medals dataset in wide format.
    
    Contains medal counts by country for 2018 Winter Olympics, with separate
    columns for each medal type.
    
    Returns:
    DataFrame: 30 rows × 4 columns
        - nation: str, country name
        - gold: int, number of gold medals
        - silver: int, number of silver medals  
        - bronze: int, number of bronze medals
    """

def medals_long():
    """
    Load Olympic medals dataset in long format.
    
    Same data as medals_wide but in tidy/long format with medal type as a variable.
    
    Returns:
    DataFrame: 90 rows × 3 columns
        - nation: str, country name
        - medal: str, medal type ('gold', 'silver', 'bronze')
        - count: int, number of medals of that type
    """

Time Series and Financial Data

Datasets with temporal components for time series analysis and visualization.

def stocks():
    """
    Load stock price dataset.
    
    Contains daily stock prices for major technology companies (AAPL, GOOGL, AMZN, FB, NFLX, MSFT)
    from 2018-2020. Useful for financial charts and time series analysis.
    
    Returns:
    DataFrame: 1560 rows × 3 columns
        - date: datetime, trading date
        - AAPL: float, Apple stock price
        - GOOGL: float, Google stock price
        - AMZN: float, Amazon stock price
        - FB: float, Facebook stock price
        - NFLX: float, Netflix stock price
        - MSFT: float, Microsoft stock price
    """

def flights():
    """
    Load airline passenger flights dataset.
    
    Contains monthly passenger counts for different airlines and airports.
    Good for demonstrating time series patterns and seasonal trends.
    
    Returns:
    DataFrame: 5733 rows × 4 columns
        - year: int, year
        - month: int, month (1-12)
        - passengers: int, number of passengers
        - airline: str, airline identifier
    """

Election and Political Data

Datasets containing electoral and political information.

def election():
    """
    Load 2013 Montreal mayoral election results.
    
    Contains voting results by district with candidate vote shares and
    geographic information for choropleth mapping.
    
    Returns:
    DataFrame: 58 rows × 15 columns
        - district: int, electoral district number
        - Coderre: float, vote percentage for Denis Coderre
        - Bergeron: float, vote percentage for Richard Bergeron  
        - Joly: float, vote percentage for Mélanie Joly
        - total: int, total votes cast
        - winner: str, winning candidate name
        - result: str, result type ('win', 'lose')
        - district_id: int, district identifier for mapping
        - ... additional demographic columns
    """

def election_geojson():
    """
    Load GeoJSON data for Montreal election districts.
    
    Geographic boundary data corresponding to the election dataset,
    used for creating choropleth maps.
    
    Returns:
    dict: GeoJSON feature collection with district boundaries
    """

Scientific and Environmental Data

Datasets from scientific measurements and environmental monitoring.

def wind():
    """
    Load wind measurement dataset.
    
    Contains wind speed and direction measurements, useful for polar plots,
    wind roses, and meteorological visualizations.
    
    Returns:
    DataFrame: 128 rows × 4 columns
        - direction: str, wind direction ('N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW')
        - strength: str, wind strength category ('0-1', '1-2', '2-3', '3-4', '4-4+', '4-5', '5-6', '6+')
        - frequency: float, frequency of occurrence
        - magnitude: float, magnitude value for polar plotting
    """

def carshare():
    """
    Load car sharing usage dataset.
    
    Contains information about car sharing service usage patterns,
    including temporal and geographic distribution.
    
    Returns:
    DataFrame: 249 rows × 4 columns
        - centroid_lat: float, latitude of service area centroid
        - centroid_lon: float, longitude of service area centroid
        - car_hours: float, total car usage hours
        - member_birth_year: int, birth year of member
    """

Experimental and A/B Testing Data

Datasets designed for statistical analysis and experimental design examples.

def experiment():
    """
    Load A/B testing experiment dataset.
    
    Contains results from a controlled experiment with treatment and control groups,
    useful for demonstrating statistical analysis and hypothesis testing.
    
    Returns:
    DataFrame: 100 rows × 4 columns
        - experiment_1: int, first experiment result
        - experiment_2: int, second experiment result  
        - experiment_3: int, third experiment result
        - group: str, experimental group ('control', 'treatment')
    """

Usage Examples

import plotly.express as px
import plotly.data as data

# Load and explore iris dataset
df_iris = data.iris()
print(df_iris.head())
print(df_iris.info())

# Create scatter plot with iris data
fig1 = px.scatter(df_iris, x="sepal_width", y="sepal_length", 
                 color="species", size="petal_length",
                 title="Iris Dataset Visualization")
fig1.show()

# Load gapminder for animated visualization
df_gap = data.gapminder()
fig2 = px.scatter(df_gap, x="gdpPercap", y="lifeExp", 
                 animation_frame="year", animation_group="country",
                 size="pop", color="continent", hover_name="country",
                 log_x=True, size_max=55, range_x=[100,100000], 
                 range_y=[25,90], title="Gapminder Animation")
fig2.show()

# Stock price time series
df_stocks = data.stocks()
fig3 = px.line(df_stocks, x="date", y=["AAPL", "GOOGL", "AMZN"], 
              title="Tech Stock Prices")
fig3.show()

# Tips dataset for statistical analysis
df_tips = data.tips()
fig4 = px.box(df_tips, x="day", y="total_bill", color="time",
             title="Restaurant Bills by Day and Time")
fig4.show()

# Wind data for polar visualization
df_wind = data.wind()
fig5 = px.bar_polar(df_wind, r="frequency", theta="direction",
                   color="strength", template="plotly_dark",
                   color_discrete_sequence=px.colors.sequential.Plasma_r,
                   title="Wind Pattern Analysis")
fig5.show()

# Election data for choropleth mapping
df_election = data.election()
geojson = data.election_geojson()
fig6 = px.choropleth(df_election, geojson=geojson, locations="district",
                    color="winner", 
                    hover_data=["Coderre", "Bergeron", "Joly"],
                    title="Montreal Election Results")
fig6.show()

# Car sharing geographic analysis
df_cars = data.carshare()
fig7 = px.scatter_mapbox(df_cars, lat="centroid_lat", lon="centroid_lon",
                        size="car_hours", color="member_birth_year",
                        hover_data=["car_hours"], zoom=10, height=600,
                        mapbox_style="open-street-map",
                        title="Car Sharing Usage Patterns")
fig7.show()

# Olympic medals comparison
df_medals = data.medals_long()
fig8 = px.bar(df_medals, x="nation", y="count", color="medal",
             title="2018 Winter Olympics Medal Count")
fig8.show()

# Flight passenger trends
df_flights = data.flights()
fig9 = px.line(df_flights, x="month", y="passengers", color="airline",
              title="Airline Passenger Trends")
fig9.show()

# A/B testing results
df_experiment = data.experiment()
fig10 = px.box(df_experiment, y=["experiment_1", "experiment_2", "experiment_3"],
              color="group", title="A/B Testing Results")
fig10.show()

# Dataset information summary
datasets = [
    ('iris', data.iris),
    ('tips', data.tips), 
    ('gapminder', data.gapminder),
    ('stocks', data.stocks),
    ('flights', data.flights),
    ('wind', data.wind),
    ('election', data.election),
    ('carshare', data.carshare),
    ('medals_long', data.medals_long),
    ('experiment', data.experiment)
]

for name, func in datasets:
    df = func()
    print(f"{name}: {df.shape[0]} rows, {df.shape[1]} columns")

Install with Tessl CLI