A Python package for offline access to Vega datasets
npx @tessl/cli install tessl/pypi-vega-datasets@0.9.0A Python package for offline access to Vega visualization datasets, providing a comprehensive collection of well-known datasets commonly used in data visualization and statistical analysis. Returns results as Pandas DataFrames for seamless integration with Python data science workflows.
pip install vega_datasetsimport vega_datasets
from vega_datasets import data, local_dataAccess individual components:
from vega_datasets import DataLoader, LocalDataLoader
from vega_datasets.utils import connection_okfrom vega_datasets import data
# Load a dataset by calling the data loader with dataset name
iris_df = data('iris')
print(type(iris_df)) # pandas.DataFrame
# Or use attribute access
iris_df = data.iris()
# Get list of available datasets
all_datasets = data.list_datasets()
print(len(all_datasets)) # 70 datasets
# Load dataset with pandas options
cars_df = data.cars(usecols=['Name', 'Miles_per_Gallon', 'Horsepower'])
# Access only locally bundled datasets (no internet required)
from vega_datasets import local_data
stocks_df = local_data.stocks()
# Get raw data instead of parsed DataFrame
raw_data = data.iris.raw()
print(type(raw_data)) # bytesThe package follows a clean loader pattern with automatic fallback between local and remote data sources:
The design enables both bundled offline access and remote data fetching, making it suitable for various development and production environments.
Primary interface for loading datasets using either method calls or attribute access, with automatic format detection and pandas DataFrame conversion.
class DataLoader:
def __call__(self, name: str, return_raw: bool = False, use_local: bool = True, **kwargs) -> pd.DataFrame: ...
def list_datasets(self) -> List[str]: ...
class LocalDataLoader:
def __call__(self, name: str, return_raw: bool = False, use_local: bool = True, **kwargs) -> pd.DataFrame: ...
def list_datasets(self) -> List[str]: ...Enhanced loaders for datasets requiring custom parsing, date handling, or alternative return types beyond standard DataFrames.
# Stocks with pivot support
def stocks(pivoted: bool = False, use_local: bool = True, **kwargs) -> pd.DataFrame: ...
# Miserables returns tuple of DataFrames
def miserables(use_local: bool = True, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]: ...
# Geographic data returns dict objects
def us_10m(use_local: bool = True, **kwargs) -> dict: ...
def world_110m(use_local: bool = True, **kwargs) -> dict: ...Locally Bundled (17 datasets) - Available without internet connection:
iris, anscombe, carsstocks, seattle-weather, seattle-temps, sf-tempsiowa-electricity, us-employmentairports, la-riotsbarley, wheat, burtin, crimea, drivingohlcRemote Datasets (53 datasets) - Require internet connection:
7zip, flare, flare-dependenciescountries, world-110m, populationbudget, budgets, disasters, gapminderclimate, co2-concentration, earthquakes, annual-precipgithub, ffox, moviesfrom vega_datasets import data
# Dataset not found
try:
df = data('nonexistent-dataset')
except ValueError as e:
print(e) # "No such dataset nonexistent-dataset exists..."
# Local dataset not available in LocalDataLoader
from vega_datasets import local_data
try:
df = local_data.github() # github is remote-only
except ValueError as e:
print(e) # "'github' dataset is not available locally..."
# Network issues for remote datasets
try:
df = data.github(use_local=False) # Force remote access
except Exception as e:
print(f"Network error: {e}")def connection_ok() -> bool:
"""
Check if web connection is available for remote datasets.
Returns:
bool: True if web connection is OK, False otherwise.
"""from typing import List, Tuple, Dict, Any
import pandas as pd
# Core classes
class DataLoader: ...
class LocalDataLoader: ...
class Dataset: ...
# Package-level exports
data: DataLoader
local_data: LocalDataLoader
__version__: str
# Utility functions
def connection_ok() -> bool: ...