Python library that transforms Pandas and Polars DataFrames into interactive DataTables with sorting, pagination, and filtering capabilities.
—
Utilities for data handling, downsampling, sample data generation, and type processing to ensure optimal table performance and provide testing data for development and demonstration purposes.
Functions for automatically reducing DataFrame size when it exceeds specified limits, ensuring responsive table performance while preserving data structure and representation.
def downsample(df, max_rows=0, max_columns=0, max_bytes=0):
"""
Return a subset of the DataFrame that fits the specified limits.
Parameters:
- df: Pandas/Polars DataFrame or Series to downsample
- max_rows (int): Maximum number of rows (0 = unlimited)
- max_columns (int): Maximum number of columns (0 = unlimited)
- max_bytes (int | str): Maximum memory usage ("64KB", "1MB", or integer bytes)
Returns:
tuple[DataFrame, str]: (downsampled_df, warning_message)
- warning_message is empty string if no downsampling occurred
"""
def nbytes(df):
"""
Calculate memory usage of DataFrame.
Parameters:
- df: Pandas/Polars DataFrame or Series
Returns:
int: Memory usage in bytes
"""
def as_nbytes(mem):
"""
Convert memory specification to bytes.
Parameters:
- mem (int | float | str): Memory specification ("64KB", "1MB", etc. or numeric)
Returns:
int: Memory size in bytes
Raises:
ValueError: If specification format is invalid or too large (>= 1GB)
"""Comprehensive collection of functions for generating test data with various data types, structures, and complexities for development, testing, and demonstration purposes.
def get_countries(html=False, climate_zone=False):
"""
Return DataFrame with world countries data from World Bank.
Parameters:
- html (bool): If True, include HTML formatted country/capital links and flag images
- climate_zone (bool): If True, add climate zone and hemisphere columns
Returns:
pd.DataFrame: Countries data with columns: region, country, capital, longitude, latitude
"""
def get_population():
"""
Return Series with world population data from World Bank.
Returns:
pd.Series: Population data indexed by country name
"""
def get_indicators():
"""
Return DataFrame with subset of World Bank indicators.
Returns:
pd.DataFrame: World Bank indicators data
"""
def get_df_complex_index():
"""
Return DataFrame with complex multi-level index for testing.
Returns:
pd.DataFrame: DataFrame with MultiIndex (region, country) and MultiIndex columns
"""
def get_dict_of_test_dfs(N=100, M=100):
"""
Return dictionary of test DataFrames with various data types and structures.
Parameters:
- N (int): Number of rows for generated data
- M (int): Number of columns for wide DataFrame
Returns:
dict[str, pd.DataFrame]: Test DataFrames including empty, boolean, int, float,
string, datetime, categorical, object, multiindex, and complex index types
"""
def get_dict_of_polars_test_dfs(N=100, M=100):
"""
Return dictionary of Polars test DataFrames.
Parameters:
- N (int): Number of rows for generated data
- M (int): Number of columns for wide DataFrame
Returns:
dict[str, pl.DataFrame]: Polars versions of test DataFrames
"""
def generate_random_df(rows, columns, column_types=None):
"""
Generate random DataFrame with specified dimensions and data types.
Parameters:
- rows (int): Number of rows to generate
- columns (int): Number of columns to generate
- column_types (list, optional): List of data types to use (default: COLUMN_TYPES)
Returns:
pd.DataFrame: Random DataFrame with mixed data types
"""
def generate_random_series(rows, type):
"""
Generate random Series of specified type and length.
Parameters:
- rows (int): Number of rows to generate
- type (str): Data type ("bool", "int", "float", "str", "categories",
"boolean", "Int64", "date", "datetime", "timedelta")
Returns:
pd.Series: Random Series of specified type
"""
def get_dict_of_polars_test_dfs(N=100, M=100):
"""
Return dictionary of Polars test DataFrames.
Parameters:
- N (int): Number of rows for generated data
- M (int): Number of columns for wide DataFrame
Returns:
dict[str, pl.DataFrame]: Polars versions of test DataFrames with same structure as pandas versions
"""
def get_dict_of_test_series():
"""
Return dictionary of test Series with various data types.
Returns:
dict[str, pd.Series]: Test Series including boolean, int, float, string,
categorical, datetime, and complex types
"""
def get_dict_of_polars_test_series():
"""
Return dictionary of Polars test Series.
Returns:
dict[str, pl.Series]: Polars versions of test Series
"""
def generate_date_series():
"""
Generate Series with various date formats and edge cases.
Returns:
pd.Series: Date series with timezone, leap years, and boundary dates
"""
def get_pandas_styler():
"""
Return styled Pandas DataFrame with background colors and tooltips.
Returns:
pd.Styler: Styled DataFrame with trigonometric data and formatting
"""Helper functions for accessing ITables package resources and internal file management.
def find_package_file(*path):
"""
Return full path to file within ITables package.
Parameters:
- *path (str): Path components relative to package root
Returns:
Path: Full path to package file
"""
def read_package_file(*path):
"""
Read and return content of file within ITables package.
Parameters:
- *path (str): Path components relative to package root
Returns:
str: File content as string
"""import pandas as pd
from itables.downsample import downsample
# Create large DataFrame
df = pd.DataFrame({
'data': range(10000),
'values': np.random.randn(10000)
})
# Downsample to fit limits
small_df, warning = downsample(df, max_rows=1000, max_bytes="1MB")
if warning:
print(f"Downsampling applied: {warning}")
print(f"Original shape: {df.shape}, New shape: {small_df.shape}")from itables.sample_dfs import get_countries, get_dict_of_test_dfs
from itables import show
# Display world countries data
countries = get_countries(html=True, climate_zone=True)
show(countries, caption="World Countries with Climate Data")
# Get various test DataFrames
test_dfs = get_dict_of_test_dfs(N=50, M=10)
# Display different data types
show(test_dfs['float'], caption="Float Data Types")
show(test_dfs['time'], caption="Time Data Types")
show(test_dfs['multiindex'], caption="MultiIndex Example")from itables.sample_dfs import generate_random_df, COLUMN_TYPES
from itables import show
# Generate random DataFrame
random_df = generate_random_df(
rows=100,
columns=8,
column_types=['int', 'float', 'str', 'bool', 'date', 'categories']
)
show(random_df, caption="Random Generated Data")
# Generate with all supported types
full_random = generate_random_df(rows=50, columns=len(COLUMN_TYPES))
show(full_random, caption="All Data Types")from itables.sample_dfs import get_pandas_styler
from itables import show
# Get pre-styled DataFrame
styled_df = get_pandas_styler()
show(styled_df,
caption="Styled Trigonometric Data",
allow_html=True) # Required for styled DataFramesfrom itables.downsample import nbytes, as_nbytes
import pandas as pd
# Analyze DataFrame memory usage
df = pd.DataFrame({
'A': range(1000),
'B': ['text'] * 1000,
'C': pd.date_range('2020-01-01', periods=1000)
})
memory_usage = nbytes(df)
print(f"DataFrame uses {memory_usage:,} bytes")
# Convert memory specifications
print(f"64KB = {as_nbytes('64KB'):,} bytes")
print(f"1MB = {as_nbytes('1MB'):,} bytes")
print(f"Direct int: {as_nbytes(1024)} bytes")from itables.sample_dfs import get_dict_of_test_dfs, get_dict_of_test_series
from itables import show
# Get all test DataFrames
test_data = get_dict_of_test_dfs(N=20, M=5)
# Show specific interesting cases
show(test_data['empty'], caption="Empty DataFrame")
show(test_data['duplicated_columns'], caption="Duplicated Column Names")
show(test_data['big_integers'], caption="Large Integer Handling")
# Test Series data
test_series = get_dict_of_test_series()
for name, series in list(test_series.items())[:3]:
show(series.to_frame(), caption=f"Series: {name}")from itables.utils import find_package_file, read_package_file
# Find package files
dt_bundle_path = find_package_file("html", "dt_bundle.js")
print(f"DataTables bundle located at: {dt_bundle_path}")
# Read package content (for advanced use cases)
init_html = read_package_file("html", "init_datatables.html")
print(f"Init HTML template length: {len(init_html)} characters")The COLUMN_TYPES constant defines all supported data types for random generation:
COLUMN_TYPES = [
"bool", # Boolean values
"int", # Integer values
"float", # Floating point (with NaN, inf handling)
"str", # String values
"categories", # Categorical data
"boolean", # Nullable boolean (pandas extension)
"Int64", # Nullable integer (pandas extension)
"date", # Date values
"datetime", # Datetime values
"timedelta" # Time duration values
]The downsampling system uses intelligent algorithms to:
Install with Tessl CLI
npx tessl i tessl/pypi-itables