tessl/pypi-itables

Python library that transforms Pandas and Polars DataFrames into interactive DataTables with sorting, pagination, and filtering capabilities.

—

Pending

Overview

Eval results

Files

Data Processing and Utilities

Name: tessl/pypi-itables
Author: tessl

Utilities for data handling, downsampling, sample data generation, and type processing to ensure optimal table performance and provide testing data for development and demonstration purposes.

Capabilities

Data Downsampling

Functions for automatically reducing DataFrame size when it exceeds specified limits, ensuring responsive table performance while preserving data structure and representation.

def downsample(df, max_rows=0, max_columns=0, max_bytes=0):
    """
    Return a subset of the DataFrame that fits the specified limits.

    Parameters:
    - df: Pandas/Polars DataFrame or Series to downsample
    - max_rows (int): Maximum number of rows (0 = unlimited)
    - max_columns (int): Maximum number of columns (0 = unlimited)  
    - max_bytes (int | str): Maximum memory usage ("64KB", "1MB", or integer bytes)

    Returns:
    tuple[DataFrame, str]: (downsampled_df, warning_message)
    - warning_message is empty string if no downsampling occurred
    """

def nbytes(df):
    """
    Calculate memory usage of DataFrame.

    Parameters:
    - df: Pandas/Polars DataFrame or Series

    Returns:
    int: Memory usage in bytes
    """

def as_nbytes(mem):
    """
    Convert memory specification to bytes.

    Parameters:
    - mem (int | float | str): Memory specification ("64KB", "1MB", etc. or numeric)

    Returns:
    int: Memory size in bytes

    Raises:
    ValueError: If specification format is invalid or too large (>= 1GB)
    """

Sample Data Generation

Comprehensive collection of functions for generating test data with various data types, structures, and complexities for development, testing, and demonstration purposes.

def get_countries(html=False, climate_zone=False):
    """
    Return DataFrame with world countries data from World Bank.

    Parameters:
    - html (bool): If True, include HTML formatted country/capital links and flag images
    - climate_zone (bool): If True, add climate zone and hemisphere columns

    Returns:
    pd.DataFrame: Countries data with columns: region, country, capital, longitude, latitude
    """

def get_population():
    """
    Return Series with world population data from World Bank.

    Returns:
    pd.Series: Population data indexed by country name
    """

def get_indicators():
    """
    Return DataFrame with subset of World Bank indicators.

    Returns:
    pd.DataFrame: World Bank indicators data
    """

def get_df_complex_index():
    """
    Return DataFrame with complex multi-level index for testing.

    Returns:
    pd.DataFrame: DataFrame with MultiIndex (region, country) and MultiIndex columns
    """

def get_dict_of_test_dfs(N=100, M=100):
    """
    Return dictionary of test DataFrames with various data types and structures.

    Parameters:
    - N (int): Number of rows for generated data
    - M (int): Number of columns for wide DataFrame

    Returns:
    dict[str, pd.DataFrame]: Test DataFrames including empty, boolean, int, float, 
    string, datetime, categorical, object, multiindex, and complex index types
    """

def get_dict_of_polars_test_dfs(N=100, M=100):
    """
    Return dictionary of Polars test DataFrames.

    Parameters:
    - N (int): Number of rows for generated data
    - M (int): Number of columns for wide DataFrame

    Returns:
    dict[str, pl.DataFrame]: Polars versions of test DataFrames
    """

def generate_random_df(rows, columns, column_types=None):
    """
    Generate random DataFrame with specified dimensions and data types.

    Parameters:
    - rows (int): Number of rows to generate
    - columns (int): Number of columns to generate
    - column_types (list, optional): List of data types to use (default: COLUMN_TYPES)

    Returns:
    pd.DataFrame: Random DataFrame with mixed data types
    """

def generate_random_series(rows, type):
    """
    Generate random Series of specified type and length.

    Parameters:
    - rows (int): Number of rows to generate
    - type (str): Data type ("bool", "int", "float", "str", "categories", 
                  "boolean", "Int64", "date", "datetime", "timedelta")

    Returns:
    pd.Series: Random Series of specified type
    """

def get_dict_of_polars_test_dfs(N=100, M=100):
    """
    Return dictionary of Polars test DataFrames.

    Parameters:
    - N (int): Number of rows for generated data
    - M (int): Number of columns for wide DataFrame

    Returns:
    dict[str, pl.DataFrame]: Polars versions of test DataFrames with same structure as pandas versions
    """

def get_dict_of_test_series():
    """
    Return dictionary of test Series with various data types.

    Returns:
    dict[str, pd.Series]: Test Series including boolean, int, float, string, 
    categorical, datetime, and complex types
    """

def get_dict_of_polars_test_series():
    """
    Return dictionary of Polars test Series.

    Returns:
    dict[str, pl.Series]: Polars versions of test Series
    """

def generate_date_series():
    """
    Generate Series with various date formats and edge cases.

    Returns:
    pd.Series: Date series with timezone, leap years, and boundary dates
    """

def get_pandas_styler():
    """
    Return styled Pandas DataFrame with background colors and tooltips.

    Returns:
    pd.Styler: Styled DataFrame with trigonometric data and formatting
    """

Package Utilities

Helper functions for accessing ITables package resources and internal file management.

def find_package_file(*path):
    """
    Return full path to file within ITables package.

    Parameters:
    - *path (str): Path components relative to package root

    Returns:
    Path: Full path to package file
    """

def read_package_file(*path):
    """
    Read and return content of file within ITables package.

    Parameters:
    - *path (str): Path components relative to package root

    Returns:
    str: File content as string
    """

Usage Examples

Automatic Downsampling

import pandas as pd
from itables.downsample import downsample

# Create large DataFrame
df = pd.DataFrame({
    'data': range(10000),
    'values': np.random.randn(10000)
})

# Downsample to fit limits
small_df, warning = downsample(df, max_rows=1000, max_bytes="1MB")

if warning:
    print(f"Downsampling applied: {warning}")
    print(f"Original shape: {df.shape}, New shape: {small_df.shape}")

Sample Data Usage

from itables.sample_dfs import get_countries, get_dict_of_test_dfs
from itables import show

# Display world countries data
countries = get_countries(html=True, climate_zone=True)
show(countries, caption="World Countries with Climate Data")

# Get various test DataFrames
test_dfs = get_dict_of_test_dfs(N=50, M=10)

# Display different data types
show(test_dfs['float'], caption="Float Data Types")
show(test_dfs['time'], caption="Time Data Types") 
show(test_dfs['multiindex'], caption="MultiIndex Example")

Random Data Generation

from itables.sample_dfs import generate_random_df, COLUMN_TYPES
from itables import show

# Generate random DataFrame
random_df = generate_random_df(
    rows=100, 
    columns=8, 
    column_types=['int', 'float', 'str', 'bool', 'date', 'categories']
)

show(random_df, caption="Random Generated Data")

# Generate with all supported types
full_random = generate_random_df(rows=50, columns=len(COLUMN_TYPES))
show(full_random, caption="All Data Types")

Styled DataFrames

from itables.sample_dfs import get_pandas_styler
from itables import show

# Get pre-styled DataFrame
styled_df = get_pandas_styler()
show(styled_df, 
     caption="Styled Trigonometric Data",
     allow_html=True)  # Required for styled DataFrames

Memory Analysis

from itables.downsample import nbytes, as_nbytes
import pandas as pd

# Analyze DataFrame memory usage
df = pd.DataFrame({
    'A': range(1000),
    'B': ['text'] * 1000,
    'C': pd.date_range('2020-01-01', periods=1000)
})

memory_usage = nbytes(df)
print(f"DataFrame uses {memory_usage:,} bytes")

# Convert memory specifications
print(f"64KB = {as_nbytes('64KB'):,} bytes")
print(f"1MB = {as_nbytes('1MB'):,} bytes")
print(f"Direct int: {as_nbytes(1024)} bytes")

Custom Test Data

from itables.sample_dfs import get_dict_of_test_dfs, get_dict_of_test_series
from itables import show

# Get all test DataFrames
test_data = get_dict_of_test_dfs(N=20, M=5)

# Show specific interesting cases
show(test_data['empty'], caption="Empty DataFrame")
show(test_data['duplicated_columns'], caption="Duplicated Column Names")
show(test_data['big_integers'], caption="Large Integer Handling")

# Test Series data
test_series = get_dict_of_test_series()
for name, series in list(test_series.items())[:3]:
    show(series.to_frame(), caption=f"Series: {name}")

Package Resource Access

from itables.utils import find_package_file, read_package_file

# Find package files
dt_bundle_path = find_package_file("html", "dt_bundle.js")
print(f"DataTables bundle located at: {dt_bundle_path}")

# Read package content (for advanced use cases)
init_html = read_package_file("html", "init_datatables.html")
print(f"Init HTML template length: {len(init_html)} characters")

Data Type Support

Supported Column Types

The COLUMN_TYPES constant defines all supported data types for random generation:

COLUMN_TYPES = [
    "bool",        # Boolean values
    "int",         # Integer values  
    "float",       # Floating point (with NaN, inf handling)
    "str",         # String values
    "categories",  # Categorical data
    "boolean",     # Nullable boolean (pandas extension)
    "Int64",       # Nullable integer (pandas extension)
    "date",        # Date values
    "datetime",    # Datetime values
    "timedelta"    # Time duration values
]

Special Value Handling

NaN/Null values: Automatically handled for appropriate data types
Infinite values: Properly encoded for JSON serialization
Large integers: Preserved without precision loss
Complex objects: Converted to string representation with warnings
Polars types: Full compatibility including unsigned integers and struct types