Kernel Density Estimation in Python with three high-performance algorithms through a unified API.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Helper functions for grid generation, array manipulation, and data processing in kernel density estimation workflows. These utilities support the core KDE functionality with data preparation and result processing.
Automatic grid generation for kernel density evaluation with intelligent boundary selection and point distribution.
def autogrid(data, boundary_abs=3, num_points=None, boundary_rel=0.05):
"""
Generate automatic grid for KDE evaluation.
Creates equidistant grid points covering data range with intelligent
boundary extension for proper density estimation at data extremes.
Parameters:
- data: array-like, shape (obs, dims), input data for grid generation
- boundary_abs: float, absolute boundary extension in units (default: 3)
- num_points: int or None, number of grid points per dimension
- boundary_rel: float, relative boundary extension as fraction of range (default: 0.05)
Returns:
- np.ndarray: Grid points of shape (grid_obs, dims)
Raises:
- ValueError: If data is empty or has invalid shape
"""Usage Example:
import numpy as np
from KDEpy.utils import autogrid
# 1D data
data_1d = np.random.gamma(2, 1, 1000).reshape(-1, 1)
grid_1d = autogrid(data_1d, num_points=256)
print(f"1D grid shape: {grid_1d.shape}")
# 2D data
data_2d = np.random.multivariate_normal([0, 0], [[1, 0.3], [0.3, 1]], 500)
grid_2d = autogrid(data_2d, num_points=64) # 64x64 grid
print(f"2D grid shape: {grid_2d.shape}")
# Custom boundaries
grid_extended = autogrid(data_1d, boundary_abs=5, boundary_rel=0.1)
# Use with KDE
from KDEpy import TreeKDE
kde = TreeKDE().fit(data_1d)
y = kde.evaluate(grid_1d)Cartesian product generation for multi-dimensional grid construction.
def cartesian(arrays):
"""
Generate cartesian product of input arrays.
Creates all possible combinations of elements from input arrays,
useful for creating multi-dimensional grids and parameter combinations.
Parameters:
- arrays: list of array-like, 1-D arrays to form cartesian product
Returns:
- np.ndarray: 2-D array of shape (M, len(arrays)) with cartesian products
Raises:
- ValueError: If input arrays are not 1-dimensional
"""Usage Example:
import numpy as np
from KDEpy.utils import cartesian
# Create 2D grid from 1D arrays
x = np.linspace(-3, 3, 50)
y = np.linspace(-2, 2, 40)
grid_2d = cartesian([x, y])
print(f"Grid shape: {grid_2d.shape}") # (2000, 2)
# 3D grid
x = np.linspace(0, 1, 10)
y = np.linspace(0, 1, 10)
z = np.linspace(0, 1, 10)
grid_3d = cartesian([x, y, z])
print(f"3D grid shape: {grid_3d.shape}") # (1000, 3)
# Use with KDE evaluation
from KDEpy import NaiveKDE
kde = NaiveKDE().fit(np.random.randn(100, 2))
densities = kde.evaluate(grid_2d)Linear binning of data onto regular grids for efficient density computation, particularly used by FFTKDE.
def linear_binning(data, grid_points, weights=None):
"""
Bin data linearly onto grid points.
Distributes data points onto nearest grid points using linear
interpolation, preserving total mass while creating regular grid structure.
Parameters:
- data: array-like, shape (obs, dims), input data points to bin
- grid_points: array-like, shape (grid_obs, dims), target grid points
- weights: array-like or None, shape (obs,), optional weights for data points
Returns:
- np.ndarray: Binned data values on grid
Raises:
- ValueError: If data and grid dimensions don't match
"""
def linbin_cython(data, grid_points, weights=None):
"""
Cython implementation of linear binning for performance.
Parameters:
- data: array-like, input data points
- grid_points: array-like, target grid points
- weights: array-like or None, optional weights
Returns:
- np.ndarray: Binned data on grid
"""
def linbin_numpy(data, grid_points, weights=None):
"""
NumPy implementation of linear binning.
Parameters:
- data: array-like, input data points
- grid_points: array-like, target grid points
- weights: array-like or None, optional weights
Returns:
- np.ndarray: Binned data on grid
"""
def linbin_Ndim(data, grid_points, weights=None):
"""
N-dimensional linear binning dispatcher.
Parameters:
- data: array-like, input data points
- grid_points: array-like, target grid points
- weights: array-like or None, optional weights
Returns:
- np.ndarray: Binned data on grid
"""Usage Example:
import numpy as np
from KDEpy.binning import linear_binning
from KDEpy.utils import autogrid
# Generate data and grid
data = np.random.randn(1000, 2)
grid = autogrid(data, num_points=32) # 32x32 grid
# Bin data onto grid
binned = linear_binning(data, grid)
print(f"Binned data shape: {binned.shape}")
# With weights
weights = np.random.exponential(1, 1000)
binned_weighted = linear_binning(data, grid, weights=weights)
# Verify mass conservation
print(f"Original mass: {len(data)}")
print(f"Binned mass: {np.sum(binned):.1f}")Combine utilities for sophisticated grid generation:
import numpy as np
from KDEpy.utils import autogrid, cartesian
# Non-uniform density requires finer grid in certain regions
data = np.concatenate([
np.random.normal(-2, 0.3, 300),
np.random.normal(2, 0.8, 700)
])
# Create adaptive grid with higher resolution near modes
base_grid = autogrid(data.reshape(-1, 1), num_points=128)
fine_region = np.linspace(-2.5, -1.5, 64).reshape(-1, 1)
coarse_region = np.linspace(1, 3, 32).reshape(-1, 1)
# Combine grids
adaptive_grid = np.vstack([base_grid, fine_region, coarse_region])
adaptive_grid = np.unique(adaptive_grid.ravel()).reshape(-1, 1)
# Use with KDE
from KDEpy import TreeKDE
kde = TreeKDE().fit(data)
y = kde.evaluate(adaptive_grid)Optimize grid generation for high-dimensional data:
import numpy as np
from KDEpy.utils import cartesian, autogrid
# 3D data
data_3d = np.random.multivariate_normal(
mean=[0, 0, 0],
cov=[[1, 0.2, 0.1], [0.2, 1, 0.3], [0.1, 0.3, 1]],
size=2000
)
# Create sparse grid for efficiency
sparse_points = 16 # 16^3 = 4096 points instead of dense grid
x_range = np.linspace(data_3d[:, 0].min()-1, data_3d[:, 0].max()+1, sparse_points)
y_range = np.linspace(data_3d[:, 1].min()-1, data_3d[:, 1].max()+1, sparse_points)
z_range = np.linspace(data_3d[:, 2].min()-1, data_3d[:, 2].max()+1, sparse_points)
sparse_grid = cartesian([x_range, y_range, z_range])
# Evaluate efficiently
from KDEpy import TreeKDE
kde = TreeKDE().fit(data_3d)
densities = kde.evaluate(sparse_grid)Handle large datasets with chunked processing:
import numpy as np
from KDEpy.binning import linear_binning
def chunked_binning(data, grid_points, weights=None, chunk_size=10000):
"""Bin large datasets in chunks to manage memory usage."""
n_obs = len(data)
total_binned = np.zeros(len(grid_points))
for start in range(0, n_obs, chunk_size):
end = min(start + chunk_size, n_obs)
chunk_data = data[start:end]
chunk_weights = weights[start:end] if weights is not None else None
chunk_binned = linear_binning(chunk_data, grid_points, chunk_weights)
total_binned += chunk_binned
return total_binned
# Use with very large dataset
large_data = np.random.randn(100000, 2)
grid = autogrid(large_data[:1000], num_points=64) # Sample for grid
binned = chunked_binning(large_data, grid, chunk_size=5000)from typing import Union, Optional, List, Tuple
import numpy as np
# Input array types
ArrayLike = Union[np.ndarray, list, tuple]
DataArray = Union[np.ndarray, list] # Shape (obs, dims)
GridArray = Union[np.ndarray, list] # Shape (grid_obs, dims)
WeightsArray = Optional[Union[np.ndarray, list]] # Shape (obs,)
# Grid specification types
GridSpec = Union[int, Tuple[int, ...], ArrayLike]
BoundarySpec = Union[float, Tuple[float, ...]]
# Function return types
GridPoints = np.ndarray # Shape (grid_obs, dims)
CartesianProduct = np.ndarray # Shape (M, len(arrays))
BinnedData = np.ndarray # Shape matching grid
# Utility function signatures
AutogridFunc = callable[[DataArray, float, Optional[int], float], GridPoints]
CartesianFunc = callable[[List[ArrayLike]], CartesianProduct]
BinningFunc = callable[[DataArray, GridArray, WeightsArray], BinnedData]Install with Tessl CLI
npx tessl i tessl/pypi-kdepy