or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

bandwidth-selection.mdindex.mdkde-estimators.mdkernel-functions.mdutilities.md

utilities.mddocs/

0

# Utilities

1

2

Helper functions for grid generation, array manipulation, and data processing in kernel density estimation workflows. These utilities support the core KDE functionality with data preparation and result processing.

3

4

## Capabilities

5

6

### Grid Generation

7

8

Automatic grid generation for kernel density evaluation with intelligent boundary selection and point distribution.

9

10

```python { .api }

11

def autogrid(data, boundary_abs=3, num_points=None, boundary_rel=0.05):

12

"""

13

Generate automatic grid for KDE evaluation.

14

15

Creates equidistant grid points covering data range with intelligent

16

boundary extension for proper density estimation at data extremes.

17

18

Parameters:

19

- data: array-like, shape (obs, dims), input data for grid generation

20

- boundary_abs: float, absolute boundary extension in units (default: 3)

21

- num_points: int or None, number of grid points per dimension

22

- boundary_rel: float, relative boundary extension as fraction of range (default: 0.05)

23

24

Returns:

25

- np.ndarray: Grid points of shape (grid_obs, dims)

26

27

Raises:

28

- ValueError: If data is empty or has invalid shape

29

"""

30

```

31

32

**Usage Example:**

33

34

```python

35

import numpy as np

36

from KDEpy.utils import autogrid

37

38

# 1D data

39

data_1d = np.random.gamma(2, 1, 1000).reshape(-1, 1)

40

grid_1d = autogrid(data_1d, num_points=256)

41

print(f"1D grid shape: {grid_1d.shape}")

42

43

# 2D data

44

data_2d = np.random.multivariate_normal([0, 0], [[1, 0.3], [0.3, 1]], 500)

45

grid_2d = autogrid(data_2d, num_points=64) # 64x64 grid

46

print(f"2D grid shape: {grid_2d.shape}")

47

48

# Custom boundaries

49

grid_extended = autogrid(data_1d, boundary_abs=5, boundary_rel=0.1)

50

51

# Use with KDE

52

from KDEpy import TreeKDE

53

kde = TreeKDE().fit(data_1d)

54

y = kde.evaluate(grid_1d)

55

```

56

57

### Array Operations

58

59

Cartesian product generation for multi-dimensional grid construction.

60

61

```python { .api }

62

def cartesian(arrays):

63

"""

64

Generate cartesian product of input arrays.

65

66

Creates all possible combinations of elements from input arrays,

67

useful for creating multi-dimensional grids and parameter combinations.

68

69

Parameters:

70

- arrays: list of array-like, 1-D arrays to form cartesian product

71

72

Returns:

73

- np.ndarray: 2-D array of shape (M, len(arrays)) with cartesian products

74

75

Raises:

76

- ValueError: If input arrays are not 1-dimensional

77

"""

78

```

79

80

**Usage Example:**

81

82

```python

83

import numpy as np

84

from KDEpy.utils import cartesian

85

86

# Create 2D grid from 1D arrays

87

x = np.linspace(-3, 3, 50)

88

y = np.linspace(-2, 2, 40)

89

grid_2d = cartesian([x, y])

90

print(f"Grid shape: {grid_2d.shape}") # (2000, 2)

91

92

# 3D grid

93

x = np.linspace(0, 1, 10)

94

y = np.linspace(0, 1, 10)

95

z = np.linspace(0, 1, 10)

96

grid_3d = cartesian([x, y, z])

97

print(f"3D grid shape: {grid_3d.shape}") # (1000, 3)

98

99

# Use with KDE evaluation

100

from KDEpy import NaiveKDE

101

kde = NaiveKDE().fit(np.random.randn(100, 2))

102

densities = kde.evaluate(grid_2d)

103

```

104

105

### Data Binning

106

107

Linear binning of data onto regular grids for efficient density computation, particularly used by FFTKDE.

108

109

```python { .api }

110

def linear_binning(data, grid_points, weights=None):

111

"""

112

Bin data linearly onto grid points.

113

114

Distributes data points onto nearest grid points using linear

115

interpolation, preserving total mass while creating regular grid structure.

116

117

Parameters:

118

- data: array-like, shape (obs, dims), input data points to bin

119

- grid_points: array-like, shape (grid_obs, dims), target grid points

120

- weights: array-like or None, shape (obs,), optional weights for data points

121

122

Returns:

123

- np.ndarray: Binned data values on grid

124

125

Raises:

126

- ValueError: If data and grid dimensions don't match

127

"""

128

129

def linbin_cython(data, grid_points, weights=None):

130

"""

131

Cython implementation of linear binning for performance.

132

133

Parameters:

134

- data: array-like, input data points

135

- grid_points: array-like, target grid points

136

- weights: array-like or None, optional weights

137

138

Returns:

139

- np.ndarray: Binned data on grid

140

"""

141

142

def linbin_numpy(data, grid_points, weights=None):

143

"""

144

NumPy implementation of linear binning.

145

146

Parameters:

147

- data: array-like, input data points

148

- grid_points: array-like, target grid points

149

- weights: array-like or None, optional weights

150

151

Returns:

152

- np.ndarray: Binned data on grid

153

"""

154

155

def linbin_Ndim(data, grid_points, weights=None):

156

"""

157

N-dimensional linear binning dispatcher.

158

159

Parameters:

160

- data: array-like, input data points

161

- grid_points: array-like, target grid points

162

- weights: array-like or None, optional weights

163

164

Returns:

165

- np.ndarray: Binned data on grid

166

"""

167

```

168

169

**Usage Example:**

170

171

```python

172

import numpy as np

173

from KDEpy.binning import linear_binning

174

from KDEpy.utils import autogrid

175

176

# Generate data and grid

177

data = np.random.randn(1000, 2)

178

grid = autogrid(data, num_points=32) # 32x32 grid

179

180

# Bin data onto grid

181

binned = linear_binning(data, grid)

182

print(f"Binned data shape: {binned.shape}")

183

184

# With weights

185

weights = np.random.exponential(1, 1000)

186

binned_weighted = linear_binning(data, grid, weights=weights)

187

188

# Verify mass conservation

189

print(f"Original mass: {len(data)}")

190

print(f"Binned mass: {np.sum(binned):.1f}")

191

```

192

193

## Advanced Usage Patterns

194

195

### Custom Grid Construction

196

197

Combine utilities for sophisticated grid generation:

198

199

```python

200

import numpy as np

201

from KDEpy.utils import autogrid, cartesian

202

203

# Non-uniform density requires finer grid in certain regions

204

data = np.concatenate([

205

np.random.normal(-2, 0.3, 300),

206

np.random.normal(2, 0.8, 700)

207

])

208

209

# Create adaptive grid with higher resolution near modes

210

base_grid = autogrid(data.reshape(-1, 1), num_points=128)

211

fine_region = np.linspace(-2.5, -1.5, 64).reshape(-1, 1)

212

coarse_region = np.linspace(1, 3, 32).reshape(-1, 1)

213

214

# Combine grids

215

adaptive_grid = np.vstack([base_grid, fine_region, coarse_region])

216

adaptive_grid = np.unique(adaptive_grid.ravel()).reshape(-1, 1)

217

218

# Use with KDE

219

from KDEpy import TreeKDE

220

kde = TreeKDE().fit(data)

221

y = kde.evaluate(adaptive_grid)

222

```

223

224

### Efficient Multi-dimensional Evaluation

225

226

Optimize grid generation for high-dimensional data:

227

228

```python

229

import numpy as np

230

from KDEpy.utils import cartesian, autogrid

231

232

# 3D data

233

data_3d = np.random.multivariate_normal(

234

mean=[0, 0, 0],

235

cov=[[1, 0.2, 0.1], [0.2, 1, 0.3], [0.1, 0.3, 1]],

236

size=2000

237

)

238

239

# Create sparse grid for efficiency

240

sparse_points = 16 # 16^3 = 4096 points instead of dense grid

241

x_range = np.linspace(data_3d[:, 0].min()-1, data_3d[:, 0].max()+1, sparse_points)

242

y_range = np.linspace(data_3d[:, 1].min()-1, data_3d[:, 1].max()+1, sparse_points)

243

z_range = np.linspace(data_3d[:, 2].min()-1, data_3d[:, 2].max()+1, sparse_points)

244

245

sparse_grid = cartesian([x_range, y_range, z_range])

246

247

# Evaluate efficiently

248

from KDEpy import TreeKDE

249

kde = TreeKDE().fit(data_3d)

250

densities = kde.evaluate(sparse_grid)

251

```

252

253

### Memory-Efficient Binning

254

255

Handle large datasets with chunked processing:

256

257

```python

258

import numpy as np

259

from KDEpy.binning import linear_binning

260

261

def chunked_binning(data, grid_points, weights=None, chunk_size=10000):

262

"""Bin large datasets in chunks to manage memory usage."""

263

n_obs = len(data)

264

total_binned = np.zeros(len(grid_points))

265

266

for start in range(0, n_obs, chunk_size):

267

end = min(start + chunk_size, n_obs)

268

chunk_data = data[start:end]

269

chunk_weights = weights[start:end] if weights is not None else None

270

271

chunk_binned = linear_binning(chunk_data, grid_points, chunk_weights)

272

total_binned += chunk_binned

273

274

return total_binned

275

276

# Use with very large dataset

277

large_data = np.random.randn(100000, 2)

278

grid = autogrid(large_data[:1000], num_points=64) # Sample for grid

279

binned = chunked_binning(large_data, grid, chunk_size=5000)

280

```

281

282

## Types

283

284

```python { .api }

285

from typing import Union, Optional, List, Tuple

286

import numpy as np

287

288

# Input array types

289

ArrayLike = Union[np.ndarray, list, tuple]

290

DataArray = Union[np.ndarray, list] # Shape (obs, dims)

291

GridArray = Union[np.ndarray, list] # Shape (grid_obs, dims)

292

WeightsArray = Optional[Union[np.ndarray, list]] # Shape (obs,)

293

294

# Grid specification types

295

GridSpec = Union[int, Tuple[int, ...], ArrayLike]

296

BoundarySpec = Union[float, Tuple[float, ...]]

297

298

# Function return types

299

GridPoints = np.ndarray # Shape (grid_obs, dims)

300

CartesianProduct = np.ndarray # Shape (M, len(arrays))

301

BinnedData = np.ndarray # Shape matching grid

302

303

# Utility function signatures

304

AutogridFunc = callable[[DataArray, float, Optional[int], float], GridPoints]

305

CartesianFunc = callable[[List[ArrayLike]], CartesianProduct]

306

BinningFunc = callable[[DataArray, GridArray, WeightsArray], BinnedData]

307

```