or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

axes.mdhistogram-core.mdindex.mdindexing-operations.mdnumpy-integration.mdstorage-accumulators.md

numpy-integration.mddocs/

0

# NumPy Integration

1

2

NumPy-compatible histogram functions providing familiar interfaces while leveraging boost-histogram's performance advantages. These functions offer drop-in replacements for NumPy's histogram functions with additional features and better performance.

3

4

## Capabilities

5

6

### 1D Histogram Function

7

8

Drop-in replacement for numpy.histogram with enhanced performance and features.

9

10

```python { .api }

11

def histogram(

12

a,

13

bins=10,

14

range=None,

15

weights=None,

16

density=False,

17

*,

18

histogram=None,

19

storage=None,

20

threads=None

21

):

22

"""

23

Compute histogram of a dataset.

24

25

Parameters:

26

- a: array-like, input data

27

- bins: int or sequence, number of bins or bin edges

28

- range: tuple, (min, max) range for bins (ignored if bins is sequence)

29

- weights: array-like, weights for each value in a

30

- density: bool, normalize to create probability density

31

- histogram: Histogram class to use for return type (None returns numpy arrays)

32

- storage: Storage type (boost_histogram storage class)

33

- threads: int, number of threads for parallel processing

34

35

Returns:

36

Tuple of (values, edges) where:

37

- values: histogram bin counts/densities

38

- edges: bin edge array (length N+1 for N bins)

39

"""

40

```

41

42

### 2D Histogram Function

43

44

Compute 2D histograms with high performance.

45

46

```python { .api }

47

def histogram2d(

48

x,

49

y,

50

bins=10,

51

range=None,

52

weights=None,

53

density=False,

54

*,

55

histogram=None,

56

storage=None,

57

threads=None

58

):

59

"""

60

Compute 2D histogram of two datasets.

61

62

Parameters:

63

- x: array-like, x-coordinates of data points

64

- y: array-like, y-coordinates of data points

65

- bins: int or [int, int] or array-like, number of bins or bin edges for each dimension

66

- range: array-like, [[xmin, xmax], [ymin, ymax]] ranges for bins

67

- weights: array-like, weights for each data point

68

- density: bool, normalize to create probability density

69

- histogram: Histogram class to use for return type (None returns numpy arrays)

70

- storage: Storage type (boost_histogram storage class)

71

- threads: int, number of threads for parallel processing

72

73

Returns:

74

Tuple of (H, xedges, yedges) where:

75

- H: 2D histogram array, shape (nx, ny)

76

- xedges: x-axis bin edges (length nx+1)

77

- yedges: y-axis bin edges (length ny+1)

78

"""

79

```

80

81

### N-Dimensional Histogram Function

82

83

General N-dimensional histogram computation.

84

85

```python { .api }

86

def histogramdd(

87

sample,

88

bins=10,

89

range=None,

90

weights=None,

91

density=False,

92

*,

93

histogram=None,

94

storage=None,

95

threads=None

96

):

97

"""

98

Compute N-dimensional histogram.

99

100

Parameters:

101

- sample: array-like, (N, D) array or sequence of D arrays for D-dimensional data

102

- bins: int or sequence, number of bins or bin edges for each dimension

103

- range: sequence, [(min, max), ...] ranges for each dimension

104

- weights: array-like, weights for each sample point

105

- density: bool, normalize to create probability density

106

- histogram: Histogram class to use for return type (None returns numpy arrays)

107

- storage: Storage type (boost_histogram storage class)

108

- threads: int, number of threads for parallel processing

109

110

Returns:

111

Tuple of (H, edges) where:

112

- H: N-dimensional histogram array

113

- edges: list of edge arrays for each dimension

114

"""

115

```

116

117

## Usage Examples

118

119

### Basic 1D Histogram

120

121

```python

122

import boost_histogram.numpy as bhnp

123

import numpy as np

124

125

# Generate sample data

126

data = np.random.normal(0, 1, 10000)

127

128

# Basic histogram (drop-in replacement for np.histogram)

129

counts, edges = bhnp.histogram(data, bins=50)

130

131

# With explicit range

132

counts, edges = bhnp.histogram(data, bins=50, range=(-3, 3))

133

134

# With custom bin edges

135

custom_edges = np.linspace(-4, 4, 41) # 40 bins

136

counts, edges = bhnp.histogram(data, bins=custom_edges)

137

138

# Density histogram (normalized)

139

density, edges = bhnp.histogram(data, bins=50, density=True)

140

```

141

142

### Weighted Histograms

143

144

```python

145

# Data with weights

146

data = np.random.exponential(1, 5000)

147

weights = np.random.uniform(0.5, 2.0, 5000)

148

149

# Weighted histogram

150

counts, edges = bhnp.histogram(data, bins=30, weights=weights, range=(0, 5))

151

152

# Weighted density

153

density, edges = bhnp.histogram(data, bins=30, weights=weights,

154

density=True, range=(0, 5))

155

```

156

157

### High-Performance Options

158

159

```python

160

# Use specific storage for better performance

161

counts, edges = bhnp.histogram(

162

data,

163

bins=100,

164

storage=bh.storage.AtomicInt64(), # Thread-safe integer storage

165

threads=4 # Use 4 threads

166

)

167

168

# For very large datasets

169

large_data = np.random.random(50_000_000)

170

counts, edges = bhnp.histogram(

171

large_data,

172

bins=1000,

173

threads=None # Use all available cores

174

)

175

```

176

177

### 2D Histograms

178

179

```python

180

# Generate 2D data

181

x = np.random.normal(0, 1, 10000)

182

y = 0.5 * x + np.random.normal(0, 0.8, 10000)

183

184

# Basic 2D histogram

185

H, xedges, yedges = bhnp.histogram2d(x, y, bins=50)

186

187

# With explicit ranges and different bin counts

188

H, xedges, yedges = bhnp.histogram2d(

189

x, y,

190

bins=[30, 40], # 30 bins in x, 40 in y

191

range=[[-3, 3], [-2, 2]] # Explicit ranges

192

)

193

194

# Weighted 2D histogram

195

weights = np.random.exponential(1, 10000)

196

H, xedges, yedges = bhnp.histogram2d(x, y, bins=40, weights=weights)

197

198

# 2D density

199

H_density, xedges, yedges = bhnp.histogram2d(x, y, bins=50, density=True)

200

```

201

202

### Multi-dimensional Histograms

203

204

```python

205

# 3D histogram

206

x = np.random.normal(0, 1, 5000)

207

y = np.random.normal(0, 1, 5000)

208

z = x + y + np.random.normal(0, 0.5, 5000)

209

210

# Stack data for histogramdd

211

sample = np.column_stack([x, y, z])

212

213

# 3D histogram

214

H, edges = bhnp.histogramdd(sample, bins=20)

215

print(f"3D histogram shape: {H.shape}") # (20, 20, 20)

216

217

# Different bins per dimension

218

H, edges = bhnp.histogramdd(sample, bins=[15, 20, 25])

219

220

# With ranges

221

H, edges = bhnp.histogramdd(

222

sample,

223

bins=15,

224

range=[[-2, 2], [-2, 2], [-3, 3]]

225

)

226

227

# Alternative input format (sequence of arrays)

228

H, edges = bhnp.histogramdd([x, y, z], bins=20)

229

```

230

231

### Advanced Examples

232

233

```python

234

import boost_histogram as bh

235

import boost_histogram.numpy as bhnp

236

237

# Compare with pure boost-histogram

238

data = np.random.gamma(2, 1, 100000)

239

240

# NumPy-style interface

241

counts_np, edges_np = bhnp.histogram(data, bins=50, range=(0, 10))

242

243

# Equivalent boost-histogram approach

244

hist_bh = bh.Histogram(bh.axis.Regular(50, 0, 10))

245

hist_bh.fill(data)

246

counts_bh = hist_bh.values()

247

edges_bh = hist_bh.axes[0].edges

248

249

# Results are equivalent

250

assert np.allclose(counts_np, counts_bh)

251

assert np.allclose(edges_np, edges_bh)

252

```

253

254

### Integration with Scientific Stack

255

256

```python

257

import matplotlib.pyplot as plt

258

import boost_histogram.numpy as bhnp

259

260

# Generate and histogram data

261

data = np.random.beta(2, 5, 10000)

262

counts, edges = bhnp.histogram(data, bins=50, density=True)

263

264

# Plot with matplotlib

265

centers = (edges[:-1] + edges[1:]) / 2

266

plt.bar(centers, counts, width=np.diff(edges), alpha=0.7)

267

plt.xlabel('Value')

268

plt.ylabel('Density')

269

plt.title('Beta Distribution Histogram')

270

plt.show()

271

272

# For 2D plotting

273

x = np.random.multivariate_normal([0, 0], [[1, 0.5], [0.5, 1]], 5000)

274

H, xedges, yedges = bhnp.histogram2d(x[:, 0], x[:, 1], bins=30)

275

276

# Plot 2D histogram

277

plt.imshow(H.T, origin='lower', extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]])

278

plt.colorbar()

279

plt.xlabel('X')

280

plt.ylabel('Y')

281

plt.title('2D Histogram')

282

plt.show()

283

```

284

285

### Performance Comparison

286

287

```python

288

import time

289

import numpy as np

290

import boost_histogram.numpy as bhnp

291

292

# Large dataset for performance testing

293

large_data = np.random.normal(0, 1, 10_000_000)

294

295

# NumPy histogram

296

start = time.time()

297

np_counts, np_edges = np.histogram(large_data, bins=100)

298

np_time = time.time() - start

299

300

# boost-histogram NumPy interface

301

start = time.time()

302

bh_counts, bh_edges = bhnp.histogram(large_data, bins=100)

303

bh_time = time.time() - start

304

305

# boost-histogram with parallelism

306

start = time.time()

307

bh_parallel_counts, bh_parallel_edges = bhnp.histogram(

308

large_data,

309

bins=100,

310

threads=4

311

)

312

bh_parallel_time = time.time() - start

313

314

print(f"NumPy time: {np_time:.3f}s")

315

print(f"boost-histogram time: {bh_time:.3f}s")

316

print(f"boost-histogram (4 threads) time: {bh_parallel_time:.3f}s")

317

print(f"Speedup vs NumPy: {np_time/bh_parallel_time:.1f}x")

318

```

319

320

### Custom Storage Integration

321

322

```python

323

# Use advanced storage with NumPy interface

324

data = np.random.poisson(3, 50000).astype(float)

325

weights = np.random.exponential(1, 50000)

326

327

# Weighted histogram with variance tracking

328

counts, edges = bhnp.histogram(

329

data,

330

bins=20,

331

range=(0, 15),

332

weights=weights,

333

storage=bh.storage.Weight()

334

)

335

336

# Access the underlying histogram for variance information

337

hist = bh.Histogram(bh.axis.Regular(20, 0, 15), storage=bh.storage.Weight())

338

hist.fill(data, weight=weights)

339

340

values = hist.values() # Same as counts from bhnp.histogram

341

variances = hist.variances() # Additional variance information

342

343

print(f"Bin values: {values[:5]}")

344

print(f"Bin variances: {variances[:5]}")

345

```