0
# NumPy Integration
1
2
NumPy-compatible histogram functions providing familiar interfaces while leveraging boost-histogram's performance advantages. These functions offer drop-in replacements for NumPy's histogram functions with additional features and better performance.
3
4
## Capabilities
5
6
### 1D Histogram Function
7
8
Drop-in replacement for numpy.histogram with enhanced performance and features.
9
10
```python { .api }
11
def histogram(
12
a,
13
bins=10,
14
range=None,
15
weights=None,
16
density=False,
17
*,
18
histogram=None,
19
storage=None,
20
threads=None
21
):
22
"""
23
Compute histogram of a dataset.
24
25
Parameters:
26
- a: array-like, input data
27
- bins: int or sequence, number of bins or bin edges
28
- range: tuple, (min, max) range for bins (ignored if bins is sequence)
29
- weights: array-like, weights for each value in a
30
- density: bool, normalize to create probability density
31
- histogram: Histogram class to use for return type (None returns numpy arrays)
32
- storage: Storage type (boost_histogram storage class)
33
- threads: int, number of threads for parallel processing
34
35
Returns:
36
Tuple of (values, edges) where:
37
- values: histogram bin counts/densities
38
- edges: bin edge array (length N+1 for N bins)
39
"""
40
```
41
42
### 2D Histogram Function
43
44
Compute 2D histograms with high performance.
45
46
```python { .api }
47
def histogram2d(
48
x,
49
y,
50
bins=10,
51
range=None,
52
weights=None,
53
density=False,
54
*,
55
histogram=None,
56
storage=None,
57
threads=None
58
):
59
"""
60
Compute 2D histogram of two datasets.
61
62
Parameters:
63
- x: array-like, x-coordinates of data points
64
- y: array-like, y-coordinates of data points
65
- bins: int or [int, int] or array-like, number of bins or bin edges for each dimension
66
- range: array-like, [[xmin, xmax], [ymin, ymax]] ranges for bins
67
- weights: array-like, weights for each data point
68
- density: bool, normalize to create probability density
69
- histogram: Histogram class to use for return type (None returns numpy arrays)
70
- storage: Storage type (boost_histogram storage class)
71
- threads: int, number of threads for parallel processing
72
73
Returns:
74
Tuple of (H, xedges, yedges) where:
75
- H: 2D histogram array, shape (nx, ny)
76
- xedges: x-axis bin edges (length nx+1)
77
- yedges: y-axis bin edges (length ny+1)
78
"""
79
```
80
81
### N-Dimensional Histogram Function
82
83
General N-dimensional histogram computation.
84
85
```python { .api }
86
def histogramdd(
87
sample,
88
bins=10,
89
range=None,
90
weights=None,
91
density=False,
92
*,
93
histogram=None,
94
storage=None,
95
threads=None
96
):
97
"""
98
Compute N-dimensional histogram.
99
100
Parameters:
101
- sample: array-like, (N, D) array or sequence of D arrays for D-dimensional data
102
- bins: int or sequence, number of bins or bin edges for each dimension
103
- range: sequence, [(min, max), ...] ranges for each dimension
104
- weights: array-like, weights for each sample point
105
- density: bool, normalize to create probability density
106
- histogram: Histogram class to use for return type (None returns numpy arrays)
107
- storage: Storage type (boost_histogram storage class)
108
- threads: int, number of threads for parallel processing
109
110
Returns:
111
Tuple of (H, edges) where:
112
- H: N-dimensional histogram array
113
- edges: list of edge arrays for each dimension
114
"""
115
```
116
117
## Usage Examples
118
119
### Basic 1D Histogram
120
121
```python
122
import boost_histogram.numpy as bhnp
123
import numpy as np
124
125
# Generate sample data
126
data = np.random.normal(0, 1, 10000)
127
128
# Basic histogram (drop-in replacement for np.histogram)
129
counts, edges = bhnp.histogram(data, bins=50)
130
131
# With explicit range
132
counts, edges = bhnp.histogram(data, bins=50, range=(-3, 3))
133
134
# With custom bin edges
135
custom_edges = np.linspace(-4, 4, 41) # 40 bins
136
counts, edges = bhnp.histogram(data, bins=custom_edges)
137
138
# Density histogram (normalized)
139
density, edges = bhnp.histogram(data, bins=50, density=True)
140
```
141
142
### Weighted Histograms
143
144
```python
145
# Data with weights
146
data = np.random.exponential(1, 5000)
147
weights = np.random.uniform(0.5, 2.0, 5000)
148
149
# Weighted histogram
150
counts, edges = bhnp.histogram(data, bins=30, weights=weights, range=(0, 5))
151
152
# Weighted density
153
density, edges = bhnp.histogram(data, bins=30, weights=weights,
154
density=True, range=(0, 5))
155
```
156
157
### High-Performance Options
158
159
```python
160
# Use specific storage for better performance
161
counts, edges = bhnp.histogram(
162
data,
163
bins=100,
164
storage=bh.storage.AtomicInt64(), # Thread-safe integer storage
165
threads=4 # Use 4 threads
166
)
167
168
# For very large datasets
169
large_data = np.random.random(50_000_000)
170
counts, edges = bhnp.histogram(
171
large_data,
172
bins=1000,
173
threads=None # Use all available cores
174
)
175
```
176
177
### 2D Histograms
178
179
```python
180
# Generate 2D data
181
x = np.random.normal(0, 1, 10000)
182
y = 0.5 * x + np.random.normal(0, 0.8, 10000)
183
184
# Basic 2D histogram
185
H, xedges, yedges = bhnp.histogram2d(x, y, bins=50)
186
187
# With explicit ranges and different bin counts
188
H, xedges, yedges = bhnp.histogram2d(
189
x, y,
190
bins=[30, 40], # 30 bins in x, 40 in y
191
range=[[-3, 3], [-2, 2]] # Explicit ranges
192
)
193
194
# Weighted 2D histogram
195
weights = np.random.exponential(1, 10000)
196
H, xedges, yedges = bhnp.histogram2d(x, y, bins=40, weights=weights)
197
198
# 2D density
199
H_density, xedges, yedges = bhnp.histogram2d(x, y, bins=50, density=True)
200
```
201
202
### Multi-dimensional Histograms
203
204
```python
205
# 3D histogram
206
x = np.random.normal(0, 1, 5000)
207
y = np.random.normal(0, 1, 5000)
208
z = x + y + np.random.normal(0, 0.5, 5000)
209
210
# Stack data for histogramdd
211
sample = np.column_stack([x, y, z])
212
213
# 3D histogram
214
H, edges = bhnp.histogramdd(sample, bins=20)
215
print(f"3D histogram shape: {H.shape}") # (20, 20, 20)
216
217
# Different bins per dimension
218
H, edges = bhnp.histogramdd(sample, bins=[15, 20, 25])
219
220
# With ranges
221
H, edges = bhnp.histogramdd(
222
sample,
223
bins=15,
224
range=[[-2, 2], [-2, 2], [-3, 3]]
225
)
226
227
# Alternative input format (sequence of arrays)
228
H, edges = bhnp.histogramdd([x, y, z], bins=20)
229
```
230
231
### Advanced Examples
232
233
```python
234
import boost_histogram as bh
235
import boost_histogram.numpy as bhnp
236
237
# Compare with pure boost-histogram
238
data = np.random.gamma(2, 1, 100000)
239
240
# NumPy-style interface
241
counts_np, edges_np = bhnp.histogram(data, bins=50, range=(0, 10))
242
243
# Equivalent boost-histogram approach
244
hist_bh = bh.Histogram(bh.axis.Regular(50, 0, 10))
245
hist_bh.fill(data)
246
counts_bh = hist_bh.values()
247
edges_bh = hist_bh.axes[0].edges
248
249
# Results are equivalent
250
assert np.allclose(counts_np, counts_bh)
251
assert np.allclose(edges_np, edges_bh)
252
```
253
254
### Integration with Scientific Stack
255
256
```python
257
import matplotlib.pyplot as plt
258
import boost_histogram.numpy as bhnp
259
260
# Generate and histogram data
261
data = np.random.beta(2, 5, 10000)
262
counts, edges = bhnp.histogram(data, bins=50, density=True)
263
264
# Plot with matplotlib
265
centers = (edges[:-1] + edges[1:]) / 2
266
plt.bar(centers, counts, width=np.diff(edges), alpha=0.7)
267
plt.xlabel('Value')
268
plt.ylabel('Density')
269
plt.title('Beta Distribution Histogram')
270
plt.show()
271
272
# For 2D plotting
273
x = np.random.multivariate_normal([0, 0], [[1, 0.5], [0.5, 1]], 5000)
274
H, xedges, yedges = bhnp.histogram2d(x[:, 0], x[:, 1], bins=30)
275
276
# Plot 2D histogram
277
plt.imshow(H.T, origin='lower', extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]])
278
plt.colorbar()
279
plt.xlabel('X')
280
plt.ylabel('Y')
281
plt.title('2D Histogram')
282
plt.show()
283
```
284
285
### Performance Comparison
286
287
```python
288
import time
289
import numpy as np
290
import boost_histogram.numpy as bhnp
291
292
# Large dataset for performance testing
293
large_data = np.random.normal(0, 1, 10_000_000)
294
295
# NumPy histogram
296
start = time.time()
297
np_counts, np_edges = np.histogram(large_data, bins=100)
298
np_time = time.time() - start
299
300
# boost-histogram NumPy interface
301
start = time.time()
302
bh_counts, bh_edges = bhnp.histogram(large_data, bins=100)
303
bh_time = time.time() - start
304
305
# boost-histogram with parallelism
306
start = time.time()
307
bh_parallel_counts, bh_parallel_edges = bhnp.histogram(
308
large_data,
309
bins=100,
310
threads=4
311
)
312
bh_parallel_time = time.time() - start
313
314
print(f"NumPy time: {np_time:.3f}s")
315
print(f"boost-histogram time: {bh_time:.3f}s")
316
print(f"boost-histogram (4 threads) time: {bh_parallel_time:.3f}s")
317
print(f"Speedup vs NumPy: {np_time/bh_parallel_time:.1f}x")
318
```
319
320
### Custom Storage Integration
321
322
```python
323
# Use advanced storage with NumPy interface
324
data = np.random.poisson(3, 50000).astype(float)
325
weights = np.random.exponential(1, 50000)
326
327
# Weighted histogram with variance tracking
328
counts, edges = bhnp.histogram(
329
data,
330
bins=20,
331
range=(0, 15),
332
weights=weights,
333
storage=bh.storage.Weight()
334
)
335
336
# Access the underlying histogram for variance information
337
hist = bh.Histogram(bh.axis.Regular(20, 0, 15), storage=bh.storage.Weight())
338
hist.fill(data, weight=weights)
339
340
values = hist.values() # Same as counts from bhnp.histogram
341
variances = hist.variances() # Additional variance information
342
343
print(f"Bin values: {values[:5]}")
344
print(f"Bin variances: {variances[:5]}")
345
```