0
# Storage and Accumulators
1
2
Different storage backends for histogram data, from simple counting to complex statistical accumulators with variance tracking and weighted operations. Storage types determine how data is accumulated and what statistical information is available.
3
4
## Capabilities
5
6
### Base Storage Interface
7
8
Common interface for all storage types.
9
10
```python { .api }
11
class Storage:
12
"""Base class for histogram storage types."""
13
14
accumulator: type # Type of accumulator used for this storage
15
```
16
17
### Basic Storage Types
18
19
Simple numeric storage for basic histogram operations.
20
21
```python { .api }
22
class Int64(Storage):
23
"""64-bit integer storage for simple counting."""
24
accumulator = int
25
26
class Double(Storage):
27
"""Double-precision floating-point storage."""
28
accumulator = float
29
30
class AtomicInt64(Storage):
31
"""Thread-safe 64-bit integer storage for parallel operations."""
32
accumulator = int
33
34
class Unlimited(Storage):
35
"""Unlimited precision integer storage (Python int)."""
36
accumulator = float
37
```
38
39
### Weighted Storage Types
40
41
Storage types that track weights and variances.
42
43
```python { .api }
44
class Weight(Storage):
45
"""Storage for weighted histograms with variance tracking."""
46
accumulator = WeightedSum
47
48
class WeightedMean(Storage):
49
"""Storage for weighted mean calculations."""
50
accumulator = WeightedMean
51
```
52
53
### Statistical Storage Types
54
55
Advanced storage for statistical measurements.
56
57
```python { .api }
58
class Mean(Storage):
59
"""Storage for mean and variance calculations."""
60
accumulator = Mean
61
```
62
63
### Accumulator Classes
64
65
Individual accumulator objects returned by histogram bins.
66
67
```python { .api }
68
class Sum:
69
"""Simple sum accumulator."""
70
71
@property
72
def value(self) -> float:
73
"""Accumulated value."""
74
75
class Mean:
76
"""Mean accumulator with count and sum tracking."""
77
78
@property
79
def count(self) -> float:
80
"""Number of entries."""
81
82
@property
83
def value(self) -> float:
84
"""Mean value."""
85
86
@property
87
def variance(self) -> float:
88
"""Variance of entries."""
89
90
class WeightedSum:
91
"""Weighted sum accumulator with variance."""
92
93
@property
94
def value(self) -> float:
95
"""Weighted sum."""
96
97
@property
98
def variance(self) -> float:
99
"""Variance of weighted sum."""
100
101
def __iadd__(self, other):
102
"""In-place addition."""
103
104
def __imul__(self, other):
105
"""In-place multiplication."""
106
107
def __eq__(self, other) -> bool:
108
"""Test equality."""
109
110
class WeightedMean:
111
"""Weighted mean accumulator."""
112
113
@property
114
def sum_of_weights(self) -> float:
115
"""Sum of weights."""
116
117
@property
118
def sum_of_weights_squared(self) -> float:
119
"""Sum of squared weights."""
120
121
@property
122
def value(self) -> float:
123
"""Weighted mean."""
124
125
@property
126
def variance(self) -> float:
127
"""Variance of weighted mean."""
128
129
@property
130
def count(self) -> float:
131
"""Effective sample count."""
132
```
133
134
### Storage Selection Guidelines
135
136
Different storage types are optimized for different use cases:
137
138
- **Int64**: Fastest for simple counting, limited to integers
139
- **Double**: General-purpose floating-point storage
140
- **AtomicInt64**: Thread-safe counting for parallel fills
141
- **Unlimited**: Exact integer arithmetic without overflow
142
- **Weight**: Weighted data with automatic variance calculation
143
- **Mean**: Statistical analysis requiring mean and variance
144
- **WeightedMean**: Weighted statistical analysis
145
146
## Usage Examples
147
148
### Basic Storage Types
149
150
```python
151
import boost_histogram as bh
152
import numpy as np
153
154
# Default storage (Double)
155
hist1 = bh.Histogram(bh.axis.Regular(100, 0, 10))
156
157
# Explicit integer storage
158
hist2 = bh.Histogram(bh.axis.Regular(100, 0, 10), storage=bh.storage.Int64())
159
160
# Thread-safe storage for parallel operations
161
hist3 = bh.Histogram(bh.axis.Regular(100, 0, 10), storage=bh.storage.AtomicInt64())
162
163
# Fill with data
164
data = np.random.normal(5, 2, 1000)
165
hist1.fill(data)
166
hist2.fill(data)
167
hist3.fill(data, threads=4) # Use 4 threads
168
```
169
170
### Weighted Histograms
171
172
```python
173
# Create histogram with weighted storage
174
hist = bh.Histogram(bh.axis.Regular(50, 0, 10), storage=bh.storage.Weight())
175
176
# Generate data and weights
177
data = np.random.uniform(0, 10, 1000)
178
weights = np.random.exponential(1.0, 1000)
179
180
# Fill with weights
181
hist.fill(data, weight=weights)
182
183
# Access values and variances
184
values = hist.values() # Weighted sums
185
variances = hist.variances() # Variances of weighted sums
186
187
# Individual bin access returns WeightedSum accumulator
188
bin_accumulator = hist[25] # Get accumulator for bin 25
189
print(f"Value: {bin_accumulator.value}")
190
print(f"Variance: {bin_accumulator.variance}")
191
```
192
193
### Mean Storage
194
195
```python
196
# Create histogram for mean calculations
197
hist = bh.Histogram(bh.axis.Regular(20, 0, 10), storage=bh.storage.Mean())
198
199
# Fill with sample data
200
x_positions = np.random.uniform(0, 10, 1000)
201
y_values = 2 * x_positions + np.random.normal(0, 1, 1000)
202
203
hist.fill(x_positions, sample=y_values)
204
205
# Access mean values and variances
206
means = hist.values() # Mean of y_values in each x bin
207
variances = hist.variances() # Variance of y_values in each x bin
208
209
# Individual bin access returns Mean accumulator
210
bin_mean = hist[10]
211
print(f"Count: {bin_mean.count}")
212
print(f"Mean: {bin_mean.value}")
213
print(f"Variance: {bin_mean.variance}")
214
```
215
216
### Weighted Mean Storage
217
218
```python
219
# Create histogram for weighted mean calculations
220
hist = bh.Histogram(bh.axis.Regular(30, 0, 15), storage=bh.storage.WeightedMean())
221
222
# Generate data
223
x_data = np.random.uniform(0, 15, 2000)
224
y_data = np.sin(x_data) + np.random.normal(0, 0.2, 2000)
225
weights = np.random.exponential(1.0, 2000)
226
227
# Fill with weights and samples
228
hist.fill(x_data, weight=weights, sample=y_data)
229
230
# Access weighted means and variances
231
weighted_means = hist.values()
232
variances = hist.variances()
233
234
# Individual bin accumulator
235
bin_acc = hist[15]
236
print(f"Sum of weights: {bin_acc.sum_of_weights}")
237
print(f"Weighted mean: {bin_acc.value}")
238
print(f"Variance: {bin_acc.variance}")
239
```
240
241
### Storage Conversion and Views
242
243
```python
244
import boost_histogram as bh
245
246
# Create histogram with Weight storage
247
hist = bh.Histogram(bh.axis.Regular(50, 0, 10), storage=bh.storage.Weight())
248
249
# Fill with weighted data
250
data = np.random.normal(5, 2, 1000)
251
weights = np.ones_like(data) # Unit weights
252
hist.fill(data, weight=weights)
253
254
# Get structured view of the data
255
view = hist.view() # Returns WeightedSumView
256
print(f"Values: {view.value}") # Weighted sums
257
print(f"Variances: {view.variance}") # Variances
258
259
# Convert to simple values for plotting
260
values = hist.values() # Extract just the values as numpy array
261
```
262
263
### Multi-dimensional with Different Storage
264
265
```python
266
# 2D histogram with mean storage for z-values
267
hist2d = bh.Histogram(
268
bh.axis.Regular(25, 0, 5),
269
bh.axis.Regular(25, 0, 5),
270
storage=bh.storage.Mean()
271
)
272
273
# Generate 3D data
274
x = np.random.uniform(0, 5, 5000)
275
y = np.random.uniform(0, 5, 5000)
276
z = x + y + np.random.normal(0, 0.5, 5000) # z depends on x and y
277
278
# Fill with z as sample
279
hist2d.fill(x, y, sample=z)
280
281
# Get 2D array of mean z-values
282
mean_z = hist2d.values() # Shape: (25, 25)
283
var_z = hist2d.variances() # Variance of z in each (x,y) bin
284
```
285
286
### Performance Considerations
287
288
```python
289
# For high-performance counting with many threads
290
hist_atomic = bh.Histogram(
291
bh.axis.Regular(1000, 0, 100),
292
storage=bh.storage.AtomicInt64()
293
)
294
295
# Fill with maximum parallelism
296
large_data = np.random.normal(50, 15, 10_000_000)
297
hist_atomic.fill(large_data, threads=None) # Use all available cores
298
299
# For exact integer arithmetic without overflow risk
300
hist_unlimited = bh.Histogram(
301
bh.axis.Regular(100, 0, 10),
302
storage=bh.storage.Unlimited()
303
)
304
305
# Can handle arbitrarily large counts
306
small_data = np.random.uniform(0, 10, 100)
307
for _ in range(1000000): # Very large number of fills
308
hist_unlimited.fill(small_data)
309
```