or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

axes.mdhistogram-core.mdindex.mdindexing-operations.mdnumpy-integration.mdstorage-accumulators.md

storage-accumulators.mddocs/

0

# Storage and Accumulators

1

2

Different storage backends for histogram data, from simple counting to complex statistical accumulators with variance tracking and weighted operations. Storage types determine how data is accumulated and what statistical information is available.

3

4

## Capabilities

5

6

### Base Storage Interface

7

8

Common interface for all storage types.

9

10

```python { .api }

11

class Storage:

12

"""Base class for histogram storage types."""

13

14

accumulator: type # Type of accumulator used for this storage

15

```

16

17

### Basic Storage Types

18

19

Simple numeric storage for basic histogram operations.

20

21

```python { .api }

22

class Int64(Storage):

23

"""64-bit integer storage for simple counting."""

24

accumulator = int

25

26

class Double(Storage):

27

"""Double-precision floating-point storage."""

28

accumulator = float

29

30

class AtomicInt64(Storage):

31

"""Thread-safe 64-bit integer storage for parallel operations."""

32

accumulator = int

33

34

class Unlimited(Storage):

35

"""Unlimited precision integer storage (Python int)."""

36

accumulator = float

37

```

38

39

### Weighted Storage Types

40

41

Storage types that track weights and variances.

42

43

```python { .api }

44

class Weight(Storage):

45

"""Storage for weighted histograms with variance tracking."""

46

accumulator = WeightedSum

47

48

class WeightedMean(Storage):

49

"""Storage for weighted mean calculations."""

50

accumulator = WeightedMean

51

```

52

53

### Statistical Storage Types

54

55

Advanced storage for statistical measurements.

56

57

```python { .api }

58

class Mean(Storage):

59

"""Storage for mean and variance calculations."""

60

accumulator = Mean

61

```

62

63

### Accumulator Classes

64

65

Individual accumulator objects returned by histogram bins.

66

67

```python { .api }

68

class Sum:

69

"""Simple sum accumulator."""

70

71

@property

72

def value(self) -> float:

73

"""Accumulated value."""

74

75

class Mean:

76

"""Mean accumulator with count and sum tracking."""

77

78

@property

79

def count(self) -> float:

80

"""Number of entries."""

81

82

@property

83

def value(self) -> float:

84

"""Mean value."""

85

86

@property

87

def variance(self) -> float:

88

"""Variance of entries."""

89

90

class WeightedSum:

91

"""Weighted sum accumulator with variance."""

92

93

@property

94

def value(self) -> float:

95

"""Weighted sum."""

96

97

@property

98

def variance(self) -> float:

99

"""Variance of weighted sum."""

100

101

def __iadd__(self, other):

102

"""In-place addition."""

103

104

def __imul__(self, other):

105

"""In-place multiplication."""

106

107

def __eq__(self, other) -> bool:

108

"""Test equality."""

109

110

class WeightedMean:

111

"""Weighted mean accumulator."""

112

113

@property

114

def sum_of_weights(self) -> float:

115

"""Sum of weights."""

116

117

@property

118

def sum_of_weights_squared(self) -> float:

119

"""Sum of squared weights."""

120

121

@property

122

def value(self) -> float:

123

"""Weighted mean."""

124

125

@property

126

def variance(self) -> float:

127

"""Variance of weighted mean."""

128

129

@property

130

def count(self) -> float:

131

"""Effective sample count."""

132

```

133

134

### Storage Selection Guidelines

135

136

Different storage types are optimized for different use cases:

137

138

- **Int64**: Fastest for simple counting, limited to integers

139

- **Double**: General-purpose floating-point storage

140

- **AtomicInt64**: Thread-safe counting for parallel fills

141

- **Unlimited**: Exact integer arithmetic without overflow

142

- **Weight**: Weighted data with automatic variance calculation

143

- **Mean**: Statistical analysis requiring mean and variance

144

- **WeightedMean**: Weighted statistical analysis

145

146

## Usage Examples

147

148

### Basic Storage Types

149

150

```python

151

import boost_histogram as bh

152

import numpy as np

153

154

# Default storage (Double)

155

hist1 = bh.Histogram(bh.axis.Regular(100, 0, 10))

156

157

# Explicit integer storage

158

hist2 = bh.Histogram(bh.axis.Regular(100, 0, 10), storage=bh.storage.Int64())

159

160

# Thread-safe storage for parallel operations

161

hist3 = bh.Histogram(bh.axis.Regular(100, 0, 10), storage=bh.storage.AtomicInt64())

162

163

# Fill with data

164

data = np.random.normal(5, 2, 1000)

165

hist1.fill(data)

166

hist2.fill(data)

167

hist3.fill(data, threads=4) # Use 4 threads

168

```

169

170

### Weighted Histograms

171

172

```python

173

# Create histogram with weighted storage

174

hist = bh.Histogram(bh.axis.Regular(50, 0, 10), storage=bh.storage.Weight())

175

176

# Generate data and weights

177

data = np.random.uniform(0, 10, 1000)

178

weights = np.random.exponential(1.0, 1000)

179

180

# Fill with weights

181

hist.fill(data, weight=weights)

182

183

# Access values and variances

184

values = hist.values() # Weighted sums

185

variances = hist.variances() # Variances of weighted sums

186

187

# Individual bin access returns WeightedSum accumulator

188

bin_accumulator = hist[25] # Get accumulator for bin 25

189

print(f"Value: {bin_accumulator.value}")

190

print(f"Variance: {bin_accumulator.variance}")

191

```

192

193

### Mean Storage

194

195

```python

196

# Create histogram for mean calculations

197

hist = bh.Histogram(bh.axis.Regular(20, 0, 10), storage=bh.storage.Mean())

198

199

# Fill with sample data

200

x_positions = np.random.uniform(0, 10, 1000)

201

y_values = 2 * x_positions + np.random.normal(0, 1, 1000)

202

203

hist.fill(x_positions, sample=y_values)

204

205

# Access mean values and variances

206

means = hist.values() # Mean of y_values in each x bin

207

variances = hist.variances() # Variance of y_values in each x bin

208

209

# Individual bin access returns Mean accumulator

210

bin_mean = hist[10]

211

print(f"Count: {bin_mean.count}")

212

print(f"Mean: {bin_mean.value}")

213

print(f"Variance: {bin_mean.variance}")

214

```

215

216

### Weighted Mean Storage

217

218

```python

219

# Create histogram for weighted mean calculations

220

hist = bh.Histogram(bh.axis.Regular(30, 0, 15), storage=bh.storage.WeightedMean())

221

222

# Generate data

223

x_data = np.random.uniform(0, 15, 2000)

224

y_data = np.sin(x_data) + np.random.normal(0, 0.2, 2000)

225

weights = np.random.exponential(1.0, 2000)

226

227

# Fill with weights and samples

228

hist.fill(x_data, weight=weights, sample=y_data)

229

230

# Access weighted means and variances

231

weighted_means = hist.values()

232

variances = hist.variances()

233

234

# Individual bin accumulator

235

bin_acc = hist[15]

236

print(f"Sum of weights: {bin_acc.sum_of_weights}")

237

print(f"Weighted mean: {bin_acc.value}")

238

print(f"Variance: {bin_acc.variance}")

239

```

240

241

### Storage Conversion and Views

242

243

```python

244

import boost_histogram as bh

245

246

# Create histogram with Weight storage

247

hist = bh.Histogram(bh.axis.Regular(50, 0, 10), storage=bh.storage.Weight())

248

249

# Fill with weighted data

250

data = np.random.normal(5, 2, 1000)

251

weights = np.ones_like(data) # Unit weights

252

hist.fill(data, weight=weights)

253

254

# Get structured view of the data

255

view = hist.view() # Returns WeightedSumView

256

print(f"Values: {view.value}") # Weighted sums

257

print(f"Variances: {view.variance}") # Variances

258

259

# Convert to simple values for plotting

260

values = hist.values() # Extract just the values as numpy array

261

```

262

263

### Multi-dimensional with Different Storage

264

265

```python

266

# 2D histogram with mean storage for z-values

267

hist2d = bh.Histogram(

268

bh.axis.Regular(25, 0, 5),

269

bh.axis.Regular(25, 0, 5),

270

storage=bh.storage.Mean()

271

)

272

273

# Generate 3D data

274

x = np.random.uniform(0, 5, 5000)

275

y = np.random.uniform(0, 5, 5000)

276

z = x + y + np.random.normal(0, 0.5, 5000) # z depends on x and y

277

278

# Fill with z as sample

279

hist2d.fill(x, y, sample=z)

280

281

# Get 2D array of mean z-values

282

mean_z = hist2d.values() # Shape: (25, 25)

283

var_z = hist2d.variances() # Variance of z in each (x,y) bin

284

```

285

286

### Performance Considerations

287

288

```python

289

# For high-performance counting with many threads

290

hist_atomic = bh.Histogram(

291

bh.axis.Regular(1000, 0, 100),

292

storage=bh.storage.AtomicInt64()

293

)

294

295

# Fill with maximum parallelism

296

large_data = np.random.normal(50, 15, 10_000_000)

297

hist_atomic.fill(large_data, threads=None) # Use all available cores

298

299

# For exact integer arithmetic without overflow risk

300

hist_unlimited = bh.Histogram(

301

bh.axis.Regular(100, 0, 10),

302

storage=bh.storage.Unlimited()

303

)

304

305

# Can handle arbitrarily large counts

306

small_data = np.random.uniform(0, 10, 100)

307

for _ in range(1000000): # Very large number of fills

308

hist_unlimited.fill(small_data)

309

```