The Boost::Histogram Python wrapper providing fast histogram implementations with full power and flexibility for scientific computing.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Different storage backends for histogram data, from simple counting to complex statistical accumulators with variance tracking and weighted operations. Storage types determine how data is accumulated and what statistical information is available.
Common interface for all storage types.
class Storage:
"""Base class for histogram storage types."""
accumulator: type # Type of accumulator used for this storageSimple numeric storage for basic histogram operations.
class Int64(Storage):
"""64-bit integer storage for simple counting."""
accumulator = int
class Double(Storage):
"""Double-precision floating-point storage."""
accumulator = float
class AtomicInt64(Storage):
"""Thread-safe 64-bit integer storage for parallel operations."""
accumulator = int
class Unlimited(Storage):
"""Unlimited precision integer storage (Python int)."""
accumulator = floatStorage types that track weights and variances.
class Weight(Storage):
"""Storage for weighted histograms with variance tracking."""
accumulator = WeightedSum
class WeightedMean(Storage):
"""Storage for weighted mean calculations."""
accumulator = WeightedMeanAdvanced storage for statistical measurements.
class Mean(Storage):
"""Storage for mean and variance calculations."""
accumulator = MeanIndividual accumulator objects returned by histogram bins.
class Sum:
"""Simple sum accumulator."""
@property
def value(self) -> float:
"""Accumulated value."""
class Mean:
"""Mean accumulator with count and sum tracking."""
@property
def count(self) -> float:
"""Number of entries."""
@property
def value(self) -> float:
"""Mean value."""
@property
def variance(self) -> float:
"""Variance of entries."""
class WeightedSum:
"""Weighted sum accumulator with variance."""
@property
def value(self) -> float:
"""Weighted sum."""
@property
def variance(self) -> float:
"""Variance of weighted sum."""
def __iadd__(self, other):
"""In-place addition."""
def __imul__(self, other):
"""In-place multiplication."""
def __eq__(self, other) -> bool:
"""Test equality."""
class WeightedMean:
"""Weighted mean accumulator."""
@property
def sum_of_weights(self) -> float:
"""Sum of weights."""
@property
def sum_of_weights_squared(self) -> float:
"""Sum of squared weights."""
@property
def value(self) -> float:
"""Weighted mean."""
@property
def variance(self) -> float:
"""Variance of weighted mean."""
@property
def count(self) -> float:
"""Effective sample count."""Different storage types are optimized for different use cases:
import boost_histogram as bh
import numpy as np
# Default storage (Double)
hist1 = bh.Histogram(bh.axis.Regular(100, 0, 10))
# Explicit integer storage
hist2 = bh.Histogram(bh.axis.Regular(100, 0, 10), storage=bh.storage.Int64())
# Thread-safe storage for parallel operations
hist3 = bh.Histogram(bh.axis.Regular(100, 0, 10), storage=bh.storage.AtomicInt64())
# Fill with data
data = np.random.normal(5, 2, 1000)
hist1.fill(data)
hist2.fill(data)
hist3.fill(data, threads=4) # Use 4 threads# Create histogram with weighted storage
hist = bh.Histogram(bh.axis.Regular(50, 0, 10), storage=bh.storage.Weight())
# Generate data and weights
data = np.random.uniform(0, 10, 1000)
weights = np.random.exponential(1.0, 1000)
# Fill with weights
hist.fill(data, weight=weights)
# Access values and variances
values = hist.values() # Weighted sums
variances = hist.variances() # Variances of weighted sums
# Individual bin access returns WeightedSum accumulator
bin_accumulator = hist[25] # Get accumulator for bin 25
print(f"Value: {bin_accumulator.value}")
print(f"Variance: {bin_accumulator.variance}")# Create histogram for mean calculations
hist = bh.Histogram(bh.axis.Regular(20, 0, 10), storage=bh.storage.Mean())
# Fill with sample data
x_positions = np.random.uniform(0, 10, 1000)
y_values = 2 * x_positions + np.random.normal(0, 1, 1000)
hist.fill(x_positions, sample=y_values)
# Access mean values and variances
means = hist.values() # Mean of y_values in each x bin
variances = hist.variances() # Variance of y_values in each x bin
# Individual bin access returns Mean accumulator
bin_mean = hist[10]
print(f"Count: {bin_mean.count}")
print(f"Mean: {bin_mean.value}")
print(f"Variance: {bin_mean.variance}")# Create histogram for weighted mean calculations
hist = bh.Histogram(bh.axis.Regular(30, 0, 15), storage=bh.storage.WeightedMean())
# Generate data
x_data = np.random.uniform(0, 15, 2000)
y_data = np.sin(x_data) + np.random.normal(0, 0.2, 2000)
weights = np.random.exponential(1.0, 2000)
# Fill with weights and samples
hist.fill(x_data, weight=weights, sample=y_data)
# Access weighted means and variances
weighted_means = hist.values()
variances = hist.variances()
# Individual bin accumulator
bin_acc = hist[15]
print(f"Sum of weights: {bin_acc.sum_of_weights}")
print(f"Weighted mean: {bin_acc.value}")
print(f"Variance: {bin_acc.variance}")import boost_histogram as bh
# Create histogram with Weight storage
hist = bh.Histogram(bh.axis.Regular(50, 0, 10), storage=bh.storage.Weight())
# Fill with weighted data
data = np.random.normal(5, 2, 1000)
weights = np.ones_like(data) # Unit weights
hist.fill(data, weight=weights)
# Get structured view of the data
view = hist.view() # Returns WeightedSumView
print(f"Values: {view.value}") # Weighted sums
print(f"Variances: {view.variance}") # Variances
# Convert to simple values for plotting
values = hist.values() # Extract just the values as numpy array# 2D histogram with mean storage for z-values
hist2d = bh.Histogram(
bh.axis.Regular(25, 0, 5),
bh.axis.Regular(25, 0, 5),
storage=bh.storage.Mean()
)
# Generate 3D data
x = np.random.uniform(0, 5, 5000)
y = np.random.uniform(0, 5, 5000)
z = x + y + np.random.normal(0, 0.5, 5000) # z depends on x and y
# Fill with z as sample
hist2d.fill(x, y, sample=z)
# Get 2D array of mean z-values
mean_z = hist2d.values() # Shape: (25, 25)
var_z = hist2d.variances() # Variance of z in each (x,y) bin# For high-performance counting with many threads
hist_atomic = bh.Histogram(
bh.axis.Regular(1000, 0, 100),
storage=bh.storage.AtomicInt64()
)
# Fill with maximum parallelism
large_data = np.random.normal(50, 15, 10_000_000)
hist_atomic.fill(large_data, threads=None) # Use all available cores
# For exact integer arithmetic without overflow risk
hist_unlimited = bh.Histogram(
bh.axis.Regular(100, 0, 10),
storage=bh.storage.Unlimited()
)
# Can handle arbitrarily large counts
small_data = np.random.uniform(0, 10, 100)
for _ in range(1000000): # Very large number of fills
hist_unlimited.fill(small_data)Install with Tessl CLI
npx tessl i tessl/pypi-boost-histogram