tessl/pypi-boost-histogram

The Boost::Histogram Python wrapper providing fast histogram implementations with full power and flexibility for scientific computing.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Storage and Accumulators

Name: tessl/pypi-boost-histogram
Author: tessl

Different storage backends for histogram data, from simple counting to complex statistical accumulators with variance tracking and weighted operations. Storage types determine how data is accumulated and what statistical information is available.

Capabilities

Base Storage Interface

Common interface for all storage types.

class Storage:
    """Base class for histogram storage types."""

    accumulator: type  # Type of accumulator used for this storage

Basic Storage Types

Simple numeric storage for basic histogram operations.

class Int64(Storage):
    """64-bit integer storage for simple counting."""
    accumulator = int

class Double(Storage):
    """Double-precision floating-point storage."""
    accumulator = float

class AtomicInt64(Storage):
    """Thread-safe 64-bit integer storage for parallel operations."""
    accumulator = int

class Unlimited(Storage):
    """Unlimited precision integer storage (Python int)."""
    accumulator = float

Weighted Storage Types

Storage types that track weights and variances.

class Weight(Storage):
    """Storage for weighted histograms with variance tracking."""
    accumulator = WeightedSum

class WeightedMean(Storage):
    """Storage for weighted mean calculations."""
    accumulator = WeightedMean

Statistical Storage Types

Advanced storage for statistical measurements.

class Mean(Storage):
    """Storage for mean and variance calculations."""
    accumulator = Mean

Accumulator Classes

Individual accumulator objects returned by histogram bins.

class Sum:
    """Simple sum accumulator."""
    
    @property
    def value(self) -> float:
        """Accumulated value."""

class Mean:
    """Mean accumulator with count and sum tracking."""
    
    @property
    def count(self) -> float:
        """Number of entries."""
    
    @property
    def value(self) -> float:
        """Mean value."""
    
    @property
    def variance(self) -> float:
        """Variance of entries."""

class WeightedSum:
    """Weighted sum accumulator with variance."""
    
    @property
    def value(self) -> float:
        """Weighted sum."""
    
    @property
    def variance(self) -> float:
        """Variance of weighted sum."""
    
    def __iadd__(self, other):
        """In-place addition."""
    
    def __imul__(self, other):
        """In-place multiplication."""
        
    def __eq__(self, other) -> bool:
        """Test equality."""

class WeightedMean:
    """Weighted mean accumulator."""
    
    @property
    def sum_of_weights(self) -> float:
        """Sum of weights."""
    
    @property
    def sum_of_weights_squared(self) -> float:
        """Sum of squared weights."""
    
    @property
    def value(self) -> float:
        """Weighted mean."""
    
    @property
    def variance(self) -> float:
        """Variance of weighted mean."""
        
    @property
    def count(self) -> float:
        """Effective sample count."""

Storage Selection Guidelines

Different storage types are optimized for different use cases:

Int64: Fastest for simple counting, limited to integers
Double: General-purpose floating-point storage
AtomicInt64: Thread-safe counting for parallel fills
Unlimited: Exact integer arithmetic without overflow
Weight: Weighted data with automatic variance calculation
Mean: Statistical analysis requiring mean and variance
WeightedMean: Weighted statistical analysis

Usage Examples

Basic Storage Types

import boost_histogram as bh
import numpy as np

# Default storage (Double)
hist1 = bh.Histogram(bh.axis.Regular(100, 0, 10))

# Explicit integer storage
hist2 = bh.Histogram(bh.axis.Regular(100, 0, 10), storage=bh.storage.Int64())

# Thread-safe storage for parallel operations
hist3 = bh.Histogram(bh.axis.Regular(100, 0, 10), storage=bh.storage.AtomicInt64())

# Fill with data
data = np.random.normal(5, 2, 1000)
hist1.fill(data)
hist2.fill(data)
hist3.fill(data, threads=4)  # Use 4 threads

Weighted Histograms

# Create histogram with weighted storage
hist = bh.Histogram(bh.axis.Regular(50, 0, 10), storage=bh.storage.Weight())

# Generate data and weights
data = np.random.uniform(0, 10, 1000)
weights = np.random.exponential(1.0, 1000)

# Fill with weights
hist.fill(data, weight=weights)

# Access values and variances
values = hist.values()  # Weighted sums
variances = hist.variances()  # Variances of weighted sums

# Individual bin access returns WeightedSum accumulator
bin_accumulator = hist[25]  # Get accumulator for bin 25
print(f"Value: {bin_accumulator.value}")
print(f"Variance: {bin_accumulator.variance}")

Mean Storage

# Create histogram for mean calculations
hist = bh.Histogram(bh.axis.Regular(20, 0, 10), storage=bh.storage.Mean())

# Fill with sample data
x_positions = np.random.uniform(0, 10, 1000)
y_values = 2 * x_positions + np.random.normal(0, 1, 1000)

hist.fill(x_positions, sample=y_values)

# Access mean values and variances
means = hist.values()  # Mean of y_values in each x bin
variances = hist.variances()  # Variance of y_values in each x bin

# Individual bin access returns Mean accumulator
bin_mean = hist[10]
print(f"Count: {bin_mean.count}")
print(f"Mean: {bin_mean.value}")
print(f"Variance: {bin_mean.variance}")

Weighted Mean Storage

# Create histogram for weighted mean calculations
hist = bh.Histogram(bh.axis.Regular(30, 0, 15), storage=bh.storage.WeightedMean())

# Generate data
x_data = np.random.uniform(0, 15, 2000)
y_data = np.sin(x_data) + np.random.normal(0, 0.2, 2000)
weights = np.random.exponential(1.0, 2000)

# Fill with weights and samples
hist.fill(x_data, weight=weights, sample=y_data)

# Access weighted means and variances
weighted_means = hist.values()
variances = hist.variances()

# Individual bin accumulator
bin_acc = hist[15]
print(f"Sum of weights: {bin_acc.sum_of_weights}")
print(f"Weighted mean: {bin_acc.value}")
print(f"Variance: {bin_acc.variance}")

Storage Conversion and Views

import boost_histogram as bh

# Create histogram with Weight storage
hist = bh.Histogram(bh.axis.Regular(50, 0, 10), storage=bh.storage.Weight())

# Fill with weighted data
data = np.random.normal(5, 2, 1000)
weights = np.ones_like(data)  # Unit weights
hist.fill(data, weight=weights)

# Get structured view of the data
view = hist.view()  # Returns WeightedSumView
print(f"Values: {view.value}")      # Weighted sums
print(f"Variances: {view.variance}") # Variances

# Convert to simple values for plotting
values = hist.values()  # Extract just the values as numpy array

Multi-dimensional with Different Storage

# 2D histogram with mean storage for z-values
hist2d = bh.Histogram(
    bh.axis.Regular(25, 0, 5),
    bh.axis.Regular(25, 0, 5),
    storage=bh.storage.Mean()
)

# Generate 3D data
x = np.random.uniform(0, 5, 5000)
y = np.random.uniform(0, 5, 5000)
z = x + y + np.random.normal(0, 0.5, 5000)  # z depends on x and y

# Fill with z as sample
hist2d.fill(x, y, sample=z)

# Get 2D array of mean z-values
mean_z = hist2d.values()  # Shape: (25, 25)
var_z = hist2d.variances()  # Variance of z in each (x,y) bin

Performance Considerations

# For high-performance counting with many threads
hist_atomic = bh.Histogram(
    bh.axis.Regular(1000, 0, 100),
    storage=bh.storage.AtomicInt64()
)

# Fill with maximum parallelism
large_data = np.random.normal(50, 15, 10_000_000)
hist_atomic.fill(large_data, threads=None)  # Use all available cores

# For exact integer arithmetic without overflow risk
hist_unlimited = bh.Histogram(
    bh.axis.Regular(100, 0, 10),
    storage=bh.storage.Unlimited()
)

# Can handle arbitrarily large counts
small_data = np.random.uniform(0, 10, 100)
for _ in range(1000000):  # Very large number of fills
    hist_unlimited.fill(small_data)

Install with Tessl CLI