The Boost::Histogram Python wrapper providing fast histogram implementations with full power and flexibility for scientific computing.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
NumPy-compatible histogram functions providing familiar interfaces while leveraging boost-histogram's performance advantages. These functions offer drop-in replacements for NumPy's histogram functions with additional features and better performance.
Drop-in replacement for numpy.histogram with enhanced performance and features.
def histogram(
a,
bins=10,
range=None,
weights=None,
density=False,
*,
histogram=None,
storage=None,
threads=None
):
"""
Compute histogram of a dataset.
Parameters:
- a: array-like, input data
- bins: int or sequence, number of bins or bin edges
- range: tuple, (min, max) range for bins (ignored if bins is sequence)
- weights: array-like, weights for each value in a
- density: bool, normalize to create probability density
- histogram: Histogram class to use for return type (None returns numpy arrays)
- storage: Storage type (boost_histogram storage class)
- threads: int, number of threads for parallel processing
Returns:
Tuple of (values, edges) where:
- values: histogram bin counts/densities
- edges: bin edge array (length N+1 for N bins)
"""Compute 2D histograms with high performance.
def histogram2d(
x,
y,
bins=10,
range=None,
weights=None,
density=False,
*,
histogram=None,
storage=None,
threads=None
):
"""
Compute 2D histogram of two datasets.
Parameters:
- x: array-like, x-coordinates of data points
- y: array-like, y-coordinates of data points
- bins: int or [int, int] or array-like, number of bins or bin edges for each dimension
- range: array-like, [[xmin, xmax], [ymin, ymax]] ranges for bins
- weights: array-like, weights for each data point
- density: bool, normalize to create probability density
- histogram: Histogram class to use for return type (None returns numpy arrays)
- storage: Storage type (boost_histogram storage class)
- threads: int, number of threads for parallel processing
Returns:
Tuple of (H, xedges, yedges) where:
- H: 2D histogram array, shape (nx, ny)
- xedges: x-axis bin edges (length nx+1)
- yedges: y-axis bin edges (length ny+1)
"""General N-dimensional histogram computation.
def histogramdd(
sample,
bins=10,
range=None,
weights=None,
density=False,
*,
histogram=None,
storage=None,
threads=None
):
"""
Compute N-dimensional histogram.
Parameters:
- sample: array-like, (N, D) array or sequence of D arrays for D-dimensional data
- bins: int or sequence, number of bins or bin edges for each dimension
- range: sequence, [(min, max), ...] ranges for each dimension
- weights: array-like, weights for each sample point
- density: bool, normalize to create probability density
- histogram: Histogram class to use for return type (None returns numpy arrays)
- storage: Storage type (boost_histogram storage class)
- threads: int, number of threads for parallel processing
Returns:
Tuple of (H, edges) where:
- H: N-dimensional histogram array
- edges: list of edge arrays for each dimension
"""import boost_histogram.numpy as bhnp
import numpy as np
# Generate sample data
data = np.random.normal(0, 1, 10000)
# Basic histogram (drop-in replacement for np.histogram)
counts, edges = bhnp.histogram(data, bins=50)
# With explicit range
counts, edges = bhnp.histogram(data, bins=50, range=(-3, 3))
# With custom bin edges
custom_edges = np.linspace(-4, 4, 41) # 40 bins
counts, edges = bhnp.histogram(data, bins=custom_edges)
# Density histogram (normalized)
density, edges = bhnp.histogram(data, bins=50, density=True)# Data with weights
data = np.random.exponential(1, 5000)
weights = np.random.uniform(0.5, 2.0, 5000)
# Weighted histogram
counts, edges = bhnp.histogram(data, bins=30, weights=weights, range=(0, 5))
# Weighted density
density, edges = bhnp.histogram(data, bins=30, weights=weights,
density=True, range=(0, 5))# Use specific storage for better performance
counts, edges = bhnp.histogram(
data,
bins=100,
storage=bh.storage.AtomicInt64(), # Thread-safe integer storage
threads=4 # Use 4 threads
)
# For very large datasets
large_data = np.random.random(50_000_000)
counts, edges = bhnp.histogram(
large_data,
bins=1000,
threads=None # Use all available cores
)# Generate 2D data
x = np.random.normal(0, 1, 10000)
y = 0.5 * x + np.random.normal(0, 0.8, 10000)
# Basic 2D histogram
H, xedges, yedges = bhnp.histogram2d(x, y, bins=50)
# With explicit ranges and different bin counts
H, xedges, yedges = bhnp.histogram2d(
x, y,
bins=[30, 40], # 30 bins in x, 40 in y
range=[[-3, 3], [-2, 2]] # Explicit ranges
)
# Weighted 2D histogram
weights = np.random.exponential(1, 10000)
H, xedges, yedges = bhnp.histogram2d(x, y, bins=40, weights=weights)
# 2D density
H_density, xedges, yedges = bhnp.histogram2d(x, y, bins=50, density=True)# 3D histogram
x = np.random.normal(0, 1, 5000)
y = np.random.normal(0, 1, 5000)
z = x + y + np.random.normal(0, 0.5, 5000)
# Stack data for histogramdd
sample = np.column_stack([x, y, z])
# 3D histogram
H, edges = bhnp.histogramdd(sample, bins=20)
print(f"3D histogram shape: {H.shape}") # (20, 20, 20)
# Different bins per dimension
H, edges = bhnp.histogramdd(sample, bins=[15, 20, 25])
# With ranges
H, edges = bhnp.histogramdd(
sample,
bins=15,
range=[[-2, 2], [-2, 2], [-3, 3]]
)
# Alternative input format (sequence of arrays)
H, edges = bhnp.histogramdd([x, y, z], bins=20)import boost_histogram as bh
import boost_histogram.numpy as bhnp
# Compare with pure boost-histogram
data = np.random.gamma(2, 1, 100000)
# NumPy-style interface
counts_np, edges_np = bhnp.histogram(data, bins=50, range=(0, 10))
# Equivalent boost-histogram approach
hist_bh = bh.Histogram(bh.axis.Regular(50, 0, 10))
hist_bh.fill(data)
counts_bh = hist_bh.values()
edges_bh = hist_bh.axes[0].edges
# Results are equivalent
assert np.allclose(counts_np, counts_bh)
assert np.allclose(edges_np, edges_bh)import matplotlib.pyplot as plt
import boost_histogram.numpy as bhnp
# Generate and histogram data
data = np.random.beta(2, 5, 10000)
counts, edges = bhnp.histogram(data, bins=50, density=True)
# Plot with matplotlib
centers = (edges[:-1] + edges[1:]) / 2
plt.bar(centers, counts, width=np.diff(edges), alpha=0.7)
plt.xlabel('Value')
plt.ylabel('Density')
plt.title('Beta Distribution Histogram')
plt.show()
# For 2D plotting
x = np.random.multivariate_normal([0, 0], [[1, 0.5], [0.5, 1]], 5000)
H, xedges, yedges = bhnp.histogram2d(x[:, 0], x[:, 1], bins=30)
# Plot 2D histogram
plt.imshow(H.T, origin='lower', extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]])
plt.colorbar()
plt.xlabel('X')
plt.ylabel('Y')
plt.title('2D Histogram')
plt.show()import time
import numpy as np
import boost_histogram.numpy as bhnp
# Large dataset for performance testing
large_data = np.random.normal(0, 1, 10_000_000)
# NumPy histogram
start = time.time()
np_counts, np_edges = np.histogram(large_data, bins=100)
np_time = time.time() - start
# boost-histogram NumPy interface
start = time.time()
bh_counts, bh_edges = bhnp.histogram(large_data, bins=100)
bh_time = time.time() - start
# boost-histogram with parallelism
start = time.time()
bh_parallel_counts, bh_parallel_edges = bhnp.histogram(
large_data,
bins=100,
threads=4
)
bh_parallel_time = time.time() - start
print(f"NumPy time: {np_time:.3f}s")
print(f"boost-histogram time: {bh_time:.3f}s")
print(f"boost-histogram (4 threads) time: {bh_parallel_time:.3f}s")
print(f"Speedup vs NumPy: {np_time/bh_parallel_time:.1f}x")# Use advanced storage with NumPy interface
data = np.random.poisson(3, 50000).astype(float)
weights = np.random.exponential(1, 50000)
# Weighted histogram with variance tracking
counts, edges = bhnp.histogram(
data,
bins=20,
range=(0, 15),
weights=weights,
storage=bh.storage.Weight()
)
# Access the underlying histogram for variance information
hist = bh.Histogram(bh.axis.Regular(20, 0, 15), storage=bh.storage.Weight())
hist.fill(data, weight=weights)
values = hist.values() # Same as counts from bhnp.histogram
variances = hist.variances() # Additional variance information
print(f"Bin values: {values[:5]}")
print(f"Bin variances: {variances[:5]}")Install with Tessl CLI
npx tessl i tessl/pypi-boost-histogram