Multi-dimensional data arrays with labeled dimensions for scientific computing
—
Advanced binning operations for event data, histogram creation, and data grouping with support for irregular bins, multi-dimensional binning, and event data manipulation. These functions enable efficient analysis of scattered data and creation of regular grids.
Transform scattered event data into regular bins for histogram analysis.
def bin(x, /, **edges):
"""
Bin scattered data into regular bins
Args:
x (DataArray): Input data with event coordinates
**edges: Bin edges for each dimension as keyword arguments
(e.g., x=bin_edges, y=bin_edges)
Returns:
DataArray: Binned data with bin-edge coordinates
Examples:
bin(events, x=10) # 10 bins along x
bin(events, x=x_edges, y=y_edges) # Custom bin edges
"""
def hist(x, /, **edges):
"""
Create histogram from data
Args:
x (Variable or DataArray): Input data
**edges: Bin edges for each dimension
Returns:
Variable or DataArray: Histogram with bin counts
Examples:
hist(data, x=10) # 10 bins along x dimension
hist(data, x=x_edges, energy=energy_edges) # Multi-dimensional histogram
"""
def nanhist(x, /, **edges):
"""
Create histogram ignoring NaN values
Args:
x (Variable or DataArray): Input data (may contain NaN)
**edges: Bin edges for each dimension
Returns:
Variable or DataArray: Histogram with NaN values ignored
"""
def rebin(x, **edges):
"""
Re-bin existing histogram data
Args:
x (Variable or DataArray): Input histogram
**edges: New bin edges for each dimension
Returns:
Variable or DataArray: Re-binned histogram
Note:
Preserves integrated counts when changing bin boundaries
"""Group data by coordinate values or labels for categorical analysis.
def group(x, /, **groups):
"""
Group data by coordinate labels
Args:
x (DataArray): Input data
**groups: Grouping specifications for each dimension
Returns:
DataArray: Grouped data
Examples:
group(data, detector=detector_groups)
group(data, sample=['A', 'B', 'C'])
"""
def groupby(x, group, *, dim=None):
"""
Group data by coordinate values
Args:
x (DataArray or Dataset): Input data
group (str or Variable): Grouping coordinate or values
dim (str, optional): Dimension to group along
Returns:
GroupByDataArray or GroupByDataset: Grouped data object
Examples:
grouped = groupby(dataset, 'sample_id')
result = grouped.sum('event') # Sum within each group
"""Access and manipulate the contents of binned data structures.
def bins(x, dim=None):
"""
Access binned data contents
Args:
x (Variable or DataArray): Binned data
dim (str, optional): Dimension to access
Returns:
Bins: Bin contents accessor
"""
def bins_like(x, fill_value=None):
"""
Create bins with same structure as input
Args:
x (Variable or DataArray): Template binned data
fill_value (optional): Value to fill new bins
Returns:
Variable or DataArray: New binned structure
"""
def lookup(x, dim):
"""
Create lookup table for fast binning
Args:
x (Variable): Bin edges or centers
dim (str): Dimension name
Returns:
Lookup: Fast lookup table for binning operations
"""Lower-level binning control and advanced binning operations.
def make_binned(x, edges, groups=None):
"""
Create binned data structure with specified edges
Args:
x (DataArray): Event data to bin
edges (Dict[str, Variable]): Bin edges for each dimension
groups (Dict[str, Variable], optional): Grouping information
Returns:
DataArray: Binned data structure
"""
def make_histogrammed(x, edges):
"""
Create histogrammed data structure
Args:
x (Variable or DataArray): Input data
edges (Dict[str, Variable]): Bin edges for histogram
Returns:
Variable or DataArray: Histogrammed data
"""import scipp as sc
import numpy as np
# Create sample data
data = sc.array(dims=['event'], values=np.random.normal(0, 1, 1000))
# Create simple histogram
hist_data = sc.hist(data, event=20) # 20 bins
print(hist_data.sizes) # Shows binned structure
# Create histogram with custom edges
edges = sc.linspace('event', -3, 3, 21) # 20 bins from -3 to 3
hist_custom = sc.hist(data, event=edges)# Create 2D event data
x_events = sc.array(dims=['event'], values=np.random.normal(0, 1, 5000))
y_events = sc.array(dims=['event'], values=np.random.normal(0, 0.5, 5000))
# Combine into DataArray with coordinates
events = sc.DataArray(
data=sc.ones(dims=['event'], shape=[5000], unit='counts'),
coords={'x': x_events, 'y': y_events}
)
# Create 2D histogram
hist_2d = sc.hist(events, x=50, y=30) # 50x30 grid
print(hist_2d.sizes) # {'x': 50, 'y': 30}
# Custom 2D binning with specified edges
x_edges = sc.linspace('x', -3, 3, 51)
y_edges = sc.linspace('y', -2, 2, 31)
hist_2d_custom = sc.hist(events, x=x_edges, y=y_edges)# Generate realistic event data (e.g., detector events)
n_events = 10000
event_data = sc.DataArray(
data=sc.array(dims=['event'], values=np.random.exponential(1, n_events), unit='counts'),
coords={
'x': sc.array(dims=['event'], values=np.random.uniform(-10, 10, n_events), unit='mm'),
'y': sc.array(dims=['event'], values=np.random.uniform(-5, 5, n_events), unit='mm'),
'tof': sc.array(dims=['event'], values=np.random.gamma(2, 1000, n_events), unit='us')
}
)
# Bin event data into 3D histogram
binned = sc.bin(event_data, x=20, y=10, tof=50)
print(binned) # Shows binned structure with preserved events
# Convert binned data to histogram
histogram = sc.hist(binned)
print(histogram.sizes) # {'x': 20, 'y': 10, 'tof': 50}# Create sample data with categorical coordinate
sample_names = ['sample_A', 'sample_B', 'sample_C'] * 100
measurements = sc.DataArray(
data=sc.array(dims=['measurement'], values=np.random.normal(5, 1, 300), unit='counts'),
coords={
'sample': sc.array(dims=['measurement'], values=sample_names),
'time': sc.arange('measurement', 300, unit='s')
}
)
# Group by sample and calculate statistics
grouped = sc.groupby(measurements, 'sample')
sample_means = grouped.mean('measurement')
sample_sums = grouped.sum('measurement')
print(sample_means.coords['sample']) # ['sample_A', 'sample_B', 'sample_C']# Create initial histogram
original_edges = sc.linspace('x', 0, 10, 11) # 10 bins
data = sc.array(dims=['x'], values=np.random.poisson(10, 10), unit='counts')
original_hist = sc.DataArray(data=data, coords={'x': original_edges})
# Re-bin to different resolution
new_edges = sc.linspace('x', 0, 10, 6) # 5 bins (coarser)
rebinned = sc.rebin(original_hist, x=new_edges)
# Re-bin to higher resolution (will interpolate)
fine_edges = sc.linspace('x', 0, 10, 21) # 20 bins (finer)
rebinned_fine = sc.rebin(original_hist, x=fine_edges)
# Verify count conservation
original_total = sc.sum(original_hist)
rebinned_total = sc.sum(rebinned)
print(f"Original: {original_total.value}, Rebinned: {rebinned_total.value}")# Create lookup table for fast repeated binning
x_edges = sc.linspace('x', 0, 100, 101)
lookup_table = sc.lookup(x_edges, 'x')
# Generate multiple datasets to bin with same edges
datasets = []
for i in range(10):
data = sc.array(dims=['event'], values=np.random.uniform(0, 100, 1000))
datasets.append(data)
# Fast binning using lookup table
binned_datasets = []
for data in datasets:
events = sc.DataArray(
data=sc.ones(dims=['event'], shape=[1000]),
coords={'x': data}
)
# Lookup table enables faster binning for repeated operations
binned = sc.bin(events, x=lookup_table)
binned_datasets.append(binned)# Create irregular bin edges (non-uniform spacing)
irregular_edges = sc.array(
dims=['x'],
values=[0, 1, 2, 5, 10, 20, 50, 100], # Increasing spacing
unit='mm'
)
# Create sample data
position_data = sc.array(
dims=['particle'],
values=np.random.exponential(10, 5000),
unit='mm'
)
# Histogram with irregular bins
events = sc.DataArray(
data=sc.ones(dims=['particle'], shape=[5000]),
coords={'x': position_data}
)
irregular_hist = sc.hist(events, x=irregular_edges)
print(irregular_hist.coords['x']) # Shows irregular bin edges# Create data with NaN values
data_with_nan = sc.array(
dims=['measurement'],
values=np.array([1, 2, np.nan, 4, 5, np.nan, 7, 8]),
)
# Regular histogram includes NaN in counts
regular_hist = sc.hist(data_with_nan, measurement=5)
# NaN-ignoring histogram excludes NaN values
nan_hist = sc.nanhist(data_with_nan, measurement=5)
print(f"Regular total: {sc.sum(regular_hist).value}")
print(f"NaN-ignoring total: {sc.sum(nan_hist).value}")Install with Tessl CLI
npx tessl i tessl/pypi-scipp