CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-scipp

Multi-dimensional data arrays with labeled dimensions for scientific computing

Pending
Overview
Eval results
Files

binning-histogramming.mddocs/

Binning and Histogramming

Advanced binning operations for event data, histogram creation, and data grouping with support for irregular bins, multi-dimensional binning, and event data manipulation. These functions enable efficient analysis of scattered data and creation of regular grids.

Capabilities

Event Data Binning

Transform scattered event data into regular bins for histogram analysis.

def bin(x, /, **edges):
    """
    Bin scattered data into regular bins
    
    Args:
        x (DataArray): Input data with event coordinates
        **edges: Bin edges for each dimension as keyword arguments
                 (e.g., x=bin_edges, y=bin_edges)
    
    Returns:
        DataArray: Binned data with bin-edge coordinates
        
    Examples:
        bin(events, x=10)  # 10 bins along x
        bin(events, x=x_edges, y=y_edges)  # Custom bin edges
    """

def hist(x, /, **edges):
    """
    Create histogram from data
    
    Args:
        x (Variable or DataArray): Input data
        **edges: Bin edges for each dimension
    
    Returns:
        Variable or DataArray: Histogram with bin counts
        
    Examples:
        hist(data, x=10)  # 10 bins along x dimension
        hist(data, x=x_edges, energy=energy_edges)  # Multi-dimensional histogram
    """

def nanhist(x, /, **edges):
    """
    Create histogram ignoring NaN values
    
    Args:
        x (Variable or DataArray): Input data (may contain NaN)
        **edges: Bin edges for each dimension
    
    Returns:
        Variable or DataArray: Histogram with NaN values ignored
    """

def rebin(x, **edges):
    """
    Re-bin existing histogram data
    
    Args:
        x (Variable or DataArray): Input histogram
        **edges: New bin edges for each dimension
    
    Returns:
        Variable or DataArray: Re-binned histogram
        
    Note:
        Preserves integrated counts when changing bin boundaries
    """

Data Grouping

Group data by coordinate values or labels for categorical analysis.

def group(x, /, **groups):
    """
    Group data by coordinate labels
    
    Args:
        x (DataArray): Input data
        **groups: Grouping specifications for each dimension
    
    Returns:
        DataArray: Grouped data
        
    Examples:
        group(data, detector=detector_groups)
        group(data, sample=['A', 'B', 'C'])
    """

def groupby(x, group, *, dim=None):
    """
    Group data by coordinate values
    
    Args:
        x (DataArray or Dataset): Input data
        group (str or Variable): Grouping coordinate or values
        dim (str, optional): Dimension to group along
    
    Returns:
        GroupByDataArray or GroupByDataset: Grouped data object
        
    Examples:
        grouped = groupby(dataset, 'sample_id')
        result = grouped.sum('event')  # Sum within each group
    """

Bin Access and Manipulation

Access and manipulate the contents of binned data structures.

def bins(x, dim=None):
    """
    Access binned data contents
    
    Args:
        x (Variable or DataArray): Binned data
        dim (str, optional): Dimension to access
    
    Returns:
        Bins: Bin contents accessor
    """

def bins_like(x, fill_value=None):
    """
    Create bins with same structure as input
    
    Args:
        x (Variable or DataArray): Template binned data
        fill_value (optional): Value to fill new bins
    
    Returns:
        Variable or DataArray: New binned structure
    """

def lookup(x, dim):
    """
    Create lookup table for fast binning
    
    Args:
        x (Variable): Bin edges or centers
        dim (str): Dimension name
    
    Returns:
        Lookup: Fast lookup table for binning operations
    """

Specialized Binning Functions

Lower-level binning control and advanced binning operations.

def make_binned(x, edges, groups=None):
    """
    Create binned data structure with specified edges
    
    Args:
        x (DataArray): Event data to bin
        edges (Dict[str, Variable]): Bin edges for each dimension
        groups (Dict[str, Variable], optional): Grouping information
    
    Returns:
        DataArray: Binned data structure
    """

def make_histogrammed(x, edges):
    """
    Create histogrammed data structure
    
    Args:
        x (Variable or DataArray): Input data
        edges (Dict[str, Variable]): Bin edges for histogram
    
    Returns:
        Variable or DataArray: Histogrammed data
    """

Usage Examples

Basic Histogramming

import scipp as sc
import numpy as np

# Create sample data
data = sc.array(dims=['event'], values=np.random.normal(0, 1, 1000))

# Create simple histogram
hist_data = sc.hist(data, event=20)  # 20 bins
print(hist_data.sizes)  # Shows binned structure

# Create histogram with custom edges
edges = sc.linspace('event', -3, 3, 21)  # 20 bins from -3 to 3
hist_custom = sc.hist(data, event=edges)

Multi-dimensional Histogramming

# Create 2D event data
x_events = sc.array(dims=['event'], values=np.random.normal(0, 1, 5000))
y_events = sc.array(dims=['event'], values=np.random.normal(0, 0.5, 5000))

# Combine into DataArray with coordinates
events = sc.DataArray(
    data=sc.ones(dims=['event'], shape=[5000], unit='counts'),
    coords={'x': x_events, 'y': y_events}
)

# Create 2D histogram
hist_2d = sc.hist(events, x=50, y=30)  # 50x30 grid
print(hist_2d.sizes)  # {'x': 50, 'y': 30}

# Custom 2D binning with specified edges
x_edges = sc.linspace('x', -3, 3, 51)
y_edges = sc.linspace('y', -2, 2, 31)
hist_2d_custom = sc.hist(events, x=x_edges, y=y_edges)

Event Data Binning

# Generate realistic event data (e.g., detector events)
n_events = 10000
event_data = sc.DataArray(
    data=sc.array(dims=['event'], values=np.random.exponential(1, n_events), unit='counts'),
    coords={
        'x': sc.array(dims=['event'], values=np.random.uniform(-10, 10, n_events), unit='mm'),
        'y': sc.array(dims=['event'], values=np.random.uniform(-5, 5, n_events), unit='mm'),
        'tof': sc.array(dims=['event'], values=np.random.gamma(2, 1000, n_events), unit='us')
    }
)

# Bin event data into 3D histogram
binned = sc.bin(event_data, x=20, y=10, tof=50)
print(binned)  # Shows binned structure with preserved events

# Convert binned data to histogram
histogram = sc.hist(binned)
print(histogram.sizes)  # {'x': 20, 'y': 10, 'tof': 50}

Data Grouping Operations

# Create sample data with categorical coordinate
sample_names = ['sample_A', 'sample_B', 'sample_C'] * 100
measurements = sc.DataArray(
    data=sc.array(dims=['measurement'], values=np.random.normal(5, 1, 300), unit='counts'),
    coords={
        'sample': sc.array(dims=['measurement'], values=sample_names),
        'time': sc.arange('measurement', 300, unit='s')
    }
)

# Group by sample and calculate statistics
grouped = sc.groupby(measurements, 'sample')
sample_means = grouped.mean('measurement')
sample_sums = grouped.sum('measurement')

print(sample_means.coords['sample'])  # ['sample_A', 'sample_B', 'sample_C']

Re-binning Operations

# Create initial histogram
original_edges = sc.linspace('x', 0, 10, 11)  # 10 bins
data = sc.array(dims=['x'], values=np.random.poisson(10, 10), unit='counts')
original_hist = sc.DataArray(data=data, coords={'x': original_edges})

# Re-bin to different resolution
new_edges = sc.linspace('x', 0, 10, 6)  # 5 bins (coarser)
rebinned = sc.rebin(original_hist, x=new_edges)

# Re-bin to higher resolution (will interpolate)
fine_edges = sc.linspace('x', 0, 10, 21)  # 20 bins (finer)
rebinned_fine = sc.rebin(original_hist, x=fine_edges)

# Verify count conservation
original_total = sc.sum(original_hist)
rebinned_total = sc.sum(rebinned)
print(f"Original: {original_total.value}, Rebinned: {rebinned_total.value}")

Advanced Binning with Lookup Tables

# Create lookup table for fast repeated binning
x_edges = sc.linspace('x', 0, 100, 101)
lookup_table = sc.lookup(x_edges, 'x')

# Generate multiple datasets to bin with same edges
datasets = []
for i in range(10):
    data = sc.array(dims=['event'], values=np.random.uniform(0, 100, 1000))
    datasets.append(data)

# Fast binning using lookup table
binned_datasets = []
for data in datasets:
    events = sc.DataArray(
        data=sc.ones(dims=['event'], shape=[1000]),
        coords={'x': data}
    )
    # Lookup table enables faster binning for repeated operations
    binned = sc.bin(events, x=lookup_table)
    binned_datasets.append(binned)

Working with Irregular Bins

# Create irregular bin edges (non-uniform spacing)
irregular_edges = sc.array(
    dims=['x'], 
    values=[0, 1, 2, 5, 10, 20, 50, 100],  # Increasing spacing
    unit='mm'
)

# Create sample data
position_data = sc.array(
    dims=['particle'], 
    values=np.random.exponential(10, 5000), 
    unit='mm'
)

# Histogram with irregular bins
events = sc.DataArray(
    data=sc.ones(dims=['particle'], shape=[5000]),
    coords={'x': position_data}
)

irregular_hist = sc.hist(events, x=irregular_edges)
print(irregular_hist.coords['x'])  # Shows irregular bin edges

NaN Handling in Histograms

# Create data with NaN values
data_with_nan = sc.array(
    dims=['measurement'], 
    values=np.array([1, 2, np.nan, 4, 5, np.nan, 7, 8]),
)

# Regular histogram includes NaN in counts
regular_hist = sc.hist(data_with_nan, measurement=5)

# NaN-ignoring histogram excludes NaN values
nan_hist = sc.nanhist(data_with_nan, measurement=5)

print(f"Regular total: {sc.sum(regular_hist).value}")
print(f"NaN-ignoring total: {sc.sum(nan_hist).value}")

Install with Tessl CLI

npx tessl i tessl/pypi-scipp

docs

array-creation.md

binning-histogramming.md

coordinate-systems.md

core-data-structures.md

index.md

input-output.md

mathematical-operations.md

reduction-operations.md

scipy-integration.md

shape-operations.md

spatial-operations.md

testing-utilities.md

units-system.md

visualization.md

tile.json