0
# Binning and Histogramming
1
2
Advanced binning operations for event data, histogram creation, and data grouping with support for irregular bins, multi-dimensional binning, and event data manipulation. These functions enable efficient analysis of scattered data and creation of regular grids.
3
4
## Capabilities
5
6
### Event Data Binning
7
8
Transform scattered event data into regular bins for histogram analysis.
9
10
```python { .api }
11
def bin(x, /, **edges):
12
"""
13
Bin scattered data into regular bins
14
15
Args:
16
x (DataArray): Input data with event coordinates
17
**edges: Bin edges for each dimension as keyword arguments
18
(e.g., x=bin_edges, y=bin_edges)
19
20
Returns:
21
DataArray: Binned data with bin-edge coordinates
22
23
Examples:
24
bin(events, x=10) # 10 bins along x
25
bin(events, x=x_edges, y=y_edges) # Custom bin edges
26
"""
27
28
def hist(x, /, **edges):
29
"""
30
Create histogram from data
31
32
Args:
33
x (Variable or DataArray): Input data
34
**edges: Bin edges for each dimension
35
36
Returns:
37
Variable or DataArray: Histogram with bin counts
38
39
Examples:
40
hist(data, x=10) # 10 bins along x dimension
41
hist(data, x=x_edges, energy=energy_edges) # Multi-dimensional histogram
42
"""
43
44
def nanhist(x, /, **edges):
45
"""
46
Create histogram ignoring NaN values
47
48
Args:
49
x (Variable or DataArray): Input data (may contain NaN)
50
**edges: Bin edges for each dimension
51
52
Returns:
53
Variable or DataArray: Histogram with NaN values ignored
54
"""
55
56
def rebin(x, **edges):
57
"""
58
Re-bin existing histogram data
59
60
Args:
61
x (Variable or DataArray): Input histogram
62
**edges: New bin edges for each dimension
63
64
Returns:
65
Variable or DataArray: Re-binned histogram
66
67
Note:
68
Preserves integrated counts when changing bin boundaries
69
"""
70
```
71
72
### Data Grouping
73
74
Group data by coordinate values or labels for categorical analysis.
75
76
```python { .api }
77
def group(x, /, **groups):
78
"""
79
Group data by coordinate labels
80
81
Args:
82
x (DataArray): Input data
83
**groups: Grouping specifications for each dimension
84
85
Returns:
86
DataArray: Grouped data
87
88
Examples:
89
group(data, detector=detector_groups)
90
group(data, sample=['A', 'B', 'C'])
91
"""
92
93
def groupby(x, group, *, dim=None):
94
"""
95
Group data by coordinate values
96
97
Args:
98
x (DataArray or Dataset): Input data
99
group (str or Variable): Grouping coordinate or values
100
dim (str, optional): Dimension to group along
101
102
Returns:
103
GroupByDataArray or GroupByDataset: Grouped data object
104
105
Examples:
106
grouped = groupby(dataset, 'sample_id')
107
result = grouped.sum('event') # Sum within each group
108
"""
109
```
110
111
### Bin Access and Manipulation
112
113
Access and manipulate the contents of binned data structures.
114
115
```python { .api }
116
def bins(x, dim=None):
117
"""
118
Access binned data contents
119
120
Args:
121
x (Variable or DataArray): Binned data
122
dim (str, optional): Dimension to access
123
124
Returns:
125
Bins: Bin contents accessor
126
"""
127
128
def bins_like(x, fill_value=None):
129
"""
130
Create bins with same structure as input
131
132
Args:
133
x (Variable or DataArray): Template binned data
134
fill_value (optional): Value to fill new bins
135
136
Returns:
137
Variable or DataArray: New binned structure
138
"""
139
140
def lookup(x, dim):
141
"""
142
Create lookup table for fast binning
143
144
Args:
145
x (Variable): Bin edges or centers
146
dim (str): Dimension name
147
148
Returns:
149
Lookup: Fast lookup table for binning operations
150
"""
151
```
152
153
### Specialized Binning Functions
154
155
Lower-level binning control and advanced binning operations.
156
157
```python { .api }
158
def make_binned(x, edges, groups=None):
159
"""
160
Create binned data structure with specified edges
161
162
Args:
163
x (DataArray): Event data to bin
164
edges (Dict[str, Variable]): Bin edges for each dimension
165
groups (Dict[str, Variable], optional): Grouping information
166
167
Returns:
168
DataArray: Binned data structure
169
"""
170
171
def make_histogrammed(x, edges):
172
"""
173
Create histogrammed data structure
174
175
Args:
176
x (Variable or DataArray): Input data
177
edges (Dict[str, Variable]): Bin edges for histogram
178
179
Returns:
180
Variable or DataArray: Histogrammed data
181
"""
182
```
183
184
## Usage Examples
185
186
### Basic Histogramming
187
188
```python
189
import scipp as sc
190
import numpy as np
191
192
# Create sample data
193
data = sc.array(dims=['event'], values=np.random.normal(0, 1, 1000))
194
195
# Create simple histogram
196
hist_data = sc.hist(data, event=20) # 20 bins
197
print(hist_data.sizes) # Shows binned structure
198
199
# Create histogram with custom edges
200
edges = sc.linspace('event', -3, 3, 21) # 20 bins from -3 to 3
201
hist_custom = sc.hist(data, event=edges)
202
```
203
204
### Multi-dimensional Histogramming
205
206
```python
207
# Create 2D event data
208
x_events = sc.array(dims=['event'], values=np.random.normal(0, 1, 5000))
209
y_events = sc.array(dims=['event'], values=np.random.normal(0, 0.5, 5000))
210
211
# Combine into DataArray with coordinates
212
events = sc.DataArray(
213
data=sc.ones(dims=['event'], shape=[5000], unit='counts'),
214
coords={'x': x_events, 'y': y_events}
215
)
216
217
# Create 2D histogram
218
hist_2d = sc.hist(events, x=50, y=30) # 50x30 grid
219
print(hist_2d.sizes) # {'x': 50, 'y': 30}
220
221
# Custom 2D binning with specified edges
222
x_edges = sc.linspace('x', -3, 3, 51)
223
y_edges = sc.linspace('y', -2, 2, 31)
224
hist_2d_custom = sc.hist(events, x=x_edges, y=y_edges)
225
```
226
227
### Event Data Binning
228
229
```python
230
# Generate realistic event data (e.g., detector events)
231
n_events = 10000
232
event_data = sc.DataArray(
233
data=sc.array(dims=['event'], values=np.random.exponential(1, n_events), unit='counts'),
234
coords={
235
'x': sc.array(dims=['event'], values=np.random.uniform(-10, 10, n_events), unit='mm'),
236
'y': sc.array(dims=['event'], values=np.random.uniform(-5, 5, n_events), unit='mm'),
237
'tof': sc.array(dims=['event'], values=np.random.gamma(2, 1000, n_events), unit='us')
238
}
239
)
240
241
# Bin event data into 3D histogram
242
binned = sc.bin(event_data, x=20, y=10, tof=50)
243
print(binned) # Shows binned structure with preserved events
244
245
# Convert binned data to histogram
246
histogram = sc.hist(binned)
247
print(histogram.sizes) # {'x': 20, 'y': 10, 'tof': 50}
248
```
249
250
### Data Grouping Operations
251
252
```python
253
# Create sample data with categorical coordinate
254
sample_names = ['sample_A', 'sample_B', 'sample_C'] * 100
255
measurements = sc.DataArray(
256
data=sc.array(dims=['measurement'], values=np.random.normal(5, 1, 300), unit='counts'),
257
coords={
258
'sample': sc.array(dims=['measurement'], values=sample_names),
259
'time': sc.arange('measurement', 300, unit='s')
260
}
261
)
262
263
# Group by sample and calculate statistics
264
grouped = sc.groupby(measurements, 'sample')
265
sample_means = grouped.mean('measurement')
266
sample_sums = grouped.sum('measurement')
267
268
print(sample_means.coords['sample']) # ['sample_A', 'sample_B', 'sample_C']
269
```
270
271
### Re-binning Operations
272
273
```python
274
# Create initial histogram
275
original_edges = sc.linspace('x', 0, 10, 11) # 10 bins
276
data = sc.array(dims=['x'], values=np.random.poisson(10, 10), unit='counts')
277
original_hist = sc.DataArray(data=data, coords={'x': original_edges})
278
279
# Re-bin to different resolution
280
new_edges = sc.linspace('x', 0, 10, 6) # 5 bins (coarser)
281
rebinned = sc.rebin(original_hist, x=new_edges)
282
283
# Re-bin to higher resolution (will interpolate)
284
fine_edges = sc.linspace('x', 0, 10, 21) # 20 bins (finer)
285
rebinned_fine = sc.rebin(original_hist, x=fine_edges)
286
287
# Verify count conservation
288
original_total = sc.sum(original_hist)
289
rebinned_total = sc.sum(rebinned)
290
print(f"Original: {original_total.value}, Rebinned: {rebinned_total.value}")
291
```
292
293
### Advanced Binning with Lookup Tables
294
295
```python
296
# Create lookup table for fast repeated binning
297
x_edges = sc.linspace('x', 0, 100, 101)
298
lookup_table = sc.lookup(x_edges, 'x')
299
300
# Generate multiple datasets to bin with same edges
301
datasets = []
302
for i in range(10):
303
data = sc.array(dims=['event'], values=np.random.uniform(0, 100, 1000))
304
datasets.append(data)
305
306
# Fast binning using lookup table
307
binned_datasets = []
308
for data in datasets:
309
events = sc.DataArray(
310
data=sc.ones(dims=['event'], shape=[1000]),
311
coords={'x': data}
312
)
313
# Lookup table enables faster binning for repeated operations
314
binned = sc.bin(events, x=lookup_table)
315
binned_datasets.append(binned)
316
```
317
318
### Working with Irregular Bins
319
320
```python
321
# Create irregular bin edges (non-uniform spacing)
322
irregular_edges = sc.array(
323
dims=['x'],
324
values=[0, 1, 2, 5, 10, 20, 50, 100], # Increasing spacing
325
unit='mm'
326
)
327
328
# Create sample data
329
position_data = sc.array(
330
dims=['particle'],
331
values=np.random.exponential(10, 5000),
332
unit='mm'
333
)
334
335
# Histogram with irregular bins
336
events = sc.DataArray(
337
data=sc.ones(dims=['particle'], shape=[5000]),
338
coords={'x': position_data}
339
)
340
341
irregular_hist = sc.hist(events, x=irregular_edges)
342
print(irregular_hist.coords['x']) # Shows irregular bin edges
343
```
344
345
### NaN Handling in Histograms
346
347
```python
348
# Create data with NaN values
349
data_with_nan = sc.array(
350
dims=['measurement'],
351
values=np.array([1, 2, np.nan, 4, 5, np.nan, 7, 8]),
352
)
353
354
# Regular histogram includes NaN in counts
355
regular_hist = sc.hist(data_with_nan, measurement=5)
356
357
# NaN-ignoring histogram excludes NaN values
358
nan_hist = sc.nanhist(data_with_nan, measurement=5)
359
360
print(f"Regular total: {sc.sum(regular_hist).value}")
361
print(f"NaN-ignoring total: {sc.sum(nan_hist).value}")
362
```