or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

array-creation.mdbinning-histogramming.mdcoordinate-systems.mdcore-data-structures.mdindex.mdinput-output.mdmathematical-operations.mdreduction-operations.mdscipy-integration.mdshape-operations.mdspatial-operations.mdtesting-utilities.mdunits-system.mdvisualization.md

binning-histogramming.mddocs/

0

# Binning and Histogramming

1

2

Advanced binning operations for event data, histogram creation, and data grouping with support for irregular bins, multi-dimensional binning, and event data manipulation. These functions enable efficient analysis of scattered data and creation of regular grids.

3

4

## Capabilities

5

6

### Event Data Binning

7

8

Transform scattered event data into regular bins for histogram analysis.

9

10

```python { .api }

11

def bin(x, /, **edges):

12

"""

13

Bin scattered data into regular bins

14

15

Args:

16

x (DataArray): Input data with event coordinates

17

**edges: Bin edges for each dimension as keyword arguments

18

(e.g., x=bin_edges, y=bin_edges)

19

20

Returns:

21

DataArray: Binned data with bin-edge coordinates

22

23

Examples:

24

bin(events, x=10) # 10 bins along x

25

bin(events, x=x_edges, y=y_edges) # Custom bin edges

26

"""

27

28

def hist(x, /, **edges):

29

"""

30

Create histogram from data

31

32

Args:

33

x (Variable or DataArray): Input data

34

**edges: Bin edges for each dimension

35

36

Returns:

37

Variable or DataArray: Histogram with bin counts

38

39

Examples:

40

hist(data, x=10) # 10 bins along x dimension

41

hist(data, x=x_edges, energy=energy_edges) # Multi-dimensional histogram

42

"""

43

44

def nanhist(x, /, **edges):

45

"""

46

Create histogram ignoring NaN values

47

48

Args:

49

x (Variable or DataArray): Input data (may contain NaN)

50

**edges: Bin edges for each dimension

51

52

Returns:

53

Variable or DataArray: Histogram with NaN values ignored

54

"""

55

56

def rebin(x, **edges):

57

"""

58

Re-bin existing histogram data

59

60

Args:

61

x (Variable or DataArray): Input histogram

62

**edges: New bin edges for each dimension

63

64

Returns:

65

Variable or DataArray: Re-binned histogram

66

67

Note:

68

Preserves integrated counts when changing bin boundaries

69

"""

70

```

71

72

### Data Grouping

73

74

Group data by coordinate values or labels for categorical analysis.

75

76

```python { .api }

77

def group(x, /, **groups):

78

"""

79

Group data by coordinate labels

80

81

Args:

82

x (DataArray): Input data

83

**groups: Grouping specifications for each dimension

84

85

Returns:

86

DataArray: Grouped data

87

88

Examples:

89

group(data, detector=detector_groups)

90

group(data, sample=['A', 'B', 'C'])

91

"""

92

93

def groupby(x, group, *, dim=None):

94

"""

95

Group data by coordinate values

96

97

Args:

98

x (DataArray or Dataset): Input data

99

group (str or Variable): Grouping coordinate or values

100

dim (str, optional): Dimension to group along

101

102

Returns:

103

GroupByDataArray or GroupByDataset: Grouped data object

104

105

Examples:

106

grouped = groupby(dataset, 'sample_id')

107

result = grouped.sum('event') # Sum within each group

108

"""

109

```

110

111

### Bin Access and Manipulation

112

113

Access and manipulate the contents of binned data structures.

114

115

```python { .api }

116

def bins(x, dim=None):

117

"""

118

Access binned data contents

119

120

Args:

121

x (Variable or DataArray): Binned data

122

dim (str, optional): Dimension to access

123

124

Returns:

125

Bins: Bin contents accessor

126

"""

127

128

def bins_like(x, fill_value=None):

129

"""

130

Create bins with same structure as input

131

132

Args:

133

x (Variable or DataArray): Template binned data

134

fill_value (optional): Value to fill new bins

135

136

Returns:

137

Variable or DataArray: New binned structure

138

"""

139

140

def lookup(x, dim):

141

"""

142

Create lookup table for fast binning

143

144

Args:

145

x (Variable): Bin edges or centers

146

dim (str): Dimension name

147

148

Returns:

149

Lookup: Fast lookup table for binning operations

150

"""

151

```

152

153

### Specialized Binning Functions

154

155

Lower-level binning control and advanced binning operations.

156

157

```python { .api }

158

def make_binned(x, edges, groups=None):

159

"""

160

Create binned data structure with specified edges

161

162

Args:

163

x (DataArray): Event data to bin

164

edges (Dict[str, Variable]): Bin edges for each dimension

165

groups (Dict[str, Variable], optional): Grouping information

166

167

Returns:

168

DataArray: Binned data structure

169

"""

170

171

def make_histogrammed(x, edges):

172

"""

173

Create histogrammed data structure

174

175

Args:

176

x (Variable or DataArray): Input data

177

edges (Dict[str, Variable]): Bin edges for histogram

178

179

Returns:

180

Variable or DataArray: Histogrammed data

181

"""

182

```

183

184

## Usage Examples

185

186

### Basic Histogramming

187

188

```python

189

import scipp as sc

190

import numpy as np

191

192

# Create sample data

193

data = sc.array(dims=['event'], values=np.random.normal(0, 1, 1000))

194

195

# Create simple histogram

196

hist_data = sc.hist(data, event=20) # 20 bins

197

print(hist_data.sizes) # Shows binned structure

198

199

# Create histogram with custom edges

200

edges = sc.linspace('event', -3, 3, 21) # 20 bins from -3 to 3

201

hist_custom = sc.hist(data, event=edges)

202

```

203

204

### Multi-dimensional Histogramming

205

206

```python

207

# Create 2D event data

208

x_events = sc.array(dims=['event'], values=np.random.normal(0, 1, 5000))

209

y_events = sc.array(dims=['event'], values=np.random.normal(0, 0.5, 5000))

210

211

# Combine into DataArray with coordinates

212

events = sc.DataArray(

213

data=sc.ones(dims=['event'], shape=[5000], unit='counts'),

214

coords={'x': x_events, 'y': y_events}

215

)

216

217

# Create 2D histogram

218

hist_2d = sc.hist(events, x=50, y=30) # 50x30 grid

219

print(hist_2d.sizes) # {'x': 50, 'y': 30}

220

221

# Custom 2D binning with specified edges

222

x_edges = sc.linspace('x', -3, 3, 51)

223

y_edges = sc.linspace('y', -2, 2, 31)

224

hist_2d_custom = sc.hist(events, x=x_edges, y=y_edges)

225

```

226

227

### Event Data Binning

228

229

```python

230

# Generate realistic event data (e.g., detector events)

231

n_events = 10000

232

event_data = sc.DataArray(

233

data=sc.array(dims=['event'], values=np.random.exponential(1, n_events), unit='counts'),

234

coords={

235

'x': sc.array(dims=['event'], values=np.random.uniform(-10, 10, n_events), unit='mm'),

236

'y': sc.array(dims=['event'], values=np.random.uniform(-5, 5, n_events), unit='mm'),

237

'tof': sc.array(dims=['event'], values=np.random.gamma(2, 1000, n_events), unit='us')

238

}

239

)

240

241

# Bin event data into 3D histogram

242

binned = sc.bin(event_data, x=20, y=10, tof=50)

243

print(binned) # Shows binned structure with preserved events

244

245

# Convert binned data to histogram

246

histogram = sc.hist(binned)

247

print(histogram.sizes) # {'x': 20, 'y': 10, 'tof': 50}

248

```

249

250

### Data Grouping Operations

251

252

```python

253

# Create sample data with categorical coordinate

254

sample_names = ['sample_A', 'sample_B', 'sample_C'] * 100

255

measurements = sc.DataArray(

256

data=sc.array(dims=['measurement'], values=np.random.normal(5, 1, 300), unit='counts'),

257

coords={

258

'sample': sc.array(dims=['measurement'], values=sample_names),

259

'time': sc.arange('measurement', 300, unit='s')

260

}

261

)

262

263

# Group by sample and calculate statistics

264

grouped = sc.groupby(measurements, 'sample')

265

sample_means = grouped.mean('measurement')

266

sample_sums = grouped.sum('measurement')

267

268

print(sample_means.coords['sample']) # ['sample_A', 'sample_B', 'sample_C']

269

```

270

271

### Re-binning Operations

272

273

```python

274

# Create initial histogram

275

original_edges = sc.linspace('x', 0, 10, 11) # 10 bins

276

data = sc.array(dims=['x'], values=np.random.poisson(10, 10), unit='counts')

277

original_hist = sc.DataArray(data=data, coords={'x': original_edges})

278

279

# Re-bin to different resolution

280

new_edges = sc.linspace('x', 0, 10, 6) # 5 bins (coarser)

281

rebinned = sc.rebin(original_hist, x=new_edges)

282

283

# Re-bin to higher resolution (will interpolate)

284

fine_edges = sc.linspace('x', 0, 10, 21) # 20 bins (finer)

285

rebinned_fine = sc.rebin(original_hist, x=fine_edges)

286

287

# Verify count conservation

288

original_total = sc.sum(original_hist)

289

rebinned_total = sc.sum(rebinned)

290

print(f"Original: {original_total.value}, Rebinned: {rebinned_total.value}")

291

```

292

293

### Advanced Binning with Lookup Tables

294

295

```python

296

# Create lookup table for fast repeated binning

297

x_edges = sc.linspace('x', 0, 100, 101)

298

lookup_table = sc.lookup(x_edges, 'x')

299

300

# Generate multiple datasets to bin with same edges

301

datasets = []

302

for i in range(10):

303

data = sc.array(dims=['event'], values=np.random.uniform(0, 100, 1000))

304

datasets.append(data)

305

306

# Fast binning using lookup table

307

binned_datasets = []

308

for data in datasets:

309

events = sc.DataArray(

310

data=sc.ones(dims=['event'], shape=[1000]),

311

coords={'x': data}

312

)

313

# Lookup table enables faster binning for repeated operations

314

binned = sc.bin(events, x=lookup_table)

315

binned_datasets.append(binned)

316

```

317

318

### Working with Irregular Bins

319

320

```python

321

# Create irregular bin edges (non-uniform spacing)

322

irregular_edges = sc.array(

323

dims=['x'],

324

values=[0, 1, 2, 5, 10, 20, 50, 100], # Increasing spacing

325

unit='mm'

326

)

327

328

# Create sample data

329

position_data = sc.array(

330

dims=['particle'],

331

values=np.random.exponential(10, 5000),

332

unit='mm'

333

)

334

335

# Histogram with irregular bins

336

events = sc.DataArray(

337

data=sc.ones(dims=['particle'], shape=[5000]),

338

coords={'x': position_data}

339

)

340

341

irregular_hist = sc.hist(events, x=irregular_edges)

342

print(irregular_hist.coords['x']) # Shows irregular bin edges

343

```

344

345

### NaN Handling in Histograms

346

347

```python

348

# Create data with NaN values

349

data_with_nan = sc.array(

350

dims=['measurement'],

351

values=np.array([1, 2, np.nan, 4, 5, np.nan, 7, 8]),

352

)

353

354

# Regular histogram includes NaN in counts

355

regular_hist = sc.hist(data_with_nan, measurement=5)

356

357

# NaN-ignoring histogram excludes NaN values

358

nan_hist = sc.nanhist(data_with_nan, measurement=5)

359

360

print(f"Regular total: {sc.sum(regular_hist).value}")

361

print(f"NaN-ignoring total: {sc.sum(nan_hist).value}")

362

```