0
# Core Data Structures
1
2
Foundation classes for multi-dimensional labeled arrays with physical units, uncertainty propagation, and comprehensive metadata handling. These classes form the core of scipp's data model and enable all higher-level functionality.
3
4
## Capabilities
5
6
### Variable
7
8
The fundamental data structure in scipp, representing a multi-dimensional array with labeled dimensions, physical units, and optional variances for uncertainty propagation.
9
10
```python { .api }
11
class Variable:
12
"""Multi-dimensional array with labeled dimensions, units, and variances"""
13
14
def __init__(self, *, dims=None, values=None, variances=None, unit=None, dtype=None):
15
"""
16
Create a Variable
17
18
Args:
19
dims (Sequence[str]): Dimension labels
20
values (array-like): Data values
21
variances (array-like, optional): Variance values for uncertainty
22
unit (Unit or str, optional): Physical unit
23
dtype (DType or str, optional): Data type
24
"""
25
26
@property
27
def dims(self) -> Tuple[str, ...]:
28
"""Dimension labels"""
29
30
@property
31
def shape(self) -> Tuple[int, ...]:
32
"""Shape of the data"""
33
34
@property
35
def unit(self) -> Unit:
36
"""Physical unit"""
37
38
@property
39
def dtype(self) -> DType:
40
"""Data type"""
41
42
@property
43
def values(self) -> np.ndarray:
44
"""Data values as numpy array"""
45
46
@property
47
def variances(self) -> Optional[np.ndarray]:
48
"""Variance values if present"""
49
50
def copy(self, deep: bool = True) -> Variable:
51
"""Create a copy of the variable"""
52
53
def rename_dims(self, mapping: Dict[str, str]) -> Variable:
54
"""Rename dimensions"""
55
56
def to(self, /, *, unit=None, dtype=None, copy=True) -> Variable:
57
"""Convert unit or dtype"""
58
```
59
60
### DataArray
61
62
A Variable with associated coordinate variables and masks, providing enhanced data organization and metadata management for scientific datasets.
63
64
```python { .api }
65
class DataArray:
66
"""Variable with coordinates and masks for enhanced data organization"""
67
68
def __init__(self, data=None, *, coords=None, masks=None, name=None):
69
"""
70
Create a DataArray
71
72
Args:
73
data (Variable): The data variable
74
coords (Dict[str, Variable], optional): Coordinate variables
75
masks (Dict[str, Variable], optional): Mask variables
76
name (str, optional): Name of the data array
77
"""
78
79
@property
80
def data(self) -> Variable:
81
"""The data variable"""
82
83
@property
84
def coords(self) -> Coords:
85
"""Dictionary-like access to coordinates"""
86
87
@property
88
def masks(self) -> Masks:
89
"""Dictionary-like access to masks"""
90
91
@property
92
def dims(self) -> Tuple[str, ...]:
93
"""Dimension labels"""
94
95
@property
96
def shape(self) -> Tuple[int, ...]:
97
"""Shape of the data"""
98
99
@property
100
def unit(self) -> Unit:
101
"""Physical unit of the data"""
102
103
@property
104
def dtype(self) -> DType:
105
"""Data type"""
106
107
def copy(self, deep: bool = True) -> DataArray:
108
"""Create a copy of the data array"""
109
110
def rename_dims(self, mapping: Dict[str, str]) -> DataArray:
111
"""Rename dimensions"""
112
```
113
114
### Dataset
115
116
A dictionary-like container for multiple related DataArrays sharing coordinate systems, enabling analysis of multi-variate scientific data.
117
118
```python { .api }
119
class Dataset:
120
"""Dictionary-like container for multiple DataArrays"""
121
122
def __init__(self, data=None, *, coords=None, name=None):
123
"""
124
Create a Dataset
125
126
Args:
127
data (Dict[str, DataArray or Variable], optional): Data variables
128
coords (Dict[str, Variable], optional): Shared coordinates
129
name (str, optional): Name of the dataset
130
"""
131
132
@property
133
def coords(self) -> Coords:
134
"""Dictionary-like access to shared coordinates"""
135
136
@property
137
def dims(self) -> Tuple[str, ...]:
138
"""All dimension labels in the dataset"""
139
140
def copy(self, deep: bool = True) -> Dataset:
141
"""Create a copy of the dataset"""
142
143
def rename_dims(self, mapping: Dict[str, str]) -> Dataset:
144
"""Rename dimensions"""
145
146
def __getitem__(self, key: str) -> DataArray:
147
"""Get data array by name"""
148
149
def __setitem__(self, key: str, value: Union[DataArray, Variable]):
150
"""Set data array by name"""
151
152
def __delitem__(self, key: str):
153
"""Delete data array by name"""
154
155
def keys(self) -> KeysView[str]:
156
"""Data array names"""
157
158
def values(self) -> ValuesView[DataArray]:
159
"""Data arrays"""
160
161
def items(self) -> ItemsView[str, DataArray]:
162
"""Name, data array pairs"""
163
```
164
165
### DataGroup
166
167
A hierarchical container for complex, nested data structures, enabling organization of related datasets and metadata in tree-like structures.
168
169
```python { .api }
170
class DataGroup:
171
"""Hierarchical container for nested data structures"""
172
173
def __init__(self, data=None):
174
"""
175
Create a DataGroup
176
177
Args:
178
data (Dict[str, Any], optional): Initial data
179
"""
180
181
def __getitem__(self, key: str):
182
"""Get item by name"""
183
184
def __setitem__(self, key: str, value):
185
"""Set item by name"""
186
187
def __delitem__(self, key: str):
188
"""Delete item by name"""
189
190
def keys(self) -> KeysView[str]:
191
"""Item names"""
192
193
def values(self) -> ValuesView:
194
"""Items"""
195
196
def items(self) -> ItemsView:
197
"""Name, item pairs"""
198
199
def copy(self, deep: bool = True) -> DataGroup:
200
"""Create a copy of the data group"""
201
```
202
203
### Unit
204
205
Physical unit representation with arithmetic operations and automatic propagation through mathematical operations.
206
207
```python { .api }
208
class Unit:
209
"""Physical unit with arithmetic operations"""
210
211
def __init__(self, unit_string: str):
212
"""
213
Create a Unit from string representation
214
215
Args:
216
unit_string (str): Unit string (e.g., 'm', 'kg*m/s^2', 'mm')
217
"""
218
219
def __mul__(self, other: Unit) -> Unit:
220
"""Multiply units"""
221
222
def __truediv__(self, other: Unit) -> Unit:
223
"""Divide units"""
224
225
def __pow__(self, exponent: Union[int, float]) -> Unit:
226
"""Raise unit to power"""
227
228
def __eq__(self, other: Unit) -> bool:
229
"""Check unit equality"""
230
231
def __str__(self) -> str:
232
"""String representation"""
233
234
def __repr__(self) -> str:
235
"""String representation"""
236
```
237
238
### DType
239
240
Data type enumeration for scipp arrays, providing type safety and consistency across operations.
241
242
```python { .api }
243
class DType:
244
"""Data type enumeration for scipp arrays"""
245
246
# Numeric types
247
int32: DType
248
int64: DType
249
float32: DType
250
float64: DType
251
bool: DType
252
253
# String type
254
string: DType
255
256
# Time types
257
datetime64: DType
258
259
# Vector types
260
vector3: DType
261
262
# Transformation types
263
linear_transform3: DType
264
affine_transform3: DType
265
rotation3: DType
266
translation3: DType
267
268
def __eq__(self, other: DType) -> bool:
269
"""Check dtype equality"""
270
271
def __str__(self) -> str:
272
"""String representation"""
273
```
274
275
## Auxiliary Access Classes
276
277
### Coords
278
279
Dictionary-like access to coordinate variables with automatic dimension consistency checking.
280
281
```python { .api }
282
class Coords:
283
"""Dictionary-like access to coordinates"""
284
285
def __getitem__(self, key: str) -> Variable:
286
"""Get coordinate by name"""
287
288
def __setitem__(self, key: str, value: Variable):
289
"""Set coordinate by name"""
290
291
def __delitem__(self, key: str):
292
"""Delete coordinate by name"""
293
294
def keys(self) -> KeysView[str]:
295
"""Coordinate names"""
296
297
def values(self) -> ValuesView[Variable]:
298
"""Coordinate variables"""
299
300
def items(self) -> ItemsView[str, Variable]:
301
"""Name, variable pairs"""
302
```
303
304
### Masks
305
306
Dictionary-like access to mask variables for data filtering and selection.
307
308
```python { .api }
309
class Masks:
310
"""Dictionary-like access to masks"""
311
312
def __getitem__(self, key: str) -> Variable:
313
"""Get mask by name"""
314
315
def __setitem__(self, key: str, value: Variable):
316
"""Set mask by name"""
317
318
def __delitem__(self, key: str):
319
"""Delete mask by name"""
320
321
def keys(self) -> KeysView[str]:
322
"""Mask names"""
323
324
def values(self) -> ValuesView[Variable]:
325
"""Mask variables"""
326
327
def items(self) -> ItemsView[str, Variable]:
328
"""Name, variable pairs"""
329
```
330
331
### Bins
332
333
Access to binned/event data content with operations for bin manipulation and event data handling.
334
335
```python { .api }
336
class Bins:
337
"""Access to binned/event data content"""
338
339
@property
340
def constituents(self) -> Dict[str, Variable]:
341
"""Access to bin contents"""
342
343
@property
344
def coords(self) -> Coords:
345
"""Coordinates of bin contents"""
346
347
def copy(self, deep: bool = True) -> Bins:
348
"""Create copy of binned data"""
349
```
350
351
## Usage Examples
352
353
### Creating Variables with Units and Uncertainties
354
355
```python
356
import scipp as sc
357
358
# Create a simple variable
359
temp = sc.array(dims=['x'], values=[273.15, 298.15, 373.15], unit='K')
360
361
# Create with uncertainties
362
measurement = sc.array(
363
dims=['time'],
364
values=[1.0, 2.1, 3.0],
365
variances=[0.01, 0.04, 0.09],
366
unit='m/s'
367
)
368
369
# Create multi-dimensional data
370
image = sc.zeros(dims=['y', 'x'], shape=[100, 200], unit='counts')
371
```
372
373
### Building DataArrays with Coordinates
374
375
```python
376
import numpy as np
377
378
# Create coordinate arrays
379
x = sc.linspace(dim='x', start=0, stop=10, num=11, unit='mm')
380
y = sc.linspace(dim='y', start=0, stop=5, num=6, unit='mm')
381
382
# Create 2D data
383
data = sc.array(dims=['y', 'x'], values=np.random.random((6, 11)), unit='counts')
384
385
# Combine into DataArray
386
da = sc.DataArray(data=data, coords={'x': x, 'y': y})
387
388
# Add masks
389
mask = sc.array(dims=['y', 'x'], values=np.random.random((6, 11)) > 0.9)
390
da.masks['random_mask'] = mask
391
```
392
393
### Working with Datasets
394
395
```python
396
# Create multiple related measurements
397
temperature = sc.DataArray(
398
data=sc.array(dims=['time'], values=[295, 300, 305], unit='K'),
399
coords={'time': sc.arange(dim='time', start=0, stop=3, unit='s')}
400
)
401
402
pressure = sc.DataArray(
403
data=sc.array(dims=['time'], values=[1.0, 1.1, 1.2], unit='bar'),
404
coords={'time': sc.arange(dim='time', start=0, stop=3, unit='s')}
405
)
406
407
# Combine into dataset
408
experiment = sc.Dataset({'temperature': temperature, 'pressure': pressure})
409
410
# Access data
411
print(experiment['temperature'])
412
print(experiment.coords['time'])
413
```