0
# Utilities and Helpers
1
2
Utility functions for inspecting ASDF files, testing extensions, and working with ASDF data structures programmatically. These tools provide convenience methods for debugging, development, and integration testing.
3
4
## Capabilities
5
6
### File Inspection
7
8
Display and analyze ASDF file structure and contents for debugging and exploration.
9
10
```python { .api }
11
def info(node_or_path, max_rows=24, max_cols=120, show_values=True):
12
"""
13
Print rendering of ASDF tree structure to stdout.
14
15
Parameters:
16
- node_or_path: ASDF file path, file-like object, or tree node to inspect
17
- max_rows (int): Maximum number of array rows to display
18
- max_cols (int): Maximum number of array columns to display
19
- show_values (bool): Whether to show actual values for small arrays
20
21
Example output:
22
root (AsdfObject)
23
├─ array_data (ndarray): shape=(100, 50), dtype=float64
24
├─ metadata (dict)
25
│ ├─ title (str): "Sample Dataset"
26
│ └─ version (str): "1.0"
27
└─ parameters (list): 3 items
28
"""
29
```
30
31
### Tree Search
32
33
Search through ASDF tree structures to find specific nodes, types, or values.
34
35
```python { .api }
36
class AsdfSearchResult:
37
"""
38
Result object containing search matches with tree navigation methods.
39
"""
40
41
def __iter__(self):
42
"""Iterate over search results."""
43
44
def __len__(self):
45
"""Number of search results."""
46
47
def __getitem__(self, index):
48
"""Get result by index."""
49
50
@property
51
def paths(self) -> list:
52
"""List of tree paths for all matches."""
53
54
@property
55
def nodes(self) -> list:
56
"""List of matching nodes."""
57
58
# Available through AsdfFile.search() method:
59
def search(tree, key=None, type_=None, value=None, filter_=None):
60
"""
61
Search through tree for matching nodes.
62
63
Parameters:
64
- tree: Tree or AsdfFile to search
65
- key: Key name to match (string or regex)
66
- type_: Python type to match
67
- value: Specific value to match
68
- filter_: Custom filter function taking (key, value) and returning bool
69
70
Returns:
71
AsdfSearchResult: Object containing all matches
72
"""
73
```
74
75
### Tree Manipulation
76
77
Utilities for working with ASDF tree structures programmatically.
78
79
```python { .api }
80
# Available through asdf.treeutil module (internal but useful):
81
82
def walk_tree(tree):
83
"""
84
Walk through all nodes in an ASDF tree.
85
86
Parameters:
87
- tree: Tree structure to walk
88
89
Yields:
90
(path, key, value) tuples for each node
91
"""
92
93
def get_tree_path(tree, path):
94
"""
95
Get value at specific path in tree.
96
97
Parameters:
98
- tree: Tree structure
99
- path (list): Path components as list
100
101
Returns:
102
Value at the specified path
103
"""
104
105
def set_tree_path(tree, path, value):
106
"""
107
Set value at specific path in tree.
108
109
Parameters:
110
- tree: Tree structure to modify
111
- path (list): Path components as list
112
- value: Value to set at path
113
"""
114
```
115
116
### Reference Utilities
117
118
Work with ASDF references and external data.
119
120
```python { .api }
121
# Available through AsdfFile methods:
122
123
def resolve_references(af):
124
"""
125
Resolve all external references in ASDF file.
126
127
Parameters:
128
- af (AsdfFile): ASDF file containing references
129
130
Side effects:
131
Replaces reference objects with actual data
132
"""
133
134
def find_references(af):
135
"""
136
Find all references in ASDF tree.
137
138
Parameters:
139
- af (AsdfFile): ASDF file to search
140
141
Returns:
142
list: All reference objects found in tree
143
"""
144
```
145
146
### Schema Utilities
147
148
Work with ASDF schemas and validation.
149
150
```python { .api }
151
# Available through asdf.schema module:
152
153
def load_schema(schema_uri):
154
"""
155
Load ASDF schema by URI.
156
157
Parameters:
158
- schema_uri (str): URI of schema to load
159
160
Returns:
161
dict: Loaded schema definition
162
"""
163
164
def validate_tree(tree, schema_uri, extension_manager=None):
165
"""
166
Validate tree against specific schema.
167
168
Parameters:
169
- tree: Tree structure to validate
170
- schema_uri (str): URI of schema for validation
171
- extension_manager (ExtensionManager, optional): Extensions for validation
172
173
Raises:
174
ValidationError: If validation fails
175
"""
176
```
177
178
## Usage Examples
179
180
### File Inspection and Debugging
181
182
```python
183
import asdf
184
import numpy as np
185
186
# Create test file with complex structure
187
data = {
188
"experiments": {
189
"exp1": {
190
"data": np.random.random((100, 50)),
191
"metadata": {"date": "2024-01-01", "researcher": "Dr. Smith"},
192
"parameters": {"temperature": 25.0, "pressure": 1.013}
193
},
194
"exp2": {
195
"data": np.arange(1000).reshape(40, 25),
196
"metadata": {"date": "2024-01-02", "researcher": "Dr. Jones"},
197
"parameters": {"temperature": 30.0, "pressure": 1.020}
198
}
199
},
200
"summary": {"total_experiments": 2, "status": "complete"}
201
}
202
203
af = asdf.AsdfFile(data)
204
af.write_to("experiments.asdf")
205
206
# Inspect file structure
207
asdf.info("experiments.asdf")
208
# Output:
209
# root (AsdfObject)
210
# ├─ experiments (dict)
211
# │ ├─ exp1 (dict)
212
# │ │ ├─ data (ndarray): shape=(100, 50), dtype=float64
213
# │ │ ├─ metadata (dict): 2 items
214
# │ │ └─ parameters (dict): 2 items
215
# │ └─ exp2 (dict)
216
# │ ├─ data (ndarray): shape=(40, 25), dtype=int64
217
# │ ├─ metadata (dict): 2 items
218
# │ └─ parameters (dict): 2 items
219
# └─ summary (dict): 2 items
220
221
# Show more detail with values
222
asdf.info("experiments.asdf", show_values=True, max_rows=5)
223
```
224
225
### Tree Search Operations
226
227
```python
228
# Search for specific keys
229
with asdf.open("experiments.asdf") as af:
230
# Find all metadata
231
metadata_results = af.search(key="metadata")
232
print(f"Found {len(metadata_results)} metadata objects")
233
234
for result in metadata_results:
235
print(f"Path: {result.path}")
236
print(f"Researcher: {result.node['researcher']}")
237
238
# Find numpy arrays
239
array_results = af.search(type_=np.ndarray)
240
print(f"Found {len(array_results)} arrays")
241
242
for result in array_results:
243
print(f"Array at {result.path}: shape={result.node.shape}")
244
245
# Find specific values
246
temp_results = af.search(value=25.0)
247
print(f"Found temperature 25.0 at: {temp_results.paths}")
248
249
# Custom search with filter
250
large_arrays = af.search(filter_=lambda k, v:
251
isinstance(v, np.ndarray) and v.size > 1000)
252
print(f"Found {len(large_arrays)} large arrays")
253
```
254
255
### Testing and Validation
256
257
```python
258
from asdf.testing import roundtrip_object, yaml_to_asdf
259
260
# Test custom object serialization
261
class CustomData:
262
def __init__(self, values):
263
self.values = values
264
265
def __eq__(self, other):
266
return isinstance(other, CustomData) and self.values == other.values
267
268
# Test roundtrip with custom converter
269
original = CustomData([1, 2, 3, 4, 5])
270
271
try:
272
restored = roundtrip_object(original)
273
assert original == restored
274
print("Roundtrip test passed")
275
except Exception as e:
276
print(f"Roundtrip test failed: {e}")
277
278
# Test YAML parsing
279
yaml_content = """
280
experiment:
281
name: "Temperature Study"
282
data: [20.1, 22.3, 24.7, 21.9]
283
conditions:
284
humidity: 0.65
285
pressure: 1013.25
286
metadata:
287
version: 1.0
288
created: "2024-01-01"
289
"""
290
291
af = yaml_to_asdf(yaml_content)
292
print(f"Parsed experiment: {af.tree['experiment']['name']}")
293
print(f"Data points: {len(af.tree['experiment']['data'])}")
294
```
295
296
### Reference Management
297
298
```python
299
# Create file with external references
300
from asdf import ExternalArrayReference
301
302
# Create main file with external reference
303
external_ref = ExternalArrayReference(
304
"external_data.asdf",
305
"large_dataset",
306
np.float64,
307
(10000, 1000)
308
)
309
310
main_data = {
311
"local_data": np.random.random(100),
312
"external_data": external_ref,
313
"metadata": {"source": "external"}
314
}
315
316
af = asdf.AsdfFile(main_data)
317
af.write_to("main_with_refs.asdf")
318
319
# Find and resolve references
320
with asdf.open("main_with_refs.asdf") as af:
321
# Find all references
322
refs = af.find_references()
323
print(f"Found {len(refs)} references:")
324
325
for ref in refs:
326
if isinstance(ref, ExternalArrayReference):
327
print(f" External array: {ref.fileuri} -> {ref.target}")
328
print(f" Shape: {ref.shape}, dtype: {ref.dtype}")
329
330
# Resolve references (if external files exist)
331
try:
332
af.resolve_references()
333
print("All references resolved successfully")
334
except FileNotFoundError as e:
335
print(f"Could not resolve reference: {e}")
336
```
337
338
### Development and Debugging Tools
339
340
```python
341
# Tree walking for analysis
342
def analyze_tree_structure(af):
343
"""Analyze ASDF tree structure for debugging."""
344
345
type_counts = {}
346
total_arrays = 0
347
total_array_elements = 0
348
349
def walk_node(node, path=""):
350
nonlocal total_arrays, total_array_elements
351
352
if isinstance(node, dict):
353
for key, value in node.items():
354
new_path = f"{path}/{key}" if path else key
355
walk_node(value, new_path)
356
357
elif isinstance(node, list):
358
for i, value in enumerate(node):
359
new_path = f"{path}[{i}]"
360
walk_node(value, new_path)
361
362
elif isinstance(node, np.ndarray):
363
total_arrays += 1
364
total_array_elements += node.size
365
print(f"Array at {path}: {node.shape} {node.dtype}")
366
367
# Count types
368
node_type = type(node).__name__
369
type_counts[node_type] = type_counts.get(node_type, 0) + 1
370
371
walk_node(af.tree)
372
373
print(f"\nTree Analysis:")
374
print(f" Total arrays: {total_arrays}")
375
print(f" Total array elements: {total_array_elements}")
376
print(f" Type distribution:")
377
for typ, count in sorted(type_counts.items()):
378
print(f" {typ}: {count}")
379
380
# Use analysis tool
381
with asdf.open("experiments.asdf") as af:
382
analyze_tree_structure(af)
383
```
384
385
### Performance Profiling
386
387
```python
388
import time
389
import sys
390
391
def profile_asdf_operations(data_sizes):
392
"""Profile ASDF operations for performance analysis."""
393
394
results = []
395
396
for size in data_sizes:
397
# Create test data
398
test_data = {
399
"array": np.random.random((size, size)),
400
"metadata": {"size": size, "created": time.time()}
401
}
402
403
# Time write operation
404
start_time = time.time()
405
af = asdf.AsdfFile(test_data)
406
af.write_to(f"test_{size}.asdf")
407
write_time = time.time() - start_time
408
409
# Time read operation
410
start_time = time.time()
411
with asdf.open(f"test_{size}.asdf") as af:
412
_ = af.tree["array"].sum() # Force array load
413
read_time = time.time() - start_time
414
415
# Get file size
416
file_size = os.path.getsize(f"test_{size}.asdf")
417
418
results.append({
419
"size": size,
420
"write_time": write_time,
421
"read_time": read_time,
422
"file_size": file_size,
423
"elements": size * size
424
})
425
426
print(f"Size {size}x{size}: write={write_time:.3f}s, "
427
f"read={read_time:.3f}s, file={file_size/1024/1024:.1f}MB")
428
429
return results
430
431
# Profile different array sizes
432
sizes = [100, 500, 1000, 2000]
433
profile_results = profile_asdf_operations(sizes)
434
```