or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration.mdcore-data-types.mddata-serialization.mdextension-system.mdfile-operations.mdindex.mdutilities.md

file-operations.mddocs/

0

# File Operations

1

2

Core file handling functionality for creating, reading, writing, and managing ASDF files. Provides both high-level convenience functions and comprehensive low-level file management through the AsdfFile class.

3

4

## Capabilities

5

6

### AsdfFile Class

7

8

Main class for representing and manipulating ASDF files, providing complete control over file creation, reading, writing, and validation.

9

10

```python { .api }

11

class AsdfFile:

12

def __init__(self, tree=None, uri=None, extensions=None, version=None,

13

ignore_unrecognized_tag=False, memmap=False, lazy_load=True,

14

custom_schema=None):

15

"""

16

Create a new AsdfFile object.

17

18

Parameters:

19

- tree (dict or AsdfFile, optional): Main tree data conforming to ASDF schema

20

- uri (str, optional): URI for resolving relative references

21

- extensions (object, optional): Additional extensions to use when reading and writing

22

- version (str, optional): ASDF core schemas version

23

- ignore_unrecognized_tag (bool, optional): When True, do not raise warnings for unrecognized tags (default: False)

24

- memmap (bool, optional): When True, attempt to memmap underlying data arrays (default: False)

25

- lazy_load (bool, optional): When True, data arrays will only be loaded lazily (default: True)

26

- custom_schema (str, optional): Path to custom schema file for secondary validation

27

"""

28

29

# Properties

30

@property

31

def version(self):

32

"""Get this AsdfFile's ASDF core schemas version."""

33

34

@version.setter

35

def version(self, value):

36

"""Set this AsdfFile's ASDF core schemas version."""

37

38

@property

39

def version_string(self):

40

"""Get this AsdfFile's ASDF core schemas version as a string."""

41

42

@property

43

def extensions(self):

44

"""Get the list of user extensions that are enabled for use with this AsdfFile."""

45

46

@extensions.setter

47

def extensions(self, value):

48

"""Set the list of user extensions that are enabled for use with this AsdfFile."""

49

50

@property

51

def extension_manager(self):

52

"""Get the ExtensionManager for this AsdfFile."""

53

54

@property

55

def file_format_version(self):

56

"""Get the file format version."""

57

58

@property

59

def uri(self):

60

"""Get the URI associated with the AsdfFile."""

61

62

@property

63

def tree(self):

64

"""Get/set the tree of data in the ASDF file."""

65

66

@tree.setter

67

def tree(self, tree):

68

"""Set the tree of data in the ASDF file."""

69

70

@property

71

def comments(self):

72

"""Get the comments after the header, before the tree."""

73

74

# File Operations

75

def close(self):

76

"""Close the file handles associated with the asdf.AsdfFile."""

77

78

def copy(self):

79

"""Create a copy of the AsdfFile."""

80

81

def update(self, all_array_storage=NotSet, all_array_compression=NotSet,

82

compression_kwargs=NotSet, pad_blocks=False, include_block_index=True,

83

version=None):

84

"""

85

Update the file on disk in place.

86

87

Parameters:

88

- all_array_storage (str, optional): Override array storage type (internal/external/inline)

89

- all_array_compression (str, optional): Set compression type (zlib/bzp2/lz4/input/None)

90

- compression_kwargs (dict, optional): Compression keyword arguments

91

- pad_blocks (float or bool, optional): Add extra space between blocks (default: False)

92

- include_block_index (bool, optional): Include block index at end of file (default: True)

93

- version (str, optional): Update ASDF core schemas version before writing

94

"""

95

96

def write_to(self, fd, all_array_storage=NotSet, all_array_compression=NotSet,

97

compression_kwargs=NotSet, pad_blocks=False, include_block_index=True,

98

version=None):

99

"""

100

Write the ASDF file to the given file-like object.

101

102

Parameters:

103

- fd (str or file-like): File path or file-like object to write to

104

- all_array_storage (str, optional): Override array storage type (internal/external/inline)

105

- all_array_compression (str, optional): Set compression type (zlib/bzp2/lz4/input/None)

106

- compression_kwargs (dict, optional): Compression keyword arguments

107

- pad_blocks (float or bool, optional): Add extra space between blocks (default: False)

108

- include_block_index (bool, optional): Include block index at end of file (default: True)

109

- version (str, optional): Update ASDF core schemas version before writing

110

"""

111

112

# URI and Reference Operations

113

def resolve_uri(self, uri):

114

"""

115

Resolve a (possibly relative) URI against the URI of this ASDF file.

116

117

Parameters:

118

- uri (str): An absolute or relative URI to resolve

119

120

Returns:

121

str: The resolved URI

122

"""

123

124

def open_external(self, uri, **kwargs):

125

"""

126

Open an external ASDF file from the given URI.

127

128

Parameters:

129

- uri (str): An absolute or relative URI to resolve against this ASDF file

130

131

Returns:

132

AsdfFile: The external ASDF file

133

"""

134

135

def make_reference(self, path=None):

136

"""

137

Make a new reference to a part of this file's tree.

138

139

Parameters:

140

- path (list of str and int, optional): Parts of path pointing to item in tree

141

142

Returns:

143

reference: A reference object

144

"""

145

146

def find_references(self):

147

"""Find all external JSON References in the tree and convert them to Reference objects."""

148

149

def resolve_references(self):

150

"""Find all external JSON References in tree, load external content, and place in tree."""

151

152

# Array Operations

153

def set_array_storage(self, arr, array_storage):

154

"""

155

Set the block type to use for the given array data.

156

157

Parameters:

158

- arr (numpy.ndarray): The array to set

159

- array_storage (str): Must be one of: internal, external, inline

160

"""

161

162

def get_array_storage(self, arr):

163

"""Get the block type for the given array data."""

164

165

def set_array_compression(self, arr, compression, **compression_kwargs):

166

"""

167

Set the compression to use for the given array data.

168

169

Parameters:

170

- arr (numpy.ndarray): The array to set

171

- compression (str or None): Must be one of: '', None, zlib, bzp2, lz4, input

172

"""

173

174

def get_array_compression(self, arr):

175

"""Get the compression type for the given array data."""

176

177

def get_array_compression_kwargs(self, arr):

178

"""Get compression keyword arguments for the array."""

179

180

def set_array_save_base(self, arr, save_base):

181

"""

182

Set the save_base option for arr.

183

184

Parameters:

185

- arr (numpy.ndarray): The array

186

- save_base (bool or None): If None, default from config is used

187

"""

188

189

def get_array_save_base(self, arr):

190

"""

191

Get the save_base option for arr.

192

193

Parameters:

194

- arr (numpy.ndarray): The array

195

196

Returns:

197

bool or None: The save_base setting

198

"""

199

200

# Schema and Validation Operations

201

def validate(self):

202

"""Validate the current state of the tree against the ASDF schema."""

203

204

def fill_defaults(self):

205

"""Fill in any values missing in tree using default values from schema."""

206

207

def remove_defaults(self):

208

"""Remove any values in tree that are same as default values in schema."""

209

210

def schema_info(self, key="description", path=None, preserve_list=True, refresh_extension_manager=NotSet):

211

"""

212

Get a nested dictionary of the schema information for a given key.

213

214

Parameters:

215

- key (str): The key to look up (default: "description")

216

- path (str or AsdfSearchResult): Dot-separated path or AsdfSearchResult object

217

- preserve_list (bool): If True, preserve lists, otherwise turn into dicts

218

- refresh_extension_manager (bool): Deprecated parameter

219

"""

220

221

# History Operations

222

def add_history_entry(self, description, software=None):

223

"""

224

Add an entry to the history list.

225

226

Parameters:

227

- description (str): A description of the change

228

- software (dict or list of dict): A description of the software used

229

"""

230

231

def get_history_entries(self):

232

"""

233

Get a list of history entries from the file object.

234

235

Returns:

236

list: A list of history entries

237

"""

238

239

# Information and Search Operations

240

def info(self, max_rows=24, max_cols=120,

241

show_values=True, refresh_extension_manager=NotSet):

242

"""

243

Print a rendering of this file's tree to stdout.

244

245

Parameters:

246

- max_rows (int, tuple, or None, optional): Maximum number of lines to print

247

- max_cols (int or None, optional): Maximum length of line to print

248

- show_values (bool, optional): Set to False to disable display of primitive values

249

"""

250

251

def search(self, key=NotSet, type_=NotSet, value=NotSet, filter_=None):

252

"""

253

Search this file's tree.

254

255

Parameters:

256

- key (NotSet, str, or any): Search query that selects nodes by dict key or list index

257

- type_ (NotSet, str, or type): Search query that selects nodes by type

258

- value (NotSet, str, or any): Search query that selects nodes by value

259

- filter_ (callable): Callable that filters nodes by arbitrary criteria

260

261

Returns:

262

AsdfSearchResult: The result of the search

263

"""

264

265

# Dictionary-like Interface

266

def keys(self):

267

"""Return the keys of the tree."""

268

269

def __getitem__(self, key):

270

"""Get an item from the tree."""

271

272

def __setitem__(self, key, value):

273

"""Set an item in the tree."""

274

275

def __contains__(self, item):

276

"""Check if item is in the tree."""

277

278

# Context Manager Support

279

def __enter__(self):

280

"""Enter context manager."""

281

282

def __exit__(self, type_, value, traceback):

283

"""Exit context manager."""

284

```

285

286

### File Opening

287

288

Open existing ASDF files with comprehensive options for validation, extensions, and performance optimization.

289

290

```python { .api }

291

def open(fd, uri=None, mode=None, validate_checksums=False, extensions=None,

292

ignore_unrecognized_tag=False, _force_raw_types=False, memmap=False,

293

lazy_tree=NotSet, lazy_load=True, custom_schema=None,

294

strict_extension_check=False, ignore_missing_extensions=False):

295

"""

296

Open an existing ASDF file.

297

298

Parameters:

299

- fd: File descriptor, path, or file-like object

300

- uri (str, optional): URI for resolving relative references

301

- mode (str, optional): File open mode

302

- validate_checksums (bool): Validate array checksums on read (default: False)

303

- extensions (Extension or list, optional): Additional extensions for custom types

304

- ignore_unrecognized_tag (bool): Don't warn for unrecognized tags (default: False)

305

- _force_raw_types (bool): Internal parameter for forcing raw types (default: False)

306

- memmap (bool): Memory-map arrays when possible (default: False)

307

- lazy_tree (NotSet): Deprecated parameter (default: NotSet)

308

- lazy_load (bool): Load arrays lazily when accessed (default: True)

309

- custom_schema (str, optional): Path to custom validation schema

310

- strict_extension_check (bool): Strict extension validation (default: False)

311

- ignore_missing_extensions (bool): Ignore missing extension warnings (default: False)

312

313

Returns:

314

AsdfFile: The opened ASDF file object

315

"""

316

```

317

318

### Simple File Loading

319

320

Convenience function for loading ASDF files directly to Python objects without AsdfFile wrapper.

321

322

```python { .api }

323

def load(fp, *, uri=None, validate_checksums=False, extensions=None,

324

custom_schema=None):

325

"""

326

Load object tree from ASDF file.

327

328

Parameters:

329

- fp: File path or file-like object

330

- uri (str, optional): URI for resolving relative references

331

- validate_checksums (bool): Validate array checksums

332

- extensions (Extension or list, optional): Additional extensions

333

- custom_schema (str, optional): Path to custom validation schema

334

335

Returns:

336

Object tree (typically dict)

337

"""

338

```

339

340

### Simple File Writing

341

342

Convenience function for writing Python objects directly to ASDF files.

343

344

```python { .api }

345

def dump(tree, fp, *, version=None, extensions=None, all_array_storage=NotSet,

346

all_array_compression=NotSet, compression_kwargs=NotSet, pad_blocks=False,

347

custom_schema=None):

348

"""

349

Serialize object tree to ASDF file.

350

351

Parameters:

352

- tree: Object tree to serialize

353

- fp: File path or file-like object

354

- version (str, optional): ASDF version to write

355

- extensions (Extension or list, optional): Additional extensions

356

- all_array_storage (str, optional): Array storage mode ('internal', 'external')

357

- all_array_compression (str, optional): Compression algorithm

358

- compression_kwargs (dict, optional): Compression-specific options

359

- pad_blocks (bool): Pad blocks for improved streaming

360

- custom_schema (str, optional): Path to custom validation schema

361

"""

362

```

363

364

## Usage Examples

365

366

### Basic File Creation

367

368

```python

369

import asdf

370

import numpy as np

371

372

# Create data

373

data = {

374

"arrays": {

375

"data": np.random.random((1000, 1000)),

376

"mask": np.zeros(1000, dtype=bool)

377

},

378

"metadata": {

379

"title": "My Dataset",

380

"version": "1.0"

381

}

382

}

383

384

# Create and write file

385

af = asdf.AsdfFile(data)

386

af.write_to("dataset.asdf")

387

```

388

389

### File Reading with Memory Mapping

390

391

```python

392

# Open with memory mapping for large arrays

393

with asdf.open("dataset.asdf", memmap=True) as af:

394

# Access data without loading into memory

395

print(af.tree["arrays"]["data"].shape)

396

397

# Only load when needed

398

subset = af.tree["arrays"]["data"][:100, :100]

399

```

400

401

### Validation and Schema Checking

402

403

```python

404

# Open with validation

405

af = asdf.AsdfFile(tree)

406

af.validate() # Validate against standard schema

407

408

# Write with custom validation

409

af.write_to("output.asdf", custom_schema="my_schema.json")

410

```

411

412

### Working with References

413

414

```python

415

# Resolve external references

416

af.resolve_references()

417

418

# Find all references in tree

419

refs = af.find_references()

420

print(f"Found {len(refs)} references")

421

```