or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration.mdcore-data-types.mddata-serialization.mdextension-system.mdfile-operations.mdindex.mdutilities.md

data-serialization.mddocs/

0

# Data Serialization

1

2

High-level functions for serializing and deserializing Python objects to/from ASDF format using string representations. These functions provide convenient alternatives to file-based operations for in-memory processing and testing.

3

4

## Capabilities

5

6

### String Deserialization

7

8

Load Python objects from ASDF-formatted strings with full support for validation and extensions.

9

10

```python { .api }

11

def loads(asdf_string, *, uri=None, validate_checksums=False, extensions=None,

12

custom_schema=None):

13

"""

14

Load object tree from ASDF string.

15

16

Parameters:

17

- asdf_string (str): ASDF-formatted string content

18

- uri (str, optional): URI for resolving relative references

19

- validate_checksums (bool): Validate array checksums on load

20

- extensions (Extension or list, optional): Additional extensions for custom types

21

- custom_schema (str, optional): Path to custom validation schema

22

23

Returns:

24

Object tree (typically dict) containing deserialized data

25

26

Raises:

27

ValidationError: If validation fails

28

AsdfSerializationError: If deserialization fails

29

"""

30

```

31

32

### String Serialization

33

34

Serialize Python objects to ASDF-formatted strings with comprehensive control over output format and compression.

35

36

```python { .api }

37

def dumps(tree, *, version=None, extensions=None, all_array_storage=NotSet,

38

all_array_compression=NotSet, compression_kwargs=NotSet, pad_blocks=False,

39

custom_schema=None) -> str:

40

"""

41

Serialize object tree to ASDF string.

42

43

Parameters:

44

- tree: Object tree to serialize (typically dict)

45

- version (str, optional): ASDF version to use (e.g., "1.5.0")

46

- extensions (Extension or list, optional): Additional extensions for custom types

47

- all_array_storage (str, optional): Override array storage ('internal', 'external')

48

- all_array_compression (str, optional): Compression algorithm ('none', 'zlib', 'bzp2', 'lz4')

49

- compression_kwargs (dict, optional): Algorithm-specific compression parameters

50

- pad_blocks (bool): Add padding to array blocks for streaming performance

51

- custom_schema (str, optional): Path to custom validation schema

52

53

Returns:

54

str: ASDF-formatted string representation

55

56

Raises:

57

AsdfSerializationError: If serialization fails

58

ValidationError: If custom schema validation fails

59

"""

60

```

61

62

## Usage Examples

63

64

### Basic String Serialization

65

66

```python

67

import asdf

68

import numpy as np

69

70

# Create data structure

71

data = {

72

"scalars": {

73

"temperature": 25.5,

74

"pressure": 101325,

75

"active": True

76

},

77

"arrays": {

78

"measurements": np.array([1.2, 3.4, 5.6, 7.8]),

79

"timestamps": np.arange(100, dtype='datetime64[s]')

80

},

81

"metadata": {

82

"instrument": "Spectrometer X1",

83

"operator": "Dr. Smith"

84

}

85

}

86

87

# Serialize to string

88

asdf_string = asdf.dumps(data)

89

print(len(asdf_string)) # String length

90

print(asdf_string[:200]) # First 200 characters

91

```

92

93

### String Deserialization

94

95

```python

96

# Deserialize from string

97

loaded_data = asdf.loads(asdf_string)

98

99

# Access the data

100

print(loaded_data["scalars"]["temperature"]) # 25.5

101

print(loaded_data["arrays"]["measurements"].dtype) # float64

102

print(type(loaded_data["arrays"]["timestamps"])) # numpy.ndarray

103

```

104

105

### Compression in String Operations

106

107

```python

108

# Serialize with compression

109

compressed_string = asdf.dumps(

110

data,

111

all_array_compression='zlib',

112

compression_kwargs={'level': 9}

113

)

114

115

# Compare sizes

116

original_size = len(asdf.dumps(data))

117

compressed_size = len(compressed_string)

118

print(f"Compression ratio: {original_size / compressed_size:.2f}x")

119

120

# Deserialize compressed data

121

loaded_compressed = asdf.loads(compressed_string)

122

# Arrays are automatically decompressed

123

```

124

125

### Version Control

126

127

```python

128

# Serialize to specific ASDF version

129

v1_4_string = asdf.dumps(data, version="1.4.0")

130

v1_5_string = asdf.dumps(data, version="1.5.0")

131

132

# Load and check version compatibility

133

loaded = asdf.loads(v1_4_string)

134

```

135

136

### Custom Extensions in String Operations

137

138

```python

139

from asdf.extension import Extension

140

141

# Custom extension for special types

142

class MyExtension(Extension):

143

extension_uri = "asdf://example.com/myext/extensions/myext-1.0.0"

144

# ... extension implementation

145

146

# Serialize with extensions

147

custom_string = asdf.dumps(

148

{"custom_object": MyCustomType()},

149

extensions=[MyExtension()]

150

)

151

152

# Deserialize with same extensions

153

loaded_custom = asdf.loads(custom_string, extensions=[MyExtension()])

154

```

155

156

### Testing and Validation

157

158

```python

159

import tempfile

160

161

# Round-trip testing

162

original = {"test_data": np.random.random(100)}

163

serialized = asdf.dumps(original)

164

deserialized = asdf.loads(serialized)

165

166

# Verify data integrity

167

np.testing.assert_array_equal(

168

original["test_data"],

169

deserialized["test_data"]

170

)

171

172

# Validate against custom schema

173

try:

174

validated_string = asdf.dumps(

175

data,

176

custom_schema="my_validation_schema.json"

177

)

178

except ValidationError as e:

179

print(f"Validation failed: {e}")

180

```

181

182

### Memory-Efficient Processing

183

184

```python

185

# For large datasets, consider streaming operations

186

def process_large_dataset(data_chunks):

187

results = []

188

for chunk in data_chunks:

189

# Serialize chunk

190

chunk_string = asdf.dumps({"chunk": chunk})

191

192

# Process serialized data (e.g., send over network)

193

processed_string = process_remotely(chunk_string)

194

195

# Deserialize result

196

result = asdf.loads(processed_string)

197

results.append(result["chunk"])

198

199

return results

200

```