0
# XArray Integration
1
2
Seamless conversion between CDF files and xarray Datasets with ISTP (International Solar-Terrestrial Physics) compliance checking and automatic metadata handling. This integration enables modern Python scientific workflows using xarray's powerful data analysis capabilities.
3
4
## Capabilities
5
6
### CDF to XArray Conversion
7
8
Convert CDF files directly to xarray Datasets with automatic dimension detection, coordinate assignment, and metadata preservation.
9
10
```python { .api }
11
def cdf_to_xarray(filename, to_datetime=True, to_unixtime=False, fillval_to_nan=False):
12
"""
13
Convert a CDF file to an xarray Dataset.
14
15
Parameters:
16
- filename (str): Path to the CDF file
17
- to_datetime (bool): Convert time variables to numpy datetime64 (default: True)
18
- to_unixtime (bool): Convert time variables to Unix timestamps (default: False)
19
- fillval_to_nan (bool): Replace fill values with NaN (default: False)
20
21
Returns:
22
xarray.Dataset: Dataset with variables as DataArrays, proper coordinates,
23
and preserved attributes from the CDF file
24
25
Notes:
26
- Automatically detects DEPEND_0 (usually time) relationships
27
- Preserves variable and global attributes
28
- Handles multi-dimensional coordinate dependencies
29
- Converts CDF time formats to datetime64 or Unix time
30
"""
31
```
32
33
**Usage Examples:**
34
35
```python
36
import cdflib.xarray
37
38
# Basic conversion with default settings
39
ds = cdflib.xarray.cdf_to_xarray('scientific_data.cdf')
40
print(ds)
41
print(f"Variables: {list(ds.data_vars)}")
42
print(f"Coordinates: {list(ds.coords)}")
43
44
# Convert time to Unix timestamps instead of datetime64
45
ds_unix = cdflib.xarray.cdf_to_xarray('scientific_data.cdf',
46
to_datetime=False,
47
to_unixtime=True)
48
49
# Replace fill values with NaN for easier analysis
50
ds_nan = cdflib.xarray.cdf_to_xarray('scientific_data.cdf',
51
fillval_to_nan=True)
52
53
# Access data and metadata
54
temperature = ds['Temperature']
55
print(f"Temperature units: {temperature.attrs.get('UNITS', 'N/A')}")
56
print(f"Temperature shape: {temperature.shape}")
57
print(f"Time coordinate: {temperature.coords}")
58
59
# Global attributes are preserved
60
print(f"Dataset title: {ds.attrs.get('TITLE', 'N/A')}")
61
print(f"Mission: {ds.attrs.get('PROJECT', 'N/A')}")
62
```
63
64
### XArray to CDF Conversion
65
66
Convert xarray Datasets to CDF files with comprehensive ISTP compliance validation and automatic metadata generation.
67
68
```python { .api }
69
def xarray_to_cdf(xarray_dataset, file_name, unix_time_to_cdf_time=False,
70
istp=True, terminate_on_warning=False, auto_fix_depends=True,
71
record_dimensions=["record0"], compression=0, nan_to_fillval=True):
72
"""
73
Convert an xarray Dataset to a CDF file.
74
75
Parameters:
76
- xarray_dataset (xarray.Dataset): Dataset to convert
77
- file_name (str): Output CDF file path
78
- unix_time_to_cdf_time (bool): Convert Unix timestamps to CDF time formats (default: False)
79
- istp (bool): Enable ISTP compliance checking (default: True)
80
- terminate_on_warning (bool): Stop conversion on ISTP warnings (default: False)
81
- auto_fix_depends (bool): Automatically create DEPEND_0 relationships (default: True)
82
- record_dimensions (list): Record dimension names (default: ["record0"])
83
- compression (int): Compression level 0-9 (default: 0)
84
- nan_to_fillval (bool): Convert NaN values to appropriate fill values (default: True)
85
86
Notes:
87
- Validates variable and attribute names for ISTP compliance
88
- Automatically detects and converts time variables to appropriate CDF epoch formats
89
- Generates required ISTP attributes if missing
90
- Handles multi-dimensional variables with proper DEPEND relationships
91
- Validates dimension consistency and monotonic time axes
92
"""
93
```
94
95
**Usage Examples:**
96
97
```python
98
import xarray as xr
99
import numpy as np
100
import cdflib.xarray
101
102
# Create sample xarray Dataset
103
time = pd.date_range('2023-01-01', periods=100, freq='1H')
104
lat = np.linspace(-90, 90, 181)
105
lon = np.linspace(-180, 180, 361)
106
107
# Create sample data
108
temperature = 15 + 10 * np.random.randn(100, 181, 361)
109
pressure = 1013 + 50 * np.random.randn(100, 181, 361)
110
111
ds = xr.Dataset({
112
'temperature': (['time', 'lat', 'lon'], temperature, {
113
'units': 'degC',
114
'long_name': 'Air Temperature',
115
'standard_name': 'air_temperature'
116
}),
117
'pressure': (['time', 'lat', 'lon'], pressure, {
118
'units': 'hPa',
119
'long_name': 'Air Pressure',
120
'standard_name': 'air_pressure'
121
})
122
}, coords={
123
'time': ('time', time),
124
'lat': ('lat', lat, {'units': 'degrees_north'}),
125
'lon': ('lon', lon, {'units': 'degrees_east'})
126
}, attrs={
127
'title': 'Weather Analysis Dataset',
128
'institution': 'Research Institute',
129
'source': 'Model simulation',
130
'history': 'Created with xarray'
131
})
132
133
# Convert to CDF with ISTP compliance
134
cdflib.xarray.xarray_to_cdf(ds, 'weather_data.cdf')
135
136
# Convert with custom settings
137
cdflib.xarray.xarray_to_cdf(ds, 'weather_compressed.cdf',
138
compression=9,
139
terminate_on_warning=True)
140
141
# Disable ISTP checking for non-standard datasets
142
cdflib.xarray.xarray_to_cdf(ds, 'custom_data.cdf', istp=False)
143
```
144
145
### Round-trip Conversion
146
147
Demonstrate data integrity through CDF → XArray → CDF conversion.
148
149
```python
150
import cdflib.xarray
151
import numpy as np
152
153
# Read original CDF file
154
original_ds = cdflib.xarray.cdf_to_xarray('input_data.cdf')
155
print(f"Original variables: {list(original_ds.data_vars)}")
156
157
# Perform some analysis with xarray
158
processed_ds = original_ds.copy()
159
160
# Add derived variable
161
if 'Temperature' in processed_ds:
162
processed_ds['Temperature_K'] = processed_ds['Temperature'] + 273.15
163
processed_ds['Temperature_K'].attrs = {
164
'units': 'K',
165
'long_name': 'Temperature in Kelvin',
166
'source': 'Derived from Temperature'
167
}
168
169
# Add analysis metadata
170
processed_ds.attrs.update({
171
'processing_date': '2023-06-15T10:30:00Z',
172
'processing_software': 'xarray + cdflib',
173
'derived_variables': 'Temperature_K'
174
})
175
176
# Write back to CDF
177
cdflib.xarray.xarray_to_cdf(processed_ds, 'processed_data.cdf')
178
179
# Verify round-trip integrity
180
verification_ds = cdflib.xarray.cdf_to_xarray('processed_data.cdf')
181
print(f"Processed variables: {list(verification_ds.data_vars)}")
182
print(f"New global attributes: {verification_ds.attrs}")
183
```
184
185
### ISTP Compliance Validation
186
187
Validate datasets against International Solar-Terrestrial Physics data standards.
188
189
```python
190
import cdflib.xarray
191
import xarray as xr
192
import numpy as np
193
194
# Create ISTP-compliant dataset
195
time_data = pd.date_range('2023-01-01', periods=1440, freq='1min')
196
magnetic_field = np.random.randn(1440, 3) * 100 + [25000, 0, -5000]
197
198
# ISTP-compliant variable and attribute names
199
ds = xr.Dataset({
200
'B_field': (['Epoch', 'components'], magnetic_field, {
201
'UNITS': 'nT',
202
'CATDESC': 'Magnetic field vector in GSM coordinates',
203
'DEPEND_0': 'Epoch',
204
'DEPEND_1': 'B_field_labels',
205
'FIELDNAM': 'Magnetic Field',
206
'FILLVAL': -1e31,
207
'VALIDMIN': -100000.0,
208
'VALIDMAX': 100000.0,
209
'VAR_TYPE': 'data'
210
}),
211
'B_field_labels': (['components'], ['Bx', 'By', 'Bz'], {
212
'CATDESC': 'Magnetic field component labels',
213
'FIELDNAM': 'Component labels',
214
'VAR_TYPE': 'metadata'
215
})
216
}, coords={
217
'Epoch': ('Epoch', time_data, {
218
'UNITS': 'ns',
219
'TIME_BASE': 'J2000',
220
'CATDESC': 'Default time',
221
'FIELDNAM': 'Time',
222
'FILLVAL': np.datetime64('NaT'),
223
'VAR_TYPE': 'support_data'
224
}),
225
'components': np.arange(3)
226
}, attrs={
227
'TITLE': 'ISTP Compliant Magnetic Field Data',
228
'PROJECT': 'Sample Mission',
229
'DISCIPLINE': 'Space Physics>Magnetospheric Science',
230
'DATA_TYPE': 'survey>magnetic field',
231
'DESCRIPTOR': 'MAG>Magnetic Field',
232
'INSTRUMENT_TYPE': 'Magnetometer',
233
'MISSION_GROUP': 'Sample Mission',
234
'PI_NAME': 'Dr. Sample',
235
'PI_AFFILIATION': 'Research Institute',
236
'TEXT': 'High-resolution magnetic field measurements'
237
})
238
239
# Convert with strict ISTP validation
240
try:
241
cdflib.xarray.xarray_to_cdf(ds, 'istp_compliant.cdf',
242
terminate_on_warning=True)
243
print("Dataset is ISTP compliant!")
244
except Exception as e:
245
print(f"ISTP compliance error: {e}")
246
```
247
248
### Working with Large Datasets
249
250
Efficiently handle large scientific datasets with chunking and selective loading.
251
252
```python
253
import cdflib.xarray
254
import xarray as xr
255
256
# Read only specific variables from large CDF file
257
ds = cdflib.xarray.cdf_to_xarray('large_dataset.cdf')
258
259
# Select subset of variables
260
subset_vars = ['Temperature', 'Pressure', 'Epoch']
261
ds_subset = ds[subset_vars]
262
263
# Time-based selection using xarray's powerful indexing
264
ds_recent = ds.sel(Epoch=slice('2023-06-01', '2023-06-30'))
265
266
# Spatial subset for gridded data
267
if 'lat' in ds.coords and 'lon' in ds.coords:
268
# Select North American region
269
ds_na = ds.sel(lat=slice(20, 60), lon=slice(-130, -60))
270
271
# Temporal resampling using xarray
272
if 'Epoch' in ds.coords:
273
# Resample to daily means
274
ds_daily = ds.resample(Epoch='1D').mean()
275
276
# Convert back to CDF
277
cdflib.xarray.xarray_to_cdf(ds_daily, 'daily_averages.cdf')
278
```
279
280
### Advanced Time Handling
281
282
Handle complex time coordinate scenarios with multiple epoch formats.
283
284
```python
285
import cdflib.xarray
286
import cdflib
287
288
# Read CDF with multiple time variables
289
ds = cdflib.xarray.cdf_to_xarray('multi_time_data.cdf')
290
291
# Check for different time formats in the original CDF
292
cdf = cdflib.CDF('multi_time_data.cdf')
293
info = cdf.cdf_info()
294
295
for var in info['zVariables']:
296
var_info = cdf.varinq(var)
297
if var_info['Data_Type'] in [31, 32, 33]: # CDF time types
298
print(f"Time variable {var}: type {var_info['Data_Type_Description']}")
299
300
# Convert specific time format preferences
301
ds_dt = cdflib.xarray.cdf_to_xarray('multi_time_data.cdf',
302
to_datetime=True)
303
ds_unix = cdflib.xarray.cdf_to_xarray('multi_time_data.cdf',
304
to_datetime=False,
305
to_unixtime=True)
306
307
# Compare time representations
308
print("Datetime format:", ds_dt.coords['Epoch'].values[:3])
309
print("Unix time format:", ds_unix.coords['Epoch'].values[:3])
310
```
311
312
## Error Handling and Validation
313
314
The xarray integration includes comprehensive error handling and validation.
315
316
```python { .api }
317
class ISTPError(Exception):
318
"""Exception raised for ISTP compliance violations."""
319
```
320
321
**Common Error Scenarios:**
322
323
```python
324
import cdflib.xarray
325
import xarray as xr
326
327
try:
328
# Invalid variable names (ISTP compliance)
329
bad_ds = xr.Dataset({
330
'123invalid': (['time'], [1, 2, 3]), # Cannot start with number
331
'bad-name': (['time'], [4, 5, 6]) # Hyphens not allowed
332
})
333
cdflib.xarray.xarray_to_cdf(bad_ds, 'bad.cdf')
334
335
except cdflib.xarray.ISTPError as e:
336
print(f"ISTP compliance error: {e}")
337
338
try:
339
# Non-monotonic time axis
340
bad_time = [3, 1, 2, 4, 5] # Not monotonic
341
bad_ds = xr.Dataset({
342
'data': (['time'], [10, 20, 30, 40, 50])
343
}, coords={'time': bad_time})
344
cdflib.xarray.xarray_to_cdf(bad_ds, 'bad_time.cdf')
345
346
except ValueError as e:
347
print(f"Time axis error: {e}")
348
349
try:
350
# File not found
351
ds = cdflib.xarray.cdf_to_xarray('nonexistent.cdf')
352
353
except FileNotFoundError as e:
354
print(f"File error: {e}")
355
```
356
357
## Integration with Scientific Workflows
358
359
### Climate Data Analysis
360
361
```python
362
import cdflib.xarray
363
import xarray as xr
364
import matplotlib.pyplot as plt
365
366
# Load climate dataset
367
climate_ds = cdflib.xarray.cdf_to_xarray('climate_data.cdf')
368
369
# Calculate climatology using xarray's groupby
370
if 'time' in climate_ds.coords:
371
monthly_climate = climate_ds.groupby('time.month').mean()
372
373
# Plot temperature climatology
374
if 'temperature' in climate_ds:
375
monthly_climate['temperature'].plot(x='lon', y='lat',
376
col='month', col_wrap=4)
377
plt.suptitle('Monthly Temperature Climatology')
378
plt.show()
379
380
# Save climatology as new CDF
381
cdflib.xarray.xarray_to_cdf(monthly_climate, 'climatology.cdf')
382
```
383
384
### Space Physics Data Processing
385
386
```python
387
import cdflib.xarray
388
import numpy as np
389
390
# Load magnetometer data
391
mag_ds = cdflib.xarray.cdf_to_xarray('magnetometer.cdf')
392
393
if 'B_field' in mag_ds and 'Epoch' in mag_ds.coords:
394
# Calculate magnetic field magnitude
395
B_magnitude = np.sqrt((mag_ds['B_field']**2).sum(dim='components'))
396
B_magnitude.attrs = {
397
'units': 'nT',
398
'long_name': 'Magnetic Field Magnitude',
399
'description': 'Total magnetic field strength'
400
}
401
402
# Add to dataset
403
mag_ds['B_magnitude'] = B_magnitude
404
405
# Calculate hourly averages
406
hourly_avg = mag_ds.resample(Epoch='1H').mean()
407
408
# Export processed data
409
cdflib.xarray.xarray_to_cdf(hourly_avg, 'magnetometer_hourly.cdf')
410
411
print(f"Original data points: {len(mag_ds.Epoch)}")
412
print(f"Hourly averages: {len(hourly_avg.Epoch)}")
413
```
414
415
## Types
416
417
```python { .api }
418
import xarray as xr
419
420
# XArray Dataset type returned by cdf_to_xarray
421
Dataset = xr.Dataset
422
423
# Exception for ISTP compliance issues
424
class ISTPError(Exception):
425
"""Exception raised for ISTP compliance violations during conversion."""
426
```