or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

cdf-reading.mdcdf-writing.mdepochs.mdindex.mdxarray-integration.md

xarray-integration.mddocs/

0

# XArray Integration

1

2

Seamless conversion between CDF files and xarray Datasets with ISTP (International Solar-Terrestrial Physics) compliance checking and automatic metadata handling. This integration enables modern Python scientific workflows using xarray's powerful data analysis capabilities.

3

4

## Capabilities

5

6

### CDF to XArray Conversion

7

8

Convert CDF files directly to xarray Datasets with automatic dimension detection, coordinate assignment, and metadata preservation.

9

10

```python { .api }

11

def cdf_to_xarray(filename, to_datetime=True, to_unixtime=False, fillval_to_nan=False):

12

"""

13

Convert a CDF file to an xarray Dataset.

14

15

Parameters:

16

- filename (str): Path to the CDF file

17

- to_datetime (bool): Convert time variables to numpy datetime64 (default: True)

18

- to_unixtime (bool): Convert time variables to Unix timestamps (default: False)

19

- fillval_to_nan (bool): Replace fill values with NaN (default: False)

20

21

Returns:

22

xarray.Dataset: Dataset with variables as DataArrays, proper coordinates,

23

and preserved attributes from the CDF file

24

25

Notes:

26

- Automatically detects DEPEND_0 (usually time) relationships

27

- Preserves variable and global attributes

28

- Handles multi-dimensional coordinate dependencies

29

- Converts CDF time formats to datetime64 or Unix time

30

"""

31

```

32

33

**Usage Examples:**

34

35

```python

36

import cdflib.xarray

37

38

# Basic conversion with default settings

39

ds = cdflib.xarray.cdf_to_xarray('scientific_data.cdf')

40

print(ds)

41

print(f"Variables: {list(ds.data_vars)}")

42

print(f"Coordinates: {list(ds.coords)}")

43

44

# Convert time to Unix timestamps instead of datetime64

45

ds_unix = cdflib.xarray.cdf_to_xarray('scientific_data.cdf',

46

to_datetime=False,

47

to_unixtime=True)

48

49

# Replace fill values with NaN for easier analysis

50

ds_nan = cdflib.xarray.cdf_to_xarray('scientific_data.cdf',

51

fillval_to_nan=True)

52

53

# Access data and metadata

54

temperature = ds['Temperature']

55

print(f"Temperature units: {temperature.attrs.get('UNITS', 'N/A')}")

56

print(f"Temperature shape: {temperature.shape}")

57

print(f"Time coordinate: {temperature.coords}")

58

59

# Global attributes are preserved

60

print(f"Dataset title: {ds.attrs.get('TITLE', 'N/A')}")

61

print(f"Mission: {ds.attrs.get('PROJECT', 'N/A')}")

62

```

63

64

### XArray to CDF Conversion

65

66

Convert xarray Datasets to CDF files with comprehensive ISTP compliance validation and automatic metadata generation.

67

68

```python { .api }

69

def xarray_to_cdf(xarray_dataset, file_name, unix_time_to_cdf_time=False,

70

istp=True, terminate_on_warning=False, auto_fix_depends=True,

71

record_dimensions=["record0"], compression=0, nan_to_fillval=True):

72

"""

73

Convert an xarray Dataset to a CDF file.

74

75

Parameters:

76

- xarray_dataset (xarray.Dataset): Dataset to convert

77

- file_name (str): Output CDF file path

78

- unix_time_to_cdf_time (bool): Convert Unix timestamps to CDF time formats (default: False)

79

- istp (bool): Enable ISTP compliance checking (default: True)

80

- terminate_on_warning (bool): Stop conversion on ISTP warnings (default: False)

81

- auto_fix_depends (bool): Automatically create DEPEND_0 relationships (default: True)

82

- record_dimensions (list): Record dimension names (default: ["record0"])

83

- compression (int): Compression level 0-9 (default: 0)

84

- nan_to_fillval (bool): Convert NaN values to appropriate fill values (default: True)

85

86

Notes:

87

- Validates variable and attribute names for ISTP compliance

88

- Automatically detects and converts time variables to appropriate CDF epoch formats

89

- Generates required ISTP attributes if missing

90

- Handles multi-dimensional variables with proper DEPEND relationships

91

- Validates dimension consistency and monotonic time axes

92

"""

93

```

94

95

**Usage Examples:**

96

97

```python

98

import xarray as xr

99

import numpy as np

100

import cdflib.xarray

101

102

# Create sample xarray Dataset

103

time = pd.date_range('2023-01-01', periods=100, freq='1H')

104

lat = np.linspace(-90, 90, 181)

105

lon = np.linspace(-180, 180, 361)

106

107

# Create sample data

108

temperature = 15 + 10 * np.random.randn(100, 181, 361)

109

pressure = 1013 + 50 * np.random.randn(100, 181, 361)

110

111

ds = xr.Dataset({

112

'temperature': (['time', 'lat', 'lon'], temperature, {

113

'units': 'degC',

114

'long_name': 'Air Temperature',

115

'standard_name': 'air_temperature'

116

}),

117

'pressure': (['time', 'lat', 'lon'], pressure, {

118

'units': 'hPa',

119

'long_name': 'Air Pressure',

120

'standard_name': 'air_pressure'

121

})

122

}, coords={

123

'time': ('time', time),

124

'lat': ('lat', lat, {'units': 'degrees_north'}),

125

'lon': ('lon', lon, {'units': 'degrees_east'})

126

}, attrs={

127

'title': 'Weather Analysis Dataset',

128

'institution': 'Research Institute',

129

'source': 'Model simulation',

130

'history': 'Created with xarray'

131

})

132

133

# Convert to CDF with ISTP compliance

134

cdflib.xarray.xarray_to_cdf(ds, 'weather_data.cdf')

135

136

# Convert with custom settings

137

cdflib.xarray.xarray_to_cdf(ds, 'weather_compressed.cdf',

138

compression=9,

139

terminate_on_warning=True)

140

141

# Disable ISTP checking for non-standard datasets

142

cdflib.xarray.xarray_to_cdf(ds, 'custom_data.cdf', istp=False)

143

```

144

145

### Round-trip Conversion

146

147

Demonstrate data integrity through CDF → XArray → CDF conversion.

148

149

```python

150

import cdflib.xarray

151

import numpy as np

152

153

# Read original CDF file

154

original_ds = cdflib.xarray.cdf_to_xarray('input_data.cdf')

155

print(f"Original variables: {list(original_ds.data_vars)}")

156

157

# Perform some analysis with xarray

158

processed_ds = original_ds.copy()

159

160

# Add derived variable

161

if 'Temperature' in processed_ds:

162

processed_ds['Temperature_K'] = processed_ds['Temperature'] + 273.15

163

processed_ds['Temperature_K'].attrs = {

164

'units': 'K',

165

'long_name': 'Temperature in Kelvin',

166

'source': 'Derived from Temperature'

167

}

168

169

# Add analysis metadata

170

processed_ds.attrs.update({

171

'processing_date': '2023-06-15T10:30:00Z',

172

'processing_software': 'xarray + cdflib',

173

'derived_variables': 'Temperature_K'

174

})

175

176

# Write back to CDF

177

cdflib.xarray.xarray_to_cdf(processed_ds, 'processed_data.cdf')

178

179

# Verify round-trip integrity

180

verification_ds = cdflib.xarray.cdf_to_xarray('processed_data.cdf')

181

print(f"Processed variables: {list(verification_ds.data_vars)}")

182

print(f"New global attributes: {verification_ds.attrs}")

183

```

184

185

### ISTP Compliance Validation

186

187

Validate datasets against International Solar-Terrestrial Physics data standards.

188

189

```python

190

import cdflib.xarray

191

import xarray as xr

192

import numpy as np

193

194

# Create ISTP-compliant dataset

195

time_data = pd.date_range('2023-01-01', periods=1440, freq='1min')

196

magnetic_field = np.random.randn(1440, 3) * 100 + [25000, 0, -5000]

197

198

# ISTP-compliant variable and attribute names

199

ds = xr.Dataset({

200

'B_field': (['Epoch', 'components'], magnetic_field, {

201

'UNITS': 'nT',

202

'CATDESC': 'Magnetic field vector in GSM coordinates',

203

'DEPEND_0': 'Epoch',

204

'DEPEND_1': 'B_field_labels',

205

'FIELDNAM': 'Magnetic Field',

206

'FILLVAL': -1e31,

207

'VALIDMIN': -100000.0,

208

'VALIDMAX': 100000.0,

209

'VAR_TYPE': 'data'

210

}),

211

'B_field_labels': (['components'], ['Bx', 'By', 'Bz'], {

212

'CATDESC': 'Magnetic field component labels',

213

'FIELDNAM': 'Component labels',

214

'VAR_TYPE': 'metadata'

215

})

216

}, coords={

217

'Epoch': ('Epoch', time_data, {

218

'UNITS': 'ns',

219

'TIME_BASE': 'J2000',

220

'CATDESC': 'Default time',

221

'FIELDNAM': 'Time',

222

'FILLVAL': np.datetime64('NaT'),

223

'VAR_TYPE': 'support_data'

224

}),

225

'components': np.arange(3)

226

}, attrs={

227

'TITLE': 'ISTP Compliant Magnetic Field Data',

228

'PROJECT': 'Sample Mission',

229

'DISCIPLINE': 'Space Physics>Magnetospheric Science',

230

'DATA_TYPE': 'survey>magnetic field',

231

'DESCRIPTOR': 'MAG>Magnetic Field',

232

'INSTRUMENT_TYPE': 'Magnetometer',

233

'MISSION_GROUP': 'Sample Mission',

234

'PI_NAME': 'Dr. Sample',

235

'PI_AFFILIATION': 'Research Institute',

236

'TEXT': 'High-resolution magnetic field measurements'

237

})

238

239

# Convert with strict ISTP validation

240

try:

241

cdflib.xarray.xarray_to_cdf(ds, 'istp_compliant.cdf',

242

terminate_on_warning=True)

243

print("Dataset is ISTP compliant!")

244

except Exception as e:

245

print(f"ISTP compliance error: {e}")

246

```

247

248

### Working with Large Datasets

249

250

Efficiently handle large scientific datasets with chunking and selective loading.

251

252

```python

253

import cdflib.xarray

254

import xarray as xr

255

256

# Read only specific variables from large CDF file

257

ds = cdflib.xarray.cdf_to_xarray('large_dataset.cdf')

258

259

# Select subset of variables

260

subset_vars = ['Temperature', 'Pressure', 'Epoch']

261

ds_subset = ds[subset_vars]

262

263

# Time-based selection using xarray's powerful indexing

264

ds_recent = ds.sel(Epoch=slice('2023-06-01', '2023-06-30'))

265

266

# Spatial subset for gridded data

267

if 'lat' in ds.coords and 'lon' in ds.coords:

268

# Select North American region

269

ds_na = ds.sel(lat=slice(20, 60), lon=slice(-130, -60))

270

271

# Temporal resampling using xarray

272

if 'Epoch' in ds.coords:

273

# Resample to daily means

274

ds_daily = ds.resample(Epoch='1D').mean()

275

276

# Convert back to CDF

277

cdflib.xarray.xarray_to_cdf(ds_daily, 'daily_averages.cdf')

278

```

279

280

### Advanced Time Handling

281

282

Handle complex time coordinate scenarios with multiple epoch formats.

283

284

```python

285

import cdflib.xarray

286

import cdflib

287

288

# Read CDF with multiple time variables

289

ds = cdflib.xarray.cdf_to_xarray('multi_time_data.cdf')

290

291

# Check for different time formats in the original CDF

292

cdf = cdflib.CDF('multi_time_data.cdf')

293

info = cdf.cdf_info()

294

295

for var in info['zVariables']:

296

var_info = cdf.varinq(var)

297

if var_info['Data_Type'] in [31, 32, 33]: # CDF time types

298

print(f"Time variable {var}: type {var_info['Data_Type_Description']}")

299

300

# Convert specific time format preferences

301

ds_dt = cdflib.xarray.cdf_to_xarray('multi_time_data.cdf',

302

to_datetime=True)

303

ds_unix = cdflib.xarray.cdf_to_xarray('multi_time_data.cdf',

304

to_datetime=False,

305

to_unixtime=True)

306

307

# Compare time representations

308

print("Datetime format:", ds_dt.coords['Epoch'].values[:3])

309

print("Unix time format:", ds_unix.coords['Epoch'].values[:3])

310

```

311

312

## Error Handling and Validation

313

314

The xarray integration includes comprehensive error handling and validation.

315

316

```python { .api }

317

class ISTPError(Exception):

318

"""Exception raised for ISTP compliance violations."""

319

```

320

321

**Common Error Scenarios:**

322

323

```python

324

import cdflib.xarray

325

import xarray as xr

326

327

try:

328

# Invalid variable names (ISTP compliance)

329

bad_ds = xr.Dataset({

330

'123invalid': (['time'], [1, 2, 3]), # Cannot start with number

331

'bad-name': (['time'], [4, 5, 6]) # Hyphens not allowed

332

})

333

cdflib.xarray.xarray_to_cdf(bad_ds, 'bad.cdf')

334

335

except cdflib.xarray.ISTPError as e:

336

print(f"ISTP compliance error: {e}")

337

338

try:

339

# Non-monotonic time axis

340

bad_time = [3, 1, 2, 4, 5] # Not monotonic

341

bad_ds = xr.Dataset({

342

'data': (['time'], [10, 20, 30, 40, 50])

343

}, coords={'time': bad_time})

344

cdflib.xarray.xarray_to_cdf(bad_ds, 'bad_time.cdf')

345

346

except ValueError as e:

347

print(f"Time axis error: {e}")

348

349

try:

350

# File not found

351

ds = cdflib.xarray.cdf_to_xarray('nonexistent.cdf')

352

353

except FileNotFoundError as e:

354

print(f"File error: {e}")

355

```

356

357

## Integration with Scientific Workflows

358

359

### Climate Data Analysis

360

361

```python

362

import cdflib.xarray

363

import xarray as xr

364

import matplotlib.pyplot as plt

365

366

# Load climate dataset

367

climate_ds = cdflib.xarray.cdf_to_xarray('climate_data.cdf')

368

369

# Calculate climatology using xarray's groupby

370

if 'time' in climate_ds.coords:

371

monthly_climate = climate_ds.groupby('time.month').mean()

372

373

# Plot temperature climatology

374

if 'temperature' in climate_ds:

375

monthly_climate['temperature'].plot(x='lon', y='lat',

376

col='month', col_wrap=4)

377

plt.suptitle('Monthly Temperature Climatology')

378

plt.show()

379

380

# Save climatology as new CDF

381

cdflib.xarray.xarray_to_cdf(monthly_climate, 'climatology.cdf')

382

```

383

384

### Space Physics Data Processing

385

386

```python

387

import cdflib.xarray

388

import numpy as np

389

390

# Load magnetometer data

391

mag_ds = cdflib.xarray.cdf_to_xarray('magnetometer.cdf')

392

393

if 'B_field' in mag_ds and 'Epoch' in mag_ds.coords:

394

# Calculate magnetic field magnitude

395

B_magnitude = np.sqrt((mag_ds['B_field']**2).sum(dim='components'))

396

B_magnitude.attrs = {

397

'units': 'nT',

398

'long_name': 'Magnetic Field Magnitude',

399

'description': 'Total magnetic field strength'

400

}

401

402

# Add to dataset

403

mag_ds['B_magnitude'] = B_magnitude

404

405

# Calculate hourly averages

406

hourly_avg = mag_ds.resample(Epoch='1H').mean()

407

408

# Export processed data

409

cdflib.xarray.xarray_to_cdf(hourly_avg, 'magnetometer_hourly.cdf')

410

411

print(f"Original data points: {len(mag_ds.Epoch)}")

412

print(f"Hourly averages: {len(hourly_avg.Epoch)}")

413

```

414

415

## Types

416

417

```python { .api }

418

import xarray as xr

419

420

# XArray Dataset type returned by cdf_to_xarray

421

Dataset = xr.Dataset

422

423

# Exception for ISTP compliance issues

424

class ISTPError(Exception):

425

"""Exception raised for ISTP compliance violations during conversion."""

426

```