A Python CDF reader toolkit for reading and writing CDF files without requiring NASA CDF library installation
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Seamless conversion between CDF files and xarray Datasets with ISTP (International Solar-Terrestrial Physics) compliance checking and automatic metadata handling. This integration enables modern Python scientific workflows using xarray's powerful data analysis capabilities.
Convert CDF files directly to xarray Datasets with automatic dimension detection, coordinate assignment, and metadata preservation.
def cdf_to_xarray(filename, to_datetime=True, to_unixtime=False, fillval_to_nan=False):
"""
Convert a CDF file to an xarray Dataset.
Parameters:
- filename (str): Path to the CDF file
- to_datetime (bool): Convert time variables to numpy datetime64 (default: True)
- to_unixtime (bool): Convert time variables to Unix timestamps (default: False)
- fillval_to_nan (bool): Replace fill values with NaN (default: False)
Returns:
xarray.Dataset: Dataset with variables as DataArrays, proper coordinates,
and preserved attributes from the CDF file
Notes:
- Automatically detects DEPEND_0 (usually time) relationships
- Preserves variable and global attributes
- Handles multi-dimensional coordinate dependencies
- Converts CDF time formats to datetime64 or Unix time
"""Usage Examples:
import cdflib.xarray
# Basic conversion with default settings
ds = cdflib.xarray.cdf_to_xarray('scientific_data.cdf')
print(ds)
print(f"Variables: {list(ds.data_vars)}")
print(f"Coordinates: {list(ds.coords)}")
# Convert time to Unix timestamps instead of datetime64
ds_unix = cdflib.xarray.cdf_to_xarray('scientific_data.cdf',
to_datetime=False,
to_unixtime=True)
# Replace fill values with NaN for easier analysis
ds_nan = cdflib.xarray.cdf_to_xarray('scientific_data.cdf',
fillval_to_nan=True)
# Access data and metadata
temperature = ds['Temperature']
print(f"Temperature units: {temperature.attrs.get('UNITS', 'N/A')}")
print(f"Temperature shape: {temperature.shape}")
print(f"Time coordinate: {temperature.coords}")
# Global attributes are preserved
print(f"Dataset title: {ds.attrs.get('TITLE', 'N/A')}")
print(f"Mission: {ds.attrs.get('PROJECT', 'N/A')}")Convert xarray Datasets to CDF files with comprehensive ISTP compliance validation and automatic metadata generation.
def xarray_to_cdf(xarray_dataset, file_name, unix_time_to_cdf_time=False,
istp=True, terminate_on_warning=False, auto_fix_depends=True,
record_dimensions=["record0"], compression=0, nan_to_fillval=True):
"""
Convert an xarray Dataset to a CDF file.
Parameters:
- xarray_dataset (xarray.Dataset): Dataset to convert
- file_name (str): Output CDF file path
- unix_time_to_cdf_time (bool): Convert Unix timestamps to CDF time formats (default: False)
- istp (bool): Enable ISTP compliance checking (default: True)
- terminate_on_warning (bool): Stop conversion on ISTP warnings (default: False)
- auto_fix_depends (bool): Automatically create DEPEND_0 relationships (default: True)
- record_dimensions (list): Record dimension names (default: ["record0"])
- compression (int): Compression level 0-9 (default: 0)
- nan_to_fillval (bool): Convert NaN values to appropriate fill values (default: True)
Notes:
- Validates variable and attribute names for ISTP compliance
- Automatically detects and converts time variables to appropriate CDF epoch formats
- Generates required ISTP attributes if missing
- Handles multi-dimensional variables with proper DEPEND relationships
- Validates dimension consistency and monotonic time axes
"""Usage Examples:
import xarray as xr
import numpy as np
import cdflib.xarray
# Create sample xarray Dataset
time = pd.date_range('2023-01-01', periods=100, freq='1H')
lat = np.linspace(-90, 90, 181)
lon = np.linspace(-180, 180, 361)
# Create sample data
temperature = 15 + 10 * np.random.randn(100, 181, 361)
pressure = 1013 + 50 * np.random.randn(100, 181, 361)
ds = xr.Dataset({
'temperature': (['time', 'lat', 'lon'], temperature, {
'units': 'degC',
'long_name': 'Air Temperature',
'standard_name': 'air_temperature'
}),
'pressure': (['time', 'lat', 'lon'], pressure, {
'units': 'hPa',
'long_name': 'Air Pressure',
'standard_name': 'air_pressure'
})
}, coords={
'time': ('time', time),
'lat': ('lat', lat, {'units': 'degrees_north'}),
'lon': ('lon', lon, {'units': 'degrees_east'})
}, attrs={
'title': 'Weather Analysis Dataset',
'institution': 'Research Institute',
'source': 'Model simulation',
'history': 'Created with xarray'
})
# Convert to CDF with ISTP compliance
cdflib.xarray.xarray_to_cdf(ds, 'weather_data.cdf')
# Convert with custom settings
cdflib.xarray.xarray_to_cdf(ds, 'weather_compressed.cdf',
compression=9,
terminate_on_warning=True)
# Disable ISTP checking for non-standard datasets
cdflib.xarray.xarray_to_cdf(ds, 'custom_data.cdf', istp=False)Demonstrate data integrity through CDF → XArray → CDF conversion.
import cdflib.xarray
import numpy as np
# Read original CDF file
original_ds = cdflib.xarray.cdf_to_xarray('input_data.cdf')
print(f"Original variables: {list(original_ds.data_vars)}")
# Perform some analysis with xarray
processed_ds = original_ds.copy()
# Add derived variable
if 'Temperature' in processed_ds:
processed_ds['Temperature_K'] = processed_ds['Temperature'] + 273.15
processed_ds['Temperature_K'].attrs = {
'units': 'K',
'long_name': 'Temperature in Kelvin',
'source': 'Derived from Temperature'
}
# Add analysis metadata
processed_ds.attrs.update({
'processing_date': '2023-06-15T10:30:00Z',
'processing_software': 'xarray + cdflib',
'derived_variables': 'Temperature_K'
})
# Write back to CDF
cdflib.xarray.xarray_to_cdf(processed_ds, 'processed_data.cdf')
# Verify round-trip integrity
verification_ds = cdflib.xarray.cdf_to_xarray('processed_data.cdf')
print(f"Processed variables: {list(verification_ds.data_vars)}")
print(f"New global attributes: {verification_ds.attrs}")Validate datasets against International Solar-Terrestrial Physics data standards.
import cdflib.xarray
import xarray as xr
import numpy as np
# Create ISTP-compliant dataset
time_data = pd.date_range('2023-01-01', periods=1440, freq='1min')
magnetic_field = np.random.randn(1440, 3) * 100 + [25000, 0, -5000]
# ISTP-compliant variable and attribute names
ds = xr.Dataset({
'B_field': (['Epoch', 'components'], magnetic_field, {
'UNITS': 'nT',
'CATDESC': 'Magnetic field vector in GSM coordinates',
'DEPEND_0': 'Epoch',
'DEPEND_1': 'B_field_labels',
'FIELDNAM': 'Magnetic Field',
'FILLVAL': -1e31,
'VALIDMIN': -100000.0,
'VALIDMAX': 100000.0,
'VAR_TYPE': 'data'
}),
'B_field_labels': (['components'], ['Bx', 'By', 'Bz'], {
'CATDESC': 'Magnetic field component labels',
'FIELDNAM': 'Component labels',
'VAR_TYPE': 'metadata'
})
}, coords={
'Epoch': ('Epoch', time_data, {
'UNITS': 'ns',
'TIME_BASE': 'J2000',
'CATDESC': 'Default time',
'FIELDNAM': 'Time',
'FILLVAL': np.datetime64('NaT'),
'VAR_TYPE': 'support_data'
}),
'components': np.arange(3)
}, attrs={
'TITLE': 'ISTP Compliant Magnetic Field Data',
'PROJECT': 'Sample Mission',
'DISCIPLINE': 'Space Physics>Magnetospheric Science',
'DATA_TYPE': 'survey>magnetic field',
'DESCRIPTOR': 'MAG>Magnetic Field',
'INSTRUMENT_TYPE': 'Magnetometer',
'MISSION_GROUP': 'Sample Mission',
'PI_NAME': 'Dr. Sample',
'PI_AFFILIATION': 'Research Institute',
'TEXT': 'High-resolution magnetic field measurements'
})
# Convert with strict ISTP validation
try:
cdflib.xarray.xarray_to_cdf(ds, 'istp_compliant.cdf',
terminate_on_warning=True)
print("Dataset is ISTP compliant!")
except Exception as e:
print(f"ISTP compliance error: {e}")Efficiently handle large scientific datasets with chunking and selective loading.
import cdflib.xarray
import xarray as xr
# Read only specific variables from large CDF file
ds = cdflib.xarray.cdf_to_xarray('large_dataset.cdf')
# Select subset of variables
subset_vars = ['Temperature', 'Pressure', 'Epoch']
ds_subset = ds[subset_vars]
# Time-based selection using xarray's powerful indexing
ds_recent = ds.sel(Epoch=slice('2023-06-01', '2023-06-30'))
# Spatial subset for gridded data
if 'lat' in ds.coords and 'lon' in ds.coords:
# Select North American region
ds_na = ds.sel(lat=slice(20, 60), lon=slice(-130, -60))
# Temporal resampling using xarray
if 'Epoch' in ds.coords:
# Resample to daily means
ds_daily = ds.resample(Epoch='1D').mean()
# Convert back to CDF
cdflib.xarray.xarray_to_cdf(ds_daily, 'daily_averages.cdf')Handle complex time coordinate scenarios with multiple epoch formats.
import cdflib.xarray
import cdflib
# Read CDF with multiple time variables
ds = cdflib.xarray.cdf_to_xarray('multi_time_data.cdf')
# Check for different time formats in the original CDF
cdf = cdflib.CDF('multi_time_data.cdf')
info = cdf.cdf_info()
for var in info['zVariables']:
var_info = cdf.varinq(var)
if var_info['Data_Type'] in [31, 32, 33]: # CDF time types
print(f"Time variable {var}: type {var_info['Data_Type_Description']}")
# Convert specific time format preferences
ds_dt = cdflib.xarray.cdf_to_xarray('multi_time_data.cdf',
to_datetime=True)
ds_unix = cdflib.xarray.cdf_to_xarray('multi_time_data.cdf',
to_datetime=False,
to_unixtime=True)
# Compare time representations
print("Datetime format:", ds_dt.coords['Epoch'].values[:3])
print("Unix time format:", ds_unix.coords['Epoch'].values[:3])The xarray integration includes comprehensive error handling and validation.
class ISTPError(Exception):
"""Exception raised for ISTP compliance violations."""Common Error Scenarios:
import cdflib.xarray
import xarray as xr
try:
# Invalid variable names (ISTP compliance)
bad_ds = xr.Dataset({
'123invalid': (['time'], [1, 2, 3]), # Cannot start with number
'bad-name': (['time'], [4, 5, 6]) # Hyphens not allowed
})
cdflib.xarray.xarray_to_cdf(bad_ds, 'bad.cdf')
except cdflib.xarray.ISTPError as e:
print(f"ISTP compliance error: {e}")
try:
# Non-monotonic time axis
bad_time = [3, 1, 2, 4, 5] # Not monotonic
bad_ds = xr.Dataset({
'data': (['time'], [10, 20, 30, 40, 50])
}, coords={'time': bad_time})
cdflib.xarray.xarray_to_cdf(bad_ds, 'bad_time.cdf')
except ValueError as e:
print(f"Time axis error: {e}")
try:
# File not found
ds = cdflib.xarray.cdf_to_xarray('nonexistent.cdf')
except FileNotFoundError as e:
print(f"File error: {e}")import cdflib.xarray
import xarray as xr
import matplotlib.pyplot as plt
# Load climate dataset
climate_ds = cdflib.xarray.cdf_to_xarray('climate_data.cdf')
# Calculate climatology using xarray's groupby
if 'time' in climate_ds.coords:
monthly_climate = climate_ds.groupby('time.month').mean()
# Plot temperature climatology
if 'temperature' in climate_ds:
monthly_climate['temperature'].plot(x='lon', y='lat',
col='month', col_wrap=4)
plt.suptitle('Monthly Temperature Climatology')
plt.show()
# Save climatology as new CDF
cdflib.xarray.xarray_to_cdf(monthly_climate, 'climatology.cdf')import cdflib.xarray
import numpy as np
# Load magnetometer data
mag_ds = cdflib.xarray.cdf_to_xarray('magnetometer.cdf')
if 'B_field' in mag_ds and 'Epoch' in mag_ds.coords:
# Calculate magnetic field magnitude
B_magnitude = np.sqrt((mag_ds['B_field']**2).sum(dim='components'))
B_magnitude.attrs = {
'units': 'nT',
'long_name': 'Magnetic Field Magnitude',
'description': 'Total magnetic field strength'
}
# Add to dataset
mag_ds['B_magnitude'] = B_magnitude
# Calculate hourly averages
hourly_avg = mag_ds.resample(Epoch='1H').mean()
# Export processed data
cdflib.xarray.xarray_to_cdf(hourly_avg, 'magnetometer_hourly.cdf')
print(f"Original data points: {len(mag_ds.Epoch)}")
print(f"Hourly averages: {len(hourly_avg.Epoch)}")import xarray as xr
# XArray Dataset type returned by cdf_to_xarray
Dataset = xr.Dataset
# Exception for ISTP compliance issues
class ISTPError(Exception):
"""Exception raised for ISTP compliance violations during conversion."""Install with Tessl CLI
npx tessl i tessl/pypi-cdflib