netCDF4 file access via h5py with hierarchical and legacy APIs for scientific computing
69
The h5netcdf legacy API provides drop-in compatibility with netCDF4-python, enabling existing code to work with minimal modifications. This API mirrors the method names, behavior, and conventions of the netCDF4-python library.
Main file interface compatible with netCDF4.Dataset.
class Dataset(File, Group, HasAttributesMixin):
def __init__(self, filename, mode: str = 'r', format: str = 'NETCDF4',
group: str = None, invalid_netcdf: bool = False,
phony_dims: str = None, **kwargs):
"""
Open or create a netCDF dataset (compatible with netCDF4.Dataset).
Args:
filename: Path to the netCDF file or file-like object
mode (str): File access mode ('r', 'w', 'a', 'r+')
format (str): File format (only 'NETCDF4' supported)
group (str): Group to open (None for root)
invalid_netcdf (bool): Allow non-netCDF4 features
phony_dims (str): Handle unlabeled dimensions ('sort', 'access')
**kwargs: Additional file creation parameters
"""
...
def close(self) -> None:
"""Close the dataset."""
...
def sync(self) -> None:
"""Synchronize data to disk."""
...
def flush(self) -> None:
"""Flush data to disk."""
...Group management with netCDF4-python compatible method names.
class Group(Group, HasAttributesMixin):
def createGroup(self, name: str) -> Group:
"""Create child group (compatible with netCDF4)."""
...
@property
def groups(self) -> dict:
"""Dictionary of child groups."""
...
@property
def variables(self) -> dict:
"""Dictionary of variables in this group."""
...
@property
def dimensions(self) -> dict:
"""Dictionary of dimensions in this group."""
...
def createDimension(self, name: str, size: int = None) -> Dimension:
"""
Create dimension (compatible with netCDF4).
Args:
name (str): Dimension name
size (int): Dimension size (None for unlimited)
Returns:
Dimension: The created dimension
"""
...
def createVariable(self, varname: str, datatype, dimensions: tuple = (),
zlib: bool = False, complevel: int = 4, shuffle: bool = True,
fletcher32: bool = False, contiguous: bool = False,
chunksizes: tuple = None, endian: str = 'native',
least_significant_digit: int = None, fill_value = None,
chunk_cache = None, **kwargs) -> Variable:
"""
Create variable (compatible with netCDF4).
Args:
varname (str): Variable name
datatype: Data type (numpy dtype, string, or UserType)
dimensions (tuple): Dimension names
zlib (bool): Enable gzip compression
complevel (int): Compression level (1-9)
shuffle (bool): Apply shuffle filter
fletcher32 (bool): Apply Fletcher32 checksum
contiguous (bool): Store data contiguously
chunksizes (tuple): Chunk sizes
endian (str): Byte order ('native', 'little', 'big')
least_significant_digit (int): Precision for compression
fill_value: Fill value for missing data
chunk_cache: HDF5 chunk cache settings
**kwargs: Additional creation parameters
Returns:
Variable: The created variable
"""
...User-defined type creation with netCDF4-python method names.
def createEnumType(self, datatype, datatype_name: str, enum_dict: dict) -> EnumType:
"""Create enumeration type (compatible with netCDF4)."""
...
def createVLType(self, datatype, datatype_name: str) -> VLType:
"""Create variable-length type (compatible with netCDF4)."""
...
def createCompoundType(self, datatype, datatype_name: str) -> CompoundType:
"""Create compound type (compatible with netCDF4)."""
...
@property
def enumtypes(self) -> dict:
"""Dictionary of enumeration types in this group."""
...
@property
def vltypes(self) -> dict:
"""Dictionary of variable-length types in this group."""
...
@property
def cmptypes(self) -> dict:
"""Dictionary of compound types in this group."""
...Variable class with netCDF4-python compatible methods.
class Variable(BaseVariable, HasAttributesMixin):
def chunking(self):
"""
Return chunking information (compatible with netCDF4).
Returns:
tuple or str: Chunk sizes or 'contiguous' if not chunked
"""
...
def filters(self) -> dict:
"""
Return HDF5 filter parameters (compatible with netCDF4).
Returns:
dict: Filter settings including complevel, zlib, shuffle, fletcher32
"""
...
@property
def dtype(self):
"""Return numpy dtype (compatible with netCDF4.Variable)."""
...NetCDF4-python style attribute access methods.
class HasAttributesMixin:
def getncattr(self, name: str):
"""
Get attribute by name (compatible with netCDF4).
Args:
name (str): Attribute name
Returns:
Attribute value
"""
...
def setncattr(self, name: str, value) -> None:
"""
Set attribute by name (compatible with netCDF4).
Args:
name (str): Attribute name
value: Attribute value
"""
...
def ncattrs(self) -> list:
"""
List attribute names (compatible with netCDF4).
Returns:
list: Attribute names
"""
...
def __getattr__(self, name: str):
"""Direct attribute access (compatible with netCDF4)."""
...
def __setattr__(self, name: str, value) -> None:
"""Direct attribute assignment (compatible with netCDF4)."""
...Default fill values and utility functions.
default_fillvals = {
'S1': '\x00',
'i1': -127,
'u1': 255,
'i2': -32767,
'u2': 65535,
'i4': -2147483647,
'u4': 4294967295,
'i8': -9223372036854775806,
'u8': 18446744073709551614,
'f4': 9.969209968386869e36,
'f8': 9.969209968386869e36,
}
def _get_default_fillvalue(dtype) -> any:
"""Get default fill value for data type."""
...
def _check_return_dtype_endianess(endian: str = "native") -> str:
"""Check and normalize endianness specification."""
...# Original netCDF4-python code:
# import netCDF4
# h5netcdf equivalent (drop-in replacement):
import h5netcdf.legacyapi as netCDF4
# Rest of the code remains identical
with netCDF4.Dataset('data.nc', 'r') as dataset:
# Access variables through .variables
temperature = dataset.variables['temperature'][:]
# Access dimensions through .dimensions
time_size = len(dataset.dimensions['time'])
# Access attributes using netCDF4 methods
units = dataset.variables['temperature'].getncattr('units')
global_attrs = dataset.ncattrs()import h5netcdf.legacyapi as netCDF4
import numpy as np
# Create file using netCDF4-python syntax
with netCDF4.Dataset('output.nc', 'w', format='NETCDF4') as dataset:
# Create dimensions
time_dim = dataset.createDimension('time', None) # Unlimited
lat_dim = dataset.createDimension('lat', 180)
lon_dim = dataset.createDimension('lon', 360)
# Create coordinate variables
times = dataset.createVariable('time', 'f8', ('time',))
latitudes = dataset.createVariable('lat', 'f4', ('lat',))
longitudes = dataset.createVariable('lon', 'f4', ('lon',))
# Create data variable with compression
temperature = dataset.createVariable(
'temperature', 'f4', ('time', 'lat', 'lon'),
zlib=True, # Enable compression
complevel=6, # Compression level
shuffle=True, # Shuffle filter
fletcher32=True # Checksum
)
# Set attributes using netCDF4 methods
dataset.setncattr('title', 'Global Temperature Data')
dataset.setncattr('institution', 'Climate Research Center')
temperature.setncattr('units', 'K')
temperature.setncattr('long_name', 'Air Temperature')
# Write coordinate data
latitudes[:] = np.linspace(-89.5, 89.5, 180)
longitudes[:] = np.linspace(-179.5, 179.5, 360)
# Write time series data
for t in range(10):
times[t] = t
temperature[t, :, :] = np.random.random((180, 360)) * 50 + 273.15import h5netcdf.legacyapi as netCDF4
with netCDF4.Dataset('groups.nc', 'w') as dataset:
# Create groups using netCDF4 syntax
obs_group = dataset.createGroup('observations')
model_group = dataset.createGroup('model')
# Create dimensions in groups
obs_group.createDimension('time', 100)
obs_group.createDimension('station', 50)
# Create variables in groups
temp_obs = obs_group.createVariable('temperature', 'f4', ('time', 'station'))
# Set group attributes
obs_group.setncattr('description', 'Observational data')
model_group.setncattr('description', 'Model output')import h5netcdf.legacyapi as netCDF4
import numpy as np
with netCDF4.Dataset('types.nc', 'w') as dataset:
# Create enumeration type
quality_enum = dataset.createEnumType('i1', 'quality_flag', {
'good': 0,
'questionable': 1,
'bad': 2,
'missing': 3
})
# Create compound type
obs_dtype = np.dtype([
('value', 'f4'),
('uncertainty', 'f4'),
('quality', 'i1')
])
obs_compound = dataset.createCompoundType(obs_dtype, 'observation')
# Create variable-length type
vlen_str = dataset.createVLType(str, 'vlen_string')
# Use these types in variables
dataset.createDimension('n', 100)
quality_var = dataset.createVariable('quality', quality_enum, ('n',))
obs_var = dataset.createVariable('observations', obs_compound, ('n',))
comment_var = dataset.createVariable('comments', vlen_str, ('n',))import h5netcdf.legacyapi as netCDF4
with netCDF4.Dataset('attributes.nc', 'r') as dataset:
# NetCDF4 style attribute access
title = dataset.getncattr('title')
all_global_attrs = dataset.ncattrs()
# Direct attribute access (also supported)
title = dataset.title # Same as getncattr('title')
# Variable attributes
temp = dataset.variables['temperature']
units = temp.getncattr('units')
var_attrs = temp.ncattrs()
# Direct variable attribute access
units = temp.units # Same as getncattr('units')
# Check attribute existence
if 'long_name' in temp.ncattrs():
long_name = temp.getncattr('long_name')import h5netcdf.legacyapi as netCDF4
with netCDF4.Dataset('info.nc', 'r') as dataset:
temp = dataset.variables['temperature']
# Get chunking information (netCDF4 method)
chunks = temp.chunking()
if chunks:
print(f"Variable is chunked: {chunks}")
else:
print("Variable is not chunked")
# Get filter information (netCDF4 method)
filters = temp.filters()
print(f"Compression: {filters.get('zlib', False)}")
print(f"Shuffle: {filters.get('shuffle', False)}")
print(f"Fletcher32: {filters.get('fletcher32', False)}")
# Other netCDF4-compatible properties
print(f"Shape: {temp.shape}")
print(f"Dimensions: {temp.dimensions}")
print(f"Data type: {temp.dtype}")import h5netcdf.legacyapi as netCDF4
try:
with netCDF4.Dataset('test.nc', 'r') as dataset:
# Code that works with both netCDF4-python and h5netcdf
pass
except FileNotFoundError:
print("File not found")
except OSError as e:
print(f"I/O error: {e}")# You can mix modern and legacy APIs
import h5netcdf
import h5netcdf.legacyapi as netCDF4
# Open with legacy API
with netCDF4.Dataset('mixed.nc', 'w') as legacy_dataset:
# Use legacy methods
legacy_dataset.createDimension('time', 10)
temp = legacy_dataset.createVariable('temperature', 'f4', ('time',))
# Access underlying modern API objects
modern_file = legacy_dataset # Dataset inherits from File
# Use modern API methods on the same object
modern_file.flush() # Modern API method
# Variables also support both APIs
temp.attrs['units'] = 'K' # Modern API attribute access
temp.setncattr('long_name', 'Temperature') # Legacy API methodImport change: Replace import netCDF4 with import h5netcdf.legacyapi as netCDF4
Method compatibility: All major netCDF4-python methods are supported
Performance differences: h5netcdf may have different performance characteristics
Feature differences: Some advanced netCDF4-python features may not be available
Install with Tessl CLI
npx tessl i tessl/pypi-h5netcdfdocs
evals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
scenario-10