netCDF4 file access via h5py with hierarchical and legacy APIs for scientific computing
69
NetCDF4 supports user-defined data types including enumeration types, variable-length types, and compound (structured) types. These enable complex data structures beyond basic numeric and string types.
Common functionality for all user-defined types.
class UserType(BaseObject):
@property
def name(self) -> str:
"""Type name."""
...
@property
def dtype(self) -> np.dtype:
"""NumPy dtype representation."""
...Define discrete sets of named values, useful for categorical data and flags.
class EnumType(UserType):
@property
def enum_dict(self) -> dict:
"""Dictionary mapping enum names to values."""
...
def create_enumtype(self, datatype, datatype_name: str, enum_dict: dict) -> EnumType:
"""
Create an enumeration type.
Args:
datatype: Base integer type (e.g., 'i1', 'i2', 'i4')
datatype_name (str): Name for the enumeration type
enum_dict (dict): Mapping of enum names to integer values
Returns:
EnumType: The created enumeration type
"""
...Store arrays of varying lengths, useful for ragged arrays and string data.
class VLType(UserType):
pass
def create_vltype(self, datatype, datatype_name: str) -> VLType:
"""
Create a variable-length type.
Args:
datatype: Base data type for array elements
datatype_name (str): Name for the variable-length type
Returns:
VLType: The created variable-length type
"""
...Define structured types with multiple named fields, similar to C structs.
class CompoundType(UserType):
@property
def dtype_view(self) -> np.dtype:
"""Alternative dtype view for string handling."""
...
def create_cmptype(self, datatype, datatype_name: str) -> CompoundType:
"""
Create a compound type.
Args:
datatype: NumPy structured dtype defining the compound type
datatype_name (str): Name for the compound type
Returns:
CompoundType: The created compound type
"""
...Access user-defined types within groups.
@property
def enumtypes(self) -> Frozen:
"""Dictionary-like access to enumeration types."""
...
@property
def vltypes(self) -> Frozen:
"""Dictionary-like access to variable-length types."""
...
@property
def cmptypes(self) -> Frozen:
"""Dictionary-like access to compound types."""
...import h5netcdf
import numpy as np
with h5netcdf.File('enum_types.nc', 'w') as f:
# Create enumeration for quality flags
quality_enum = f.create_enumtype(
'i1', # Base type: signed 8-bit integer
'quality_flag',
{
'good': 0,
'questionable': 1,
'bad': 2,
'missing': 3
}
)
# Create enumeration for weather conditions
weather_enum = f.create_enumtype(
'i2', # Base type: signed 16-bit integer
'weather_type',
{
'clear': 0,
'partly_cloudy': 1,
'cloudy': 2,
'rain': 3,
'snow': 4,
'storm': 5
}
)
# Create dimensions and variables using enum types
f.dimensions['time'] = 100
f.dimensions['station'] = 50
quality = f.create_variable('quality', ('time', 'station'),
dtype=quality_enum)
weather = f.create_variable('weather', ('time', 'station'),
dtype=weather_enum)
# Write enum values using integer codes
quality[0, :] = np.random.choice([0, 1, 2, 3], size=50)
weather[0, :] = np.random.choice([0, 1, 2, 3, 4, 5], size=50)
# Access enum information
print(f"Quality enum values: {quality_enum.enum_dict}")
print(f"Weather enum values: {weather_enum.enum_dict}")with h5netcdf.File('vlen_types.nc', 'w') as f:
# Create variable-length string type
vlen_str = f.create_vltype(str, 'vlen_string')
# Create variable-length integer array type
vlen_int = f.create_vltype('i4', 'vlen_int_array')
# Create variables using VL types
f.dimensions['record'] = 10
# Variable-length strings (for varying-length text)
comments = f.create_variable('comments', ('record',), dtype=vlen_str)
# Variable-length integer arrays (for ragged arrays)
measurements = f.create_variable('measurements', ('record',), dtype=vlen_int)
# Write variable-length data
comment_data = [
"Short comment",
"This is a much longer comment with more detail",
"Medium length",
"", # Empty string
"Another comment"
]
measurement_data = [
[1, 2, 3], # 3 values
[4, 5, 6, 7, 8], # 5 values
[9], # 1 value
[], # No values
[10, 11] # 2 values
]
# Note: Writing VL data depends on h5py version and backend
# This is conceptual - actual syntax may vary
for i, (comment, measurements_list) in enumerate(zip(comment_data, measurement_data)):
if i < len(comment_data):
comments[i] = comment
if i < len(measurement_data):
measurements[i] = measurements_listwith h5netcdf.File('compound_types.nc', 'w') as f:
# Define compound type for weather observations
weather_dtype = np.dtype([
('temperature', 'f4'), # 32-bit float
('humidity', 'f4'), # 32-bit float
('pressure', 'f8'), # 64-bit float
('wind_speed', 'f4'), # 32-bit float
('wind_direction', 'i2'), # 16-bit integer
('station_id', 'i4'), # 32-bit integer
('timestamp', 'i8') # 64-bit integer
])
weather_compound = f.create_cmptype(weather_dtype, 'weather_obs')
# Create variable using compound type
f.dimensions['observation'] = 1000
obs = f.create_variable('observations', ('observation',),
dtype=weather_compound)
# Create structured array data
data = np.zeros(1000, dtype=weather_dtype)
data['temperature'] = np.random.normal(20, 10, 1000)
data['humidity'] = np.random.uniform(30, 90, 1000)
data['pressure'] = np.random.normal(1013.25, 20, 1000)
data['wind_speed'] = np.random.exponential(5, 1000)
data['wind_direction'] = np.random.randint(0, 360, 1000)
data['station_id'] = np.random.randint(1000, 9999, 1000)
data['timestamp'] = np.arange(1000) + 1640000000 # Unix timestamps
# Write compound data
obs[:] = data
# Access compound type information
print(f"Compound type fields: {weather_compound.dtype.names}")
print(f"Field types: {[weather_compound.dtype.fields[name][0] for name in weather_compound.dtype.names]}")with h5netcdf.File('nested_types.nc', 'w') as f:
# Create enumeration for data source
source_enum = f.create_enumtype('i1', 'data_source', {
'satellite': 0,
'ground_station': 1,
'aircraft': 2,
'ship': 3
})
# Create compound type that includes enum field
measurement_dtype = np.dtype([
('value', 'f4'),
('uncertainty', 'f4'),
('source', 'i1'), # Will use enum values
('quality_code', 'i1')
])
measurement_compound = f.create_cmptype(measurement_dtype, 'measurement')
# Create variable using nested types
f.dimensions['sample'] = 500
data_var = f.create_variable('data', ('sample',), dtype=measurement_compound)
# Create data with enum values in compound type
sample_data = np.zeros(500, dtype=measurement_dtype)
sample_data['value'] = np.random.normal(0, 1, 500)
sample_data['uncertainty'] = np.random.exponential(0.1, 500)
sample_data['source'] = np.random.choice([0, 1, 2, 3], 500) # Enum values
sample_data['quality_code'] = np.random.choice([0, 1, 2], 500)
data_var[:] = sample_datawith h5netcdf.File('read_types.nc', 'r') as f:
# List all user-defined types
print("Enumeration types:")
for name, enum_type in f.enumtypes.items():
print(f" {name}: {enum_type.enum_dict}")
print("\nVariable-length types:")
for name, vl_type in f.vltypes.items():
print(f" {name}: {vl_type.dtype}")
print("\nCompound types:")
for name, cmp_type in f.cmptypes.items():
print(f" {name}: {cmp_type.dtype}")
# Read data with user-defined types
if 'observations' in f.variables:
obs = f.variables['observations']
data = obs[:]
# Access individual fields of compound data
temperatures = data['temperature']
pressures = data['pressure']
print(f"Temperature range: {temperatures.min():.1f} to {temperatures.max():.1f}")
print(f"Pressure range: {pressures.min():.1f} to {pressures.max():.1f}")with h5netcdf.File('type_inheritance.nc', 'w') as f:
# Create types in root group
status_enum = f.create_enumtype('i1', 'status', {
'active': 1,
'inactive': 0,
'maintenance': 2
})
# Create child group
sensors = f.create_group('sensors')
# Child groups inherit parent types
sensors.dimensions['sensor_id'] = 100
# Use parent's enum type in child group
sensor_status = sensors.create_variable('status', ('sensor_id',),
dtype=status_enum)
# Create group-specific type
sensor_type_enum = sensors.create_enumtype('i1', 'sensor_type', {
'temperature': 0,
'humidity': 1,
'pressure': 2,
'wind': 3
})
sensor_type_var = sensors.create_variable('type', ('sensor_id',),
dtype=sensor_type_enum)import h5netcdf.legacyapi as netCDF4
with netCDF4.Dataset('legacy_types.nc', 'w') as f:
# Legacy API methods (aliases to core methods)
quality_enum = f.createEnumType('i1', 'quality', {
'good': 0,
'bad': 1,
'missing': 2
})
vlen_str = f.createVLType(str, 'vlen_string')
compound_dtype = np.dtype([('x', 'f4'), ('y', 'f4')])
point_type = f.createCompoundType(compound_dtype, 'point')
# Create variables using these types
f.createDimension('n', 10)
quality_var = f.createVariable('quality', quality_enum, ('n',))
text_var = f.createVariable('text', vlen_str, ('n',))
points_var = f.createVariable('points', point_type, ('n',))Install with Tessl CLI
npx tessl i tessl/pypi-h5netcdfdocs
evals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
scenario-10