CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-h5netcdf

netCDF4 file access via h5py with hierarchical and legacy APIs for scientific computing

69

0.83x
Overview
Eval results
Files

user-types.mddocs/

User-Defined Types

NetCDF4 supports user-defined data types including enumeration types, variable-length types, and compound (structured) types. These enable complex data structures beyond basic numeric and string types.

Capabilities

Base User Type

Common functionality for all user-defined types.

class UserType(BaseObject):
    @property
    def name(self) -> str:
        """Type name."""
        ...
    
    @property
    def dtype(self) -> np.dtype:
        """NumPy dtype representation."""
        ...

Enumeration Types

Define discrete sets of named values, useful for categorical data and flags.

class EnumType(UserType):
    @property
    def enum_dict(self) -> dict:
        """Dictionary mapping enum names to values."""
        ...

def create_enumtype(self, datatype, datatype_name: str, enum_dict: dict) -> EnumType:
    """
    Create an enumeration type.
    
    Args:
        datatype: Base integer type (e.g., 'i1', 'i2', 'i4')
        datatype_name (str): Name for the enumeration type
        enum_dict (dict): Mapping of enum names to integer values
        
    Returns:
        EnumType: The created enumeration type
    """
    ...

Variable-Length Types

Store arrays of varying lengths, useful for ragged arrays and string data.

class VLType(UserType):
    pass

def create_vltype(self, datatype, datatype_name: str) -> VLType:
    """
    Create a variable-length type.
    
    Args:
        datatype: Base data type for array elements
        datatype_name (str): Name for the variable-length type
        
    Returns:
        VLType: The created variable-length type
    """
    ...

Compound Types

Define structured types with multiple named fields, similar to C structs.

class CompoundType(UserType):
    @property
    def dtype_view(self) -> np.dtype:
        """Alternative dtype view for string handling."""
        ...

def create_cmptype(self, datatype, datatype_name: str) -> CompoundType:
    """
    Create a compound type.
    
    Args:
        datatype: NumPy structured dtype defining the compound type
        datatype_name (str): Name for the compound type
        
    Returns:
        CompoundType: The created compound type
    """
    ...

Type Access

Access user-defined types within groups.

@property
def enumtypes(self) -> Frozen:
    """Dictionary-like access to enumeration types."""
    ...

@property
def vltypes(self) -> Frozen:
    """Dictionary-like access to variable-length types."""
    ...

@property
def cmptypes(self) -> Frozen:
    """Dictionary-like access to compound types."""
    ...

Usage Examples

Enumeration Types

import h5netcdf
import numpy as np

with h5netcdf.File('enum_types.nc', 'w') as f:
    # Create enumeration for quality flags
    quality_enum = f.create_enumtype(
        'i1',  # Base type: signed 8-bit integer
        'quality_flag',
        {
            'good': 0,
            'questionable': 1,
            'bad': 2,
            'missing': 3
        }
    )
    
    # Create enumeration for weather conditions
    weather_enum = f.create_enumtype(
        'i2',  # Base type: signed 16-bit integer
        'weather_type',
        {
            'clear': 0,
            'partly_cloudy': 1,
            'cloudy': 2,
            'rain': 3,
            'snow': 4,
            'storm': 5
        }
    )
    
    # Create dimensions and variables using enum types
    f.dimensions['time'] = 100
    f.dimensions['station'] = 50
    
    quality = f.create_variable('quality', ('time', 'station'), 
                               dtype=quality_enum)
    weather = f.create_variable('weather', ('time', 'station'), 
                               dtype=weather_enum)
    
    # Write enum values using integer codes
    quality[0, :] = np.random.choice([0, 1, 2, 3], size=50)
    weather[0, :] = np.random.choice([0, 1, 2, 3, 4, 5], size=50)
    
    # Access enum information
    print(f"Quality enum values: {quality_enum.enum_dict}")
    print(f"Weather enum values: {weather_enum.enum_dict}")

Variable-Length Types

with h5netcdf.File('vlen_types.nc', 'w') as f:
    # Create variable-length string type
    vlen_str = f.create_vltype(str, 'vlen_string')
    
    # Create variable-length integer array type
    vlen_int = f.create_vltype('i4', 'vlen_int_array')
    
    # Create variables using VL types
    f.dimensions['record'] = 10
    
    # Variable-length strings (for varying-length text)
    comments = f.create_variable('comments', ('record',), dtype=vlen_str)
    
    # Variable-length integer arrays (for ragged arrays)
    measurements = f.create_variable('measurements', ('record',), dtype=vlen_int)
    
    # Write variable-length data
    comment_data = [
        "Short comment",
        "This is a much longer comment with more detail",
        "Medium length",
        "",  # Empty string
        "Another comment"
    ]
    
    measurement_data = [
        [1, 2, 3],           # 3 values
        [4, 5, 6, 7, 8],     # 5 values
        [9],                 # 1 value
        [],                  # No values
        [10, 11]             # 2 values
    ]
    
    # Note: Writing VL data depends on h5py version and backend
    # This is conceptual - actual syntax may vary
    for i, (comment, measurements_list) in enumerate(zip(comment_data, measurement_data)):
        if i < len(comment_data):
            comments[i] = comment
        if i < len(measurement_data):
            measurements[i] = measurements_list

Compound Types

with h5netcdf.File('compound_types.nc', 'w') as f:
    # Define compound type for weather observations
    weather_dtype = np.dtype([
        ('temperature', 'f4'),    # 32-bit float
        ('humidity', 'f4'),       # 32-bit float
        ('pressure', 'f8'),       # 64-bit float
        ('wind_speed', 'f4'),     # 32-bit float
        ('wind_direction', 'i2'), # 16-bit integer
        ('station_id', 'i4'),     # 32-bit integer
        ('timestamp', 'i8')       # 64-bit integer
    ])
    
    weather_compound = f.create_cmptype(weather_dtype, 'weather_obs')
    
    # Create variable using compound type
    f.dimensions['observation'] = 1000
    
    obs = f.create_variable('observations', ('observation',), 
                           dtype=weather_compound)
    
    # Create structured array data
    data = np.zeros(1000, dtype=weather_dtype)
    data['temperature'] = np.random.normal(20, 10, 1000)
    data['humidity'] = np.random.uniform(30, 90, 1000)
    data['pressure'] = np.random.normal(1013.25, 20, 1000)
    data['wind_speed'] = np.random.exponential(5, 1000)
    data['wind_direction'] = np.random.randint(0, 360, 1000)
    data['station_id'] = np.random.randint(1000, 9999, 1000)
    data['timestamp'] = np.arange(1000) + 1640000000  # Unix timestamps
    
    # Write compound data
    obs[:] = data
    
    # Access compound type information
    print(f"Compound type fields: {weather_compound.dtype.names}")
    print(f"Field types: {[weather_compound.dtype.fields[name][0] for name in weather_compound.dtype.names]}")

Complex Nested Types

with h5netcdf.File('nested_types.nc', 'w') as f:
    # Create enumeration for data source
    source_enum = f.create_enumtype('i1', 'data_source', {
        'satellite': 0,
        'ground_station': 1,
        'aircraft': 2,
        'ship': 3
    })
    
    # Create compound type that includes enum field
    measurement_dtype = np.dtype([
        ('value', 'f4'),
        ('uncertainty', 'f4'),
        ('source', 'i1'),  # Will use enum values
        ('quality_code', 'i1')
    ])
    
    measurement_compound = f.create_cmptype(measurement_dtype, 'measurement')
    
    # Create variable using nested types
    f.dimensions['sample'] = 500
    
    data_var = f.create_variable('data', ('sample',), dtype=measurement_compound)
    
    # Create data with enum values in compound type
    sample_data = np.zeros(500, dtype=measurement_dtype)
    sample_data['value'] = np.random.normal(0, 1, 500)
    sample_data['uncertainty'] = np.random.exponential(0.1, 500)
    sample_data['source'] = np.random.choice([0, 1, 2, 3], 500)  # Enum values
    sample_data['quality_code'] = np.random.choice([0, 1, 2], 500)
    
    data_var[:] = sample_data

Reading User-Defined Types

with h5netcdf.File('read_types.nc', 'r') as f:
    # List all user-defined types
    print("Enumeration types:")
    for name, enum_type in f.enumtypes.items():
        print(f"  {name}: {enum_type.enum_dict}")
    
    print("\nVariable-length types:")
    for name, vl_type in f.vltypes.items():
        print(f"  {name}: {vl_type.dtype}")
    
    print("\nCompound types:")
    for name, cmp_type in f.cmptypes.items():
        print(f"  {name}: {cmp_type.dtype}")
    
    # Read data with user-defined types
    if 'observations' in f.variables:
        obs = f.variables['observations']
        data = obs[:]
        
        # Access individual fields of compound data
        temperatures = data['temperature']
        pressures = data['pressure']
        
        print(f"Temperature range: {temperatures.min():.1f} to {temperatures.max():.1f}")
        print(f"Pressure range: {pressures.min():.1f} to {pressures.max():.1f}")

Type Inheritance in Groups

with h5netcdf.File('type_inheritance.nc', 'w') as f:
    # Create types in root group
    status_enum = f.create_enumtype('i1', 'status', {
        'active': 1,
        'inactive': 0,
        'maintenance': 2
    })
    
    # Create child group
    sensors = f.create_group('sensors')
    
    # Child groups inherit parent types
    sensors.dimensions['sensor_id'] = 100
    
    # Use parent's enum type in child group
    sensor_status = sensors.create_variable('status', ('sensor_id',), 
                                          dtype=status_enum)
    
    # Create group-specific type
    sensor_type_enum = sensors.create_enumtype('i1', 'sensor_type', {
        'temperature': 0,
        'humidity': 1,
        'pressure': 2,
        'wind': 3
    })
    
    sensor_type_var = sensors.create_variable('type', ('sensor_id',), 
                                            dtype=sensor_type_enum)

Legacy API Compatibility

import h5netcdf.legacyapi as netCDF4

with netCDF4.Dataset('legacy_types.nc', 'w') as f:
    # Legacy API methods (aliases to core methods)
    quality_enum = f.createEnumType('i1', 'quality', {
        'good': 0,
        'bad': 1,
        'missing': 2
    })
    
    vlen_str = f.createVLType(str, 'vlen_string')
    
    compound_dtype = np.dtype([('x', 'f4'), ('y', 'f4')])
    point_type = f.createCompoundType(compound_dtype, 'point')
    
    # Create variables using these types
    f.createDimension('n', 10)
    
    quality_var = f.createVariable('quality', quality_enum, ('n',))
    text_var = f.createVariable('text', vlen_str, ('n',))
    points_var = f.createVariable('points', point_type, ('n',))

Type Validation and Best Practices

Enumeration Guidelines

  • Use meaningful names for enum values
  • Keep integer values small and sequential
  • Document enum meanings in variable attributes
  • Consider using flags for multiple boolean properties

Variable-Length Considerations

  • VL types can impact performance with large datasets
  • Consider fixed-size alternatives when possible
  • Be aware of memory usage with large VL arrays

Compound Type Design

  • Use descriptive field names
  • Group related fields logically
  • Consider alignment and padding for performance
  • Document field meanings and units

Compatibility Notes

  • User-defined types are netCDF4-specific features
  • Not all tools support all user-defined types
  • Test compatibility with target applications
  • Provide fallback variables for critical data

Install with Tessl CLI

npx tessl i tessl/pypi-h5netcdf

docs

attributes.md

dimensions.md

file-operations.md

groups.md

index.md

legacy-api.md

user-types.md

variables.md

tile.json