netCDF4 file access via h5py with hierarchical and legacy APIs for scientific computing
69
Variables are the primary data containers in netCDF4 files, storing multidimensional arrays with associated metadata. They support various data types, compression options, and chunking strategies for efficient storage and access.
Create variables with specified dimensions, data types, and storage options.
def create_variable(self, name: str, dimensions: tuple = (), dtype = None,
data = None, fillvalue = None, chunks: tuple = None,
chunking_heuristic: str = None, compression: str = None,
compression_opts: int = None, shuffle: bool = False,
fletcher32: bool = False, **kwargs) -> Variable:
"""
Create a new variable in the group.
Args:
name (str): Variable name
dimensions (tuple): Tuple of dimension names
dtype: NumPy dtype or UserType for the variable data
data: Initial data to store (optional)
fillvalue: Fill value for missing data
chunks (tuple): Chunk sizes for each dimension
chunking_heuristic (str): Auto-chunking approach ('h5py' or 'h5netcdf')
compression (str): Compression method ('gzip', 'lzf', 'szip')
compression_opts (int): Compression level (0-9 for gzip)
shuffle (bool): Apply shuffle filter before compression
fletcher32 (bool): Apply Fletcher32 checksum
**kwargs: Additional HDF5 dataset creation parameters
Returns:
Variable: The newly created variable
"""
...Access variable metadata and configuration.
class Variable(BaseVariable):
@property
def name(self) -> str:
"""Variable name."""
...
@property
def dimensions(self) -> tuple:
"""Tuple of dimension names."""
...
@property
def shape(self) -> tuple:
"""Current shape of the variable."""
...
@property
def ndim(self) -> int:
"""Number of dimensions."""
...
@property
def dtype(self) -> np.dtype:
"""NumPy data type."""
...
@property
def datatype(self):
"""NetCDF datatype (includes user-defined types like EnumType, VLType, CompoundType)."""
...
@property
def attrs(self) -> Attributes:
"""Variable attributes."""
...Access information about variable storage and compression.
@property
def chunks(self) -> tuple:
"""Chunk sizes for each dimension (None if not chunked)."""
...
@property
def compression(self) -> str:
"""Compression method used ('gzip', 'lzf', 'szip', or None)."""
...
@property
def compression_opts(self) -> int:
"""Compression options/level."""
...
@property
def shuffle(self) -> bool:
"""Whether shuffle filter is applied."""
...
@property
def fletcher32(self) -> bool:
"""Whether Fletcher32 checksum is applied."""
...Read and write variable data using NumPy-style indexing.
def __getitem__(self, key) -> np.ndarray:
"""
Read data from the variable using NumPy-style indexing.
Args:
key: Index specification (int, slice, tuple of indices/slices)
Returns:
np.ndarray: The requested data
"""
...
def __setitem__(self, key, value) -> None:
"""
Write data to the variable using NumPy-style indexing.
Args:
key: Index specification (int, slice, tuple of indices/slices)
value: Data to write (scalar, array, or array-like)
"""
...
def __len__(self) -> int:
"""
Return the size of the first dimension.
Returns:
int: Size of first dimension
"""
...Seamless integration with NumPy arrays and operations.
def __array__(self, *args, **kwargs) -> np.ndarray:
"""NumPy array interface support (loads all data)."""
...
def __repr__(self) -> str:
"""String representation of the variable."""
...import h5netcdf
import numpy as np
with h5netcdf.File('variables.nc', 'w') as f:
# Create dimensions
f.dimensions['time'] = 100
f.dimensions['lat'] = 180
f.dimensions['lon'] = 360
# Create a simple variable
temp = f.create_variable('temperature', ('time', 'lat', 'lon'), dtype='f4')
# Set attributes
temp.attrs['units'] = 'K'
temp.attrs['long_name'] = 'Air Temperature'
temp.attrs['valid_range'] = [200.0, 350.0]
# Write some data
temp[0, :, :] = np.random.random((180, 360)) * 50 + 273.15
# Read data back
first_timestep = temp[0, :, :]
print(f"Temperature shape: {temp.shape}")
print(f"Temperature dtype: {temp.dtype}")with h5netcdf.File('indexing.nc', 'r') as f:
temp = f.variables['temperature']
# Various indexing patterns
all_data = temp[:] # All data
first_time = temp[0, :, :] # First time slice
subset = temp[10:20, 50:100, 100:200] # Subset
single_point = temp[15, 90, 180] # Single value
# Fancy indexing
specific_times = temp[[0, 5, 10], :, :] # Specific time steps
# Step indexing
every_10th = temp[::10, :, :] # Every 10th time stepwith h5netcdf.File('compressed.nc', 'w') as f:
f.dimensions['time'] = None # Unlimited
f.dimensions['lat'] = 721
f.dimensions['lon'] = 1440
# Create compressed variable with chunking
temp = f.create_variable(
'temperature',
('time', 'lat', 'lon'),
dtype='f4',
chunks=(1, 361, 720), # Chunk size
compression='gzip', # Compression method
compression_opts=6, # Compression level
shuffle=True, # Shuffle filter
fletcher32=True # Checksum
)
# Check compression settings
print(f"Chunks: {temp.chunks}")
print(f"Compression: {temp.compression}")
print(f"Compression level: {temp.compression_opts}")
print(f"Shuffle: {temp.shuffle}")
print(f"Fletcher32: {temp.fletcher32}")with h5netcdf.File('missing_data.nc', 'w') as f:
f.dimensions['time'] = 10
f.dimensions['station'] = 50
# Variable with fill value
temp = f.create_variable(
'temperature',
('time', 'station'),
dtype='f4',
fillvalue=-999.0
)
# Write partial data
temp[0, :25] = np.random.random(25) * 30 + 273.15
# Remaining values will be fill value
# Check for fill values when reading
data = temp[:]
valid_data = data[data != -999.0]
print(f"Valid measurements: {len(valid_data)}")with h5netcdf.File('data_types.nc', 'w') as f:
f.dimensions['n'] = 100
# Integer variables
int_var = f.create_variable('integers', ('n',), dtype='i4')
int_var[:] = np.arange(100)
# Float variables
float_var = f.create_variable('floats', ('n',), dtype='f8')
float_var[:] = np.random.random(100)
# String variables
f.dimensions['str_len'] = 20
str_var = f.create_variable('strings', ('n', 'str_len'), dtype='S1')
# Boolean-like (using integers)
bool_var = f.create_variable('flags', ('n',), dtype='i1')
bool_var[:] = np.random.choice([0, 1], 100)with h5netcdf.File('unlimited.nc', 'w') as f:
# Create unlimited dimension
f.dimensions['time'] = None # Unlimited
f.dimensions['station'] = 10
# Variable with unlimited dimension
temp = f.create_variable('temperature', ('time', 'station'), dtype='f4')
# Write data in chunks (simulating time series)
for t in range(5):
# Extend the unlimited dimension
temp[t, :] = np.random.random(10) * 30 + 273.15
print(f"Current time dimension size: {f.dimensions['time'].size}")
print(f"Variable shape: {temp.shape}")with h5netcdf.File('coordinates.nc', 'w') as f:
# Create dimensions
f.dimensions['lat'] = 180
f.dimensions['lon'] = 360
f.dimensions['time'] = 12
# Create coordinate variables (same name as dimension)
lat = f.create_variable('lat', ('lat',), dtype='f4')
lat[:] = np.linspace(-89.5, 89.5, 180)
lat.attrs['units'] = 'degrees_north'
lat.attrs['long_name'] = 'Latitude'
lon = f.create_variable('lon', ('lon',), dtype='f4')
lon[:] = np.linspace(-179.5, 179.5, 360)
lon.attrs['units'] = 'degrees_east'
lon.attrs['long_name'] = 'Longitude'
time = f.create_variable('time', ('time',), dtype='f8')
time[:] = np.arange(12)
time.attrs['units'] = 'months since 2023-01-01'
time.attrs['calendar'] = 'standard'
# Data variable using these coordinates
temp = f.create_variable('temperature', ('time', 'lat', 'lon'), dtype='f4')
temp.attrs['coordinates'] = 'time lat lon'# Efficient: Process data in chunks
with h5netcdf.File('large_data.nc', 'r') as f:
temp = f.variables['temperature']
# Instead of loading all data at once
# all_data = temp[:] # Memory intensive
# Process in chunks
for i in range(0, temp.shape[0], 10):
chunk = temp[i:i+10, :, :]
# Process chunk
result = process_chunk(chunk)Install with Tessl CLI
npx tessl i tessl/pypi-h5netcdfdocs
evals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
scenario-10