tessl install tessl/pypi-h5netcdf@1.6.0netCDF4 file access via h5py with hierarchical and legacy APIs for scientific computing
Agent Success
Agent success rate when using this tile
69%
Improvement
Agent success rate improvement when using this tile compared to baseline
0.83x
Baseline
Agent success rate without this tile
83%
You need to create a tool that processes and stores large climate datasets efficiently. The tool should create an optimized netCDF4 file with proper compression and chunking strategies to balance storage efficiency with data access performance.
Implement a Python program climate_storage.py that creates a netCDF4 file containing multidimensional climate data with optimized storage settings.
Create a file climate_data.nc with the following structure:
Dimensions:
time: unlimited dimension for temporal datalatitude: 180 grid pointslongitude: 360 grid pointslevel: 10 atmospheric levelsVariables:
temperature (time, level, latitude, longitude): 4D temperature data in Kelvin
pressure (time, latitude, longitude): 3D pressure data in Pascals
station_id (latitude, longitude): 2D station identifier data
Attributes:
title to "Optimized Climate Dataset"compression_info to "Using gzip with shuffle filter"units for temperature to "K"units for pressure to "Pa"Provides netCDF4 file access via h5py with support for compression and chunking.
Test file: test_climate_storage.py
import h5netcdf.legacyapi as netCDF4
def test_file_structure():
"""Verify the file is created with correct structure."""
with netCDF4.Dataset('climate_data.nc', 'r') as f:
# Check dimensions
assert 'time' in f.dimensions
assert 'latitude' in f.dimensions
assert 'longitude' in f.dimensions
assert 'level' in f.dimensions
assert f.dimensions['latitude'].size == 180
assert f.dimensions['longitude'].size == 360
assert f.dimensions['level'].size == 10
assert f.dimensions['time'].isunlimited()
# Check variables exist
assert 'temperature' in f.variables
assert 'pressure' in f.variables
assert 'station_id' in f.variablesTest file: test_climate_storage.py
import h5netcdf.legacyapi as netCDF4
def test_compression():
"""Verify compression settings are applied correctly."""
with netCDF4.Dataset('climate_data.nc', 'r') as f:
temp_var = f.variables['temperature']
press_var = f.variables['pressure']
station_var = f.variables['station_id']
# Check temperature compression
assert temp_var.filters()['complevel'] == 4
assert temp_var.filters()['shuffle'] == True
# Check pressure compression
assert press_var.filters()['complevel'] == 2
assert press_var.filters()['shuffle'] == True
# Check station_id has no compression
assert station_var.filters()['complevel'] == 0Test file: test_climate_storage.py
import h5netcdf.legacyapi as netCDF4
def test_chunking():
"""Verify chunk sizes are configured correctly."""
with netCDF4.Dataset('climate_data.nc', 'r') as f:
temp_var = f.variables['temperature']
press_var = f.variables['pressure']
station_var = f.variables['station_id']
# Check chunk sizes
assert temp_var.chunking() == [1, 2, 45, 90]
assert press_var.chunking() == [10, 30, 60]
assert station_var.chunking() == [45, 90]Test file: test_climate_storage.py
import h5netcdf.legacyapi as netCDF4
import numpy as np
def test_attributes_and_data():
"""Verify attributes are set and initial data is written."""
with netCDF4.Dataset('climate_data.nc', 'r') as f:
# Check global attributes
assert f.getncattr('title') == "Optimized Climate Dataset"
assert f.getncattr('compression_info') == "Using gzip with shuffle filter"
# Check variable attributes
assert f.variables['temperature'].getncattr('units') == "K"
assert f.variables['pressure'].getncattr('units') == "Pa"
# Check initial data (at least one value to confirm data was written)
temp_data = f.variables['temperature'][0, 0, 0, 0]
assert np.isclose(temp_data, 273.15)
press_data = f.variables['pressure'][0, 0, 0]
assert np.isclose(press_data, 101325.0)
station_data = f.variables['station_id'][0, 0]
assert station_data == 1climate_storage.py - Main implementation file that creates the optimized netCDF4 filetest_climate_storage.py - Test file with all test casesclimate_data.nc - The generated netCDF4 file (created when running the program)