or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

attributes.mddimensions.mdfile-operations.mdgroups.mdindex.mdlegacy-api.mduser-types.mdvariables.md

variables.mddocs/

0

# Variables and Data Access

1

2

Variables are the primary data containers in netCDF4 files, storing multidimensional arrays with associated metadata. They support various data types, compression options, and chunking strategies for efficient storage and access.

3

4

## Capabilities

5

6

### Variable Creation

7

8

Create variables with specified dimensions, data types, and storage options.

9

10

```python { .api }

11

def create_variable(self, name: str, dimensions: tuple = (), dtype = None,

12

data = None, fillvalue = None, chunks: tuple = None,

13

chunking_heuristic: str = None, compression: str = None,

14

compression_opts: int = None, shuffle: bool = False,

15

fletcher32: bool = False, **kwargs) -> Variable:

16

"""

17

Create a new variable in the group.

18

19

Args:

20

name (str): Variable name

21

dimensions (tuple): Tuple of dimension names

22

dtype: NumPy dtype or UserType for the variable data

23

data: Initial data to store (optional)

24

fillvalue: Fill value for missing data

25

chunks (tuple): Chunk sizes for each dimension

26

chunking_heuristic (str): Auto-chunking approach ('h5py' or 'h5netcdf')

27

compression (str): Compression method ('gzip', 'lzf', 'szip')

28

compression_opts (int): Compression level (0-9 for gzip)

29

shuffle (bool): Apply shuffle filter before compression

30

fletcher32 (bool): Apply Fletcher32 checksum

31

**kwargs: Additional HDF5 dataset creation parameters

32

33

Returns:

34

Variable: The newly created variable

35

"""

36

...

37

```

38

39

### Variable Properties

40

41

Access variable metadata and configuration.

42

43

```python { .api }

44

class Variable(BaseVariable):

45

@property

46

def name(self) -> str:

47

"""Variable name."""

48

...

49

50

@property

51

def dimensions(self) -> tuple:

52

"""Tuple of dimension names."""

53

...

54

55

@property

56

def shape(self) -> tuple:

57

"""Current shape of the variable."""

58

...

59

60

@property

61

def ndim(self) -> int:

62

"""Number of dimensions."""

63

...

64

65

@property

66

def dtype(self) -> np.dtype:

67

"""NumPy data type."""

68

...

69

70

@property

71

def datatype(self):

72

"""NetCDF datatype (includes user-defined types like EnumType, VLType, CompoundType)."""

73

...

74

75

@property

76

def attrs(self) -> Attributes:

77

"""Variable attributes."""

78

...

79

```

80

81

### Storage Configuration Properties

82

83

Access information about variable storage and compression.

84

85

```python { .api }

86

@property

87

def chunks(self) -> tuple:

88

"""Chunk sizes for each dimension (None if not chunked)."""

89

...

90

91

@property

92

def compression(self) -> str:

93

"""Compression method used ('gzip', 'lzf', 'szip', or None)."""

94

...

95

96

@property

97

def compression_opts(self) -> int:

98

"""Compression options/level."""

99

...

100

101

@property

102

def shuffle(self) -> bool:

103

"""Whether shuffle filter is applied."""

104

...

105

106

@property

107

def fletcher32(self) -> bool:

108

"""Whether Fletcher32 checksum is applied."""

109

...

110

```

111

112

### Data Access

113

114

Read and write variable data using NumPy-style indexing.

115

116

```python { .api }

117

def __getitem__(self, key) -> np.ndarray:

118

"""

119

Read data from the variable using NumPy-style indexing.

120

121

Args:

122

key: Index specification (int, slice, tuple of indices/slices)

123

124

Returns:

125

np.ndarray: The requested data

126

"""

127

...

128

129

def __setitem__(self, key, value) -> None:

130

"""

131

Write data to the variable using NumPy-style indexing.

132

133

Args:

134

key: Index specification (int, slice, tuple of indices/slices)

135

value: Data to write (scalar, array, or array-like)

136

"""

137

...

138

139

def __len__(self) -> int:

140

"""

141

Return the size of the first dimension.

142

143

Returns:

144

int: Size of first dimension

145

"""

146

...

147

```

148

149

### NumPy Integration

150

151

Seamless integration with NumPy arrays and operations.

152

153

```python { .api }

154

def __array__(self, *args, **kwargs) -> np.ndarray:

155

"""NumPy array interface support (loads all data)."""

156

...

157

158

def __repr__(self) -> str:

159

"""String representation of the variable."""

160

...

161

```

162

163

## Usage Examples

164

165

### Basic Variable Operations

166

167

```python

168

import h5netcdf

169

import numpy as np

170

171

with h5netcdf.File('variables.nc', 'w') as f:

172

# Create dimensions

173

f.dimensions['time'] = 100

174

f.dimensions['lat'] = 180

175

f.dimensions['lon'] = 360

176

177

# Create a simple variable

178

temp = f.create_variable('temperature', ('time', 'lat', 'lon'), dtype='f4')

179

180

# Set attributes

181

temp.attrs['units'] = 'K'

182

temp.attrs['long_name'] = 'Air Temperature'

183

temp.attrs['valid_range'] = [200.0, 350.0]

184

185

# Write some data

186

temp[0, :, :] = np.random.random((180, 360)) * 50 + 273.15

187

188

# Read data back

189

first_timestep = temp[0, :, :]

190

print(f"Temperature shape: {temp.shape}")

191

print(f"Temperature dtype: {temp.dtype}")

192

```

193

194

### Advanced Indexing

195

196

```python

197

with h5netcdf.File('indexing.nc', 'r') as f:

198

temp = f.variables['temperature']

199

200

# Various indexing patterns

201

all_data = temp[:] # All data

202

first_time = temp[0, :, :] # First time slice

203

subset = temp[10:20, 50:100, 100:200] # Subset

204

single_point = temp[15, 90, 180] # Single value

205

206

# Fancy indexing

207

specific_times = temp[[0, 5, 10], :, :] # Specific time steps

208

209

# Step indexing

210

every_10th = temp[::10, :, :] # Every 10th time step

211

```

212

213

### Chunking and Compression

214

215

```python

216

with h5netcdf.File('compressed.nc', 'w') as f:

217

f.dimensions['time'] = None # Unlimited

218

f.dimensions['lat'] = 721

219

f.dimensions['lon'] = 1440

220

221

# Create compressed variable with chunking

222

temp = f.create_variable(

223

'temperature',

224

('time', 'lat', 'lon'),

225

dtype='f4',

226

chunks=(1, 361, 720), # Chunk size

227

compression='gzip', # Compression method

228

compression_opts=6, # Compression level

229

shuffle=True, # Shuffle filter

230

fletcher32=True # Checksum

231

)

232

233

# Check compression settings

234

print(f"Chunks: {temp.chunks}")

235

print(f"Compression: {temp.compression}")

236

print(f"Compression level: {temp.compression_opts}")

237

print(f"Shuffle: {temp.shuffle}")

238

print(f"Fletcher32: {temp.fletcher32}")

239

```

240

241

### Fill Values and Missing Data

242

243

```python

244

with h5netcdf.File('missing_data.nc', 'w') as f:

245

f.dimensions['time'] = 10

246

f.dimensions['station'] = 50

247

248

# Variable with fill value

249

temp = f.create_variable(

250

'temperature',

251

('time', 'station'),

252

dtype='f4',

253

fillvalue=-999.0

254

)

255

256

# Write partial data

257

temp[0, :25] = np.random.random(25) * 30 + 273.15

258

# Remaining values will be fill value

259

260

# Check for fill values when reading

261

data = temp[:]

262

valid_data = data[data != -999.0]

263

print(f"Valid measurements: {len(valid_data)}")

264

```

265

266

### Working with Different Data Types

267

268

```python

269

with h5netcdf.File('data_types.nc', 'w') as f:

270

f.dimensions['n'] = 100

271

272

# Integer variables

273

int_var = f.create_variable('integers', ('n',), dtype='i4')

274

int_var[:] = np.arange(100)

275

276

# Float variables

277

float_var = f.create_variable('floats', ('n',), dtype='f8')

278

float_var[:] = np.random.random(100)

279

280

# String variables

281

f.dimensions['str_len'] = 20

282

str_var = f.create_variable('strings', ('n', 'str_len'), dtype='S1')

283

284

# Boolean-like (using integers)

285

bool_var = f.create_variable('flags', ('n',), dtype='i1')

286

bool_var[:] = np.random.choice([0, 1], 100)

287

```

288

289

### Unlimited Dimensions

290

291

```python

292

with h5netcdf.File('unlimited.nc', 'w') as f:

293

# Create unlimited dimension

294

f.dimensions['time'] = None # Unlimited

295

f.dimensions['station'] = 10

296

297

# Variable with unlimited dimension

298

temp = f.create_variable('temperature', ('time', 'station'), dtype='f4')

299

300

# Write data in chunks (simulating time series)

301

for t in range(5):

302

# Extend the unlimited dimension

303

temp[t, :] = np.random.random(10) * 30 + 273.15

304

305

print(f"Current time dimension size: {f.dimensions['time'].size}")

306

print(f"Variable shape: {temp.shape}")

307

```

308

309

### Coordinate Variables

310

311

```python

312

with h5netcdf.File('coordinates.nc', 'w') as f:

313

# Create dimensions

314

f.dimensions['lat'] = 180

315

f.dimensions['lon'] = 360

316

f.dimensions['time'] = 12

317

318

# Create coordinate variables (same name as dimension)

319

lat = f.create_variable('lat', ('lat',), dtype='f4')

320

lat[:] = np.linspace(-89.5, 89.5, 180)

321

lat.attrs['units'] = 'degrees_north'

322

lat.attrs['long_name'] = 'Latitude'

323

324

lon = f.create_variable('lon', ('lon',), dtype='f4')

325

lon[:] = np.linspace(-179.5, 179.5, 360)

326

lon.attrs['units'] = 'degrees_east'

327

lon.attrs['long_name'] = 'Longitude'

328

329

time = f.create_variable('time', ('time',), dtype='f8')

330

time[:] = np.arange(12)

331

time.attrs['units'] = 'months since 2023-01-01'

332

time.attrs['calendar'] = 'standard'

333

334

# Data variable using these coordinates

335

temp = f.create_variable('temperature', ('time', 'lat', 'lon'), dtype='f4')

336

temp.attrs['coordinates'] = 'time lat lon'

337

```

338

339

## Performance Considerations

340

341

### Chunking Strategy

342

343

- **Time series data**: Chunk along time dimension for efficient appends

344

- **Spatial data**: Chunk to match typical access patterns (e.g., geographic tiles)

345

- **Rule of thumb**: Aim for chunk sizes of 10KB to 1MB

346

347

### Compression Guidelines

348

349

- **gzip**: Good general-purpose compression, level 6 is often optimal

350

- **lzf**: Faster compression/decompression, lower ratio

351

- **szip**: Good for scientific data, patent restrictions

352

- **shuffle**: Almost always beneficial with compression

353

354

### Memory Management

355

356

```python

357

# Efficient: Process data in chunks

358

with h5netcdf.File('large_data.nc', 'r') as f:

359

temp = f.variables['temperature']

360

361

# Instead of loading all data at once

362

# all_data = temp[:] # Memory intensive

363

364

# Process in chunks

365

for i in range(0, temp.shape[0], 10):

366

chunk = temp[i:i+10, :, :]

367

# Process chunk

368

result = process_chunk(chunk)

369

```