or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

config-utilities.mdcoordinate-systems.mddata-management.mdindex.mdio-operations.mdspatial-operations.md

data-management.mddocs/

0

# Data Management

1

2

NoData value handling, attribute and encoding management, and data merging operations for combining multiple raster datasets. These capabilities enable proper data quality management and dataset combination workflows.

3

4

## Capabilities

5

6

### NoData Value Management

7

8

Handle missing or invalid data values in raster datasets with comprehensive NoData support.

9

10

```python { .api }

11

@property

12

def nodata(self) -> Any:

13

"""

14

Get the NoData value for the DataArray.

15

16

Returns:

17

NoData value or None if not set

18

"""

19

20

@property

21

def encoded_nodata(self) -> Any:

22

"""

23

Get the encoded NoData value from the DataArray encoding.

24

25

Returns:

26

Encoded NoData value or None if not set

27

"""

28

29

def set_nodata(

30

self,

31

input_nodata: Optional[float],

32

*,

33

inplace: bool = True

34

) -> xarray.DataArray:

35

"""

36

Set the NoData value for the DataArray without modifying the data.

37

38

Parameters:

39

- input_nodata: NoData value (None to unset)

40

- inplace: If True, modify in place (default: True)

41

42

Returns:

43

DataArray with NoData value set (if inplace=False)

44

"""

45

46

def write_nodata(

47

self,

48

input_nodata: Optional[float],

49

*,

50

encoded: bool = False,

51

inplace: bool = False

52

) -> xarray.DataArray:

53

"""

54

Write the NoData value to the DataArray in a CF compliant manner.

55

56

Parameters:

57

- input_nodata: NoData value (None removes _FillValue attribute)

58

- encoded: Write to encoding instead of attributes (default: False)

59

- inplace: If True, modify in place (default: False)

60

61

Returns:

62

DataArray with NoData written to attributes/encoding

63

"""

64

```

65

66

#### Usage Examples

67

68

```python

69

import rioxarray

70

import numpy as np

71

72

# Open data and check NoData

73

da = rioxarray.open_rasterio('data.tif')

74

print(f"Current NoData: {da.rio.nodata}")

75

76

# Set NoData value

77

da.rio.set_nodata(-9999)

78

print(f"New NoData: {da.rio.nodata}")

79

80

# Write NoData to attributes for file output

81

da_with_nodata = da.rio.write_nodata(-9999, inplace=False)

82

83

# Write to encoding instead of attributes

84

da_encoded = da.rio.write_nodata(-9999, encoded=True, inplace=False)

85

86

# Remove NoData value

87

da.rio.set_nodata(None)

88

89

# Handle NaN as NoData

90

da.rio.set_nodata(np.nan)

91

92

# Check encoded NoData

93

print(f"Encoded NoData: {da.rio.encoded_nodata}")

94

```

95

96

### Attribute Management

97

98

Manage xarray attributes for metadata and file compliance.

99

100

```python { .api }

101

def set_attrs(

102

self,

103

inplace: bool = False,

104

**attrs

105

) -> Union[xarray.Dataset, xarray.DataArray]:

106

"""

107

Set attributes on the Dataset/DataArray.

108

109

Parameters:

110

- inplace: If True, modify in place (default: False)

111

- **attrs: Attribute key-value pairs to set

112

113

Returns:

114

Dataset/DataArray with attributes set

115

"""

116

117

def update_attrs(

118

self,

119

inplace: bool = False,

120

**attrs

121

) -> Union[xarray.Dataset, xarray.DataArray]:

122

"""

123

Update existing attributes on the Dataset/DataArray.

124

125

Parameters:

126

- inplace: If True, modify in place (default: False)

127

- **attrs: Attribute key-value pairs to update

128

129

Returns:

130

Dataset/DataArray with attributes updated

131

"""

132

```

133

134

#### Usage Examples

135

136

```python

137

import rioxarray

138

139

da = rioxarray.open_rasterio('data.tif')

140

141

# Set new attributes

142

da_with_attrs = da.rio.set_attrs(

143

title="My Dataset",

144

description="Processed satellite imagery",

145

processing_date="2023-01-01",

146

inplace=False

147

)

148

149

# Update existing attributes

150

da_updated = da.rio.update_attrs(

151

title="Updated Dataset", # Updates existing

152

version="1.1", # Adds new

153

inplace=False

154

)

155

156

# Modify in place

157

da.rio.set_attrs(

158

units="degrees_celsius",

159

scale_factor=0.01,

160

inplace=True

161

)

162

```

163

164

### Encoding Management

165

166

Control how data is encoded when writing to files.

167

168

```python { .api }

169

def set_encoding(

170

self,

171

inplace: bool = False,

172

**encoding

173

) -> Union[xarray.Dataset, xarray.DataArray]:

174

"""

175

Set encoding on the Dataset/DataArray.

176

177

Parameters:

178

- inplace: If True, modify in place (default: False)

179

- **encoding: Encoding key-value pairs to set

180

181

Returns:

182

Dataset/DataArray with encoding set

183

"""

184

185

def update_encoding(

186

self,

187

inplace: bool = False,

188

**encoding

189

) -> Union[xarray.Dataset, xarray.DataArray]:

190

"""

191

Update existing encoding on the Dataset/DataArray.

192

193

Parameters:

194

- inplace: If True, modify in place (default: False)

195

- **encoding: Encoding key-value pairs to update

196

197

Returns:

198

Dataset/DataArray with encoding updated

199

"""

200

```

201

202

#### Usage Examples

203

204

```python

205

import rioxarray

206

207

da = rioxarray.open_rasterio('data.tif')

208

209

# Set compression encoding for file output

210

da_compressed = da.rio.set_encoding(

211

dtype='float32',

212

_FillValue=-9999,

213

zlib=True,

214

complevel=6,

215

inplace=False

216

)

217

218

# Update specific encoding parameters

219

da_updated = da.rio.update_encoding(

220

complevel=9, # Higher compression

221

shuffle=True, # Enable byte shuffling

222

inplace=False

223

)

224

225

# Common encoding patterns

226

da_int16 = da.rio.set_encoding(

227

dtype='int16',

228

scale_factor=0.01,

229

add_offset=0,

230

_FillValue=-32768,

231

inplace=False

232

)

233

```

234

235

### Spatial Dimension Management

236

237

Manage and configure spatial dimension names and properties.

238

239

```python { .api }

240

@property

241

def x_dim(self) -> Optional[Hashable]:

242

"""Get the x (longitude/easting) dimension name."""

243

244

@property

245

def y_dim(self) -> Optional[Hashable]:

246

"""Get the y (latitude/northing) dimension name."""

247

248

@property

249

def width(self) -> int:

250

"""Get raster width in pixels."""

251

252

@property

253

def height(self) -> int:

254

"""Get raster height in pixels."""

255

256

@property

257

def shape(self) -> tuple[int, int]:

258

"""Get raster shape as (height, width)."""

259

260

@property

261

def count(self) -> int:

262

"""Get number of bands/variables."""

263

264

def set_spatial_dims(

265

self,

266

x_dim: Optional[Hashable] = None,

267

y_dim: Optional[Hashable] = None,

268

inplace: bool = False

269

) -> Union[xarray.Dataset, xarray.DataArray]:

270

"""

271

Set spatial dimension names.

272

273

Parameters:

274

- x_dim: Name for x dimension (longitude/easting)

275

- y_dim: Name for y dimension (latitude/northing)

276

- inplace: If True, modify in place (default: False)

277

278

Returns:

279

Dataset/DataArray with spatial dimensions set

280

"""

281

```

282

283

#### Usage Examples

284

285

```python

286

import rioxarray

287

288

da = rioxarray.open_rasterio('data.tif')

289

290

# Check current spatial dimensions

291

print(f"X dimension: {da.rio.x_dim}")

292

print(f"Y dimension: {da.rio.y_dim}")

293

print(f"Shape: {da.rio.shape}")

294

print(f"Width: {da.rio.width}, Height: {da.rio.height}")

295

296

# Set custom spatial dimension names

297

da_custom = da.rio.set_spatial_dims(

298

x_dim='longitude',

299

y_dim='latitude',

300

inplace=False

301

)

302

303

# Access updated dimensions

304

print(f"New X dim: {da_custom.rio.x_dim}")

305

print(f"New Y dim: {da_custom.rio.y_dim}")

306

```

307

308

### Data Merging

309

310

Combine multiple DataArrays or Datasets geospatially using rasterio.merge functionality.

311

312

```python { .api }

313

def merge_arrays(

314

dataarrays: Sequence[xarray.DataArray],

315

*,

316

bounds: Optional[tuple] = None,

317

res: Optional[tuple] = None,

318

nodata: Optional[float] = None,

319

precision: Optional[float] = None,

320

method: Union[str, Callable, None] = None,

321

crs: Optional[rasterio.crs.CRS] = None,

322

parse_coordinates: bool = True

323

) -> xarray.DataArray:

324

"""

325

Merge multiple DataArrays geospatially.

326

327

Parameters:

328

- dataarrays: List of DataArrays to merge

329

- bounds: Output bounds (left, bottom, right, top)

330

- res: Output resolution (x_res, y_res) or single value for square pixels

331

- nodata: NoData value for output (uses first array's nodata if None)

332

- precision: Decimal precision for inverse transform computation

333

- method: Merge method ('first', 'last', 'min', 'max', 'mean', 'sum', or callable)

334

- crs: Output CRS (uses first array's CRS if None)

335

- parse_coordinates: Parse spatial coordinates (default: True)

336

337

Returns:

338

Merged DataArray

339

"""

340

341

def merge_datasets(

342

datasets: Sequence[xarray.Dataset],

343

*,

344

bounds: Optional[tuple] = None,

345

res: Optional[tuple] = None,

346

nodata: Optional[float] = None,

347

precision: Optional[float] = None,

348

method: Union[str, Callable, None] = None,

349

crs: Optional[rasterio.crs.CRS] = None

350

) -> xarray.Dataset:

351

"""

352

Merge multiple Datasets geospatially.

353

354

Parameters:

355

- datasets: List of Datasets to merge

356

- bounds: Output bounds (left, bottom, right, top)

357

- res: Output resolution (x_res, y_res) or single value for square pixels

358

- nodata: NoData value for output

359

- precision: Decimal precision for inverse transform computation

360

- method: Merge method ('first', 'last', 'min', 'max', 'mean', 'sum', or callable)

361

- crs: Output CRS (uses first dataset's CRS if None)

362

363

Returns:

364

Merged Dataset

365

"""

366

```

367

368

#### Usage Examples

369

370

```python

371

import rioxarray

372

import xarray as xr

373

import numpy as np

374

from rioxarray.merge import merge_arrays, merge_datasets

375

376

# Load multiple overlapping rasters

377

da1 = rioxarray.open_rasterio('tile1.tif')

378

da2 = rioxarray.open_rasterio('tile2.tif')

379

da3 = rioxarray.open_rasterio('tile3.tif')

380

381

# Simple merge (first array takes precedence)

382

merged = merge_arrays([da1, da2, da3])

383

384

# Merge with specific bounds

385

merged_bounded = merge_arrays(

386

[da1, da2, da3],

387

bounds=(100000, 200000, 300000, 400000)

388

)

389

390

# Merge with custom resolution

391

merged_resampled = merge_arrays(

392

[da1, da2, da3],

393

res=(30, 30) # 30m pixels

394

)

395

396

# Merge using mean of overlapping areas

397

merged_mean = merge_arrays(

398

[da1, da2, da3],

399

method='mean'

400

)

401

402

# Merge using custom function

403

def custom_merge(old_data, new_data, old_nodata, new_nodata, index=None, roff=None, coff=None):

404

"""Custom merge function - take maximum value"""

405

return np.maximum(old_data, new_data)

406

407

merged_custom = merge_arrays(

408

[da1, da2, da3],

409

method=custom_merge

410

)

411

412

# Merge datasets with multiple variables

413

ds1 = xr.Dataset({'var1': da1, 'var2': da1 * 2})

414

ds2 = xr.Dataset({'var1': da2, 'var2': da2 * 2})

415

merged_ds = merge_datasets([ds1, ds2])

416

```

417

418

### Ground Control Points

419

420

Handle Ground Control Points (GCPs) for georeferencing and coordinate system definition.

421

422

```python { .api }

423

def write_gcps(

424

self,

425

gcps: Sequence[rasterio.control.GroundControlPoint],

426

crs: Optional[Any] = None,

427

inplace: bool = False

428

) -> Union[xarray.Dataset, xarray.DataArray]:

429

"""

430

Write Ground Control Points to the Dataset/DataArray.

431

432

Parameters:

433

- gcps: List of GroundControlPoint objects

434

- crs: CRS for the GCPs (uses dataset CRS if None)

435

- inplace: If True, modify in place (default: False)

436

437

Returns:

438

Dataset/DataArray with GCPs written

439

"""

440

441

def get_gcps(self) -> tuple[Sequence[rasterio.control.GroundControlPoint], Optional[rasterio.crs.CRS]]:

442

"""

443

Get Ground Control Points from the Dataset/DataArray.

444

445

Returns:

446

tuple: (list of GCPs, CRS of GCPs)

447

"""

448

```

449

450

#### Usage Examples

451

452

```python

453

import rioxarray

454

from rasterio.control import GroundControlPoint

455

456

da = rioxarray.open_rasterio('image.tif')

457

458

# Create GCPs (image coordinates to real-world coordinates)

459

gcps = [

460

GroundControlPoint(row=0, col=0, x=-120.0, y=40.0, z=0.0),

461

GroundControlPoint(row=0, col=100, x=-119.0, y=40.0, z=0.0),

462

GroundControlPoint(row=100, col=0, x=-120.0, y=39.0, z=0.0),

463

GroundControlPoint(row=100, col=100, x=-119.0, y=39.0, z=0.0),

464

]

465

466

# Write GCPs to dataset

467

da_with_gcps = da.rio.write_gcps(gcps, crs='EPSG:4326', inplace=False)

468

469

# Read GCPs from dataset

470

retrieved_gcps, gcp_crs = da_with_gcps.rio.get_gcps()

471

print(f"Found {len(retrieved_gcps)} GCPs in {gcp_crs}")

472

```

473

474

## Advanced Data Management

475

476

### Data Quality Assessment

477

478

```python

479

import rioxarray

480

import numpy as np

481

482

da = rioxarray.open_rasterio('data.tif')

483

484

# Check for NoData coverage

485

nodata_mask = da == da.rio.nodata if da.rio.nodata is not None else np.isnan(da)

486

nodata_percentage = (nodata_mask.sum() / da.size * 100).values

487

print(f"NoData coverage: {nodata_percentage:.2f}%")

488

489

# Assess data range

490

valid_data = da.where(~nodata_mask)

491

print(f"Data range: {float(valid_data.min())} to {float(valid_data.max())}")

492

493

# Check for infinite values

494

inf_count = np.isinf(da).sum().values

495

print(f"Infinite values: {inf_count}")

496

```

497

498

### Metadata Standardization

499

500

```python

501

import rioxarray

502

from datetime import datetime

503

504

da = rioxarray.open_rasterio('data.tif')

505

506

# Standardize metadata for CF compliance

507

standardized = da.rio.set_attrs(

508

title="Standardized Dataset",

509

institution="My Organization",

510

source="Processed satellite data",

511

history=f"Created on {datetime.now().isoformat()}",

512

references="doi:10.1000/example",

513

comment="Quality controlled and processed",

514

inplace=False

515

)

516

517

# Set standard encoding

518

cf_encoded = standardized.rio.set_encoding(

519

dtype='float32',

520

_FillValue=-9999.0,

521

scale_factor=1.0,

522

add_offset=0.0,

523

zlib=True,

524

complevel=6,

525

inplace=False

526

)

527

```

528

529

### Batch Processing Workflows

530

531

```python

532

import rioxarray

533

import glob

534

535

# Process multiple files with consistent metadata

536

file_pattern = "data_*.tif"

537

files = glob.glob(file_pattern)

538

539

processed_arrays = []

540

for file_path in files:

541

da = rioxarray.open_rasterio(file_path)

542

543

# Standardize NoData

544

da.rio.set_nodata(-9999, inplace=True)

545

546

# Add processing metadata

547

da = da.rio.set_attrs(

548

processing_date=datetime.now().isoformat(),

549

source_file=file_path,

550

inplace=False

551

)

552

553

processed_arrays.append(da)

554

555

# Merge all processed arrays

556

from rioxarray.merge import merge_arrays

557

final_merged = merge_arrays(

558

processed_arrays,

559

method='mean',

560

nodata=-9999

561

)

562

```