0
# Data Management
1
2
NoData value handling, attribute and encoding management, and data merging operations for combining multiple raster datasets. These capabilities enable proper data quality management and dataset combination workflows.
3
4
## Capabilities
5
6
### NoData Value Management
7
8
Handle missing or invalid data values in raster datasets with comprehensive NoData support.
9
10
```python { .api }
11
@property
12
def nodata(self) -> Any:
13
"""
14
Get the NoData value for the DataArray.
15
16
Returns:
17
NoData value or None if not set
18
"""
19
20
@property
21
def encoded_nodata(self) -> Any:
22
"""
23
Get the encoded NoData value from the DataArray encoding.
24
25
Returns:
26
Encoded NoData value or None if not set
27
"""
28
29
def set_nodata(
30
self,
31
input_nodata: Optional[float],
32
*,
33
inplace: bool = True
34
) -> xarray.DataArray:
35
"""
36
Set the NoData value for the DataArray without modifying the data.
37
38
Parameters:
39
- input_nodata: NoData value (None to unset)
40
- inplace: If True, modify in place (default: True)
41
42
Returns:
43
DataArray with NoData value set (if inplace=False)
44
"""
45
46
def write_nodata(
47
self,
48
input_nodata: Optional[float],
49
*,
50
encoded: bool = False,
51
inplace: bool = False
52
) -> xarray.DataArray:
53
"""
54
Write the NoData value to the DataArray in a CF compliant manner.
55
56
Parameters:
57
- input_nodata: NoData value (None removes _FillValue attribute)
58
- encoded: Write to encoding instead of attributes (default: False)
59
- inplace: If True, modify in place (default: False)
60
61
Returns:
62
DataArray with NoData written to attributes/encoding
63
"""
64
```
65
66
#### Usage Examples
67
68
```python
69
import rioxarray
70
import numpy as np
71
72
# Open data and check NoData
73
da = rioxarray.open_rasterio('data.tif')
74
print(f"Current NoData: {da.rio.nodata}")
75
76
# Set NoData value
77
da.rio.set_nodata(-9999)
78
print(f"New NoData: {da.rio.nodata}")
79
80
# Write NoData to attributes for file output
81
da_with_nodata = da.rio.write_nodata(-9999, inplace=False)
82
83
# Write to encoding instead of attributes
84
da_encoded = da.rio.write_nodata(-9999, encoded=True, inplace=False)
85
86
# Remove NoData value
87
da.rio.set_nodata(None)
88
89
# Handle NaN as NoData
90
da.rio.set_nodata(np.nan)
91
92
# Check encoded NoData
93
print(f"Encoded NoData: {da.rio.encoded_nodata}")
94
```
95
96
### Attribute Management
97
98
Manage xarray attributes for metadata and file compliance.
99
100
```python { .api }
101
def set_attrs(
102
self,
103
inplace: bool = False,
104
**attrs
105
) -> Union[xarray.Dataset, xarray.DataArray]:
106
"""
107
Set attributes on the Dataset/DataArray.
108
109
Parameters:
110
- inplace: If True, modify in place (default: False)
111
- **attrs: Attribute key-value pairs to set
112
113
Returns:
114
Dataset/DataArray with attributes set
115
"""
116
117
def update_attrs(
118
self,
119
inplace: bool = False,
120
**attrs
121
) -> Union[xarray.Dataset, xarray.DataArray]:
122
"""
123
Update existing attributes on the Dataset/DataArray.
124
125
Parameters:
126
- inplace: If True, modify in place (default: False)
127
- **attrs: Attribute key-value pairs to update
128
129
Returns:
130
Dataset/DataArray with attributes updated
131
"""
132
```
133
134
#### Usage Examples
135
136
```python
137
import rioxarray
138
139
da = rioxarray.open_rasterio('data.tif')
140
141
# Set new attributes
142
da_with_attrs = da.rio.set_attrs(
143
title="My Dataset",
144
description="Processed satellite imagery",
145
processing_date="2023-01-01",
146
inplace=False
147
)
148
149
# Update existing attributes
150
da_updated = da.rio.update_attrs(
151
title="Updated Dataset", # Updates existing
152
version="1.1", # Adds new
153
inplace=False
154
)
155
156
# Modify in place
157
da.rio.set_attrs(
158
units="degrees_celsius",
159
scale_factor=0.01,
160
inplace=True
161
)
162
```
163
164
### Encoding Management
165
166
Control how data is encoded when writing to files.
167
168
```python { .api }
169
def set_encoding(
170
self,
171
inplace: bool = False,
172
**encoding
173
) -> Union[xarray.Dataset, xarray.DataArray]:
174
"""
175
Set encoding on the Dataset/DataArray.
176
177
Parameters:
178
- inplace: If True, modify in place (default: False)
179
- **encoding: Encoding key-value pairs to set
180
181
Returns:
182
Dataset/DataArray with encoding set
183
"""
184
185
def update_encoding(
186
self,
187
inplace: bool = False,
188
**encoding
189
) -> Union[xarray.Dataset, xarray.DataArray]:
190
"""
191
Update existing encoding on the Dataset/DataArray.
192
193
Parameters:
194
- inplace: If True, modify in place (default: False)
195
- **encoding: Encoding key-value pairs to update
196
197
Returns:
198
Dataset/DataArray with encoding updated
199
"""
200
```
201
202
#### Usage Examples
203
204
```python
205
import rioxarray
206
207
da = rioxarray.open_rasterio('data.tif')
208
209
# Set compression encoding for file output
210
da_compressed = da.rio.set_encoding(
211
dtype='float32',
212
_FillValue=-9999,
213
zlib=True,
214
complevel=6,
215
inplace=False
216
)
217
218
# Update specific encoding parameters
219
da_updated = da.rio.update_encoding(
220
complevel=9, # Higher compression
221
shuffle=True, # Enable byte shuffling
222
inplace=False
223
)
224
225
# Common encoding patterns
226
da_int16 = da.rio.set_encoding(
227
dtype='int16',
228
scale_factor=0.01,
229
add_offset=0,
230
_FillValue=-32768,
231
inplace=False
232
)
233
```
234
235
### Spatial Dimension Management
236
237
Manage and configure spatial dimension names and properties.
238
239
```python { .api }
240
@property
241
def x_dim(self) -> Optional[Hashable]:
242
"""Get the x (longitude/easting) dimension name."""
243
244
@property
245
def y_dim(self) -> Optional[Hashable]:
246
"""Get the y (latitude/northing) dimension name."""
247
248
@property
249
def width(self) -> int:
250
"""Get raster width in pixels."""
251
252
@property
253
def height(self) -> int:
254
"""Get raster height in pixels."""
255
256
@property
257
def shape(self) -> tuple[int, int]:
258
"""Get raster shape as (height, width)."""
259
260
@property
261
def count(self) -> int:
262
"""Get number of bands/variables."""
263
264
def set_spatial_dims(
265
self,
266
x_dim: Optional[Hashable] = None,
267
y_dim: Optional[Hashable] = None,
268
inplace: bool = False
269
) -> Union[xarray.Dataset, xarray.DataArray]:
270
"""
271
Set spatial dimension names.
272
273
Parameters:
274
- x_dim: Name for x dimension (longitude/easting)
275
- y_dim: Name for y dimension (latitude/northing)
276
- inplace: If True, modify in place (default: False)
277
278
Returns:
279
Dataset/DataArray with spatial dimensions set
280
"""
281
```
282
283
#### Usage Examples
284
285
```python
286
import rioxarray
287
288
da = rioxarray.open_rasterio('data.tif')
289
290
# Check current spatial dimensions
291
print(f"X dimension: {da.rio.x_dim}")
292
print(f"Y dimension: {da.rio.y_dim}")
293
print(f"Shape: {da.rio.shape}")
294
print(f"Width: {da.rio.width}, Height: {da.rio.height}")
295
296
# Set custom spatial dimension names
297
da_custom = da.rio.set_spatial_dims(
298
x_dim='longitude',
299
y_dim='latitude',
300
inplace=False
301
)
302
303
# Access updated dimensions
304
print(f"New X dim: {da_custom.rio.x_dim}")
305
print(f"New Y dim: {da_custom.rio.y_dim}")
306
```
307
308
### Data Merging
309
310
Combine multiple DataArrays or Datasets geospatially using rasterio.merge functionality.
311
312
```python { .api }
313
def merge_arrays(
314
dataarrays: Sequence[xarray.DataArray],
315
*,
316
bounds: Optional[tuple] = None,
317
res: Optional[tuple] = None,
318
nodata: Optional[float] = None,
319
precision: Optional[float] = None,
320
method: Union[str, Callable, None] = None,
321
crs: Optional[rasterio.crs.CRS] = None,
322
parse_coordinates: bool = True
323
) -> xarray.DataArray:
324
"""
325
Merge multiple DataArrays geospatially.
326
327
Parameters:
328
- dataarrays: List of DataArrays to merge
329
- bounds: Output bounds (left, bottom, right, top)
330
- res: Output resolution (x_res, y_res) or single value for square pixels
331
- nodata: NoData value for output (uses first array's nodata if None)
332
- precision: Decimal precision for inverse transform computation
333
- method: Merge method ('first', 'last', 'min', 'max', 'mean', 'sum', or callable)
334
- crs: Output CRS (uses first array's CRS if None)
335
- parse_coordinates: Parse spatial coordinates (default: True)
336
337
Returns:
338
Merged DataArray
339
"""
340
341
def merge_datasets(
342
datasets: Sequence[xarray.Dataset],
343
*,
344
bounds: Optional[tuple] = None,
345
res: Optional[tuple] = None,
346
nodata: Optional[float] = None,
347
precision: Optional[float] = None,
348
method: Union[str, Callable, None] = None,
349
crs: Optional[rasterio.crs.CRS] = None
350
) -> xarray.Dataset:
351
"""
352
Merge multiple Datasets geospatially.
353
354
Parameters:
355
- datasets: List of Datasets to merge
356
- bounds: Output bounds (left, bottom, right, top)
357
- res: Output resolution (x_res, y_res) or single value for square pixels
358
- nodata: NoData value for output
359
- precision: Decimal precision for inverse transform computation
360
- method: Merge method ('first', 'last', 'min', 'max', 'mean', 'sum', or callable)
361
- crs: Output CRS (uses first dataset's CRS if None)
362
363
Returns:
364
Merged Dataset
365
"""
366
```
367
368
#### Usage Examples
369
370
```python
371
import rioxarray
372
import xarray as xr
373
import numpy as np
374
from rioxarray.merge import merge_arrays, merge_datasets
375
376
# Load multiple overlapping rasters
377
da1 = rioxarray.open_rasterio('tile1.tif')
378
da2 = rioxarray.open_rasterio('tile2.tif')
379
da3 = rioxarray.open_rasterio('tile3.tif')
380
381
# Simple merge (first array takes precedence)
382
merged = merge_arrays([da1, da2, da3])
383
384
# Merge with specific bounds
385
merged_bounded = merge_arrays(
386
[da1, da2, da3],
387
bounds=(100000, 200000, 300000, 400000)
388
)
389
390
# Merge with custom resolution
391
merged_resampled = merge_arrays(
392
[da1, da2, da3],
393
res=(30, 30) # 30m pixels
394
)
395
396
# Merge using mean of overlapping areas
397
merged_mean = merge_arrays(
398
[da1, da2, da3],
399
method='mean'
400
)
401
402
# Merge using custom function
403
def custom_merge(old_data, new_data, old_nodata, new_nodata, index=None, roff=None, coff=None):
404
"""Custom merge function - take maximum value"""
405
return np.maximum(old_data, new_data)
406
407
merged_custom = merge_arrays(
408
[da1, da2, da3],
409
method=custom_merge
410
)
411
412
# Merge datasets with multiple variables
413
ds1 = xr.Dataset({'var1': da1, 'var2': da1 * 2})
414
ds2 = xr.Dataset({'var1': da2, 'var2': da2 * 2})
415
merged_ds = merge_datasets([ds1, ds2])
416
```
417
418
### Ground Control Points
419
420
Handle Ground Control Points (GCPs) for georeferencing and coordinate system definition.
421
422
```python { .api }
423
def write_gcps(
424
self,
425
gcps: Sequence[rasterio.control.GroundControlPoint],
426
crs: Optional[Any] = None,
427
inplace: bool = False
428
) -> Union[xarray.Dataset, xarray.DataArray]:
429
"""
430
Write Ground Control Points to the Dataset/DataArray.
431
432
Parameters:
433
- gcps: List of GroundControlPoint objects
434
- crs: CRS for the GCPs (uses dataset CRS if None)
435
- inplace: If True, modify in place (default: False)
436
437
Returns:
438
Dataset/DataArray with GCPs written
439
"""
440
441
def get_gcps(self) -> tuple[Sequence[rasterio.control.GroundControlPoint], Optional[rasterio.crs.CRS]]:
442
"""
443
Get Ground Control Points from the Dataset/DataArray.
444
445
Returns:
446
tuple: (list of GCPs, CRS of GCPs)
447
"""
448
```
449
450
#### Usage Examples
451
452
```python
453
import rioxarray
454
from rasterio.control import GroundControlPoint
455
456
da = rioxarray.open_rasterio('image.tif')
457
458
# Create GCPs (image coordinates to real-world coordinates)
459
gcps = [
460
GroundControlPoint(row=0, col=0, x=-120.0, y=40.0, z=0.0),
461
GroundControlPoint(row=0, col=100, x=-119.0, y=40.0, z=0.0),
462
GroundControlPoint(row=100, col=0, x=-120.0, y=39.0, z=0.0),
463
GroundControlPoint(row=100, col=100, x=-119.0, y=39.0, z=0.0),
464
]
465
466
# Write GCPs to dataset
467
da_with_gcps = da.rio.write_gcps(gcps, crs='EPSG:4326', inplace=False)
468
469
# Read GCPs from dataset
470
retrieved_gcps, gcp_crs = da_with_gcps.rio.get_gcps()
471
print(f"Found {len(retrieved_gcps)} GCPs in {gcp_crs}")
472
```
473
474
## Advanced Data Management
475
476
### Data Quality Assessment
477
478
```python
479
import rioxarray
480
import numpy as np
481
482
da = rioxarray.open_rasterio('data.tif')
483
484
# Check for NoData coverage
485
nodata_mask = da == da.rio.nodata if da.rio.nodata is not None else np.isnan(da)
486
nodata_percentage = (nodata_mask.sum() / da.size * 100).values
487
print(f"NoData coverage: {nodata_percentage:.2f}%")
488
489
# Assess data range
490
valid_data = da.where(~nodata_mask)
491
print(f"Data range: {float(valid_data.min())} to {float(valid_data.max())}")
492
493
# Check for infinite values
494
inf_count = np.isinf(da).sum().values
495
print(f"Infinite values: {inf_count}")
496
```
497
498
### Metadata Standardization
499
500
```python
501
import rioxarray
502
from datetime import datetime
503
504
da = rioxarray.open_rasterio('data.tif')
505
506
# Standardize metadata for CF compliance
507
standardized = da.rio.set_attrs(
508
title="Standardized Dataset",
509
institution="My Organization",
510
source="Processed satellite data",
511
history=f"Created on {datetime.now().isoformat()}",
512
references="doi:10.1000/example",
513
comment="Quality controlled and processed",
514
inplace=False
515
)
516
517
# Set standard encoding
518
cf_encoded = standardized.rio.set_encoding(
519
dtype='float32',
520
_FillValue=-9999.0,
521
scale_factor=1.0,
522
add_offset=0.0,
523
zlib=True,
524
complevel=6,
525
inplace=False
526
)
527
```
528
529
### Batch Processing Workflows
530
531
```python
532
import rioxarray
533
import glob
534
535
# Process multiple files with consistent metadata
536
file_pattern = "data_*.tif"
537
files = glob.glob(file_pattern)
538
539
processed_arrays = []
540
for file_path in files:
541
da = rioxarray.open_rasterio(file_path)
542
543
# Standardize NoData
544
da.rio.set_nodata(-9999, inplace=True)
545
546
# Add processing metadata
547
da = da.rio.set_attrs(
548
processing_date=datetime.now().isoformat(),
549
source_file=file_path,
550
inplace=False
551
)
552
553
processed_arrays.append(da)
554
555
# Merge all processed arrays
556
from rioxarray.merge import merge_arrays
557
final_merged = merge_arrays(
558
processed_arrays,
559
method='mean',
560
nodata=-9999
561
)
562
```