Geospatial xarray extension powered by rasterio for raster data manipulation and analysis
—
NoData value handling, attribute and encoding management, and data merging operations for combining multiple raster datasets. These capabilities enable proper data quality management and dataset combination workflows.
Handle missing or invalid data values in raster datasets with comprehensive NoData support.
@property
def nodata(self) -> Any:
"""
Get the NoData value for the DataArray.
Returns:
NoData value or None if not set
"""
@property
def encoded_nodata(self) -> Any:
"""
Get the encoded NoData value from the DataArray encoding.
Returns:
Encoded NoData value or None if not set
"""
def set_nodata(
self,
input_nodata: Optional[float],
*,
inplace: bool = True
) -> xarray.DataArray:
"""
Set the NoData value for the DataArray without modifying the data.
Parameters:
- input_nodata: NoData value (None to unset)
- inplace: If True, modify in place (default: True)
Returns:
DataArray with NoData value set (if inplace=False)
"""
def write_nodata(
self,
input_nodata: Optional[float],
*,
encoded: bool = False,
inplace: bool = False
) -> xarray.DataArray:
"""
Write the NoData value to the DataArray in a CF compliant manner.
Parameters:
- input_nodata: NoData value (None removes _FillValue attribute)
- encoded: Write to encoding instead of attributes (default: False)
- inplace: If True, modify in place (default: False)
Returns:
DataArray with NoData written to attributes/encoding
"""import rioxarray
import numpy as np
# Open data and check NoData
da = rioxarray.open_rasterio('data.tif')
print(f"Current NoData: {da.rio.nodata}")
# Set NoData value
da.rio.set_nodata(-9999)
print(f"New NoData: {da.rio.nodata}")
# Write NoData to attributes for file output
da_with_nodata = da.rio.write_nodata(-9999, inplace=False)
# Write to encoding instead of attributes
da_encoded = da.rio.write_nodata(-9999, encoded=True, inplace=False)
# Remove NoData value
da.rio.set_nodata(None)
# Handle NaN as NoData
da.rio.set_nodata(np.nan)
# Check encoded NoData
print(f"Encoded NoData: {da.rio.encoded_nodata}")Manage xarray attributes for metadata and file compliance.
def set_attrs(
self,
inplace: bool = False,
**attrs
) -> Union[xarray.Dataset, xarray.DataArray]:
"""
Set attributes on the Dataset/DataArray.
Parameters:
- inplace: If True, modify in place (default: False)
- **attrs: Attribute key-value pairs to set
Returns:
Dataset/DataArray with attributes set
"""
def update_attrs(
self,
inplace: bool = False,
**attrs
) -> Union[xarray.Dataset, xarray.DataArray]:
"""
Update existing attributes on the Dataset/DataArray.
Parameters:
- inplace: If True, modify in place (default: False)
- **attrs: Attribute key-value pairs to update
Returns:
Dataset/DataArray with attributes updated
"""import rioxarray
da = rioxarray.open_rasterio('data.tif')
# Set new attributes
da_with_attrs = da.rio.set_attrs(
title="My Dataset",
description="Processed satellite imagery",
processing_date="2023-01-01",
inplace=False
)
# Update existing attributes
da_updated = da.rio.update_attrs(
title="Updated Dataset", # Updates existing
version="1.1", # Adds new
inplace=False
)
# Modify in place
da.rio.set_attrs(
units="degrees_celsius",
scale_factor=0.01,
inplace=True
)Control how data is encoded when writing to files.
def set_encoding(
self,
inplace: bool = False,
**encoding
) -> Union[xarray.Dataset, xarray.DataArray]:
"""
Set encoding on the Dataset/DataArray.
Parameters:
- inplace: If True, modify in place (default: False)
- **encoding: Encoding key-value pairs to set
Returns:
Dataset/DataArray with encoding set
"""
def update_encoding(
self,
inplace: bool = False,
**encoding
) -> Union[xarray.Dataset, xarray.DataArray]:
"""
Update existing encoding on the Dataset/DataArray.
Parameters:
- inplace: If True, modify in place (default: False)
- **encoding: Encoding key-value pairs to update
Returns:
Dataset/DataArray with encoding updated
"""import rioxarray
da = rioxarray.open_rasterio('data.tif')
# Set compression encoding for file output
da_compressed = da.rio.set_encoding(
dtype='float32',
_FillValue=-9999,
zlib=True,
complevel=6,
inplace=False
)
# Update specific encoding parameters
da_updated = da.rio.update_encoding(
complevel=9, # Higher compression
shuffle=True, # Enable byte shuffling
inplace=False
)
# Common encoding patterns
da_int16 = da.rio.set_encoding(
dtype='int16',
scale_factor=0.01,
add_offset=0,
_FillValue=-32768,
inplace=False
)Manage and configure spatial dimension names and properties.
@property
def x_dim(self) -> Optional[Hashable]:
"""Get the x (longitude/easting) dimension name."""
@property
def y_dim(self) -> Optional[Hashable]:
"""Get the y (latitude/northing) dimension name."""
@property
def width(self) -> int:
"""Get raster width in pixels."""
@property
def height(self) -> int:
"""Get raster height in pixels."""
@property
def shape(self) -> tuple[int, int]:
"""Get raster shape as (height, width)."""
@property
def count(self) -> int:
"""Get number of bands/variables."""
def set_spatial_dims(
self,
x_dim: Optional[Hashable] = None,
y_dim: Optional[Hashable] = None,
inplace: bool = False
) -> Union[xarray.Dataset, xarray.DataArray]:
"""
Set spatial dimension names.
Parameters:
- x_dim: Name for x dimension (longitude/easting)
- y_dim: Name for y dimension (latitude/northing)
- inplace: If True, modify in place (default: False)
Returns:
Dataset/DataArray with spatial dimensions set
"""import rioxarray
da = rioxarray.open_rasterio('data.tif')
# Check current spatial dimensions
print(f"X dimension: {da.rio.x_dim}")
print(f"Y dimension: {da.rio.y_dim}")
print(f"Shape: {da.rio.shape}")
print(f"Width: {da.rio.width}, Height: {da.rio.height}")
# Set custom spatial dimension names
da_custom = da.rio.set_spatial_dims(
x_dim='longitude',
y_dim='latitude',
inplace=False
)
# Access updated dimensions
print(f"New X dim: {da_custom.rio.x_dim}")
print(f"New Y dim: {da_custom.rio.y_dim}")Combine multiple DataArrays or Datasets geospatially using rasterio.merge functionality.
def merge_arrays(
dataarrays: Sequence[xarray.DataArray],
*,
bounds: Optional[tuple] = None,
res: Optional[tuple] = None,
nodata: Optional[float] = None,
precision: Optional[float] = None,
method: Union[str, Callable, None] = None,
crs: Optional[rasterio.crs.CRS] = None,
parse_coordinates: bool = True
) -> xarray.DataArray:
"""
Merge multiple DataArrays geospatially.
Parameters:
- dataarrays: List of DataArrays to merge
- bounds: Output bounds (left, bottom, right, top)
- res: Output resolution (x_res, y_res) or single value for square pixels
- nodata: NoData value for output (uses first array's nodata if None)
- precision: Decimal precision for inverse transform computation
- method: Merge method ('first', 'last', 'min', 'max', 'mean', 'sum', or callable)
- crs: Output CRS (uses first array's CRS if None)
- parse_coordinates: Parse spatial coordinates (default: True)
Returns:
Merged DataArray
"""
def merge_datasets(
datasets: Sequence[xarray.Dataset],
*,
bounds: Optional[tuple] = None,
res: Optional[tuple] = None,
nodata: Optional[float] = None,
precision: Optional[float] = None,
method: Union[str, Callable, None] = None,
crs: Optional[rasterio.crs.CRS] = None
) -> xarray.Dataset:
"""
Merge multiple Datasets geospatially.
Parameters:
- datasets: List of Datasets to merge
- bounds: Output bounds (left, bottom, right, top)
- res: Output resolution (x_res, y_res) or single value for square pixels
- nodata: NoData value for output
- precision: Decimal precision for inverse transform computation
- method: Merge method ('first', 'last', 'min', 'max', 'mean', 'sum', or callable)
- crs: Output CRS (uses first dataset's CRS if None)
Returns:
Merged Dataset
"""import rioxarray
import xarray as xr
import numpy as np
from rioxarray.merge import merge_arrays, merge_datasets
# Load multiple overlapping rasters
da1 = rioxarray.open_rasterio('tile1.tif')
da2 = rioxarray.open_rasterio('tile2.tif')
da3 = rioxarray.open_rasterio('tile3.tif')
# Simple merge (first array takes precedence)
merged = merge_arrays([da1, da2, da3])
# Merge with specific bounds
merged_bounded = merge_arrays(
[da1, da2, da3],
bounds=(100000, 200000, 300000, 400000)
)
# Merge with custom resolution
merged_resampled = merge_arrays(
[da1, da2, da3],
res=(30, 30) # 30m pixels
)
# Merge using mean of overlapping areas
merged_mean = merge_arrays(
[da1, da2, da3],
method='mean'
)
# Merge using custom function
def custom_merge(old_data, new_data, old_nodata, new_nodata, index=None, roff=None, coff=None):
"""Custom merge function - take maximum value"""
return np.maximum(old_data, new_data)
merged_custom = merge_arrays(
[da1, da2, da3],
method=custom_merge
)
# Merge datasets with multiple variables
ds1 = xr.Dataset({'var1': da1, 'var2': da1 * 2})
ds2 = xr.Dataset({'var1': da2, 'var2': da2 * 2})
merged_ds = merge_datasets([ds1, ds2])Handle Ground Control Points (GCPs) for georeferencing and coordinate system definition.
def write_gcps(
self,
gcps: Sequence[rasterio.control.GroundControlPoint],
crs: Optional[Any] = None,
inplace: bool = False
) -> Union[xarray.Dataset, xarray.DataArray]:
"""
Write Ground Control Points to the Dataset/DataArray.
Parameters:
- gcps: List of GroundControlPoint objects
- crs: CRS for the GCPs (uses dataset CRS if None)
- inplace: If True, modify in place (default: False)
Returns:
Dataset/DataArray with GCPs written
"""
def get_gcps(self) -> tuple[Sequence[rasterio.control.GroundControlPoint], Optional[rasterio.crs.CRS]]:
"""
Get Ground Control Points from the Dataset/DataArray.
Returns:
tuple: (list of GCPs, CRS of GCPs)
"""import rioxarray
from rasterio.control import GroundControlPoint
da = rioxarray.open_rasterio('image.tif')
# Create GCPs (image coordinates to real-world coordinates)
gcps = [
GroundControlPoint(row=0, col=0, x=-120.0, y=40.0, z=0.0),
GroundControlPoint(row=0, col=100, x=-119.0, y=40.0, z=0.0),
GroundControlPoint(row=100, col=0, x=-120.0, y=39.0, z=0.0),
GroundControlPoint(row=100, col=100, x=-119.0, y=39.0, z=0.0),
]
# Write GCPs to dataset
da_with_gcps = da.rio.write_gcps(gcps, crs='EPSG:4326', inplace=False)
# Read GCPs from dataset
retrieved_gcps, gcp_crs = da_with_gcps.rio.get_gcps()
print(f"Found {len(retrieved_gcps)} GCPs in {gcp_crs}")import rioxarray
import numpy as np
da = rioxarray.open_rasterio('data.tif')
# Check for NoData coverage
nodata_mask = da == da.rio.nodata if da.rio.nodata is not None else np.isnan(da)
nodata_percentage = (nodata_mask.sum() / da.size * 100).values
print(f"NoData coverage: {nodata_percentage:.2f}%")
# Assess data range
valid_data = da.where(~nodata_mask)
print(f"Data range: {float(valid_data.min())} to {float(valid_data.max())}")
# Check for infinite values
inf_count = np.isinf(da).sum().values
print(f"Infinite values: {inf_count}")import rioxarray
from datetime import datetime
da = rioxarray.open_rasterio('data.tif')
# Standardize metadata for CF compliance
standardized = da.rio.set_attrs(
title="Standardized Dataset",
institution="My Organization",
source="Processed satellite data",
history=f"Created on {datetime.now().isoformat()}",
references="doi:10.1000/example",
comment="Quality controlled and processed",
inplace=False
)
# Set standard encoding
cf_encoded = standardized.rio.set_encoding(
dtype='float32',
_FillValue=-9999.0,
scale_factor=1.0,
add_offset=0.0,
zlib=True,
complevel=6,
inplace=False
)import rioxarray
import glob
# Process multiple files with consistent metadata
file_pattern = "data_*.tif"
files = glob.glob(file_pattern)
processed_arrays = []
for file_path in files:
da = rioxarray.open_rasterio(file_path)
# Standardize NoData
da.rio.set_nodata(-9999, inplace=True)
# Add processing metadata
da = da.rio.set_attrs(
processing_date=datetime.now().isoformat(),
source_file=file_path,
inplace=False
)
processed_arrays.append(da)
# Merge all processed arrays
from rioxarray.merge import merge_arrays
final_merged = merge_arrays(
processed_arrays,
method='mean',
nodata=-9999
)Install with Tessl CLI
npx tessl i tessl/pypi-rioxarray