Geospatial xarray extension powered by rasterio for raster data manipulation and analysis
—
Clipping, reprojection, padding, and geometric operations on raster data. These functions enable spatial analysis, coordinate transformations, and geometric manipulation of raster datasets.
Transform raster data from one coordinate reference system to another with control over resolution, shape, and resampling methods.
def reproject(
self,
dst_crs: Any,
*,
resolution: Optional[Union[float, tuple[float, float]]] = None,
shape: Optional[tuple[int, int]] = None,
transform: Optional[rasterio.Affine] = None,
resampling: rasterio.enums.Resampling = rasterio.enums.Resampling.nearest,
nodata: Optional[float] = None,
**kwargs
) -> Union[xarray.DataArray, xarray.Dataset]:
"""
Reproject DataArray or Dataset to a new coordinate system.
Parameters:
- dst_crs: Destination CRS (EPSG, PROJ, WKT string, or CRS object)
- resolution: Pixel size in destination CRS units (float or (x_res, y_res))
- shape: Output shape (height, width) - cannot use with resolution
- transform: Destination affine transform
- resampling: Resampling algorithm (default: nearest)
- nodata: NoData value for destination (uses source nodata if None)
- **kwargs: Additional arguments for rasterio.warp.reproject
Returns:
Reprojected DataArray or Dataset
"""import rioxarray
from rasterio.enums import Resampling
# Open data in one CRS
da = rioxarray.open_rasterio('utm_data.tif') # UTM projection
# Reproject to geographic coordinates
geo_da = da.rio.reproject('EPSG:4326')
# Reproject with specific resolution
geo_da = da.rio.reproject('EPSG:4326', resolution=0.01) # 0.01 degree pixels
# Reproject with custom shape
geo_da = da.rio.reproject('EPSG:4326', shape=(1000, 2000))
# Reproject with different resampling
geo_da = da.rio.reproject(
'EPSG:4326',
resolution=0.01,
resampling=Resampling.bilinear
)
# Reproject with custom nodata
geo_da = da.rio.reproject('EPSG:4326', nodata=-9999)Reproject one raster to match the grid of another raster exactly.
def reproject_match(
self,
match_data_array: Union[xarray.DataArray, xarray.Dataset],
*,
resampling: rasterio.enums.Resampling = rasterio.enums.Resampling.nearest,
**reproject_kwargs
) -> Union[xarray.DataArray, xarray.Dataset]:
"""
Reproject DataArray/Dataset to match another DataArray/Dataset grid.
Parameters:
- match_data_array: Target grid to match (DataArray or Dataset)
- resampling: Resampling algorithm (default: nearest)
- **reproject_kwargs: Additional arguments for reproject
Returns:
Reprojected data matching target grid
"""import rioxarray
# Open two datasets with different grids
da1 = rioxarray.open_rasterio('data1.tif') # 30m resolution
da2 = rioxarray.open_rasterio('data2.tif') # 10m resolution
# Reproject da1 to match da2's grid exactly
da1_matched = da1.rio.reproject_match(da2)
# Now they have identical grids for analysis
assert da1_matched.rio.crs == da2.rio.crs
assert da1_matched.rio.shape == da2.rio.shape
assert da1_matched.rio.transform() == da2.rio.transform()
# Reproject with bilinear resampling
da1_matched = da1.rio.reproject_match(da2, resampling=Resampling.bilinear)Clip raster data using vector geometries (polygons, etc.) with support for various masking options.
def clip(
self,
geometries: Iterable,
crs: Optional[Any] = None,
*,
all_touched: bool = False,
drop: bool = True,
invert: bool = False,
from_disk: bool = False
) -> Union[xarray.DataArray, xarray.Dataset]:
"""
Clip raster by geojson-like geometry objects.
Parameters:
- geometries: List of GeoJSON geometry dicts or objects with __geo_interface__
- crs: CRS of input geometries (assumes same as dataset if None)
- all_touched: Include all pixels touched by geometries (default: False)
- drop: Drop data outside geometries vs. mask with nodata (default: True)
- invert: Invert mask (set inside geometries to nodata) (default: False)
- from_disk: Clip directly from disk for memory efficiency (default: False)
Returns:
Clipped DataArray or Dataset
"""import rioxarray
import json
# Load raster data
da = rioxarray.open_rasterio('large_raster.tif')
# Define clipping geometry
geometry = {
"type": "Polygon",
"coordinates": [[
[-94.07955, 41.69086],
[-94.06082, 41.69103],
[-94.06063, 41.67932],
[-94.07936, 41.67915],
[-94.07955, 41.69086]
]]
}
# Clip to polygon
clipped = da.rio.clip([geometry], crs='EPSG:4326')
# Clip with all touched pixels included
clipped_all = da.rio.clip([geometry], crs='EPSG:4326', all_touched=True)
# Mask (don't drop) area outside geometry
masked = da.rio.clip([geometry], crs='EPSG:4326', drop=False)
# Invert mask (mask inside geometry)
inverted = da.rio.clip([geometry], crs='EPSG:4326', invert=True)
# Clip large data from disk for memory efficiency
clipped_disk = da.rio.clip([geometry], crs='EPSG:4326', from_disk=True)Clip raster data to rectangular bounding boxes defined by coordinate extents.
def clip_box(
self,
minx: float,
miny: float,
maxx: float,
maxy: float,
crs: Optional[Any] = None,
auto_expand: bool = False,
auto_expand_limit: int = 3
) -> Union[xarray.DataArray, xarray.Dataset]:
"""
Clip raster data to a bounding box.
Parameters:
- minx: Minimum x coordinate
- miny: Minimum y coordinate
- maxx: Maximum x coordinate
- maxy: Maximum y coordinate
- crs: CRS of bounding box coordinates (assumes same as dataset if None)
- auto_expand: Expand bbox if needed to include full pixels (default: False)
- auto_expand_limit: Maximum pixels to expand bbox (default: 3)
Returns:
Clipped DataArray or Dataset
"""import rioxarray
# Load raster data
da = rioxarray.open_rasterio('raster.tif')
# Clip to bounding box in same CRS as data
clipped = da.rio.clip_box(
minx=100000, miny=200000,
maxx=150000, maxy=250000
)
# Clip using geographic coordinates
clipped_geo = da.rio.clip_box(
minx=-10, miny=40,
maxx=10, maxy=60,
crs='EPSG:4326'
)
# Auto-expand to include full pixels at boundaries
clipped_expanded = da.rio.clip_box(
minx=-10, miny=40, maxx=10, maxy=60,
crs='EPSG:4326',
auto_expand=True
)Expand raster data by adding pixels around the edges or to encompass specific areas.
def pad_xy(
self,
minx: float,
miny: float,
maxx: float,
maxy: float,
constant_values: Union[int, float] = 0
) -> xarray.DataArray:
"""
Pad DataArray in x/y directions to include specified coordinates.
Parameters:
- minx: Minimum x coordinate to include
- miny: Minimum y coordinate to include
- maxx: Maximum x coordinate to include
- maxy: Maximum y coordinate to include
- constant_values: Value to use for padding (default: 0)
Returns:
Padded DataArray
"""
def pad_box(
self,
minx: float,
miny: float,
maxx: float,
maxy: float,
crs: Optional[Any] = None,
constant_values: Union[int, float] = 0
) -> Union[xarray.DataArray, xarray.Dataset]:
"""
Pad Dataset/DataArray to encompass a bounding box.
Parameters:
- minx, miny, maxx, maxy: Bounding box coordinates
- crs: CRS of bounding box (assumes same as dataset if None)
- constant_values: Value for padded pixels (default: 0)
Returns:
Padded Dataset or DataArray
"""import rioxarray
da = rioxarray.open_rasterio('small_raster.tif')
# Pad to include specific coordinates
padded = da.rio.pad_xy(
minx=da.rio.bounds()[0] - 1000, # Extend 1000m in each direction
miny=da.rio.bounds()[1] - 1000,
maxx=da.rio.bounds()[2] + 1000,
maxy=da.rio.bounds()[3] + 1000,
constant_values=-9999
)
# Pad to encompass a larger area
padded_box = da.rio.pad_box(
minx=-10, miny=40, maxx=10, maxy=60,
crs='EPSG:4326',
constant_values=0
)Select data by pixel windows or coordinate ranges.
def isel_window(
self,
window: rasterio.windows.Window
) -> Union[xarray.DataArray, xarray.Dataset]:
"""
Index selection using a rasterio Window.
Parameters:
- window: rasterio Window object defining pixel ranges
Returns:
Subset of data corresponding to window
"""
def slice_xy(
self,
minx: Optional[float] = None,
miny: Optional[float] = None,
maxx: Optional[float] = None,
maxy: Optional[float] = None
) -> Union[xarray.DataArray, xarray.Dataset]:
"""
Slice data by x/y coordinate bounds.
Parameters:
- minx, miny, maxx, maxy: Coordinate bounds (None to skip bound)
Returns:
Sliced Dataset or DataArray
"""import rioxarray
from rasterio.windows import Window
da = rioxarray.open_rasterio('raster.tif')
# Select by pixel window
window = Window(col_off=100, row_off=50, width=200, height=150)
windowed = da.rio.isel_window(window)
# Slice by coordinates
subset = da.rio.slice_xy(minx=100000, maxx=200000)
# Slice with partial bounds
subset_y = da.rio.slice_xy(miny=40, maxy=60) # Only constrain YFill missing or nodata values using spatial interpolation methods.
def interpolate_na(
self,
method: str = "linear",
**kwargs
) -> Union[xarray.DataArray, xarray.Dataset]:
"""
Interpolate missing/nodata values.
Parameters:
- method: Interpolation method ('linear', 'nearest', 'cubic', etc.)
- **kwargs: Additional arguments for xarray.interpolate_na
Returns:
Data with interpolated values
"""import rioxarray
# Open data with missing values
da = rioxarray.open_rasterio('data_with_gaps.tif')
# Linear interpolation of missing values
interpolated = da.rio.interpolate_na(method='linear')
# Nearest neighbor interpolation
interpolated_nn = da.rio.interpolate_na(method='nearest')
# Cubic interpolation along specific dimensions
interpolated_cubic = da.rio.interpolate_na(
method='cubic',
dim=['x', 'y']
)Available resampling algorithms from rasterio for reprojection operations:
# Common resampling methods
Resampling.nearest # Nearest neighbor (default, preserves values)
Resampling.bilinear # Bilinear interpolation (smooth, good for continuous data)
Resampling.cubic # Cubic convolution (smoother than bilinear)
Resampling.cubic_spline # Cubic spline interpolation
Resampling.lanczos # Lanczos windowed sinc resampling
Resampling.average # Average of all pixels (good for downsampling)
Resampling.mode # Most common value (good for categorical data)
Resampling.gauss # Gaussian kernel resamplingimport rioxarray
from rasterio.enums import Resampling
da = rioxarray.open_rasterio('data.tif')
# For categorical data (land cover, classes)
categorical = da.rio.reproject('EPSG:4326', resampling=Resampling.nearest)
# For continuous data (temperature, elevation)
continuous = da.rio.reproject('EPSG:4326', resampling=Resampling.bilinear)
# For downsampling (reducing resolution)
downsampled = da.rio.reproject('EPSG:4326', resampling=Resampling.average)
# For high-quality upsampling
upsampled = da.rio.reproject('EPSG:4326', resampling=Resampling.lanczos)import rioxarray
from rasterio.enums import Resampling
# Complex spatial processing workflow
da = rioxarray.open_rasterio('source.tif')
# 1. Reproject to standard CRS
reprojected = da.rio.reproject('EPSG:4326', resampling=Resampling.bilinear)
# 2. Clip to area of interest
geometry = {...} # Your geometry
clipped = reprojected.rio.clip([geometry], crs='EPSG:4326')
# 3. Pad to ensure full coverage
padded = clipped.rio.pad_box(
minx=clipped.rio.bounds()[0] - 0.1,
miny=clipped.rio.bounds()[1] - 0.1,
maxx=clipped.rio.bounds()[2] + 0.1,
maxy=clipped.rio.bounds()[3] + 0.1,
crs='EPSG:4326'
)
# 4. Interpolate any remaining gaps
final = padded.rio.interpolate_na(method='linear')# For large datasets, use from_disk clipping
large_clipped = large_da.rio.clip([geometry], from_disk=True)
# Use appropriate chunking before spatial operations
chunked = da.chunk({'x': 2048, 'y': 2048})
processed = chunked.rio.reproject('EPSG:4326')
# For memory-intensive operations, process in chunks
bounds = da.rio.bounds()
chunk_size = 0.1 # degrees
for minx in numpy.arange(bounds[0], bounds[2], chunk_size):
for miny in numpy.arange(bounds[1], bounds[3], chunk_size):
chunk = da.rio.clip_box(minx, miny, minx+chunk_size, miny+chunk_size)
# Process chunk...Install with Tessl CLI
npx tessl i tessl/pypi-rioxarray