CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-rioxarray

Geospatial xarray extension powered by rasterio for raster data manipulation and analysis

Pending
Overview
Eval results
Files

spatial-operations.mddocs/

Spatial Operations

Clipping, reprojection, padding, and geometric operations on raster data. These functions enable spatial analysis, coordinate transformations, and geometric manipulation of raster datasets.

Capabilities

Reprojection

Transform raster data from one coordinate reference system to another with control over resolution, shape, and resampling methods.

def reproject(
    self,
    dst_crs: Any,
    *,
    resolution: Optional[Union[float, tuple[float, float]]] = None,
    shape: Optional[tuple[int, int]] = None,
    transform: Optional[rasterio.Affine] = None,
    resampling: rasterio.enums.Resampling = rasterio.enums.Resampling.nearest,
    nodata: Optional[float] = None,
    **kwargs
) -> Union[xarray.DataArray, xarray.Dataset]:
    """
    Reproject DataArray or Dataset to a new coordinate system.

    Parameters:
    - dst_crs: Destination CRS (EPSG, PROJ, WKT string, or CRS object)
    - resolution: Pixel size in destination CRS units (float or (x_res, y_res))
    - shape: Output shape (height, width) - cannot use with resolution
    - transform: Destination affine transform
    - resampling: Resampling algorithm (default: nearest)
    - nodata: NoData value for destination (uses source nodata if None)
    - **kwargs: Additional arguments for rasterio.warp.reproject

    Returns:
    Reprojected DataArray or Dataset
    """

Usage Examples

import rioxarray
from rasterio.enums import Resampling

# Open data in one CRS
da = rioxarray.open_rasterio('utm_data.tif')  # UTM projection

# Reproject to geographic coordinates
geo_da = da.rio.reproject('EPSG:4326')

# Reproject with specific resolution
geo_da = da.rio.reproject('EPSG:4326', resolution=0.01)  # 0.01 degree pixels

# Reproject with custom shape
geo_da = da.rio.reproject('EPSG:4326', shape=(1000, 2000))

# Reproject with different resampling
geo_da = da.rio.reproject(
    'EPSG:4326', 
    resolution=0.01,
    resampling=Resampling.bilinear
)

# Reproject with custom nodata
geo_da = da.rio.reproject('EPSG:4326', nodata=-9999)

Match Reprojection

Reproject one raster to match the grid of another raster exactly.

def reproject_match(
    self,
    match_data_array: Union[xarray.DataArray, xarray.Dataset],
    *,
    resampling: rasterio.enums.Resampling = rasterio.enums.Resampling.nearest,
    **reproject_kwargs
) -> Union[xarray.DataArray, xarray.Dataset]:
    """
    Reproject DataArray/Dataset to match another DataArray/Dataset grid.

    Parameters:
    - match_data_array: Target grid to match (DataArray or Dataset)  
    - resampling: Resampling algorithm (default: nearest)
    - **reproject_kwargs: Additional arguments for reproject

    Returns:
    Reprojected data matching target grid
    """

Usage Examples

import rioxarray

# Open two datasets with different grids
da1 = rioxarray.open_rasterio('data1.tif')  # 30m resolution
da2 = rioxarray.open_rasterio('data2.tif')  # 10m resolution

# Reproject da1 to match da2's grid exactly
da1_matched = da1.rio.reproject_match(da2)

# Now they have identical grids for analysis
assert da1_matched.rio.crs == da2.rio.crs
assert da1_matched.rio.shape == da2.rio.shape
assert da1_matched.rio.transform() == da2.rio.transform()

# Reproject with bilinear resampling
da1_matched = da1.rio.reproject_match(da2, resampling=Resampling.bilinear)

Geometry Clipping

Clip raster data using vector geometries (polygons, etc.) with support for various masking options.

def clip(
    self,
    geometries: Iterable,
    crs: Optional[Any] = None,
    *,
    all_touched: bool = False,
    drop: bool = True,
    invert: bool = False,
    from_disk: bool = False
) -> Union[xarray.DataArray, xarray.Dataset]:
    """
    Clip raster by geojson-like geometry objects.

    Parameters:
    - geometries: List of GeoJSON geometry dicts or objects with __geo_interface__
    - crs: CRS of input geometries (assumes same as dataset if None)
    - all_touched: Include all pixels touched by geometries (default: False)
    - drop: Drop data outside geometries vs. mask with nodata (default: True)
    - invert: Invert mask (set inside geometries to nodata) (default: False)
    - from_disk: Clip directly from disk for memory efficiency (default: False)

    Returns:
    Clipped DataArray or Dataset
    """

Usage Examples

import rioxarray
import json

# Load raster data
da = rioxarray.open_rasterio('large_raster.tif')

# Define clipping geometry
geometry = {
    "type": "Polygon",
    "coordinates": [[
        [-94.07955, 41.69086],
        [-94.06082, 41.69103], 
        [-94.06063, 41.67932],
        [-94.07936, 41.67915],
        [-94.07955, 41.69086]
    ]]
}

# Clip to polygon
clipped = da.rio.clip([geometry], crs='EPSG:4326')

# Clip with all touched pixels included
clipped_all = da.rio.clip([geometry], crs='EPSG:4326', all_touched=True)

# Mask (don't drop) area outside geometry
masked = da.rio.clip([geometry], crs='EPSG:4326', drop=False)

# Invert mask (mask inside geometry)
inverted = da.rio.clip([geometry], crs='EPSG:4326', invert=True)

# Clip large data from disk for memory efficiency
clipped_disk = da.rio.clip([geometry], crs='EPSG:4326', from_disk=True)

Bounding Box Clipping

Clip raster data to rectangular bounding boxes defined by coordinate extents.

def clip_box(
    self,
    minx: float,
    miny: float, 
    maxx: float,
    maxy: float,
    crs: Optional[Any] = None,
    auto_expand: bool = False,
    auto_expand_limit: int = 3
) -> Union[xarray.DataArray, xarray.Dataset]:
    """
    Clip raster data to a bounding box.

    Parameters:
    - minx: Minimum x coordinate
    - miny: Minimum y coordinate  
    - maxx: Maximum x coordinate
    - maxy: Maximum y coordinate
    - crs: CRS of bounding box coordinates (assumes same as dataset if None)
    - auto_expand: Expand bbox if needed to include full pixels (default: False)
    - auto_expand_limit: Maximum pixels to expand bbox (default: 3)

    Returns:
    Clipped DataArray or Dataset
    """

Usage Examples

import rioxarray

# Load raster data
da = rioxarray.open_rasterio('raster.tif')

# Clip to bounding box in same CRS as data
clipped = da.rio.clip_box(
    minx=100000, miny=200000,
    maxx=150000, maxy=250000
)

# Clip using geographic coordinates
clipped_geo = da.rio.clip_box(
    minx=-10, miny=40,
    maxx=10, maxy=60,
    crs='EPSG:4326'
)

# Auto-expand to include full pixels at boundaries
clipped_expanded = da.rio.clip_box(
    minx=-10, miny=40, maxx=10, maxy=60,
    crs='EPSG:4326',
    auto_expand=True
)

Padding Operations

Expand raster data by adding pixels around the edges or to encompass specific areas.

def pad_xy(
    self,
    minx: float,
    miny: float,
    maxx: float, 
    maxy: float,
    constant_values: Union[int, float] = 0
) -> xarray.DataArray:
    """
    Pad DataArray in x/y directions to include specified coordinates.

    Parameters:
    - minx: Minimum x coordinate to include
    - miny: Minimum y coordinate to include
    - maxx: Maximum x coordinate to include  
    - maxy: Maximum y coordinate to include
    - constant_values: Value to use for padding (default: 0)

    Returns:
    Padded DataArray
    """

def pad_box(
    self,
    minx: float,
    miny: float,
    maxx: float,
    maxy: float,
    crs: Optional[Any] = None,
    constant_values: Union[int, float] = 0
) -> Union[xarray.DataArray, xarray.Dataset]:
    """
    Pad Dataset/DataArray to encompass a bounding box.

    Parameters:
    - minx, miny, maxx, maxy: Bounding box coordinates
    - crs: CRS of bounding box (assumes same as dataset if None)
    - constant_values: Value for padded pixels (default: 0)

    Returns:
    Padded Dataset or DataArray
    """

Usage Examples

import rioxarray

da = rioxarray.open_rasterio('small_raster.tif')

# Pad to include specific coordinates
padded = da.rio.pad_xy(
    minx=da.rio.bounds()[0] - 1000,  # Extend 1000m in each direction
    miny=da.rio.bounds()[1] - 1000,
    maxx=da.rio.bounds()[2] + 1000,
    maxy=da.rio.bounds()[3] + 1000,
    constant_values=-9999
)

# Pad to encompass a larger area
padded_box = da.rio.pad_box(
    minx=-10, miny=40, maxx=10, maxy=60,
    crs='EPSG:4326',
    constant_values=0
)

Window and Coordinate Selection

Select data by pixel windows or coordinate ranges.

def isel_window(
    self,
    window: rasterio.windows.Window
) -> Union[xarray.DataArray, xarray.Dataset]:
    """
    Index selection using a rasterio Window.

    Parameters:
    - window: rasterio Window object defining pixel ranges

    Returns:
    Subset of data corresponding to window
    """

def slice_xy(
    self,
    minx: Optional[float] = None,
    miny: Optional[float] = None,
    maxx: Optional[float] = None,
    maxy: Optional[float] = None
) -> Union[xarray.DataArray, xarray.Dataset]:
    """
    Slice data by x/y coordinate bounds.

    Parameters:
    - minx, miny, maxx, maxy: Coordinate bounds (None to skip bound)

    Returns:
    Sliced Dataset or DataArray
    """

Usage Examples

import rioxarray
from rasterio.windows import Window

da = rioxarray.open_rasterio('raster.tif')

# Select by pixel window
window = Window(col_off=100, row_off=50, width=200, height=150)
windowed = da.rio.isel_window(window)

# Slice by coordinates
subset = da.rio.slice_xy(minx=100000, maxx=200000)

# Slice with partial bounds
subset_y = da.rio.slice_xy(miny=40, maxy=60)  # Only constrain Y

Interpolation

Fill missing or nodata values using spatial interpolation methods.

def interpolate_na(
    self,
    method: str = "linear",
    **kwargs
) -> Union[xarray.DataArray, xarray.Dataset]:
    """
    Interpolate missing/nodata values.

    Parameters:
    - method: Interpolation method ('linear', 'nearest', 'cubic', etc.)
    - **kwargs: Additional arguments for xarray.interpolate_na

    Returns:
    Data with interpolated values
    """

Usage Examples

import rioxarray

# Open data with missing values
da = rioxarray.open_rasterio('data_with_gaps.tif')

# Linear interpolation of missing values
interpolated = da.rio.interpolate_na(method='linear')

# Nearest neighbor interpolation
interpolated_nn = da.rio.interpolate_na(method='nearest')

# Cubic interpolation along specific dimensions
interpolated_cubic = da.rio.interpolate_na(
    method='cubic',
    dim=['x', 'y']
)

Resampling Methods

Available resampling algorithms from rasterio for reprojection operations:

# Common resampling methods
Resampling.nearest        # Nearest neighbor (default, preserves values)
Resampling.bilinear       # Bilinear interpolation (smooth, good for continuous data)
Resampling.cubic          # Cubic convolution (smoother than bilinear)
Resampling.cubic_spline   # Cubic spline interpolation
Resampling.lanczos        # Lanczos windowed sinc resampling
Resampling.average        # Average of all pixels (good for downsampling)
Resampling.mode           # Most common value (good for categorical data)
Resampling.gauss          # Gaussian kernel resampling

Choosing Resampling Methods

import rioxarray
from rasterio.enums import Resampling

da = rioxarray.open_rasterio('data.tif')

# For categorical data (land cover, classes)
categorical = da.rio.reproject('EPSG:4326', resampling=Resampling.nearest)

# For continuous data (temperature, elevation)
continuous = da.rio.reproject('EPSG:4326', resampling=Resampling.bilinear)

# For downsampling (reducing resolution)
downsampled = da.rio.reproject('EPSG:4326', resampling=Resampling.average)

# For high-quality upsampling
upsampled = da.rio.reproject('EPSG:4326', resampling=Resampling.lanczos)

Advanced Spatial Operations

Multi-step Processing Workflows

import rioxarray
from rasterio.enums import Resampling

# Complex spatial processing workflow
da = rioxarray.open_rasterio('source.tif')

# 1. Reproject to standard CRS
reprojected = da.rio.reproject('EPSG:4326', resampling=Resampling.bilinear)

# 2. Clip to area of interest  
geometry = {...}  # Your geometry
clipped = reprojected.rio.clip([geometry], crs='EPSG:4326')

# 3. Pad to ensure full coverage
padded = clipped.rio.pad_box(
    minx=clipped.rio.bounds()[0] - 0.1,
    miny=clipped.rio.bounds()[1] - 0.1, 
    maxx=clipped.rio.bounds()[2] + 0.1,
    maxy=clipped.rio.bounds()[3] + 0.1,
    crs='EPSG:4326'
)

# 4. Interpolate any remaining gaps
final = padded.rio.interpolate_na(method='linear')

Performance Considerations

# For large datasets, use from_disk clipping
large_clipped = large_da.rio.clip([geometry], from_disk=True)

# Use appropriate chunking before spatial operations
chunked = da.chunk({'x': 2048, 'y': 2048})
processed = chunked.rio.reproject('EPSG:4326')

# For memory-intensive operations, process in chunks
bounds = da.rio.bounds()
chunk_size = 0.1  # degrees
for minx in numpy.arange(bounds[0], bounds[2], chunk_size):
    for miny in numpy.arange(bounds[1], bounds[3], chunk_size):
        chunk = da.rio.clip_box(minx, miny, minx+chunk_size, miny+chunk_size)
        # Process chunk...

Install with Tessl CLI

npx tessl i tessl/pypi-rioxarray

docs

config-utilities.md

coordinate-systems.md

data-management.md

index.md

io-operations.md

spatial-operations.md

tile.json