Summarize geospatial raster datasets based on vector geometries
—
Calculate summary statistics of raster values aggregated to vector geometries. Zonal statistics analyze how raster data varies within polygon boundaries, supporting both numerical and categorical analysis with extensive customization options.
Calculate summary statistics for vector geometries overlaid on raster data.
def zonal_stats(vectors, raster, layer=0, band=1, nodata=None, affine=None,
stats=None, all_touched=False, categorical=False,
category_map=None, add_stats=None, zone_func=None,
raster_out=False, prefix=None, geojson_out=False,
boundless=True, progress=False, **kwargs):
"""
Calculate zonal statistics for vector geometries.
Parameters:
- vectors: Path to vector source or geo-like python objects
- raster: Path to raster file or numpy array (requires affine if ndarray)
- layer: Vector layer to use, by name or number (default: 0)
- band: Raster band number, counting from 1 (default: 1)
- nodata: Override raster nodata value (default: None)
- affine: Affine transform, required for numpy arrays (default: None)
- stats: Statistics to calculate - list/string or None for defaults (default: None)
- all_touched: Include all touched pixels vs center-point only (default: False)
- categorical: Enable categorical analysis mode (default: False)
- category_map: Dict mapping raster values to category names (default: None)
- add_stats: Dict of custom statistics functions (default: None)
- zone_func: Function to apply to zone array before stats (default: None)
- raster_out: Include masked array in output (default: False)
- prefix: Prefix for output dictionary keys (default: None)
- geojson_out: Return GeoJSON features with stats as properties (default: False)
- boundless: Allow features beyond raster extent (default: True)
- progress: Show progress bar using tqdm, requires tqdm installation (default: False)
Returns:
List of dictionaries with statistics for each feature
"""
def gen_zonal_stats(vectors, raster, layer=0, band=1, nodata=None, affine=None,
stats=None, all_touched=False, categorical=False,
category_map=None, add_stats=None, zone_func=None,
raster_out=False, prefix=None, geojson_out=False,
boundless=True, **kwargs):
"""
Generator version of zonal statistics.
Parameters are identical to zonal_stats().
Returns:
Generator yielding dictionaries with statistics for each feature
"""
def raster_stats(*args, **kwargs):
"""
Deprecated alias for zonal_stats. Will be removed in version 1.0.
Use zonal_stats instead.
"""Default and extended statistics options for numerical analysis.
DEFAULT_STATS = ["count", "min", "max", "mean"]
VALID_STATS = [
"count", # Number of valid pixels
"min", # Minimum value
"max", # Maximum value
"mean", # Mean value
"sum", # Sum of values
"std", # Standard deviation
"median", # Median value
"majority", # Most frequent value
"minority", # Least frequent value
"unique", # Count of unique values
"range", # max - min
"nodata", # Count of nodata pixels
"nan" # Count of NaN pixels
]
# Percentiles supported via "percentile_N" format (e.g., "percentile_95")Helper functions for statistics processing and validation.
def check_stats(stats, categorical):
"""
Validate and normalize statistics parameter.
Parameters:
- stats: Statistics specification (str, list, or None)
- categorical: Whether in categorical mode
Returns:
Tuple of (validated_stats_list, run_count_flag)
"""
def get_percentile(stat):
"""
Extract percentile value from statistic name.
Parameters:
- stat: Statistic name starting with "percentile_"
Returns:
Float percentile value (0-100)
Raises:
ValueError if invalid percentile format or value
"""
def stats_to_csv(stats):
"""
Convert statistics list to CSV format.
Parameters:
- stats: List of statistics dictionaries
Returns:
CSV string with headers and data
"""Functions for analyzing categorical raster data with value mappings.
def remap_categories(category_map, stats):
"""
Remap category keys using category mapping.
Parameters:
- category_map: Dict mapping raster values to category names
- stats: Statistics dictionary with numeric keys
Returns:
Statistics dictionary with remapped category names as keys
"""Functions for converting geometries to raster-compatible formats.
def rasterize_geom(geom, like, all_touched=False):
"""
Rasterize geometry to boolean mask array.
Parameters:
- geom: GeoJSON geometry dictionary
- like: Raster object with desired shape and transform
- all_touched: Include all touched pixels vs center-point only
Returns:
Boolean numpy array mask
"""
def boxify_points(geom, rast):
"""
Convert Point/MultiPoint geometries to box polygons for rasterization.
Creates boxes 99% of cell size, centered on raster cells.
Parameters:
- geom: Point or MultiPoint geometry
- rast: Raster object for cell size calculation
Returns:
MultiPolygon geometry with box polygons
"""
def key_assoc_val(d, func, exclude=None):
"""
Return key associated with value returned by function.
Used for majority/minority statistics.
Parameters:
- d: Dictionary to search
- func: Function to apply to values (min, max, etc.)
- exclude: Value to exclude from search
Returns:
Key associated with function result
"""from rasterstats import zonal_stats
# Calculate default statistics for polygons
stats = zonal_stats("watersheds.shp", "elevation.tif")
print(stats[0]) # {'count': 1000, 'min': 100.0, 'max': 500.0, 'mean': 350.2}
# Specify custom statistics
stats = zonal_stats("polygons.shp", "temperature.tif",
stats=['min', 'max', 'mean', 'std', 'percentile_95'])
# Use all available statistics
stats = zonal_stats("zones.shp", "data.tif", stats="ALL")
# Show progress bar for large datasets (requires pip install rasterstats[progress])
stats = zonal_stats("large_dataset.shp", "raster.tif", progress=True)# Analyze land cover categories
category_map = {1: 'Forest', 2: 'Water', 3: 'Urban', 4: 'Agriculture'}
stats = zonal_stats("counties.shp", "landcover.tif",
categorical=True, category_map=category_map)
print(stats[0]) # {'Forest': 500, 'Water': 50, 'Urban': 200, 'Agriculture': 250}import numpy as np
def coefficient_of_variation(masked_array):
return np.std(masked_array) / np.mean(masked_array)
def pixel_density(masked_array, feature_properties):
area_km2 = feature_properties.get('area_km2', 1.0)
return masked_array.count() / area_km2
custom_stats = {
'cv': coefficient_of_variation,
'density': pixel_density
}
stats = zonal_stats("polygons.shp", "population.tif",
add_stats=custom_stats, stats=['mean', 'cv', 'density'])import numpy as np
from affine import Affine
# Create sample data
raster_data = np.random.rand(100, 100) * 1000
transform = Affine.translation(0, 100) * Affine.scale(1, -1)
# Define polygons as GeoJSON
polygons = [{
'type': 'Feature',
'geometry': {
'type': 'Polygon',
'coordinates': [[(10, 10), (50, 10), (50, 50), (10, 50), (10, 10)]]
},
'properties': {'id': 1}
}]
stats = zonal_stats(polygons, raster_data, affine=transform,
stats=['count', 'mean', 'std'])# Return results as GeoJSON features with statistics as properties
features = zonal_stats("watersheds.shp", "rainfall.tif",
geojson_out=True, prefix="rain_",
stats=['mean', 'sum', 'count'])
# Each feature retains original geometry and properties plus statistics
print(features[0]['properties'])
# {'name': 'Watershed A', 'rain_mean': 45.2, 'rain_sum': 45200, 'rain_count': 1000}# Include raster arrays in output
stats = zonal_stats("polygons.shp", "elevation.tif", raster_out=True)
mini_array = stats[0]['mini_raster_array'] # Masked numpy array
mini_affine = stats[0]['mini_raster_affine'] # Corresponding transform
# Custom zone processing function
def apply_mask(zone_array):
# Only analyze pixels above threshold
return np.ma.masked_where(zone_array < 100, zone_array)
stats = zonal_stats("zones.shp", "data.tif", zone_func=apply_mask)
# Handle features extending beyond raster
stats = zonal_stats("large_polygons.shp", "small_raster.tif",
boundless=True) # Allows partial coveragefrom typing import Union, List, Dict, Any, Optional, Callable
from numpy import ndarray, ma
from affine import Affine
StatsSpec = Union[str, List[str], None]
CategoryMapping = Dict[Union[int, float], str]
CustomStats = Dict[str, Callable[[ma.MaskedArray], Union[int, float]]]
ZoneFunction = Callable[[ma.MaskedArray], Optional[ma.MaskedArray]]
ZonalOutput = Dict[str, Union[int, float, None, ma.MaskedArray, Affine]]Install with Tessl CLI
npx tessl i tessl/pypi-rasterstats