GSTools is a comprehensive geostatistical toolbox for Python providing advanced spatial analysis and modeling capabilities.
—
Variogram estimation functions calculate empirical variograms from spatial data to characterize spatial correlation structure. These functions support structured and unstructured data, directional analysis, and various estimator types.
Primary function for calculating empirical variograms from spatial data with flexible binning and estimation options.
def vario_estimate(pos, field, bin_edges=None, sampling_size=None,
sampling_seed=None, estimator='matheron', latlon=False,
direction=None, angles=None, angles_tol=np.pi/8, bandwidth=None,
no_data=np.nan, mask=np.ma.nomask, mesh_type='unstructured',
return_counts=False, mean=None, normalizer=None, trend=None,
fit_normalizer=False, geo_scale=RADIAN_SCALE, **std_bins):
"""
Estimate empirical variogram from spatial data.
Parameters:
- pos (array-like): Spatial positions of data points, shape (dim, n_points)
- field (array-like): Field values at positions, shape (n_points,)
- bin_edges (array-like): Distance bin edges for variogram calculation
- sampling_size (int): Subsample data for large datasets (default: use all)
- sampling_seed (int): Random seed for subsampling
- estimator (str): Variogram estimator ('matheron', 'cressie')
- direction (array-like): Direction for anisotropic analysis
- angles (array-like): Directional angles for anisotropic analysis
- angles_tol (float): Angular tolerance for directional analysis (default: π/8)
- bandwidth (float): Angular bandwidth for directional analysis
- no_data (float): No-data value to ignore (default: np.nan)
- mask (array-like): Data mask (default: np.ma.nomask)
- mesh_type (str): Mesh type ('structured', 'unstructured')
- return_counts (bool): Return number of point pairs per bin
- mean (float or callable): Mean value or function
- normalizer (Normalizer): Data normalization method
- trend (callable): Trend function
- fit_normalizer (bool): Fit normalizer to data
- geo_scale (float): Geographic scaling factor (default: RADIAN_SCALE)
- **std_bins: Additional parameters for standard binning
Returns:
- bin_centers (array): Distance bin centers
- variogram (array): Empirical variogram values
- counts (array): Point pair counts per bin (if return_count=True)
"""
def vario_estimate_axis(field, direction='x', estimator='matheron', no_data=np.nan):
"""
Estimate directional variogram along specified axis.
Parameters:
- field (array-like): Field values on structured grid
- direction (str): Axis direction ('x', 'y', 'z' or 0, 1, 2)
- estimator (str): Variogram estimator type ('matheron', 'cressie')
- no_data (float): No-data value to ignore (default: np.nan)
Returns:
- bin_centers (array): Distance bin centers
- variogram (array): Directional variogram values
- counts (array): Point pair counts (if return_count=True)
"""
def vario_estimate_structured(pos, field, direction='x', bin_edges=None,
estimator='matheron', return_count=False):
"""
Estimate variogram on structured grid data.
Parameters:
- pos (list): Grid coordinate arrays [x, y, z]
- field (array): Field values on structured grid
- direction (str or int): Direction for variogram ('x', 'y', 'z' or 0, 1, 2)
- bin_edges (array-like): Distance bin edges
- estimator (str): Variogram estimator type
- return_count (bool): Return point pair counts
Returns:
- bin_centers (array): Distance bin centers
- variogram (array): Structured grid variogram values
- counts (array): Point pair counts (if return_count=True)
"""
def vario_estimate_unstructured(pos, field, bin_edges=None, sampling_size=None,
estimator='matheron', return_count=False,
num_threads=None):
"""
Estimate variogram from unstructured point data.
Parameters:
- pos (array-like): Point positions, shape (dim, n_points)
- field (array-like): Field values, shape (n_points,)
- bin_edges (array-like): Distance bin edges
- sampling_size (int): Subsample size for large datasets
- estimator (str): Variogram estimator type
- return_count (bool): Return point pair counts
- num_threads (int): Number of parallel threads
Returns:
- bin_centers (array): Distance bin centers
- variogram (array): Unstructured variogram values
- counts (array): Point pair counts (if return_count=True)
"""Functions for creating appropriate distance bins for variogram estimation.
def standard_bins(pos=None, dim=2, latlon=False, mesh_type='unstructured',
bin_no=None, max_dist=None, geo_scale=RADIAN_SCALE):
"""
Generate standard distance bins for variogram estimation.
Parameters:
- pos (array-like): Spatial positions, shape (dim, n_points) (default: None)
- dim (int): Spatial dimension (default: 2)
- latlon (bool): Use geographic coordinates (default: False)
- mesh_type (str): Mesh type ('structured', 'unstructured')
- bin_no (int): Number of distance bins (default: None for auto)
- max_dist (float): Maximum distance for binning (default: auto-detect)
- geo_scale (float): Geographic scaling factor (default: RADIAN_SCALE)
Returns:
- bin_edges (array): Distance bin edges, shape (bin_no+1,)
"""Classical variogram estimator (default):
γ(h) = 1/(2N(h)) * Σ[Z(xi) - Z(xi+h)]²Robust estimator less sensitive to outliers:
γ(h) = 1/(2N(h)) * (Σ|Z(xi) - Z(xi+h)|^0.5)^4 / (0.457 + 0.494/N(h))import gstools as gs
import numpy as np
# Generate sample data
model = gs.Exponential(dim=2, var=2.0, len_scale=10.0)
srf = gs.SRF(model, seed=12345)
# Create spatial positions
x = np.arange(0, 50, 1.0)
y = np.arange(0, 30, 1.0)
pos = [np.meshgrid(x, y)[i].flatten() for i in range(2)]
field = srf.unstructured(pos)
# Generate standard bins
bin_edges = gs.standard_bins(pos, bin_no=30, max_dist=25)
# Estimate empirical variogram
bin_centers, gamma, counts = gs.vario_estimate(
pos, field, bin_edges, return_count=True
)
# Plot results
import matplotlib.pyplot as plt
plt.plot(bin_centers, gamma, 'o-', label='Empirical')
plt.plot(bin_centers, model.variogram(bin_centers), '-', label='Theoretical')
plt.xlabel('Distance')
plt.ylabel('Variogram')
plt.legend()
plt.show()# Estimate variogram in different directions
angles = [0, 45, 90, 135] # Degrees
bandwidth = 22.5 # Angular bandwidth
directional_variograms = {}
for angle in angles:
angle_rad = np.deg2rad(angle)
bin_centers, gamma = gs.vario_estimate(
pos, field, bin_edges,
angles=[angle_rad],
bandwidth=np.deg2rad(bandwidth)
)
directional_variograms[angle] = (bin_centers, gamma)
# Plot directional variograms
for angle, (bins, gamma) in directional_variograms.items():
plt.plot(bins, gamma, 'o-', label=f'{angle}°')
plt.xlabel('Distance')
plt.ylabel('Variogram')
plt.legend()
plt.show()# Variogram along specific axes
x_bins, x_gamma = gs.vario_estimate_axis(pos, field, direction=0) # X-direction
y_bins, y_gamma = gs.vario_estimate_axis(pos, field, direction=1) # Y-direction
plt.plot(x_bins, x_gamma, 'o-', label='X-direction')
plt.plot(y_bins, y_gamma, 's-', label='Y-direction')
plt.xlabel('Distance')
plt.ylabel('Variogram')
plt.legend()
plt.show()# For data on regular grid
X, Y = np.meshgrid(x, y)
grid_field = srf.structured([x, y])
# Estimate along grid directions
x_bins, x_gamma = gs.vario_estimate_structured(
[x, y], grid_field, direction='x'
)
y_bins, y_gamma = gs.vario_estimate_structured(
[x, y], grid_field, direction='y'
)# For very large datasets, use subsampling
large_pos = np.random.rand(2, 100000) * 100
large_field = srf.unstructured(large_pos)
# Subsample for variogram estimation
bin_centers, gamma = gs.vario_estimate(
large_pos, large_field, bin_edges,
sampling_size=10000, # Use 10k points for estimation
sampling_seed=42 # Reproducible subsampling
)# Use Cressie estimator for data with outliers
bin_centers, gamma_matheron = gs.vario_estimate(
pos, field, bin_edges, estimator='matheron'
)
bin_centers, gamma_cressie = gs.vario_estimate(
pos, field, bin_edges, estimator='cressie'
)
plt.plot(bin_centers, gamma_matheron, 'o-', label='Matheron')
plt.plot(bin_centers, gamma_cressie, 's-', label='Cressie')
plt.xlabel('Distance')
plt.ylabel('Variogram')
plt.legend()
plt.show()# For lat/lon data
lat = np.random.uniform(40, 45, 1000) # Latitude
lon = np.random.uniform(-75, -70, 1000) # Longitude
geo_pos = [lon, lat] # Note: lon first, then lat
geo_field = np.random.randn(1000)
# Variogram with geographic distances
bin_centers, gamma = gs.vario_estimate(
geo_pos, geo_field, bin_edges,
latlon=True # Use great circle distances
)# Define multiple directions for anisotropy analysis
n_dirs = 8
angles = np.linspace(0, np.pi, n_dirs, endpoint=False)
bandwidth = np.pi / (2 * n_dirs)
# Calculate variogram for each direction
aniso_results = []
for angle in angles:
bins, gamma = gs.vario_estimate(
pos, field, bin_edges,
angles=[angle],
bandwidth=bandwidth,
separate_dirs=False
)
aniso_results.append((np.rad2deg(angle), bins, gamma))
# Plot anisotropy pattern
fig, axes = plt.subplots(2, 4, figsize=(15, 8))
for i, (angle, bins, gamma) in enumerate(aniso_results):
ax = axes[i//4, i%4]
ax.plot(bins, gamma, 'o-')
ax.set_title(f'Direction: {angle:.0f}°')
ax.set_xlabel('Distance')
ax.set_ylabel('Variogram')
plt.tight_layout()
plt.show()# Use multiple threads for faster computation
bin_centers, gamma = gs.vario_estimate(
pos, field, bin_edges,
num_threads=4 # Use 4 CPU cores
)# Create custom distance bins
# Finer resolution at short distances
short_bins = np.linspace(0, 5, 20)
long_bins = np.linspace(5, 50, 15)
custom_bins = np.concatenate([short_bins, long_bins[1:]])
bin_centers, gamma = gs.vario_estimate(pos, field, custom_bins)# Get detailed statistics about variogram estimation
bin_centers, gamma, counts = gs.vario_estimate(
pos, field, bin_edges, return_count=True
)
# Calculate weights based on pair counts
weights = counts / np.sum(counts)
# Weighted average variogram value
weighted_mean = np.average(gamma, weights=weights)
print(f"Total point pairs: {np.sum(counts)}")
print(f"Average pairs per bin: {np.mean(counts)}")
print(f"Weighted mean variogram: {weighted_mean:.3f}")# Estimate variogram and fit model
bin_centers, gamma = gs.vario_estimate(pos, field, bin_edges)
# Fit exponential model to empirical variogram
fit_model = gs.Exponential(dim=2)
fit_results = fit_model.fit_variogram(bin_centers, gamma)
print(f"Fitted variance: {fit_model.var:.3f}")
print(f"Fitted length scale: {fit_model.len_scale:.3f}")
print(f"Fitted nugget: {fit_model.nugget:.3f}")
# Compare empirical and fitted variograms
plt.plot(bin_centers, gamma, 'o', label='Empirical')
plt.plot(bin_centers, fit_model.variogram(bin_centers), '-', label='Fitted')
plt.xlabel('Distance')
plt.ylabel('Variogram')
plt.legend()
plt.show()Variogram estimation functions return:
Common properties:
Install with Tessl CLI
npx tessl i tessl/pypi-gstools