tessl/pypi-spyndex

Awesome Spectral Indices in Python - comprehensive library for computing spectral indices from remote sensing data

Overview

Eval results

Files

Sample Datasets

Name: tessl/pypi-spyndex
Author: tessl

Built-in sample datasets for testing, examples, and educational purposes. Provides both satellite imagery and spectral reflectance data in multiple formats, enabling users to quickly test spectral index computations and explore library functionality without requiring external data sources.

Capabilities

Dataset Loading Function

Opens built-in sample datasets with different formats optimized for various use cases and data types.

def open(dataset: str) -> Any:
    """
    Opens a built-in sample dataset.

    Parameters:
    - dataset: Dataset name ("sentinel" or "spectral")

    Returns:
    Dataset in appropriate format:
    - "sentinel": xarray.DataArray with Sentinel-2 sample image (10m bands)
    - "spectral": pandas.DataFrame with Landsat 8 reflectance samples

    Raises:
    Exception: If dataset name is not valid
    """

Usage Examples:

import spyndex.datasets

# Load Sentinel-2 sample dataset
sentinel_data = spyndex.datasets.open("sentinel")
print(type(sentinel_data))  # <class 'xarray.core.dataarray.DataArray'>
print(sentinel_data.shape)  # (4, 300, 300)

# Load spectral reflectance samples
spectral_data = spyndex.datasets.open("spectral")
print(type(spectral_data))  # <class 'pandas.core.frame.DataFrame'>
print(spectral_data.shape)  # (120, 9)

Available Datasets

Sentinel Dataset

Multi-band satellite image from Sentinel-2 satellite with 10-meter resolution bands suitable for vegetation analysis and multi-spectral index computation.

import spyndex.datasets
import spyndex

# Load Sentinel-2 sample
sentinel = spyndex.datasets.open("sentinel")

# Explore dataset structure
print(sentinel)
# Output: <xarray.DataArray (band: 4, x: 300, y: 300)>
# Coordinates:
# * band     (band) <U3 'B02' 'B03' 'B04' 'B08'
# Dimensions without coordinates: x, y

print(f"Bands available: {list(sentinel.coords['band'].values)}")
# Output: ['B02', 'B03', 'B04', 'B08']

print(f"Spatial dimensions: {sentinel.sizes['x']} x {sentinel.sizes['y']}")
# Output: 300 x 300

# Compute spectral indices using Sentinel-2 data
ndvi = spyndex.computeIndex(
    "NDVI",
    params={
        "N": sentinel.sel(band="B08"),  # NIR band
        "R": sentinel.sel(band="B04")   # Red band
    }
)

print(f"NDVI result shape: {ndvi.shape}")  # (300, 300)
print(f"NDVI range: {ndvi.min().values:.3f} to {ndvi.max().values:.3f}")

# Compute multiple indices
indices = spyndex.computeIndex(
    ["NDVI", "GNDVI"],
    params={
        "N": sentinel.sel(band="B08"),  # NIR
        "R": sentinel.sel(band="B04"),  # Red  
        "G": sentinel.sel(band="B03")   # Green
    }
)

print(f"Multiple indices shape: {indices.shape}")  # (2, 300, 300)
print(f"Index names: {list(indices.coords['index'].values)}")

Spectral Dataset

Landsat 8 surface reflectance samples representing three different land cover types, ideal for exploring spectral signatures and testing classification-oriented indices.

import spyndex.datasets
import spyndex

# Load spectral reflectance samples
spectral = spyndex.datasets.open("spectral")

# Explore dataset structure
print(spectral.dtypes)
# Output:
# SR_B1     float64  # Coastal Aerosol
# SR_B2     float64  # Blue  
# SR_B3     float64  # Green
# SR_B4     float64  # Red
# SR_B5     float64  # NIR
# SR_B6     float64  # SWIR1
# SR_B7     float64  # SWIR2
# ST_B10    float64  # Thermal
# class     object   # Land cover class
# dtype: object

print(f"Dataset shape: {spectral.shape}")  # (120, 9)
print(f"Land cover classes: {spectral['class'].unique()}")
# Output: ['Water' 'Vegetation' 'Urban']

# Analyze spectral signatures by class
for land_class in spectral['class'].unique():
    class_data = spectral[spectral['class'] == land_class]
    print(f"\n{land_class} samples: {len(class_data)}")
    print(f"Average NIR reflectance: {class_data['SR_B5'].mean():.3f}")
    print(f"Average Red reflectance: {class_data['SR_B4'].mean():.3f}")

# Compute indices for all samples
ndvi_all = spyndex.computeIndex(
    "NDVI",
    params={
        "N": spectral["SR_B5"],  # NIR
        "R": spectral["SR_B4"]   # Red
    }
)

# Add NDVI to dataframe for analysis
spectral_with_ndvi = spectral.copy()
spectral_with_ndvi["NDVI"] = ndvi_all

# Analyze NDVI by land cover class
for land_class in spectral_with_ndvi['class'].unique():
    class_ndvi = spectral_with_ndvi[spectral_with_ndvi['class'] == land_class]['NDVI']
    print(f"{land_class} NDVI: {class_ndvi.mean():.3f} ± {class_ndvi.std():.3f}")

Dataset Integration Examples

Complete Workflow Examples

Using datasets for comprehensive spectral index analysis:

import spyndex.datasets
import spyndex
import matplotlib.pyplot as plt
import numpy as np

def analyze_dataset_indices(dataset_name, indices_list):
    """Analyze multiple spectral indices on a sample dataset."""
    
    if dataset_name == "sentinel":
        data = spyndex.datasets.open("sentinel")
        
        # Compute indices on spatial data
        results = spyndex.computeIndex(
            indices_list,
            params={
                "N": data.sel(band="B08"),  # NIR
                "R": data.sel(band="B04"),  # Red
                "G": data.sel(band="B03"),  # Green
                "B": data.sel(band="B02")   # Blue
            }
        )
        
        # Visualize results
        fig, axes = plt.subplots(2, 2, figsize=(12, 10))
        axes = axes.flatten()
        
        for i, idx_name in enumerate(indices_list):
            if i < len(axes):
                im = axes[i].imshow(results.sel(index=idx_name), cmap='RdYlGn')
                axes[i].set_title(f"{idx_name}")
                axes[i].axis('off')
                plt.colorbar(im, ax=axes[i], shrink=0.8)
        
        plt.tight_layout()
        plt.show()
        
    elif dataset_name == "spectral":
        data = spyndex.datasets.open("spectral")
        
        # Compute indices on tabular data
        results = {}
        for idx_name in indices_list:
            try:
                idx_values = spyndex.computeIndex(
                    idx_name,
                    params={
                        "N": data["SR_B5"],
                        "R": data["SR_B4"], 
                        "G": data["SR_B3"],
                        "B": data["SR_B2"]
                    }
                )
                results[idx_name] = idx_values
            except Exception as e:
                print(f"Could not compute {idx_name}: {e}")
        
        # Analyze by land cover class
        for land_class in data['class'].unique():
            class_mask = data['class'] == land_class
            print(f"\n{land_class} class:")
            
            for idx_name, values in results.items():
                class_values = values[class_mask]
                print(f"  {idx_name}: {class_values.mean():.3f} ± {class_values.std():.3f}")

# Example usage
analyze_dataset_indices("sentinel", ["NDVI", "GNDVI", "EVI", "CI"])
analyze_dataset_indices("spectral", ["NDVI", "NDWI", "NBR"])

Machine Learning Integration

Using datasets for supervised learning and classification:

import spyndex.datasets
import spyndex
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

def create_spectral_features():
    """Create feature matrix using spectral indices."""
    
    # Load dataset
    data = spyndex.datasets.open("spectral")
    
    # Define vegetation-related indices
    vegetation_indices = ["NDVI", "GNDVI", "SAVI", "EVI", "CI", "RDVI"]
    
    # Compute all indices
    features = pd.DataFrame()
    
    for idx_name in vegetation_indices:
        try:
            idx_values = spyndex.computeIndex(
                idx_name,
                params={
                    "N": data["SR_B5"],   # NIR
                    "R": data["SR_B4"],   # Red
                    "G": data["SR_B3"],   # Green
                    "B": data["SR_B2"],   # Blue
                    "L": spyndex.constants.L.default  # For SAVI
                }
            )
            features[idx_name] = idx_values
        except:
            print(f"Skipping {idx_name} - missing parameters")
    
    # Add original bands as features
    band_features = ["SR_B2", "SR_B3", "SR_B4", "SR_B5", "SR_B6", "SR_B7"]
    for band in band_features:
        features[band] = data[band]
    
    # Target classes
    y = data["class"]
    
    return features, y

# Create feature matrix and train classifier
X, y = create_spectral_features()

print(f"Feature matrix shape: {X.shape}")
print(f"Features: {list(X.columns)}")
print(f"Classes: {y.unique()}")

# Train classifier
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Evaluate
y_pred = rf.predict(X_test)
print("\nClassification Results:")
print(classification_report(y_test, y_pred))

# Feature importance
feature_importance = pd.DataFrame({
    'feature': X.columns,
    'importance': rf.feature_importances_
}).sort_values('importance', ascending=False)

print("\nTop 10 Most Important Features:")
print(feature_importance.head(10))

Dataset Specifications

Sentinel Dataset Details

Source: Sentinel-2 MSI Level-2A
Spatial Resolution: 10 meters
Bands: B02 (Blue), B03 (Green), B04 (Red), B08 (NIR)
Array Size: 300 × 300 pixels
Data Type: xarray.DataArray
Coordinate System: Standard x, y pixel coordinates
Value Range: Surface reflectance (0-1 typically)

Spectral Dataset Details

Source: Landsat 8 OLI/TIRS Level-2
Samples: 120 total (40 per land cover class)
Classes: Water, Vegetation, Urban
Bands: SR_B1-B7 (surface reflectance), ST_B10 (thermal)
Data Type: pandas.DataFrame
Value Range: Surface reflectance values and brightness temperature

Error Handling

import spyndex.datasets

# Invalid dataset name
try:
    invalid_data = spyndex.datasets.open("nonexistent")
except Exception as e:
    print(f"Error: {e}")
    # Output: Error: nonexistent is not a valid dataset. Please use one of ['sentinel','spectral']

# Valid dataset names only
valid_datasets = ["sentinel", "spectral"]
for dataset in valid_datasets:
    data = spyndex.datasets.open(dataset)
    print(f"Successfully loaded {dataset} dataset")

The sample datasets provide immediately usable data for testing spectral index computations, developing analysis workflows, and learning about remote sensing applications without requiring external data acquisition or preprocessing.

Install with Tessl CLI