Comprehensive toolkit for analyzing single-cell gene expression data with scalable Python implementation supporting preprocessing, visualization, clustering, trajectory inference, and differential expression testing.
—
Scanpy provides specialized functions for analyzing spatial transcriptomics data, including spatial statistics, visualization, and neighborhood analysis for spatially resolved single-cell data. These tools are designed to work with data from platforms like 10x Visium, Slide-seq, and other spatial transcriptomics technologies.
Load spatial transcriptomics data with coordinate information.
def read_visium(path, genome=None, count_file='filtered_feature_bc_matrix.h5', library_id=None, load_images=True, source_image_path=None):
"""
Read 10x Visium spatial transcriptomics data.
Parameters:
- path (str): Path to Visium output directory
- genome (str, optional): Genome to read from h5 file
- count_file (str): Count matrix filename
- library_id (str, optional): Library identifier for multiple samples
- load_images (bool): Load tissue images
- source_image_path (str, optional): Custom path to images
Returns:
AnnData: Spatial transcriptomics data with coordinates and images
"""Calculate spatial autocorrelation and neighborhood statistics.
import scanpy as scdef morans_i(adata_or_graph, vals=None, *, use_graph=None, layer=None, obsm=None, obsp=None, use_raw=False):
"""
Calculate Moran's I Global Autocorrelation Statistic.
Moran's I measures spatial autocorrelation on a graph. Commonly used in
spatial data analysis to assess autocorrelation on a 2D grid.
Parameters:
- adata_or_graph (AnnData or sparse matrix): AnnData object or graph matrix
- vals (array, optional): Values to calculate Moran's I for
- use_graph (str, optional): Key for graph in adata (default: neighbors connectivities)
- layer (str, optional): Key for adata.layers to choose vals
- obsm (str, optional): Key for adata.obsm to choose vals
- obsp (str, optional): Key for adata.obsp to choose vals
- use_raw (bool): Whether to use adata.raw.X for vals
Returns:
array or float: Moran's I statistic(s)
"""
def gearys_c(adata_or_graph, vals=None, *, use_graph=None, layer=None, obsm=None, obsp=None, use_raw=False):
"""
Calculate Geary's C Global Autocorrelation Statistic.
Geary's C measures spatial autocorrelation - values close to 0 indicate
positive spatial autocorrelation, values close to 2 indicate negative
spatial autocorrelation.
Parameters:
- adata_or_graph (AnnData or sparse matrix): AnnData object or graph matrix
- vals (array, optional): Values to calculate Geary's C for
- use_graph (str, optional): Key for graph in adata (default: neighbors connectivities)
- layer (str, optional): Key for adata.layers to choose vals
- obsm (str, optional): Key for adata.obsm to choose vals
- obsp (str, optional): Key for adata.obsp to choose vals
- use_raw (bool): Whether to use adata.raw.X for vals
Returns:
array or float: Geary's C statistic(s)
"""
def confusion_matrix(orig, new, data=None, *, normalize=True):
"""
Create a labeled confusion matrix from original and new labels.
Parameters:
- orig (array or str): Original labels or column name in data
- new (array or str): New labels or column name in data
- data (DataFrame, optional): DataFrame containing label columns
- normalize (bool): Whether to normalize the confusion matrix
Returns:
DataFrame: Labeled confusion matrix
"""Visualize spatial transcriptomics data with coordinate information.
def spatial(adata, basis='spatial', color=None, use_raw=None, sort_order=True, alpha=None, groups=None, components=None, dimensions=None, layer=None, bw=None, contour=False, title=None, save=None, ax=None, return_fig=None, img_key=None, crop_coord=None, alpha_img=1.0, bw_method='scott', bw_adjust=1, **kwargs):
"""
Plot spatial transcriptomics data.
Parameters:
- adata (AnnData): Annotated data object with spatial coordinates
- basis (str): Key in obsm for spatial coordinates
- color (str or list, optional): Keys for coloring spots
- use_raw (bool, optional): Use raw attribute for gene expression
- sort_order (bool): Sort points by color values
- alpha (float, optional): Transparency of spots
- groups (str or list, optional): Restrict to specific groups
- components (str or list, optional): Spatial components to plot
- dimensions (tuple, optional): Dimensions to plot
- layer (str, optional): Data layer to use
- bw (str or float, optional): Bandwidth for density estimation
- contour (bool): Add contour lines for continuous values
- title (str, optional): Plot title
- save (str, optional): Save figure to file
- ax (Axes, optional): Matplotlib axes object
- return_fig (bool, optional): Return figure object
- img_key (str, optional): Key for tissue image in uns
- crop_coord (tuple, optional): Coordinates for cropping image
- alpha_img (float): Transparency of background image
- bw_method (str): Method for bandwidth estimation
- bw_adjust (float): Bandwidth adjustment factor
- **kwargs: Additional plotting parameters
Returns:
None or Figure: Plot or figure object (if return_fig=True)
"""Analyze spatial neighborhoods and local patterns.
def spatial_neighbors(adata, coord_type='generic', n_rings=1, n_neighs=6, radius=None, set_diag=False, key_added='spatial', copy=False):
"""
Compute spatial neighborhood graph.
Parameters:
- adata (AnnData): Annotated data object with spatial coordinates
- coord_type (str): Type of coordinates ('visium', 'generic')
- n_rings (int): Number of rings for Visium hexagonal grid
- n_neighs (int): Number of neighbors for generic coordinates
- radius (float, optional): Radius for neighborhood definition
- set_diag (bool): Set diagonal of adjacency matrix
- key_added (str): Key for storing spatial graph
- copy (bool): Return copy
Returns:
AnnData or None: Object with spatial graph (if copy=True)
"""
def spatial_autocorr(adata, mode='moran', genes=None, n_perms=None, n_jobs=1, copy=False, **kwargs):
"""
Calculate spatial autocorrelation for multiple genes.
Parameters:
- adata (AnnData): Annotated data object
- mode (str): Autocorrelation method ('moran', 'geary')
- genes (list, optional): Genes to analyze
- n_perms (int, optional): Permutations for significance
- n_jobs (int): Number of parallel jobs
- copy (bool): Return copy
- **kwargs: Additional parameters
Returns:
AnnData or None: Object with autocorrelation results (if copy=True)
"""Identify spatially variable genes and expression patterns.
def spatial_variable_genes(adata, layer=None, n_top_genes=None, min_counts=3, alpha=0.05, copy=False):
"""
Identify spatially variable genes.
Parameters:
- adata (AnnData): Annotated data object
- layer (str, optional): Data layer to use
- n_top_genes (int, optional): Number of top genes to return
- min_counts (int): Minimum counts per gene
- alpha (float): Significance threshold
- copy (bool): Return copy
Returns:
AnnData or None: Object with spatial variability results (if copy=True)
"""
def spatial_domains(adata, resolution=1.0, key_added='spatial_domains', copy=False, **kwargs):
"""
Identify spatial domains using clustering on spatial neighbors.
Parameters:
- adata (AnnData): Annotated data object with spatial graph
- resolution (float): Clustering resolution
- key_added (str): Key for storing domain labels
- copy (bool): Return copy
- **kwargs: Additional clustering parameters
Returns:
AnnData or None: Object with spatial domains (if copy=True)
"""Analyze trajectories and gradients in spatial context.
def spatial_gradient(adata, genes, coord_type='generic', n_neighbors=10, copy=False):
"""
Calculate spatial gradients for gene expression.
Parameters:
- adata (AnnData): Annotated data object
- genes (list): Genes to calculate gradients for
- coord_type (str): Type of spatial coordinates
- n_neighbors (int): Number of neighbors for gradient calculation
- copy (bool): Return copy
Returns:
AnnData or None: Object with gradient information (if copy=True)
"""
def spatial_velocity(adata, velocity_graph='velocity_graph', spatial_graph='spatial_connectivities', copy=False):
"""
Project RNA velocity onto spatial coordinates.
Parameters:
- adata (AnnData): Annotated data object with velocity and spatial data
- velocity_graph (str): Key for velocity graph
- spatial_graph (str): Key for spatial connectivity
- copy (bool): Return copy
Returns:
AnnData or None: Object with spatial velocity (if copy=True)
"""import scanpy as sc
import matplotlib.pyplot as plt
# Load Visium data
adata = sc.read_visium('path/to/visium/output/')
# Basic preprocessing
sc.pp.filter_genes(adata, min_cells=10)
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
# Visualize total counts and gene counts
sc.pl.spatial(adata, color=['total_counts', 'n_genes_by_counts'],
ncols=2, size=1.5)
# Visualize specific genes
genes_of_interest = ['GAPDH', 'ACTB', 'MYC']
sc.pl.spatial(adata, color=genes_of_interest, ncols=3, size=1.5)# Calculate Moran's I for spatial autocorrelation
sc.metrics.morans_i(adata, n_perms=100)
# Get top spatially variable genes
spatial_genes = adata.var.sort_values('morans_i', ascending=False).index[:20]
# Visualize spatially variable genes
sci.pl.spatial(adata, color=spatial_genes[:6], ncols=3)
# Calculate Geary's C as alternative measure
sc.metrics.gearys_c(adata, genes=spatial_genes[:10])
# Compare spatial statistics
spatial_stats = adata.var[['morans_i', 'gearys_c']].dropna()
plt.scatter(spatial_stats['morans_i'], spatial_stats['gearys_c'])
plt.xlabel("Moran's I")
plt.ylabel("Geary's C")
plt.show()# Compute spatial neighborhood graph
sc.pp.spatial_neighbors(adata, coord_type='visium', n_rings=1)
# Identify spatial domains using Leiden clustering
sc.tl.leiden(adata, adjacency=adata.obsp['spatial_connectivities'],
key_added='spatial_domains', resolution=0.5)
# Visualize spatial domains
sc.pl.spatial(adata, color='spatial_domains', legend_loc='right margin')
# Find marker genes for spatial domains
sc.tl.rank_genes_groups(adata, 'spatial_domains', method='wilcoxon')
sc.pl.rank_genes_groups(adata, n_genes=5, sharey=False)# Calculate spatial gradients
import numpy as np
gradient_genes = ['VEGFA', 'HIF1A', 'EGFR']
sc.tl.spatial_gradient(adata, genes=gradient_genes)
# Visualize gradients
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
for i, gene in enumerate(gradient_genes):
# Original expression
sc.pl.spatial(adata, color=gene, ax=axes[0, i], show=False,
title=f'{gene} expression')
# Spatial gradient magnitude
gradient_key = f'{gene}_gradient_magnitude'
if gradient_key in adata.obs.columns:
sc.pl.spatial(adata, color=gradient_key, ax=axes[1, i], show=False,
title=f'{gene} gradient')
plt.tight_layout()
plt.show()# Standard single-cell analysis on spatial data
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata.raw = adata
adata = adata[:, adata.var.highly_variable]
# PCA and neighbors
sc.pp.scale(adata)
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)
# Standard clustering
sc.tl.leiden(adata, resolution=0.5, key_added='expression_clusters')
# Compare expression-based and spatial clustering
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
# UMAP with expression clusters
sc.pl.umap(adata, color='expression_clusters', ax=axes[0], show=False,
title='Expression-based clusters')
# Spatial plot with expression clusters
sc.pl.spatial(adata, color='expression_clusters', ax=axes[1], show=False,
title='Expression clusters (spatial)')
# Spatial plot with spatial domains
sc.pl.spatial(adata, color='spatial_domains', ax=axes[2], show=False,
title='Spatial domains')
plt.show()# Load multiple Visium samples
samples = ['sample1', 'sample2', 'sample3']
adatas = {}
for sample in samples:
adata_sample = sc.read_visium(f'path/to/{sample}/')
adata_sample.obs['sample'] = sample
adatas[sample] = adata_sample
# Concatenate samples
adata_combined = sc.concat(adatas, label='sample', keys=samples)
# Spatial analysis across samples
sc.metrics.morans_i(adata_combined)
# Plot per sample
fig, axes = plt.subplots(1, len(samples), figsize=(5*len(samples), 5))
for i, sample in enumerate(samples):
adata_sample = adata_combined[adata_combined.obs['sample'] == sample]
sc.pl.spatial(adata_sample, color='total_counts', ax=axes[i],
show=False, title=sample)
plt.show()# Spatial-specific QC metrics
adata.var['n_spots'] = np.sum(adata.X > 0, axis=0).A1
adata.obs['n_genes'] = np.sum(adata.X > 0, axis=1).A1
# Visualize QC metrics spatially
sc.pl.spatial(adata, color=['total_counts', 'n_genes', 'pct_counts_mt'],
ncols=3)
# Filter spots based on spatial context
# Remove spots with very low counts that might be outside tissue
min_counts = np.percentile(adata.obs['total_counts'], 5)
adata = adata[adata.obs['total_counts'] > min_counts, :]
# Remove genes not expressed in enough spots
min_spots = 10
adata = adata[:, adata.var['n_spots'] >= min_spots]# Visium data structure
adata.obsm['spatial'] # Spatial coordinates (array_row, array_col)
adata.uns['spatial'] # Spatial metadata and images
adata.uns['spatial'][library_id]['images'] # Tissue images
adata.uns['spatial'][library_id]['scalefactors'] # Scaling factors# For non-Visium data, store coordinates in obsm
import pandas as pd
coordinates = pd.DataFrame({
'x': x_coords,
'y': y_coords
})
adata.obsm['spatial'] = coordinates.valuesInstall with Tessl CLI
npx tessl i tessl/pypi-scanpy