tessl/pypi-scanpy

Comprehensive toolkit for analyzing single-cell gene expression data with scalable Python implementation supporting preprocessing, visualization, clustering, trajectory inference, and differential expression testing.

—

Pending

Overview

Eval results

Files

External Tool Integration

Name: tessl/pypi-scanpy
Author: tessl

Scanpy's external module provides integration with popular external single-cell analysis tools and methods through a unified interface. This extends scanpy's capabilities with specialized algorithms for dimensionality reduction, trajectory inference, batch correction, imputation, and more.

Capabilities

External Analysis Tools

Advanced analysis methods from specialized single-cell packages.

def phate(adata, n_components=2, knn=5, decay=40, n_landmark=2000, t='auto', gamma=1, n_pca=100, solver='exact', seed=None, n_jobs=1, random_state=None, copy=False, **kwargs):
    """
    PHATE (Potential of Heat-diffusion for Affinity-based Embedding) dimensionality reduction.
    
    Parameters:
    - adata (AnnData): Annotated data object
    - n_components (int): Number of dimensions for embedding
    - knn (int): Number of nearest neighbors
    - decay (int): Alpha decay parameter
    - n_landmark (int): Number of landmark points
    - t (str or int): Time parameter for diffusion
    - gamma (float): Informational distance parameter
    - n_pca (int): Number of PCA components for preprocessing
    - solver (str): Solver for eigenvalue decomposition
    - seed (int, optional): Random seed
    - n_jobs (int): Number of parallel jobs
    - random_state (int, optional): Random state
    - copy (bool): Return copy
    - **kwargs: Additional PHATE parameters
    
    Returns:
    AnnData or None: Object with PHATE embedding (if copy=True)
    """

def palantir(adata, start_cell=None, num_waypoints=1200, terminal_states=None, copy=False, **kwargs):
    """
    Palantir trajectory inference algorithm.
    
    Parameters:
    - adata (AnnData): Annotated data object
    - start_cell (str, optional): Starting cell for trajectory
    - num_waypoints (int): Number of waypoints for trajectory
    - terminal_states (list, optional): Terminal cell states
    - copy (bool): Return copy
    - **kwargs: Additional Palantir parameters
    
    Returns:
    AnnData or None: Object with trajectory results (if copy=True)
    """

def palantir_results(adata, early_cell=None, ms_data=None, copy=False):
    """
    Process Palantir trajectory inference results.
    
    Parameters:
    - adata (AnnData): Annotated data object with Palantir results
    - early_cell (str, optional): Early cell identifier
    - ms_data (AnnData, optional): Mass spectrometry data
    - copy (bool): Return copy
    
    Returns:
    AnnData or None: Object with processed results (if copy=True)
    """

def phenograph(adata, clustering_algo='leiden', k=30, directed=False, prune=False, min_cluster_size=10, jaccard=True, primary_metric='euclidean', n_jobs=-1, q_tol=1e-3, louvain_time_limit=2000, nn_method='kdtree', copy=False, **kwargs):
    """
    PhenoGraph clustering algorithm.
    
    Parameters:
    - adata (AnnData): Annotated data object
    - clustering_algo (str): Clustering algorithm ('leiden' or 'louvain')
    - k (int): Number of nearest neighbors
    - directed (bool): Use directed graph
    - prune (bool): Prune graph
    - min_cluster_size (int): Minimum cluster size
    - jaccard (bool): Use Jaccard coefficient
    - primary_metric (str): Distance metric
    - n_jobs (int): Number of parallel jobs
    - q_tol (float): Quality tolerance for clustering
    - louvain_time_limit (int): Time limit for Louvain algorithm
    - nn_method (str): Nearest neighbor method
    - copy (bool): Return copy
    - **kwargs: Additional parameters
    
    Returns:
    AnnData or None: Object with clustering results (if copy=True)
    """

def trimap(adata, n_inliers=10, n_outliers=5, n_random=5, lr=1000.0, n_iters=400, copy=False, **kwargs):
    """
    TriMap dimensionality reduction.
    
    Parameters:
    - adata (AnnData): Annotated data object
    - n_inliers (int): Number of inlier points
    - n_outliers (int): Number of outlier points  
    - n_random (int): Number of random triplets
    - lr (float): Learning rate
    - n_iters (int): Number of iterations
    - copy (bool): Return copy
    - **kwargs: Additional TriMap parameters
    
    Returns:
    AnnData or None: Object with TriMap embedding (if copy=True)
    """

def wishbone(adata, start_cell=None, copy=False, **kwargs):
    """
    Wishbone trajectory inference algorithm.
    
    Parameters:
    - adata (AnnData): Annotated data object
    - start_cell (str, optional): Starting cell for trajectory
    - copy (bool): Return copy
    - **kwargs: Additional Wishbone parameters
    
    Returns:
    AnnData or None: Object with trajectory results (if copy=True)
    """

def sam(adata, max_iter=10, num_norm_avg=50, k=20, distance='correlation', copy=False, **kwargs):
    """
    SAM (Self-Assembling Manifolds) for iterative clustering.
    
    Parameters:
    - adata (AnnData): Annotated data object
    - max_iter (int): Maximum number of iterations
    - num_norm_avg (int): Number of averages for normalization
    - k (int): Number of nearest neighbors
    - distance (str): Distance metric
    - copy (bool): Return copy
    - **kwargs: Additional SAM parameters
    
    Returns:
    AnnData or None: Object with SAM results (if copy=True)
    """

def harmony_timeseries(adata_list, tp=None, copy=False, **kwargs):
    """
    Harmony integration for time series data.
    
    Parameters:
    - adata_list (list): List of AnnData objects from different time points
    - tp (list, optional): Time point labels
    - copy (bool): Return copy
    - **kwargs: Additional Harmony parameters
    
    Returns:
    AnnData or None: Integrated dataset (if copy=True)
    """

Cell Cycle Analysis

Specialized tools for cell cycle phase analysis.

def cyclone(adata, species='human', copy=False, **kwargs):
    """
    Cyclone cell cycle phase assignment.
    
    Parameters:
    - adata (AnnData): Annotated data object
    - species (str): Species for marker genes ('human' or 'mouse')
    - copy (bool): Return copy
    - **kwargs: Additional parameters
    
    Returns:
    AnnData or None: Object with cell cycle scores (if copy=True)
    """

def sandbag(adata, fraction=0.5, copy=False, **kwargs):
    """
    Sandbag cell cycle gene identification.
    
    Parameters:
    - adata (AnnData): Annotated data object
    - fraction (float): Fraction threshold for gene selection
    - copy (bool): Return copy
    - **kwargs: Additional parameters
    
    Returns:
    AnnData or None: Object with cell cycle gene markers (if copy=True)
    """

External Preprocessing

Batch correction and integration methods from external packages.

def bbknn(adata, batch_key='batch', neighbors_within_batch=3, n_pcs=50, trim=None, copy=False, **kwargs):
    """
    BBKNN (Batch Balanced k-Nearest Neighbors) batch correction.
    
    Parameters:
    - adata (AnnData): Annotated data object
    - batch_key (str): Key in obs containing batch information
    - neighbors_within_batch (int): Neighbors within each batch
    - n_pcs (int): Number of principal components to use
    - trim (int, optional): Trim neighbors per batch
    - copy (bool): Return copy
    - **kwargs: Additional BBKNN parameters
    
    Returns:
    AnnData or None: Object with corrected neighborhood graph (if copy=True)
    """

def dca(adata, mode='denoise', ae_type='nb-conddisp', normalize_per_cell=True, scale=True, log1p=True, hidden_size=(64, 32, 64), hidden_dropout=0.0, batchnorm=True, activation='relu', init='glorot_uniform', network_kwds={}, epochs=300, reduce_lr=10, early_stop=15, batch_size=32, optimizer='rmsprop', learning_rate=None, random_state=0, threads=None, verbose=False, training_kwds={}, return_model=False, return_info=False, copy=False):
    """
    Deep Count Autoencoder (DCA) for denoising and batch correction.
    
    Parameters:
    - adata (AnnData): Annotated data object
    - mode (str): Mode of operation ('denoise', 'latent')
    - ae_type (str): Autoencoder type
    - normalize_per_cell (bool): Normalize per cell
    - scale (bool): Scale features
    - log1p (bool): Log transform
    - hidden_size (tuple): Hidden layer sizes
    - hidden_dropout (float): Dropout rate
    - batchnorm (bool): Use batch normalization
    - activation (str): Activation function
    - init (str): Weight initialization
    - network_kwds (dict): Additional network parameters
    - epochs (int): Number of training epochs
    - reduce_lr (int): Learning rate reduction patience
    - early_stop (int): Early stopping patience
    - batch_size (int): Training batch size
    - optimizer (str): Optimizer
    - learning_rate (float, optional): Learning rate
    - random_state (int): Random seed
    - threads (int, optional): Number of threads
    - verbose (bool): Verbose output
    - training_kwds (dict): Additional training parameters
    - return_model (bool): Return trained model
    - return_info (bool): Return training information
    - copy (bool): Return copy
    
    Returns:
    AnnData or tuple: Denoised data and optionally model/info
    """

def harmony_integrate(adata, key, basis='X_pca', adjusted_basis='X_pca_harmony', copy=False, **kwargs):
    """
    Harmony batch integration.
    
    Parameters:
    - adata (AnnData): Annotated data object
    - key (str): Key in obs for batch variable
    - basis (str): Basis to integrate
    - adjusted_basis (str): Key for integrated embedding
    - copy (bool): Return copy
    - **kwargs: Additional Harmony parameters
    
    Returns:
    AnnData or None: Object with integrated embedding (if copy=True)
    """

def hashsolo(adata, priors=[0.01, 0.8, 0.19], pre_existing_clusters=None, number_of_noise_barcodes=None, copy=False, **kwargs):
    """
    HashSolo for demultiplexing cell hashing data and doublet detection.
    
    Parameters:
    - adata (AnnData): Annotated data object with hashtag data
    - priors (list): Prior probabilities [doublet, negative, singlet]
    - pre_existing_clusters (str, optional): Key for existing clusters
    - number_of_noise_barcodes (int, optional): Number of noise barcodes
    - copy (bool): Return copy
    - **kwargs: Additional HashSolo parameters
    
    Returns:
    AnnData or None: Object with demultiplexing results (if copy=True)
    """

def magic(adata, name_list=None, knn=10, decay=1, knn_max=None, t=3, n_pca=20, solver='exact', knn_dist='euclidean', random_state=None, n_jobs=None, copy=False, **kwargs):
    """
    MAGIC (Markov Affinity-based Graph Imputation of Cells) imputation.
    
    Parameters:
    - adata (AnnData): Annotated data object
    - name_list (list, optional): Genes to impute (None for all)
    - knn (int): Number of nearest neighbors
    - decay (int): Alpha decay parameter
    - knn_max (int, optional): Maximum number of neighbors
    - t (int): Number of diffusion steps
    - n_pca (int): Number of PCA components
    - solver (str): Solver for eigenvalue decomposition
    - knn_dist (str): Distance metric for KNN
    - random_state (int, optional): Random seed
    - n_jobs (int, optional): Number of parallel jobs
    - copy (bool): Return copy
    - **kwargs: Additional MAGIC parameters
    
    Returns:
    AnnData or None: Object with imputed data (if copy=True)
    """

def mnn_correct(adata_list, var_subset=None, batch_key='batch', index_unique='-', batch_categories=None, k=20, sigma=0.1, cos_norm_in=True, cos_norm_out=True, svd_dim=0, var_adj=True, compute_angle=False, mnn_order=None, svd_mode='rsvd', do_concatenate=True, save_raw=False, n_jobs=None, **kwargs):
    """
    MNN (Mutual Nearest Neighbors) batch correction.
    
    Parameters:
    - adata_list (list): List of AnnData objects to correct
    - var_subset (list, optional): Subset of variables for correction
    - batch_key (str): Key for batch information
    - index_unique (str): Separator for making indices unique
    - batch_categories (list, optional): Batch category order
    - k (int): Number of nearest neighbors
    - sigma (float): Gaussian smoothing parameter
    - cos_norm_in (bool): Cosine normalization before correction
    - cos_norm_out (bool): Cosine normalization after correction
    - svd_dim (int): Number of SVD dimensions (0 for no SVD)
    - var_adj (bool): Adjust variance
    - compute_angle (bool): Compute angle between batches
    - mnn_order (list, optional): Order for MNN correction
    - svd_mode (str): SVD computation mode
    - do_concatenate (bool): Concatenate results
    - save_raw (bool): Save uncorrected data
    - n_jobs (int, optional): Number of parallel jobs
    - **kwargs: Additional parameters
    
    Returns:
    AnnData or list: Corrected data
    """

def scanorama_integrate(adata_list, key=None, basis='X_pca', adjusted_basis='X_scanorama', copy=False, **kwargs):
    """
    Scanorama integration for batch correction.
    
    Parameters:
    - adata_list (list): List of AnnData objects to integrate
    - key (str, optional): Key for batch information
    - basis (str): Basis for integration
    - adjusted_basis (str): Key for integrated embedding
    - copy (bool): Return copy
    - **kwargs: Additional Scanorama parameters
    
    Returns:
    AnnData or list: Integrated datasets
    """

Export Functions

Export scanpy results to other software platforms.

def cellbrowser(adata, outdir, name, **kwargs):
    """
    Export to UCSC Cell Browser format.
    
    Parameters:
    - adata (AnnData): Annotated data object
    - outdir (str): Output directory
    - name (str): Dataset name
    - **kwargs: Additional export parameters
    
    Returns:
    None: Creates Cell Browser files
    """

def spring_project(adata, project_dir, **kwargs):
    """
    Export to SPRING visualization tool.
    
    Parameters:
    - adata (AnnData): Annotated data object
    - project_dir (str): Project directory
    - **kwargs: Additional export parameters
    
    Returns:
    None: Creates SPRING project files
    """

Usage Examples

Dimensionality Reduction with PHATE

import scanpy as sc

# PHATE embedding
sc.external.tl.phate(adata, n_components=2, knn=15, t=20)
sc.pl.embedding(adata, basis='X_phate', color='leiden')

# Compare with UMAP
fig, axes = plt.subplots(1, 2, figsize=(10, 4))
sc.pl.umap(adata, color='leiden', ax=axes[0], show=False, frameon=False)
sc.pl.embedding(adata, basis='X_phate', color='leiden', ax=axes[1], show=False, frameon=False)
axes[0].set_title('UMAP')
axes[1].set_title('PHATE')
plt.show()

Trajectory Inference with Palantir

# Set up for Palantir
sc.external.tl.palantir(adata, start_cell='ATGCCAGAACGACT-1')

# Plot pseudotime and branch probabilities
sc.pl.umap(adata, color=['palantir_pseudotime', 'palantir_entropy'])

# Plot differentiation potential
sc.pl.umap(adata, color='palantir_diff_potential')

Batch Correction with Harmony

# Harmony integration
sc.external.pp.harmony_integrate(adata, 'batch')

# Compare before and after
sc.pl.umap(adata, color='batch', title='Before Harmony')
sc.pl.embedding(adata, basis='X_pca_harmony', color='batch', title='After Harmony')

# Recompute neighbors on integrated data
sc.pp.neighbors(adata, use_rep='X_pca_harmony')
sc.tl.umap(adata)

Imputation with MAGIC

# MAGIC imputation for specific genes
genes_to_impute = ['CD34', 'GATA1', 'GATA2']
sc.external.pp.magic(adata, name_list=genes_to_impute, t=3)

# Compare before and after imputation
sc.pl.violin(adata, genes_to_impute, groupby='leiden', 
             use_raw=True, title='Before MAGIC')
sc.pl.violin(adata, genes_to_impute, groupby='leiden', 
             layer='MAGIC_imputed', title='After MAGIC')

Cell Cycle Analysis

# Cell cycle scoring with Cyclone
sc.external.tl.cyclone(adata, species='human')

# Plot cell cycle phases
sc.pl.umap(adata, color=['cyclone_G1', 'cyclone_S', 'cyclone_G2M'])

# Custom marker identification with Sandbag
sc.external.tl.sandbag(adata)

Advanced Clustering with PhenoGraph

# PhenoGraph clustering
sc.external.tl.phenograph(adata, k=30, clustering_algo='leiden')

# Compare with Leiden
sc.pl.umap(adata, color=['leiden', 'phenograph'], ncols=2)

Batch Correction with BBKNN

# BBKNN for batch-balanced neighbors
sc.external.pp.bbknn(adata, batch_key='batch', n_pcs=50)

# Recompute UMAP with corrected neighbors
sc.tl.umap(adata)
sc.pl.umap(adata, color='batch')

Export to Other Tools

# Export to UCSC Cell Browser
sc.external.exporting.cellbrowser(
    adata, 
    outdir='cellbrowser_output',
    name='my_dataset'
)

# Export to SPRING
sc.external.exporting.spring_project(
    adata,
    project_dir='spring_output'
)

Integration Notes

Installation Requirements

Many external tools require additional dependencies:

# For PHATE
pip install phate

# For Palantir  
pip install palantir-sc

# For Harmony
pip install harmonypy

# For MAGIC
pip install magic-impute

# For BBKNN
pip install bbknn

# For DCA
pip install dca