Comprehensive toolkit for analyzing single-cell gene expression data with scalable Python implementation supporting preprocessing, visualization, clustering, trajectory inference, and differential expression testing.
—
Scanpy provides comprehensive support for reading and writing various single-cell data formats, making it easy to work with data from different platforms and integrate with other analysis tools.
Read various file formats and automatically detect the appropriate format based on file extension.
def read(filename, delimiter=None, first_column_names=None, backup_url=None, sheet=None, ext=None, **kwargs):
"""
Read file and return AnnData object.
Parameters:
- filename (str): Path to file or URL
- delimiter (str, optional): Delimiter for text files
- first_column_names (bool, optional): Whether first column contains row names
- backup_url (str, optional): Backup URL if file not found locally
- sheet (str, optional): Sheet name for Excel files
- ext (str, optional): Force file extension interpretation
Returns:
AnnData: Annotated data object
"""Read data from 10x Genomics Cell Ranger output formats, the most common single-cell data format.
def read_10x_h5(filename, genome=None, gex_only=True, **kwargs):
"""
Read 10x Genomics HDF5 file.
Parameters:
- filename (str): Path to .h5 file
- genome (str, optional): Genome to read (for multi-genome files)
- gex_only (bool): Only read gene expression data
Returns:
AnnData: Annotated data object
"""
def read_10x_mtx(path, var_names='gene_symbols', make_unique=True, cache=False, **kwargs):
"""
Read 10x Genomics MTX format (matrix.mtx, features.tsv, barcodes.tsv).
Parameters:
- path (str): Path to directory containing MTX files
- var_names (str): Use 'gene_symbols' or 'gene_ids' for gene names
- make_unique (bool): Make gene names unique
- cache (bool): Write cache file for faster subsequent reading
Returns:
AnnData: Annotated data object
"""Read spatial transcriptomics data from 10x Visium platform.
def read_visium(path, genome=None, count_file='filtered_feature_bc_matrix.h5', library_id=None, load_images=True, **kwargs):
"""
Read 10x Visium spatial transcriptomics data.
Parameters:
- path (str): Path to directory containing Visium output
- genome (str, optional): Genome to read
- count_file (str): Name of count matrix file
- library_id (str, optional): Library identifier
- load_images (bool): Load histological images
Returns:
AnnData: Annotated data object with spatial coordinates
"""Read common data formats used in bioinformatics and data science.
# From anndata - automatically available in scanpy
def read_csv(filename, delimiter=',', first_column_names=None, **kwargs):
"""
Read CSV file.
Parameters:
- filename (str): Path to CSV file
- delimiter (str): Field delimiter
- first_column_names (bool, optional): First column contains row names
Returns:
AnnData: Annotated data object
"""
def read_excel(filename, sheet=None, **kwargs):
"""
Read Excel file.
Parameters:
- filename (str): Path to Excel file
- sheet (str, optional): Sheet name to read
Returns:
AnnData: Annotated data object
"""
def read_h5ad(filename, backed=None, **kwargs):
"""
Read H5AD format (native AnnData format).
Parameters:
- filename (str): Path to .h5ad file
- backed (str, optional): Backing mode ('r' for read-only)
Returns:
AnnData: Annotated data object
"""
def read_hdf(filename, key, **kwargs):
"""
Read HDF5 file.
Parameters:
- filename (str): Path to HDF5 file
- key (str): Key/group name in HDF5 file
Returns:
AnnData: Annotated data object
"""
def read_loom(filename, sparse=True, cleanup=True, **kwargs):
"""
Read Loom file format.
Parameters:
- filename (str): Path to .loom file
- sparse (bool): Store matrix in sparse format
- cleanup (bool): Clean up temporary files
Returns:
AnnData: Annotated data object
"""
def read_mtx(filename, **kwargs):
"""
Read Matrix Market format.
Parameters:
- filename (str): Path to .mtx file
Returns:
AnnData: Annotated data object
"""
def read_text(filename, delimiter=None, first_column_names=None, **kwargs):
"""
Read text file.
Parameters:
- filename (str): Path to text file
- delimiter (str, optional): Field delimiter
- first_column_names (bool, optional): First column contains row names
Returns:
AnnData: Annotated data object
"""
def read_umi_tools(filename, **kwargs):
"""
Read UMI-tools format.
Parameters:
- filename (str): Path to UMI-tools output file
Returns:
AnnData: Annotated data object
"""Write AnnData objects to various formats for sharing, archiving, or use with other tools.
def write(filename, adata, ext=None, compression=None, compression_opts=None):
"""
Write AnnData object to file.
Parameters:
- filename (str): Output file path
- adata (AnnData): AnnData object to write
- ext (str, optional): Force file format based on extension
- compression (str, optional): Compression method
- compression_opts (dict, optional): Compression options
"""Combine multiple AnnData objects into a single object.
def concat(adatas, axis=0, join='outer', merge=None, uns_merge=None, **kwargs):
"""
Concatenate AnnData objects along an axis.
Parameters:
- adatas (list): List of AnnData objects to concatenate
- axis (int): Axis along which to concatenate (0 for observations, 1 for variables)
- join (str): How to handle indices ('outer', 'inner')
- merge (str, optional): Strategy for merging conflicting annotations
- uns_merge (str, optional): Strategy for merging unstructured annotations
Returns:
AnnData: Concatenated AnnData object
"""import scanpy as sc
# Load 10x MTX format
adata = sc.read_10x_mtx(
'data/filtered_gene_bc_matrices/hg19/',
var_names='gene_symbols',
cache=True
)
adata.var_names_unique()
# Load 10x H5 format
adata = sc.read_10x_h5('data/filtered_gene_bc_matrix.h5')# Load Visium spatial transcriptomics data
adata = sc.read_visium('data/spatial/')
adata.var_names_unique()
# Spatial coordinates are stored in adata.obsm['spatial']
print(adata.obsm['spatial'].shape)# Save processed data
sc.write('results/processed_data.h5ad', adata)
# Load for further analysis
adata = sc.read_h5ad('results/processed_data.h5ad')# Load multiple datasets
adata1 = sc.read_10x_mtx('data/sample1/')
adata2 = sc.read_10x_mtx('data/sample2/')
# Add batch information
adata1.obs['batch'] = 'sample1'
adata2.obs['batch'] = 'sample2'
# Concatenate datasets
adata_combined = sc.concat([adata1, adata2], join='outer')Install with Tessl CLI
npx tessl i tessl/pypi-scanpy