tessl/pypi-tiledbsoma

Python API for efficient storage and retrieval of single-cell data using TileDB

Overview

Eval results

Files

Data I/O Operations

Name: tessl/pypi-tiledbsoma
Author: tessl

Comprehensive ingestion and outgestion functions for converting between SOMA format and popular single-cell data formats like AnnData and H5AD files. These functions enable seamless integration with existing single-cell analysis workflows and tools.

Package Import

import tiledbsoma.io as soma_io

Capabilities

AnnData Integration

Functions for converting between SOMA Experiments and AnnData objects, the standard format for single-cell data in Python.

from_anndata

Convert an AnnData object to a SOMA Experiment with full support for all AnnData components.

def from_anndata(anndata, uri, *, measurement_name="RNA", obs_id_name="obs_id", var_id_name="var_id", X_layer_name=None, obsm_layers=None, varm_layers=None, obsp_layers=None, varp_layers=None, uns_keys=None, ingest_mode="write", registration_mapping=None, context=None, platform_config=None, additional_metadata=None):
    """
    Create a SOMA Experiment from an AnnData object.
    
    Parameters:
    - anndata: AnnData object to convert
    - uri: str, URI where the SOMA experiment will be created
    - measurement_name: str, name for the measurement (default: "RNA")
    - obs_id_name: str, column name for observation IDs (default: "obs_id")
    - var_id_name: str, column name for variable IDs (default: "var_id")
    - X_layer_name: str, name for the main X matrix layer (None uses default)
    - obsm_layers: list of str, obsm keys to include (None includes all)
    - varm_layers: list of str, varm keys to include (None includes all)
    - obsp_layers: list of str, obsp keys to include (None includes all)
    - varp_layers: list of str, varp keys to include (None includes all)
    - uns_keys: list of str, uns keys to include as metadata (None includes all)
    - ingest_mode: str, ingestion mode ("write" or "resume")
    - registration_mapping: dict, mapping for registration information
    - context: TileDB context for the operation
    - platform_config: TileDB-specific configuration options
    - additional_metadata: dict, additional metadata to store
    
    Returns:
    SOMA Experiment object
    """

to_anndata

Convert a SOMA Experiment back to an AnnData object with flexible layer selection.

def to_anndata(experiment, *, measurement_name="RNA", X_layer_name=None, obsm_layers=None, varm_layers=None, obsp_layers=None, varp_layers=None, obs_coords=None, var_coords=None, obs_value_filter=None, var_value_filter=None, obs_column_names=None, var_column_names=None, batch_size=None, context=None):
    """
    Convert a SOMA Experiment to an AnnData object.
    
    Parameters:
    - experiment: SOMA Experiment object or ExperimentAxisQuery
    - measurement_name: str, name of measurement to convert (default: "RNA")
    - X_layer_name: str, X layer to use as main matrix (None uses first available)
    - obsm_layers: list of str, obsm layers to include (None includes all)
    - varm_layers: list of str, varm layers to include (None includes all)
    - obsp_layers: list of str, obsp layers to include (None includes all)
    - varp_layers: list of str, varp layers to include (None includes all)
    - obs_coords: coordinates for observation selection
    - var_coords: coordinates for variable selection
    - obs_value_filter: str, filter expression for observations
    - var_value_filter: str, filter expression for variables
    - obs_column_names: list of str, observation columns to include
    - var_column_names: list of str, variable columns to include
    - batch_size: int, batch size for reading data
    - context: TileDB context for the operation
    
    Returns:
    AnnData object
    """

Usage Example

import scanpy as sc
import tiledbsoma.io as soma_io

# Load example dataset
adata = sc.datasets.pbmc3k()
adata.var_names_unique()

# Convert to SOMA format
experiment_uri = "pbmc3k_experiment.soma"
soma_io.from_anndata(
    adata,
    experiment_uri,
    measurement_name="RNA",
    obs_id_name="obs_id",
    var_id_name="var_id"
)

# Work with SOMA format - query specific data
with tiledbsoma.open(experiment_uri) as exp:
    # Query T cells only
    query = exp.axis_query(
        "RNA",
        obs_query=tiledbsoma.AxisQuery(value_filter="cell_type == 'T cells'")
    )
    
    # Convert subset back to AnnData
    t_cell_adata = soma_io.to_anndata(
        query,
        measurement_name="RNA",
        X_layer_name="X",
        obs_column_names=["cell_type", "n_genes", "percent_mito"]
    )
    
    print(f"T cells: {t_cell_adata.n_obs} cells, {t_cell_adata.n_vars} genes")

H5AD File Operations

Functions for working directly with H5AD files, the standard file format for AnnData objects.

from_h5ad

Create a SOMA Experiment directly from an H5AD file without loading into memory.

def from_h5ad(h5ad_file_path, output_path, *, measurement_name="RNA", obs_id_name="obs_id", var_id_name="var_id", X_layer_name=None, obsm_layers=None, varm_layers=None, obsp_layers=None, varp_layers=None, uns_keys=None, ingest_mode="write", registration_mapping=None, context=None, platform_config=None, additional_metadata=None):
    """
    Create a SOMA Experiment from an H5AD file.
    
    Parameters:
    - h5ad_file_path: str, path to input H5AD file
    - output_path: str, URI where SOMA experiment will be created
    - measurement_name: str, name for the measurement (default: "RNA")
    - obs_id_name: str, column name for observation IDs (default: "obs_id")
    - var_id_name: str, column name for variable IDs (default: "var_id")
    - X_layer_name: str, name for the main X matrix layer (None uses default)
    - obsm_layers: list of str, obsm keys to include (None includes all)
    - varm_layers: list of str, varm keys to include (None includes all)
    - obsp_layers: list of str, obsp keys to include (None includes all)
    - varp_layers: list of str, varp keys to include (None includes all)
    - uns_keys: list of str, uns keys to include as metadata (None includes all)
    - ingest_mode: str, ingestion mode ("write" or "resume")
    - registration_mapping: dict, mapping for registration information
    - context: TileDB context for the operation
    - platform_config: TileDB-specific configuration options
    - additional_metadata: dict, additional metadata to store
    
    Returns:
    SOMA Experiment object
    """

to_h5ad

Write a SOMA Experiment directly to an H5AD file.

def to_h5ad(experiment, h5ad_path, *, measurement_name="RNA", X_layer_name=None, obsm_layers=None, varm_layers=None, obsp_layers=None, varp_layers=None, obs_coords=None, var_coords=None, obs_value_filter=None, var_value_filter=None, obs_column_names=None, var_column_names=None, batch_size=None, context=None):
    """
    Write a SOMA Experiment to an H5AD file.
    
    Parameters:
    - experiment: SOMA Experiment object or ExperimentAxisQuery
    - h5ad_path: str, output H5AD file path
    - measurement_name: str, name of measurement to write (default: "RNA")
    - X_layer_name: str, X layer to use as main matrix (None uses first available)
    - obsm_layers: list of str, obsm layers to include (None includes all)
    - varm_layers: list of str, varm layers to include (None includes all)
    - obsp_layers: list of str, obsp layers to include (None includes all)
    - varp_layers: list of str, varp layers to include (None includes all)
    - obs_coords: coordinates for observation selection
    - var_coords: coordinates for variable selection
    - obs_value_filter: str, filter expression for observations
    - var_value_filter: str, filter expression for variables
    - obs_column_names: list of str, observation columns to include
    - var_column_names: list of str, variable columns to include
    - batch_size: int, batch size for reading data
    - context: TileDB context for the operation
    """

Usage Example

import tiledbsoma.io as soma_io

# Convert H5AD file to SOMA format
soma_io.from_h5ad(
    "input_data.h5ad",
    "experiment.soma",
    measurement_name="RNA"
)

# Process data in SOMA format
with tiledbsoma.open("experiment.soma") as exp:
    # Perform analysis, filtering, etc.
    query = exp.axis_query("RNA", 
        obs_query=tiledbsoma.AxisQuery(value_filter="n_genes > 500")
    )
    
    # Export filtered results back to H5AD
    soma_io.to_h5ad(
        query,
        "filtered_output.h5ad",
        measurement_name="RNA"
    )

Batch Registration

Functions for registering multiple AnnData objects or H5AD files into a single SOMA Experiment.

register_anndatas

def register_anndatas(experiment_uri, adatas, *, measurement_name="RNA", obs_id_name="obs_id", var_id_name="var_id", registration_mapping=None, context=None, platform_config=None):
    """
    Register multiple AnnData objects into a SOMA Experiment.
    
    Parameters:
    - experiment_uri: str, URI of the SOMA experiment
    - adatas: list of AnnData objects to register
    - measurement_name: str, name for the measurement (default: "RNA")
    - obs_id_name: str, column name for observation IDs (default: "obs_id")
    - var_id_name: str, column name for variable IDs (default: "var_id")
    - registration_mapping: dict, mapping for registration information
    - context: TileDB context for the operation
    - platform_config: TileDB-specific configuration options
    
    Returns:
    SOMA Experiment object
    """

register_h5ads

def register_h5ads(experiment_uri, h5ad_file_paths, *, measurement_name="RNA", obs_id_name="obs_id", var_id_name="var_id", registration_mapping=None, context=None, platform_config=None):
    """
    Register multiple H5AD files into a SOMA Experiment.
    
    Parameters:
    - experiment_uri: str, URI of the SOMA experiment
    - h5ad_file_paths: list of str, paths to H5AD files to register
    - measurement_name: str, name for the measurement (default: "RNA")
    - obs_id_name: str, column name for observation IDs (default: "obs_id")
    - var_id_name: str, column name for variable IDs (default: "var_id")
    - registration_mapping: dict, mapping for registration information
    - context: TileDB context for the operation
    - platform_config: TileDB-specific configuration options
    
    Returns:
    SOMA Experiment object
    """

Usage Example

import tiledbsoma.io as soma_io
import scanpy as sc

# Load multiple datasets
pbmc_1k = sc.datasets.pbmc68k_reduced()[:1000, :].copy()
pbmc_2k = sc.datasets.pbmc68k_reduced()[1000:3000, :].copy()

# Register into single experiment
soma_io.register_anndatas(
    "combined_experiment.soma",
    [pbmc_1k, pbmc_2k],
    measurement_name="RNA"
)

# Register H5AD files
h5ad_files = ["sample1.h5ad", "sample2.h5ad", "sample3.h5ad"]
soma_io.register_h5ads(
    "multi_sample_experiment.soma",
    h5ad_files,
    measurement_name="RNA"
)

Data Append and Update Operations

Functions for incrementally adding or modifying data in existing SOMA objects.

Append Functions

def append_obs(soma_df, values, *, context=None, platform_config=None):
    """
    Append observations to a SOMA DataFrame.
    
    Parameters:
    - soma_df: SOMA DataFrame to append to
    - values: pyarrow.Table with new observation data
    - context: TileDB context for the operation
    - platform_config: TileDB-specific configuration options
    """

def append_var(soma_df, values, *, context=None, platform_config=None):
    """
    Append variables to a SOMA DataFrame.
    
    Parameters:
    - soma_df: SOMA DataFrame to append to
    - values: pyarrow.Table with new variable data
    - context: TileDB context for the operation
    - platform_config: TileDB-specific configuration options
    """

def append_X(collection, values, *, context=None, platform_config=None):
    """
    Append expression data to an X collection.
    
    Parameters:
    - collection: SOMA Collection containing X matrices
    - values: expression data to append
    - context: TileDB context for the operation
    - platform_config: TileDB-specific configuration options
    """

Update Functions

def update_obs(soma_df, values, *, context=None, platform_config=None):
    """
    Update observations in a SOMA DataFrame.
    
    Parameters:
    - soma_df: SOMA DataFrame to update
    - values: pyarrow.Table with updated observation data
    - context: TileDB context for the operation
    - platform_config: TileDB-specific configuration options
    """

def update_var(soma_df, values, *, context=None, platform_config=None):
    """
    Update variables in a SOMA DataFrame.
    
    Parameters:
    - soma_df: SOMA DataFrame to update
    - values: pyarrow.Table with updated variable data
    - context: TileDB context for the operation
    - platform_config: TileDB-specific configuration options
    """

def update_matrix(soma_coll, values, *, context=None, platform_config=None):
    """
    Update matrix data in a SOMA Collection.
    
    Parameters:
    - soma_coll: SOMA Collection containing matrices
    - values: matrix data to update
    - context: TileDB context for the operation
    - platform_config: TileDB-specific configuration options
    """

Matrix Management Functions

def add_matrix_to_collection(collection, matrix, layer_name, *, context=None, platform_config=None):
    """
    Add a matrix to a SOMA Collection.
    
    Parameters:
    - collection: SOMA Collection to add matrix to
    - matrix: matrix data to add
    - layer_name: str, name for the new matrix layer
    - context: TileDB context for the operation
    - platform_config: TileDB-specific configuration options
    """

def add_X_layer(measurement, matrix, layer_name, *, context=None, platform_config=None):
    """
    Add an X layer to a Measurement.
    
    Parameters:
    - measurement: SOMA Measurement object
    - matrix: matrix data to add as X layer
    - layer_name: str, name for the new X layer
    - context: TileDB context for the operation
    - platform_config: TileDB-specific configuration options
    """

def create_from_matrix(matrix, uri, *, context=None, platform_config=None):
    """
    Create a SOMA array from a matrix.
    
    Parameters:
    - matrix: input matrix data
    - uri: str, URI where SOMA array will be created
    - context: TileDB context for the operation
    - platform_config: TileDB-specific configuration options
    
    Returns:
    SOMA array object
    """

Experiment Shaping Operations

Functions for managing and resizing SOMA Experiment dimensions.

def get_experiment_shapes(experiment, *, measurement_name="RNA"):
    """
    Get current shapes of experiment components.
    
    Parameters:
    - experiment: SOMA Experiment object
    - measurement_name: str, name of measurement to analyze (default: "RNA")
    
    Returns:
    dict: Shapes of experiment components
    """

def show_experiment_shapes(experiment, *, measurement_name="RNA"):
    """
    Display experiment component shapes.
    
    Parameters:
    - experiment: SOMA Experiment object
    - measurement_name: str, name of measurement to analyze (default: "RNA")
    """

def resize_experiment(experiment, shape, *, measurement_name="RNA"):
    """
    Resize experiment dimensions.
    
    Parameters:
    - experiment: SOMA Experiment object
    - shape: new shape specification
    - measurement_name: str, name of measurement to resize (default: "RNA")
    """

def upgrade_experiment_shapes(experiment, *, measurement_name="RNA"):
    """
    Upgrade experiment shapes to accommodate new data.
    
    Parameters:
    - experiment: SOMA Experiment object
    - measurement_name: str, name of measurement to upgrade (default: "RNA")
    """

Registration Mapping

Support for mapping ambient labels during registration of multiple datasets.

class ExperimentAmbientLabelMapping:
    """
    Mapping for experiment ambient labels during registration.
    
    Provides functionality for consistent labeling across multiple
    datasets when registering them into a single experiment.
    """

Usage Example

import tiledbsoma.io as soma_io

# Incremental data loading workflow
with tiledbsoma.open("experiment.soma", mode="w") as exp:
    # Get current shapes
    shapes = soma_io.get_experiment_shapes(exp, measurement_name="RNA")
    print(f"Current shapes: {shapes}")
    
    # Add new observations
    new_obs_data = pa.table({
        "soma_joinid": range(1000, 1100),
        "cell_type": ["Macrophage"] * 100,
        "sample_id": ["Sample3"] * 100
    })
    soma_io.append_obs(exp.obs, new_obs_data)
    
    # Add corresponding expression data
    # ... (prepare expression matrix for new cells)
    
    # Resize experiment to accommodate new data
    soma_io.upgrade_experiment_shapes(exp, measurement_name="RNA")

This comprehensive I/O functionality enables seamless integration between SOMA's scalable storage format and the existing single-cell analysis ecosystem.

Install with Tessl CLI