or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

analysis-tools.mddata-io.mddatasets.mdexternal-tools.mdindex.mdpreprocessing.mdqueries.mdspatial-analysis.mdutilities.mdvisualization.md

analysis-tools.mddocs/

0

# Analysis Tools

1

2

Scanpy's tools module provides advanced analysis methods for single-cell data including dimensionality reduction, clustering, trajectory inference, differential expression testing, and specialized single-cell analysis algorithms.

3

4

## Capabilities

5

6

### Dimensionality Reduction and Embedding

7

8

Generate low-dimensional representations of high-dimensional single-cell data.

9

10

```python { .api }

11

def tsne(adata, n_pcs=None, use_rep=None, perplexity=30, early_exaggeration=12, learning_rate=1000, random_state=0, use_fast_tsne=True, n_jobs=None, copy=False):

12

"""

13

t-distributed stochastic neighbor embedding (t-SNE).

14

15

Parameters:

16

- adata (AnnData): Annotated data object

17

- n_pcs (int, optional): Number of PCs to use

18

- use_rep (str, optional): Representation to use

19

- perplexity (float): t-SNE perplexity parameter

20

- early_exaggeration (float): Early exaggeration parameter

21

- learning_rate (float): Learning rate

22

- random_state (int): Random seed

23

- use_fast_tsne (bool): Use fast t-SNE implementation

24

- n_jobs (int, optional): Number of parallel jobs

25

- copy (bool): Return copy

26

27

Returns:

28

AnnData or None: Object with t-SNE coordinates (if copy=True)

29

"""

30

31

def umap(adata, min_dist=0.5, spread=1.0, n_components=2, maxiter=None, alpha=1.0, gamma=1.0, negative_sample_rate=5, init_pos='spectral', random_state=0, a=None, b=None, copy=False, method='umap', neighbors_key=None):

32

"""

33

Uniform Manifold Approximation and Projection (UMAP) embedding.

34

35

Parameters:

36

- adata (AnnData): Annotated data object

37

- min_dist (float): Minimum distance between embedded points

38

- spread (float): Effective scale of embedded points

39

- n_components (int): Number of dimensions for embedding

40

- maxiter (int, optional): Maximum number of iterations

41

- alpha (float): Learning rate

42

- gamma (float): Repulsive strength

43

- negative_sample_rate (int): Number of negative samples

44

- init_pos (str): Initialization method

45

- random_state (int): Random seed

46

- a (float, optional): Curve parameter

47

- b (float, optional): Curve parameter

48

- copy (bool): Return copy

49

- method (str): UMAP method to use

50

- neighbors_key (str, optional): Key for neighbors data

51

52

Returns:

53

AnnData or None: Object with UMAP coordinates (if copy=True)

54

"""

55

56

def diffmap(adata, n_comps=15, neighbors_key=None, random_state=0, copy=False):

57

"""

58

Diffusion map embedding.

59

60

Parameters:

61

- adata (AnnData): Annotated data object

62

- n_comps (int): Number of diffusion components

63

- neighbors_key (str, optional): Key for neighbors data

64

- random_state (int): Random seed

65

- copy (bool): Return copy

66

67

Returns:

68

AnnData or None: Object with diffusion map (if copy=True)

69

"""

70

71

def draw_graph(adata, layout='fa', random_state=0, root=None, neighbors_key=None, copy=False, **kwds):

72

"""

73

Force-directed graph drawing.

74

75

Parameters:

76

- adata (AnnData): Annotated data object

77

- layout (str): Layout algorithm ('fa', 'fr', etc.)

78

- random_state (int): Random seed

79

- root (int, optional): Root node for certain layouts

80

- neighbors_key (str, optional): Key for neighbors data

81

- copy (bool): Return copy

82

- **kwds: Additional layout parameters

83

84

Returns:

85

AnnData or None: Object with graph layout (if copy=True)

86

"""

87

```

88

89

### Clustering

90

91

Identify groups of similar cells using various clustering algorithms.

92

93

```python { .api }

94

def leiden(adata, resolution=1, restrict_to=None, random_state=0, key_added='leiden', adjacency=None, directed=True, use_weights=True, n_iterations=-1, partition_type=None, neighbors_key=None, obsp=None, copy=False):

95

"""

96

Leiden clustering algorithm.

97

98

Parameters:

99

- adata (AnnData): Annotated data object

100

- resolution (float): Resolution parameter for clustering

101

- restrict_to (tuple, optional): Restrict clustering to subset

102

- random_state (int): Random seed

103

- key_added (str): Key for storing cluster labels

104

- adjacency (array, optional): Adjacency matrix

105

- directed (bool): Use directed graph

106

- use_weights (bool): Use edge weights

107

- n_iterations (int): Number of iterations (-1 for convergence)

108

- partition_type (object, optional): Partition type

109

- neighbors_key (str, optional): Key for neighbors data

110

- obsp (str, optional): Key in obsp for adjacency

111

- copy (bool): Return copy

112

113

Returns:

114

AnnData or None: Object with cluster labels (if copy=True)

115

"""

116

117

def louvain(adata, resolution=1, random_state=0, restrict_to=None, key_added='louvain', adjacency=None, flavor='vtraag', directed=True, use_weights=False, partition_type=None, neighbors_key=None, obsp=None, copy=False):

118

"""

119

Louvain clustering algorithm.

120

121

Parameters:

122

- adata (AnnData): Annotated data object

123

- resolution (float): Resolution parameter for clustering

124

- random_state (int): Random seed

125

- restrict_to (tuple, optional): Restrict clustering to subset

126

- key_added (str): Key for storing cluster labels

127

- adjacency (array, optional): Adjacency matrix

128

- flavor (str): Implementation flavor

129

- directed (bool): Use directed graph

130

- use_weights (bool): Use edge weights

131

- partition_type (object, optional): Partition type

132

- neighbors_key (str, optional): Key for neighbors data

133

- obsp (str, optional): Key in obsp for adjacency

134

- copy (bool): Return copy

135

136

Returns:

137

AnnData or None: Object with cluster labels (if copy=True)

138

"""

139

```

140

141

### Trajectory Inference and Pseudotime

142

143

Analyze developmental trajectories and compute pseudotime.

144

145

```python { .api }

146

def dpt(adata, n_dcs=10, n_branchings=0, min_group_size=0.01, allow_kendall_tau_shift=True, neighbors_key=None, copy=False):

147

"""

148

Diffusion pseudotime analysis.

149

150

Parameters:

151

- adata (AnnData): Annotated data object

152

- n_dcs (int): Number of diffusion components

153

- n_branchings (int): Number of branchings to detect

154

- min_group_size (float): Minimum group size for branching

155

- allow_kendall_tau_shift (bool): Allow Kendall tau shift

156

- neighbors_key (str, optional): Key for neighbors data

157

- copy (bool): Return copy

158

159

Returns:

160

AnnData or None: Object with pseudotime (if copy=True)

161

"""

162

163

def paga(adata, groups=None, use_rna_velocity=False, model='v1.2', neighbors_key=None, copy=False):

164

"""

165

Partition-based graph abstraction (PAGA).

166

167

Parameters:

168

- adata (AnnData): Annotated data object

169

- groups (str, optional): Key for grouping observations

170

- use_rna_velocity (bool): Use RNA velocity information

171

- model (str): PAGA model version

172

- neighbors_key (str, optional): Key for neighbors data

173

- copy (bool): Return copy

174

175

Returns:

176

AnnData or None: Object with PAGA results (if copy=True)

177

"""

178

179

def paga_degrees(adata, groups=None):

180

"""

181

Calculate node degrees in PAGA graph.

182

183

Parameters:

184

- adata (AnnData): Annotated data object

185

- groups (str, optional): Key for grouping observations

186

187

Returns:

188

array: Node degrees

189

"""

190

191

def paga_expression_entropies(adata, groups=None):

192

"""

193

Calculate expression entropies for PAGA nodes.

194

195

Parameters:

196

- adata (AnnData): Annotated data object

197

- groups (str, optional): Key for grouping observations

198

199

Returns:

200

array: Expression entropies

201

"""

202

203

def paga_compare_paths(adata1, adata2, adjacency_key='paga_adjacency', adjacency_key2=None, embeddings_key='X_umap', embedding_key2=None, annotation_key=None, annotation_key2=None):

204

"""

205

Compare PAGA paths between datasets.

206

207

Parameters:

208

- adata1 (AnnData): First dataset

209

- adata2 (AnnData): Second dataset

210

- adjacency_key (str): Key for adjacency matrix in first dataset

211

- adjacency_key2 (str, optional): Key for adjacency matrix in second dataset

212

- embeddings_key (str): Key for embeddings in first dataset

213

- embedding_key2 (str, optional): Key for embeddings in second dataset

214

- annotation_key (str, optional): Key for annotations in first dataset

215

- annotation_key2 (str, optional): Key for annotations in second dataset

216

217

Returns:

218

dict: Path comparison results

219

"""

220

```

221

222

### Differential Expression Analysis

223

224

Identify genes that are differentially expressed between groups.

225

226

```python { .api }

227

def rank_genes_groups(adata, groupby, use_raw=None, groups='all', reference='rest', n_genes=None, rankby_abs=False, pts=False, key_added=None, copy=False, method='wilcoxon', corr_method='benjamini-hochberg', tie_correct=False, layer=None, **kwds):

228

"""

229

Rank genes for characterizing groups.

230

231

Parameters:

232

- adata (AnnData): Annotated data object

233

- groupby (str): Key in obs for grouping cells

234

- use_raw (bool, optional): Use raw data

235

- groups (str or list): Groups to compare

236

- reference (str): Reference group for comparison

237

- n_genes (int, optional): Number of genes to return per group

238

- rankby_abs (bool): Rank by absolute values

239

- pts (bool): Calculate percentage of cells expressing gene

240

- key_added (str, optional): Key for storing results

241

- copy (bool): Return copy

242

- method (str): Statistical test method

243

- corr_method (str): Multiple testing correction method

244

- tie_correct (bool): Apply tie correction

245

- layer (str, optional): Layer to use

246

- **kwds: Additional method-specific parameters

247

248

Returns:

249

AnnData or None: Object with ranking results (if copy=True)

250

"""

251

252

def filter_rank_genes_groups(adata, key='rank_genes_groups', groupby=None, use_raw=None, log=True, key_added='rank_genes_groups_filtered', min_in_group_fraction=0.25, min_fold_change=2, max_out_group_fraction=0.5):

253

"""

254

Filter ranked genes based on fold change and expression criteria.

255

256

Parameters:

257

- adata (AnnData): Annotated data object

258

- key (str): Key for ranked genes results

259

- groupby (str, optional): Key for grouping

260

- use_raw (bool, optional): Use raw data

261

- log (bool): Data is log-transformed

262

- key_added (str): Key for filtered results

263

- min_in_group_fraction (float): Minimum fraction expressing in group

264

- min_fold_change (float): Minimum fold change

265

- max_out_group_fraction (float): Maximum fraction expressing out of group

266

267

Returns:

268

dict: Filtered gene rankings

269

"""

270

```

271

272

### Gene Set Scoring

273

274

Score cells based on gene set expression.

275

276

```python { .api }

277

def score_genes(adata, gene_list, ctrl_size=50, gene_pool=None, n_bins=25, score_name='score', random_state=0, copy=False, use_raw=None):

278

"""

279

Score a set of genes.

280

281

Parameters:

282

- adata (AnnData): Annotated data object

283

- gene_list (list): List of gene names to score

284

- ctrl_size (int): Number of control genes per test gene

285

- gene_pool (list, optional): Pool of genes for controls

286

- n_bins (int): Number of expression bins

287

- score_name (str): Name for score in obs

288

- random_state (int): Random seed

289

- copy (bool): Return copy

290

- use_raw (bool, optional): Use raw data

291

292

Returns:

293

AnnData or None: Object with gene scores (if copy=True)

294

"""

295

296

def score_genes_cell_cycle(adata, s_genes, g2m_genes, copy=False, **kwargs):

297

"""

298

Score cell cycle phase based on marker genes.

299

300

Parameters:

301

- adata (AnnData): Annotated data object

302

- s_genes (list): S phase marker genes

303

- g2m_genes (list): G2/M phase marker genes

304

- copy (bool): Return copy

305

- **kwargs: Additional parameters for score_genes

306

307

Returns:

308

AnnData or None: Object with cell cycle scores (if copy=True)

309

"""

310

```

311

312

### Specialized Analysis

313

314

Additional analysis tools for specific use cases.

315

316

```python { .api }

317

def dendrogram(adata, groupby, n_pcs=None, use_rep=None, var_names=None, use_raw=None, cor_method='pearson', linkage_method='complete', optimal_ordering=False, key_added=None, inplace=True):

318

"""

319

Compute hierarchical clustering dendrogram.

320

321

Parameters:

322

- adata (AnnData): Annotated data object

323

- groupby (str): Key for grouping observations

324

- n_pcs (int, optional): Number of PCs to use

325

- use_rep (str, optional): Representation to use

326

- var_names (list, optional): Variable names to use

327

- use_raw (bool, optional): Use raw data

328

- cor_method (str): Correlation method

329

- linkage_method (str): Linkage method for clustering

330

- optimal_ordering (bool): Compute optimal leaf ordering

331

- key_added (str, optional): Key for storing results

332

- inplace (bool): Store results in adata

333

334

Returns:

335

dict or None: Dendrogram results (if not inplace)

336

"""

337

338

def embedding_density(adata, basis='umap', groupby=None, key_added=None):

339

"""

340

Calculate density of cells in embedding space.

341

342

Parameters:

343

- adata (AnnData): Annotated data object

344

- basis (str): Embedding basis to use

345

- groupby (str, optional): Key for grouping

346

- key_added (str, optional): Key for storing density

347

348

Returns:

349

None: Modifies adata in place

350

"""

351

352

def marker_gene_overlap(adata, reference_markers, key='rank_genes_groups', normalize='reference', top_n_markers=None, adj_pval_threshold=None, key_added='marker_gene_overlap'):

353

"""

354

Calculate overlap between marker genes and reference.

355

356

Parameters:

357

- adata (AnnData): Annotated data object

358

- reference_markers (dict): Reference marker genes

359

- key (str): Key for marker gene results

360

- normalize (str): Normalization method

361

- top_n_markers (int, optional): Number of top markers to consider

362

- adj_pval_threshold (float, optional): Adjusted p-value threshold

363

- key_added (str): Key for storing overlap results

364

365

Returns:

366

None: Modifies adata in place

367

"""

368

369

def ingest(adata, adata_ref, obs=None, embedding_method='umap', labeling_method='knn', neighbors_key=None, inplace=True, **kwargs):

370

"""

371

Map new data to reference using ingest method.

372

373

Parameters:

374

- adata (AnnData): Query data to map

375

- adata_ref (AnnData): Reference data

376

- obs (list, optional): Observations to map

377

- embedding_method (str): Method for embedding mapping

378

- labeling_method (str): Method for label transfer

379

- neighbors_key (str, optional): Key for neighbors data

380

- inplace (bool): Modify adata in place

381

- **kwargs: Additional parameters

382

383

Returns:

384

AnnData or None: Mapped data (if not inplace)

385

"""

386

387

def sim(adata, tmax=None, n_obs=None, copy=False, **kwargs):

388

"""

389

Simulate single-cell data.

390

391

Parameters:

392

- adata (AnnData): Template data object

393

- tmax (float, optional): Maximum time for simulation

394

- n_obs (int, optional): Number of observations to simulate

395

- copy (bool): Return copy

396

- **kwargs: Additional simulation parameters

397

398

Returns:

399

AnnData or None: Simulated data (if copy=True)

400

"""

401

```

402

403

### Classes

404

405

```python { .api }

406

class Ingest:

407

"""Class for mapping query data to reference."""

408

409

def __init__(self, adata_ref, **kwargs):

410

"""

411

Initialize Ingest object.

412

413

Parameters:

414

- adata_ref (AnnData): Reference dataset

415

- **kwargs: Additional parameters

416

"""

417

418

def fit(self, **kwargs):

419

"""Fit the ingest model."""

420

421

def map_embedding(self, adata, **kwargs):

422

"""Map query data to reference embedding."""

423

```

424

425

## Usage Examples

426

427

### Basic Clustering Analysis

428

429

```python

430

import scanpy as sc

431

432

# Assume data is preprocessed with neighbors computed

433

# Leiden clustering

434

sc.tl.leiden(adata, resolution=0.5)

435

436

# UMAP embedding

437

sc.tl.umap(adata)

438

439

# Find marker genes

440

sc.tl.rank_genes_groups(adata, 'leiden', method='wilcoxon')

441

442

# Plot results

443

sc.pl.umap(adata, color=['leiden'])

444

sc.pl.rank_genes_groups(adata, n_genes=5, sharey=False)

445

```

446

447

### Trajectory Analysis

448

449

```python

450

# Diffusion pseudotime analysis

451

adata.uns['iroot'] = np.flatnonzero(adata.obs['leiden'] == '2')[0] # set root

452

sc.tl.dpt(adata)

453

454

# PAGA analysis

455

sc.tl.paga(adata, groups='leiden')

456

sc.pl.paga(adata, plot=False)

457

458

# Plot trajectory

459

sc.pl.umap(adata, color=['leiden', 'dpt_pseudotime'])

460

```

461

462

### Advanced Differential Expression

463

464

```python

465

# Compare specific groups

466

sc.tl.rank_genes_groups(adata, 'leiden', groups=['2'], reference='1', method='wilcoxon')

467

468

# Filter results

469

sc.tl.filter_rank_genes_groups(adata, min_fold_change=2)

470

471

# Get results as dataframe

472

result = sc.get.rank_genes_groups_df(adata, group='2')

473

```