or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

alignment-files.mdbgzf-files.mdcommand-tools.mdindex.mdsequence-files.mdtabix-files.mdutilities.mdvariant-files.md

command-tools.mddocs/

0

# Command-Line Tools Integration

1

2

Direct access to samtools and bcftools command-line functionality from Python. All subcommands are available as Python functions with the same arguments and behavior as the command-line tools.

3

4

## Capabilities

5

6

### Samtools Functions

7

8

Complete samtools toolkit exposed as Python functions for alignment file processing.

9

10

```python { .api }

11

def view(*args, **kwargs):

12

"""

13

View and convert alignment files.

14

15

Parameters:

16

*args: command-line arguments as strings

17

**kwargs: keyword arguments (catch_stdout, save_stdout, etc.)

18

19

Returns:

20

int, exit code (0 for success)

21

"""

22

23

def sort(*args, **kwargs):

24

"""

25

Sort alignment files.

26

27

Parameters:

28

*args: command-line arguments as strings

29

30

Returns:

31

int, exit code

32

"""

33

34

def index(*args, **kwargs):

35

"""

36

Index alignment files.

37

38

Parameters:

39

*args: command-line arguments as strings

40

41

Returns:

42

int, exit code

43

"""

44

45

def merge(*args, **kwargs):

46

"""

47

Merge multiple alignment files.

48

49

Parameters:

50

*args: command-line arguments as strings

51

52

Returns:

53

int, exit code

54

"""

55

56

def cat(*args, **kwargs):

57

"""

58

Concatenate alignment files.

59

60

Parameters:

61

*args: command-line arguments as strings

62

63

Returns:

64

int, exit code

65

"""

66

67

def stats(*args, **kwargs):

68

"""

69

Generate alignment statistics.

70

71

Parameters:

72

*args: command-line arguments as strings

73

74

Returns:

75

int, exit code

76

"""

77

78

def flagstat(*args, **kwargs):

79

"""

80

Generate flag statistics.

81

82

Parameters:

83

*args: command-line arguments as strings

84

85

Returns:

86

int, exit code

87

"""

88

89

def idxstats(*args, **kwargs):

90

"""

91

Generate index statistics.

92

93

Parameters:

94

*args: command-line arguments as strings

95

96

Returns:

97

int, exit code

98

"""

99

100

def depth(*args, **kwargs):

101

"""

102

Calculate per-position depth.

103

104

Parameters:

105

*args: command-line arguments as strings

106

107

Returns:

108

int, exit code

109

"""

110

111

def coverage(*args, **kwargs):

112

"""

113

Calculate coverage statistics.

114

115

Parameters:

116

*args: command-line arguments as strings

117

118

Returns:

119

int, exit code

120

"""

121

122

def mpileup(*args, **kwargs):

123

"""

124

Generate pileup format.

125

126

Parameters:

127

*args: command-line arguments as strings

128

129

Returns:

130

int, exit code

131

"""

132

133

def fixmate(*args, **kwargs):

134

"""

135

Fix mate pair information.

136

137

Parameters:

138

*args: command-line arguments as strings

139

140

Returns:

141

int, exit code

142

"""

143

144

def markdup(*args, **kwargs):

145

"""

146

Mark or remove duplicates.

147

148

Parameters:

149

*args: command-line arguments as strings

150

151

Returns:

152

int, exit code

153

"""

154

155

def rmdup(*args, **kwargs):

156

"""

157

Remove duplicates (legacy).

158

159

Parameters:

160

*args: command-line arguments as strings

161

162

Returns:

163

int, exit code

164

"""

165

166

def quickcheck(*args, **kwargs):

167

"""

168

Quick integrity check.

169

170

Parameters:

171

*args: command-line arguments as strings

172

173

Returns:

174

int, exit code

175

"""

176

177

def calmd(*args, **kwargs):

178

"""

179

Calculate MD tag.

180

181

Parameters:

182

*args: command-line arguments as strings

183

184

Returns:

185

int, exit code

186

"""

187

188

def fasta(*args, **kwargs):

189

"""

190

Convert to FASTA format.

191

192

Parameters:

193

*args: command-line arguments as strings

194

195

Returns:

196

int, exit code

197

"""

198

199

def fastq(*args, **kwargs):

200

"""

201

Convert to FASTQ format.

202

203

Parameters:

204

*args: command-line arguments as strings

205

206

Returns:

207

int, exit code

208

"""

209

210

def bam2fq(*args, **kwargs):

211

"""

212

Convert BAM to FASTQ.

213

214

Parameters:

215

*args: command-line arguments as strings

216

217

Returns:

218

int, exit code

219

"""

220

221

def split(*args, **kwargs):

222

"""

223

Split alignment files.

224

225

Parameters:

226

*args: command-line arguments as strings

227

228

Returns:

229

int, exit code

230

"""

231

232

def collate(*args, **kwargs):

233

"""

234

Collate alignment files.

235

236

Parameters:

237

*args: command-line arguments as strings

238

239

Returns:

240

int, exit code

241

"""

242

243

def faidx(*args, **kwargs):

244

"""

245

Index FASTA file.

246

247

Parameters:

248

*args: command-line arguments as strings

249

250

Returns:

251

int, exit code

252

"""

253

254

def dict(*args, **kwargs):

255

"""

256

Create sequence dictionary.

257

258

Parameters:

259

*args: command-line arguments as strings

260

261

Returns:

262

int, exit code

263

"""

264

265

def bedcov(*args, **kwargs):

266

"""

267

Calculate BED coverage.

268

269

Parameters:

270

*args: command-line arguments as strings

271

272

Returns:

273

int, exit code

274

"""

275

276

def phase(*args, **kwargs):

277

"""

278

Phase variants.

279

280

Parameters:

281

*args: command-line arguments as strings

282

283

Returns:

284

int, exit code

285

"""

286

287

def consensus(*args, **kwargs):

288

"""

289

Generate consensus sequence.

290

291

Parameters:

292

*args: command-line arguments as strings

293

294

Returns:

295

int, exit code

296

"""

297

298

def reheader(*args, **kwargs):

299

"""

300

Replace header.

301

302

Parameters:

303

*args: command-line arguments as strings

304

305

Returns:

306

int, exit code

307

"""

308

309

def depad(*args, **kwargs):

310

"""

311

Remove padding from alignment.

312

313

Parameters:

314

*args: command-line arguments as strings

315

316

Returns:

317

int, exit code

318

"""

319

320

def pad2unpad(*args, **kwargs):

321

"""

322

Convert padded to unpadded alignment.

323

324

Parameters:

325

*args: command-line arguments as strings

326

327

Returns:

328

int, exit code

329

"""

330

331

def addreplacerg(*args, **kwargs):

332

"""

333

Add or replace read groups.

334

335

Parameters:

336

*args: command-line arguments as strings

337

338

Returns:

339

int, exit code

340

"""

341

342

def ampliconstats(*args, **kwargs):

343

"""

344

Generate amplicon statistics.

345

346

Parameters:

347

*args: command-line arguments as strings

348

349

Returns:

350

int, exit code

351

"""

352

353

def ampliconclip(*args, **kwargs):

354

"""

355

Clip amplicon primers.

356

357

Parameters:

358

*args: command-line arguments as strings

359

360

Returns:

361

int, exit code

362

"""

363

364

def tview(*args, **kwargs):

365

"""

366

Text alignment viewer.

367

368

Parameters:

369

*args: command-line arguments as strings

370

371

Returns:

372

int, exit code

373

"""

374

375

def flags(*args, **kwargs):

376

"""

377

Explain SAM flags.

378

379

Parameters:

380

*args: command-line arguments as strings

381

382

Returns:

383

int, exit code

384

"""

385

386

def version(*args, **kwargs):

387

"""

388

Show samtools version.

389

390

Parameters:

391

*args: command-line arguments as strings

392

393

Returns:

394

int, exit code

395

"""

396

397

def bamshuf(*args, **kwargs):

398

"""

399

Shuffle and group alignments by name.

400

401

Parameters:

402

*args: command-line arguments as strings

403

404

Returns:

405

int, exit code

406

"""

407

408

def cram_size(*args, **kwargs):

409

"""

410

List the contents of a CRAM file.

411

412

Parameters:

413

*args: command-line arguments as strings

414

415

Returns:

416

int, exit code

417

"""

418

419

def fqidx(*args, **kwargs):

420

"""

421

Index/extract FASTQ.

422

423

Parameters:

424

*args: command-line arguments as strings

425

426

Returns:

427

int, exit code

428

"""

429

430

def fqimport(*args, **kwargs):

431

"""

432

Import FASTQ to SAM.

433

434

Parameters:

435

*args: command-line arguments as strings

436

437

Returns:

438

int, exit code

439

"""

440

441

def reference(*args, **kwargs):

442

"""

443

Generate reference sequence from MD tags.

444

445

Parameters:

446

*args: command-line arguments as strings

447

448

Returns:

449

int, exit code

450

"""

451

452

def reset(*args, **kwargs):

453

"""

454

Remove auxiliary fields.

455

456

Parameters:

457

*args: command-line arguments as strings

458

459

Returns:

460

int, exit code

461

"""

462

463

def samples(*args, **kwargs):

464

"""

465

List the samples in a set of SAM/BAM/CRAM files.

466

467

Parameters:

468

*args: command-line arguments as strings

469

470

Returns:

471

int, exit code

472

"""

473

474

def targetcut(*args, **kwargs):

475

"""

476

Cut out regions with excessive mismatches.

477

478

Parameters:

479

*args: command-line arguments as strings

480

481

Returns:

482

int, exit code

483

"""

484

```

485

486

### BCFtools Functions

487

488

Complete bcftools toolkit exposed as Python functions for variant file processing.

489

490

```python { .api }

491

def call(*args, **kwargs):

492

"""

493

SNP/indel calling.

494

495

Parameters:

496

*args: command-line arguments as strings

497

498

Returns:

499

int, exit code

500

"""

501

502

def filter(*args, **kwargs):

503

"""

504

Filter VCF/BCF records.

505

506

Parameters:

507

*args: command-line arguments as strings

508

509

Returns:

510

int, exit code

511

"""

512

513

def norm(*args, **kwargs):

514

"""

515

Normalize variants.

516

517

Parameters:

518

*args: command-line arguments as strings

519

520

Returns:

521

int, exit code

522

"""

523

524

def annotate(*args, **kwargs):

525

"""

526

Annotate variants.

527

528

Parameters:

529

*args: command-line arguments as strings

530

531

Returns:

532

int, exit code

533

"""

534

535

def query(*args, **kwargs):

536

"""

537

Extract/query VCF/BCF data.

538

539

Parameters:

540

*args: command-line arguments as strings

541

542

Returns:

543

int, exit code

544

"""

545

546

def convert(*args, **kwargs):

547

"""

548

Convert between formats.

549

550

Parameters:

551

*args: command-line arguments as strings

552

553

Returns:

554

int, exit code

555

"""

556

557

def concat(*args, **kwargs):

558

"""

559

Concatenate VCF files.

560

561

Parameters:

562

*args: command-line arguments as strings

563

564

Returns:

565

int, exit code

566

"""

567

568

def isec(*args, **kwargs):

569

"""

570

Create intersections of VCF files.

571

572

Parameters:

573

*args: command-line arguments as strings

574

575

Returns:

576

int, exit code

577

"""

578

579

def gtcheck(*args, **kwargs):

580

"""

581

Check sample identity.

582

583

Parameters:

584

*args: command-line arguments as strings

585

586

Returns:

587

int, exit code

588

"""

589

590

def roh(*args, **kwargs):

591

"""

592

Runs of homozygosity.

593

594

Parameters:

595

*args: command-line arguments as strings

596

597

Returns:

598

int, exit code

599

"""

600

601

def plugin(*args, **kwargs):

602

"""

603

Run bcftools plugin.

604

605

Parameters:

606

*args: command-line arguments as strings

607

608

Returns:

609

int, exit code

610

"""

611

612

def cnv(*args, **kwargs):

613

"""

614

Copy number variation analysis.

615

616

Parameters:

617

*args: command-line arguments as strings

618

619

Returns:

620

int, exit code

621

"""

622

623

def csq(*args, **kwargs):

624

"""

625

Consequence analysis.

626

627

Parameters:

628

*args: command-line arguments as strings

629

630

Returns:

631

int, exit code

632

"""

633

634

def head(*args, **kwargs):

635

"""

636

View header.

637

638

Parameters:

639

*args: command-line arguments as strings

640

641

Returns:

642

int, exit code

643

"""

644

645

def consensus(*args, **kwargs):

646

"""

647

Create consensus sequence by applying VCF variants.

648

649

Parameters:

650

*args: command-line arguments as strings

651

652

Returns:

653

int, exit code

654

"""

655

656

def index(*args, **kwargs):

657

"""

658

Index VCF/BCF files.

659

660

Parameters:

661

*args: command-line arguments as strings

662

663

Returns:

664

int, exit code

665

"""

666

667

def merge(*args, **kwargs):

668

"""

669

Merge VCF files.

670

671

Parameters:

672

*args: command-line arguments as strings

673

674

Returns:

675

int, exit code

676

"""

677

678

def mpileup(*args, **kwargs):

679

"""

680

Multi-way pileup producing genotype likelihoods.

681

682

Parameters:

683

*args: command-line arguments as strings

684

685

Returns:

686

int, exit code

687

"""

688

689

def reheader(*args, **kwargs):

690

"""

691

Modify header of VCF/BCF files.

692

693

Parameters:

694

*args: command-line arguments as strings

695

696

Returns:

697

int, exit code

698

"""

699

700

def sort(*args, **kwargs):

701

"""

702

Sort VCF/BCF files.

703

704

Parameters:

705

*args: command-line arguments as strings

706

707

Returns:

708

int, exit code

709

"""

710

711

def stats(*args, **kwargs):

712

"""

713

Produce VCF/BCF stats.

714

715

Parameters:

716

*args: command-line arguments as strings

717

718

Returns:

719

int, exit code

720

"""

721

722

def view(*args, **kwargs):

723

"""

724

View, subset and filter VCF or BCF files.

725

726

Parameters:

727

*args: command-line arguments as strings

728

729

Returns:

730

int, exit code

731

"""

732

```

733

734

### Error Handling

735

736

```python { .api }

737

class SamtoolsError(Exception):

738

"""

739

Exception raised when samtools/bcftools commands fail.

740

741

Attributes:

742

- cmd: str, command that failed

743

- returncode: int, exit code

744

- stderr: str, error output

745

"""

746

747

def __init__(self, cmd, returncode, stderr=None):

748

"""

749

Initialize SamtoolsError.

750

751

Parameters:

752

- cmd: str, command that failed

753

- returncode: int, exit code

754

- stderr: str, error output

755

"""

756

```

757

758

## Usage Examples

759

760

### Basic Command Execution

761

762

```python

763

import pysam

764

765

# Sort BAM file

766

pysam.sort("-o", "sorted.bam", "input.bam")

767

768

# Index sorted BAM

769

pysam.index("sorted.bam")

770

771

# Generate statistics

772

pysam.stats("sorted.bam")

773

774

# View specific region

775

pysam.view("-b", "-o", "region.bam", "sorted.bam", "chr1:1000-2000")

776

777

# Convert SAM to BAM

778

pysam.view("-b", "-o", "output.bam", "input.sam")

779

```

780

781

### Capturing Output

782

783

```python

784

import pysam

785

786

# Capture command output

787

result = pysam.flagstat("input.bam", catch_stdout=True)

788

print("Flagstat output:", result)

789

790

# Save output to file

791

pysam.stats("input.bam", save_stdout="stats.txt")

792

793

# Capture both stdout and stderr

794

try:

795

result = pysam.view("-h", "nonexistent.bam", catch_stdout=True)

796

except pysam.SamtoolsError as e:

797

print(f"Command failed: {e.cmd}")

798

print(f"Exit code: {e.returncode}")

799

print(f"Error: {e.stderr}")

800

```

801

802

### Complex Workflows

803

804

```python

805

import pysam

806

import os

807

808

def process_alignment_workflow(input_sam, output_prefix):

809

"""Complete alignment processing workflow."""

810

811

# Step 1: Convert SAM to BAM

812

temp_bam = f"{output_prefix}_temp.bam"

813

pysam.view("-b", "-o", temp_bam, input_sam)

814

815

# Step 2: Sort BAM

816

sorted_bam = f"{output_prefix}_sorted.bam"

817

pysam.sort("-o", sorted_bam, temp_bam)

818

819

# Step 3: Index sorted BAM

820

pysam.index(sorted_bam)

821

822

# Step 4: Mark duplicates

823

markdup_bam = f"{output_prefix}_markdup.bam"

824

pysam.markdup("-o", markdup_bam, sorted_bam)

825

826

# Step 5: Index marked BAM

827

pysam.index(markdup_bam)

828

829

# Step 6: Generate statistics

830

stats_file = f"{output_prefix}_stats.txt"

831

pysam.stats(markdup_bam, save_stdout=stats_file)

832

833

# Step 7: Generate flagstat

834

flagstat_file = f"{output_prefix}_flagstat.txt"

835

pysam.flagstat(markdup_bam, save_stdout=flagstat_file)

836

837

# Cleanup intermediate files

838

os.remove(temp_bam)

839

840

return {

841

'final_bam': markdup_bam,

842

'stats_file': stats_file,

843

'flagstat_file': flagstat_file

844

}

845

846

# Usage

847

results = process_alignment_workflow("input.sam", "processed")

848

print(f"Final BAM: {results['final_bam']}")

849

```

850

851

### Variant Calling Pipeline

852

853

```python

854

import pysam

855

856

def variant_calling_pipeline(bam_files, reference_fasta, output_vcf):

857

"""Complete variant calling pipeline using bcftools."""

858

859

# Step 1: Generate pileup

860

pileup_bcf = "pileup.bcf"

861

cmd_args = ["-f", reference_fasta, "-o", pileup_bcf, "-O", "b"] + bam_files

862

pysam.mpileup(*cmd_args)

863

864

# Step 2: Call variants

865

calls_vcf = "calls.vcf"

866

pysam.call("-mv", "-o", calls_vcf, "-O", "v", pileup_bcf)

867

868

# Step 3: Filter variants

869

pysam.filter("-i", "QUAL>=20 && DP>=10", "-o", output_vcf, calls_vcf)

870

871

# Step 4: Index final VCF

872

pysam.tabix_compress(output_vcf, f"{output_vcf}.gz")

873

pysam.tabix_index(f"{output_vcf}.gz", preset="vcf")

874

875

# Cleanup intermediate files

876

os.remove(pileup_bcf)

877

os.remove(calls_vcf)

878

879

return f"{output_vcf}.gz"

880

881

# Usage

882

bam_files = ["sample1.bam", "sample2.bam", "sample3.bam"]

883

final_vcf = variant_calling_pipeline(bam_files, "reference.fa", "variants.vcf")

884

print(f"Final VCF: {final_vcf}")

885

```

886

887

### Batch Processing

888

889

```python

890

import pysam

891

import glob

892

import os

893

from multiprocessing import Pool

894

895

def process_single_bam(bam_file):

896

"""Process single BAM file."""

897

base_name = os.path.splitext(bam_file)[0]

898

899

try:

900

# Sort if not already sorted

901

if not bam_file.endswith('_sorted.bam'):

902

sorted_bam = f"{base_name}_sorted.bam"

903

pysam.sort("-o", sorted_bam, bam_file)

904

bam_file = sorted_bam

905

906

# Index

907

pysam.index(bam_file)

908

909

# Generate statistics

910

stats_file = f"{base_name}_stats.txt"

911

pysam.stats(bam_file, save_stdout=stats_file)

912

913

return f"Processed: {bam_file}"

914

915

except pysam.SamtoolsError as e:

916

return f"Failed: {bam_file} - {e}"

917

918

def batch_process_bams(bam_pattern, num_processes=4):

919

"""Process multiple BAM files in parallel."""

920

bam_files = glob.glob(bam_pattern)

921

922

with Pool(processes=num_processes) as pool:

923

results = pool.map(process_single_bam, bam_files)

924

925

for result in results:

926

print(result)

927

928

# Usage

929

batch_process_bams("*.bam", num_processes=8)

930

```

931

932

### Quality Control Pipeline

933

934

```python

935

import pysam

936

import json

937

938

def quality_control_analysis(bam_file):

939

"""Comprehensive quality control analysis."""

940

base_name = os.path.splitext(bam_file)[0]

941

942

# Collect various statistics

943

qc_results = {}

944

945

# Basic statistics

946

stats_output = pysam.stats(bam_file, catch_stdout=True)

947

qc_results['basic_stats'] = stats_output

948

949

# Flag statistics

950

flagstat_output = pysam.flagstat(bam_file, catch_stdout=True)

951

qc_results['flag_stats'] = flagstat_output

952

953

# Index statistics (if indexed)

954

try:

955

idxstats_output = pysam.idxstats(bam_file, catch_stdout=True)

956

qc_results['index_stats'] = idxstats_output

957

except pysam.SamtoolsError:

958

qc_results['index_stats'] = "No index available"

959

960

# Quick integrity check

961

try:

962

pysam.quickcheck(bam_file)

963

qc_results['integrity_check'] = "PASS"

964

except pysam.SamtoolsError:

965

qc_results['integrity_check'] = "FAIL"

966

967

# Save results

968

qc_file = f"{base_name}_qc.json"

969

with open(qc_file, 'w') as f:

970

json.dump(qc_results, f, indent=2)

971

972

return qc_file

973

974

# Usage

975

qc_report = quality_control_analysis("sample.bam")

976

print(f"QC report saved to: {qc_report}")

977

```

978

979

## Performance Tips

980

981

- Commands run in separate processes, so there's some overhead

982

- For simple operations, using the API classes directly may be faster

983

- Use `catch_stdout=True` to capture output instead of printing to console

984

- Commands support all the same arguments as their command-line equivalents

985

- Error handling uses `SamtoolsError` exception for failed commands

986

- Multi-threading in commands can improve performance for compression/decompression