Tessl Tile for pypi/pysam@0.23.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

alignment-files.md bgzf-files.md command-tools.md index.md sequence-files.md tabix-files.md utilities.md variant-files.md

command-tools.mddocs/

0
# Command-Line Tools Integration
1

2
Direct access to samtools and bcftools command-line functionality from Python. All subcommands are available as Python functions with the same arguments and behavior as the command-line tools.
3

4
## Capabilities
5

6
### Samtools Functions
7

8
Complete samtools toolkit exposed as Python functions for alignment file processing.
9

10
```python { .api }
11
def view(*args, **kwargs):
12
    """
13
    View and convert alignment files.
14
    
15
    Parameters:
16
    *args: command-line arguments as strings
17
    **kwargs: keyword arguments (catch_stdout, save_stdout, etc.)
18
    
19
    Returns:
20
    int, exit code (0 for success)
21
    """
22

23
def sort(*args, **kwargs):
24
    """
25
    Sort alignment files.
26
    
27
    Parameters:
28
    *args: command-line arguments as strings
29
    
30
    Returns:
31
    int, exit code
32
    """
33

34
def index(*args, **kwargs):
35
    """
36
    Index alignment files.
37
    
38
    Parameters:
39
    *args: command-line arguments as strings
40
    
41
    Returns:
42
    int, exit code
43
    """
44

45
def merge(*args, **kwargs):
46
    """
47
    Merge multiple alignment files.
48
    
49
    Parameters:
50
    *args: command-line arguments as strings
51
    
52
    Returns:
53
    int, exit code
54
    """
55

56
def cat(*args, **kwargs):
57
    """
58
    Concatenate alignment files.
59
    
60
    Parameters:
61
    *args: command-line arguments as strings
62
    
63
    Returns:
64
    int, exit code
65
    """
66

67
def stats(*args, **kwargs):
68
    """
69
    Generate alignment statistics.
70
    
71
    Parameters:
72
    *args: command-line arguments as strings
73
    
74
    Returns:
75
    int, exit code
76
    """
77

78
def flagstat(*args, **kwargs):
79
    """
80
    Generate flag statistics.
81
    
82
    Parameters:
83
    *args: command-line arguments as strings
84
    
85
    Returns:
86
    int, exit code
87
    """
88

89
def idxstats(*args, **kwargs):
90
    """
91
    Generate index statistics.
92
    
93
    Parameters:
94
    *args: command-line arguments as strings
95
    
96
    Returns:
97
    int, exit code
98
    """
99

100
def depth(*args, **kwargs):
101
    """
102
    Calculate per-position depth.
103
    
104
    Parameters:
105
    *args: command-line arguments as strings
106
    
107
    Returns:
108
    int, exit code
109
    """
110

111
def coverage(*args, **kwargs):
112
    """
113
    Calculate coverage statistics.
114
    
115
    Parameters:
116
    *args: command-line arguments as strings
117
    
118
    Returns:
119
    int, exit code
120
    """
121

122
def mpileup(*args, **kwargs):
123
    """
124
    Generate pileup format.
125
    
126
    Parameters:
127
    *args: command-line arguments as strings
128
    
129
    Returns:
130
    int, exit code
131
    """
132

133
def fixmate(*args, **kwargs):
134
    """
135
    Fix mate pair information.
136
    
137
    Parameters:
138
    *args: command-line arguments as strings
139
    
140
    Returns:
141
    int, exit code
142
    """
143

144
def markdup(*args, **kwargs):
145
    """
146
    Mark or remove duplicates.
147
    
148
    Parameters:
149
    *args: command-line arguments as strings
150
    
151
    Returns:
152
    int, exit code
153
    """
154

155
def rmdup(*args, **kwargs):
156
    """
157
    Remove duplicates (legacy).
158
    
159
    Parameters:
160
    *args: command-line arguments as strings
161
    
162
    Returns:
163
    int, exit code
164
    """
165

166
def quickcheck(*args, **kwargs):
167
    """
168
    Quick integrity check.
169
    
170
    Parameters:
171
    *args: command-line arguments as strings
172
    
173
    Returns:
174
    int, exit code
175
    """
176

177
def calmd(*args, **kwargs):
178
    """
179
    Calculate MD tag.
180
    
181
    Parameters:
182
    *args: command-line arguments as strings
183
    
184
    Returns:
185
    int, exit code
186
    """
187

188
def fasta(*args, **kwargs):
189
    """
190
    Convert to FASTA format.
191
    
192
    Parameters:
193
    *args: command-line arguments as strings
194
    
195
    Returns:
196
    int, exit code
197
    """
198

199
def fastq(*args, **kwargs):
200
    """
201
    Convert to FASTQ format.
202
    
203
    Parameters:
204
    *args: command-line arguments as strings
205
    
206
    Returns:
207
    int, exit code
208
    """
209

210
def bam2fq(*args, **kwargs):
211
    """
212
    Convert BAM to FASTQ.
213
    
214
    Parameters:
215
    *args: command-line arguments as strings
216
    
217
    Returns:
218
    int, exit code
219
    """
220

221
def split(*args, **kwargs):
222
    """
223
    Split alignment files.
224
    
225
    Parameters:
226
    *args: command-line arguments as strings
227
    
228
    Returns:
229
    int, exit code
230
    """
231

232
def collate(*args, **kwargs):
233
    """
234
    Collate alignment files.
235
    
236
    Parameters:
237
    *args: command-line arguments as strings
238
    
239
    Returns:
240
    int, exit code
241
    """
242

243
def faidx(*args, **kwargs):
244
    """
245
    Index FASTA file.
246
    
247
    Parameters:
248
    *args: command-line arguments as strings
249
    
250
    Returns:
251
    int, exit code
252
    """
253

254
def dict(*args, **kwargs):
255
    """
256
    Create sequence dictionary.
257
    
258
    Parameters:
259
    *args: command-line arguments as strings
260
    
261
    Returns:
262
    int, exit code
263
    """
264

265
def bedcov(*args, **kwargs):
266
    """
267
    Calculate BED coverage.
268
    
269
    Parameters:
270
    *args: command-line arguments as strings
271
    
272
    Returns:
273
    int, exit code
274
    """
275

276
def phase(*args, **kwargs):
277
    """
278
    Phase variants.
279
    
280
    Parameters:
281
    *args: command-line arguments as strings
282
    
283
    Returns:
284
    int, exit code
285
    """
286

287
def consensus(*args, **kwargs):
288
    """
289
    Generate consensus sequence.
290
    
291
    Parameters:
292
    *args: command-line arguments as strings
293
    
294
    Returns:
295
    int, exit code
296
    """
297

298
def reheader(*args, **kwargs):
299
    """
300
    Replace header.
301
    
302
    Parameters:
303
    *args: command-line arguments as strings
304
    
305
    Returns:
306
    int, exit code
307
    """
308

309
def depad(*args, **kwargs):
310
    """
311
    Remove padding from alignment.
312
    
313
    Parameters:
314
    *args: command-line arguments as strings
315
    
316
    Returns:
317
    int, exit code
318
    """
319

320
def pad2unpad(*args, **kwargs):
321
    """
322
    Convert padded to unpadded alignment.
323
    
324
    Parameters:
325
    *args: command-line arguments as strings
326
    
327
    Returns:
328
    int, exit code
329
    """
330

331
def addreplacerg(*args, **kwargs):
332
    """
333
    Add or replace read groups.
334
    
335
    Parameters:
336
    *args: command-line arguments as strings
337
    
338
    Returns:
339
    int, exit code
340
    """
341

342
def ampliconstats(*args, **kwargs):
343
    """
344
    Generate amplicon statistics.
345
    
346
    Parameters:
347
    *args: command-line arguments as strings
348
    
349
    Returns:
350
    int, exit code
351
    """
352

353
def ampliconclip(*args, **kwargs):
354
    """
355
    Clip amplicon primers.
356
    
357
    Parameters:
358
    *args: command-line arguments as strings
359
    
360
    Returns:
361
    int, exit code
362
    """
363

364
def tview(*args, **kwargs):
365
    """
366
    Text alignment viewer.
367
    
368
    Parameters:
369
    *args: command-line arguments as strings
370
    
371
    Returns:
372
    int, exit code
373
    """
374

375
def flags(*args, **kwargs):
376
    """
377
    Explain SAM flags.
378
    
379
    Parameters:
380
    *args: command-line arguments as strings
381
    
382
    Returns:
383
    int, exit code
384
    """
385

386
def version(*args, **kwargs):
387
    """
388
    Show samtools version.
389
    
390
    Parameters:
391
    *args: command-line arguments as strings
392
    
393
    Returns:
394
    int, exit code
395
    """
396

397
def bamshuf(*args, **kwargs):
398
    """
399
    Shuffle and group alignments by name.
400
    
401
    Parameters:
402
    *args: command-line arguments as strings
403
    
404
    Returns:
405
    int, exit code
406
    """
407

408
def cram_size(*args, **kwargs):
409
    """
410
    List the contents of a CRAM file.
411
    
412
    Parameters:
413
    *args: command-line arguments as strings
414
    
415
    Returns:
416
    int, exit code
417
    """
418

419
def fqidx(*args, **kwargs):
420
    """
421
    Index/extract FASTQ.
422
    
423
    Parameters:
424
    *args: command-line arguments as strings
425
    
426
    Returns:
427
    int, exit code
428
    """
429

430
def fqimport(*args, **kwargs):
431
    """
432
    Import FASTQ to SAM.
433
    
434
    Parameters:
435
    *args: command-line arguments as strings
436
    
437
    Returns:
438
    int, exit code
439
    """
440

441
def reference(*args, **kwargs):
442
    """
443
    Generate reference sequence from MD tags.
444
    
445
    Parameters:
446
    *args: command-line arguments as strings
447
    
448
    Returns:
449
    int, exit code
450
    """
451

452
def reset(*args, **kwargs):
453
    """
454
    Remove auxiliary fields.
455
    
456
    Parameters:
457
    *args: command-line arguments as strings
458
    
459
    Returns:
460
    int, exit code
461
    """
462

463
def samples(*args, **kwargs):
464
    """
465
    List the samples in a set of SAM/BAM/CRAM files.
466
    
467
    Parameters:
468
    *args: command-line arguments as strings
469
    
470
    Returns:
471
    int, exit code
472
    """
473

474
def targetcut(*args, **kwargs):
475
    """
476
    Cut out regions with excessive mismatches.
477
    
478
    Parameters:
479
    *args: command-line arguments as strings
480
    
481
    Returns:
482
    int, exit code
483
    """
484
```
485

486
### BCFtools Functions
487

488
Complete bcftools toolkit exposed as Python functions for variant file processing.
489

490
```python { .api }
491
def call(*args, **kwargs):
492
    """
493
    SNP/indel calling.
494
    
495
    Parameters:
496
    *args: command-line arguments as strings
497
    
498
    Returns:
499
    int, exit code
500
    """
501

502
def filter(*args, **kwargs):
503
    """
504
    Filter VCF/BCF records.
505
    
506
    Parameters:
507
    *args: command-line arguments as strings
508
    
509
    Returns:
510
    int, exit code
511
    """
512

513
def norm(*args, **kwargs):
514
    """
515
    Normalize variants.
516
    
517
    Parameters:
518
    *args: command-line arguments as strings
519
    
520
    Returns:
521
    int, exit code
522
    """
523

524
def annotate(*args, **kwargs):
525
    """
526
    Annotate variants.
527
    
528
    Parameters:
529
    *args: command-line arguments as strings
530
    
531
    Returns:
532
    int, exit code
533
    """
534

535
def query(*args, **kwargs):
536
    """
537
    Extract/query VCF/BCF data.
538
    
539
    Parameters:
540
    *args: command-line arguments as strings
541
    
542
    Returns:
543
    int, exit code
544
    """
545

546
def convert(*args, **kwargs):
547
    """
548
    Convert between formats.
549
    
550
    Parameters:
551
    *args: command-line arguments as strings
552
    
553
    Returns:
554
    int, exit code
555
    """
556

557
def concat(*args, **kwargs):
558
    """
559
    Concatenate VCF files.
560
    
561
    Parameters:
562
    *args: command-line arguments as strings
563
    
564
    Returns:
565
    int, exit code
566
    """
567

568
def isec(*args, **kwargs):
569
    """
570
    Create intersections of VCF files.
571
    
572
    Parameters:
573
    *args: command-line arguments as strings
574
    
575
    Returns:
576
    int, exit code
577
    """
578

579
def gtcheck(*args, **kwargs):
580
    """
581
    Check sample identity.
582
    
583
    Parameters:
584
    *args: command-line arguments as strings
585
    
586
    Returns:
587
    int, exit code
588
    """
589

590
def roh(*args, **kwargs):
591
    """
592
    Runs of homozygosity.
593
    
594
    Parameters:
595
    *args: command-line arguments as strings
596
    
597
    Returns:
598
    int, exit code
599
    """
600

601
def plugin(*args, **kwargs):
602
    """
603
    Run bcftools plugin.
604
    
605
    Parameters:
606
    *args: command-line arguments as strings
607
    
608
    Returns:
609
    int, exit code
610
    """
611

612
def cnv(*args, **kwargs):
613
    """
614
    Copy number variation analysis.
615
    
616
    Parameters:
617
    *args: command-line arguments as strings
618
    
619
    Returns:
620
    int, exit code
621
    """
622

623
def csq(*args, **kwargs):
624
    """
625
    Consequence analysis.
626
    
627
    Parameters:
628
    *args: command-line arguments as strings
629
    
630
    Returns:
631
    int, exit code
632
    """
633

634
def head(*args, **kwargs):
635
    """
636
    View header.
637
    
638
    Parameters:
639
    *args: command-line arguments as strings
640
    
641
    Returns:
642
    int, exit code
643
    """
644

645
def consensus(*args, **kwargs):
646
    """
647
    Create consensus sequence by applying VCF variants.
648
    
649
    Parameters:
650
    *args: command-line arguments as strings
651
    
652
    Returns:
653
    int, exit code
654
    """
655

656
def index(*args, **kwargs):
657
    """
658
    Index VCF/BCF files.
659
    
660
    Parameters:
661
    *args: command-line arguments as strings
662
    
663
    Returns:
664
    int, exit code
665
    """
666

667
def merge(*args, **kwargs):
668
    """
669
    Merge VCF files.
670
    
671
    Parameters:
672
    *args: command-line arguments as strings
673
    
674
    Returns:
675
    int, exit code
676
    """
677

678
def mpileup(*args, **kwargs):
679
    """
680
    Multi-way pileup producing genotype likelihoods.
681
    
682
    Parameters:
683
    *args: command-line arguments as strings
684
    
685
    Returns:
686
    int, exit code
687
    """
688

689
def reheader(*args, **kwargs):
690
    """
691
    Modify header of VCF/BCF files.
692
    
693
    Parameters:
694
    *args: command-line arguments as strings
695
    
696
    Returns:
697
    int, exit code
698
    """
699

700
def sort(*args, **kwargs):
701
    """
702
    Sort VCF/BCF files.
703
    
704
    Parameters:
705
    *args: command-line arguments as strings
706
    
707
    Returns:
708
    int, exit code
709
    """
710

711
def stats(*args, **kwargs):
712
    """
713
    Produce VCF/BCF stats.
714
    
715
    Parameters:
716
    *args: command-line arguments as strings
717
    
718
    Returns:
719
    int, exit code
720
    """
721

722
def view(*args, **kwargs):
723
    """
724
    View, subset and filter VCF or BCF files.
725
    
726
    Parameters:
727
    *args: command-line arguments as strings
728
    
729
    Returns:
730
    int, exit code
731
    """
732
```
733

734
### Error Handling
735

736
```python { .api }
737
class SamtoolsError(Exception):
738
    """
739
    Exception raised when samtools/bcftools commands fail.
740
    
741
    Attributes:
742
    - cmd: str, command that failed
743
    - returncode: int, exit code
744
    - stderr: str, error output
745
    """
746
    
747
    def __init__(self, cmd, returncode, stderr=None):
748
        """
749
        Initialize SamtoolsError.
750
        
751
        Parameters:
752
        - cmd: str, command that failed
753
        - returncode: int, exit code  
754
        - stderr: str, error output
755
        """
756
```
757

758
## Usage Examples
759

760
### Basic Command Execution
761

762
```python
763
import pysam
764

765
# Sort BAM file
766
pysam.sort("-o", "sorted.bam", "input.bam")
767

768
# Index sorted BAM
769
pysam.index("sorted.bam")
770

771
# Generate statistics
772
pysam.stats("sorted.bam")
773

774
# View specific region
775
pysam.view("-b", "-o", "region.bam", "sorted.bam", "chr1:1000-2000")
776

777
# Convert SAM to BAM
778
pysam.view("-b", "-o", "output.bam", "input.sam")
779
```
780

781
### Capturing Output
782

783
```python
784
import pysam
785

786
# Capture command output
787
result = pysam.flagstat("input.bam", catch_stdout=True)
788
print("Flagstat output:", result)
789

790
# Save output to file
791
pysam.stats("input.bam", save_stdout="stats.txt")
792

793
# Capture both stdout and stderr
794
try:
795
    result = pysam.view("-h", "nonexistent.bam", catch_stdout=True)
796
except pysam.SamtoolsError as e:
797
    print(f"Command failed: {e.cmd}")
798
    print(f"Exit code: {e.returncode}")
799
    print(f"Error: {e.stderr}")
800
```
801

802
### Complex Workflows
803

804
```python
805
import pysam
806
import os
807

808
def process_alignment_workflow(input_sam, output_prefix):
809
    """Complete alignment processing workflow."""
810
    
811
    # Step 1: Convert SAM to BAM
812
    temp_bam = f"{output_prefix}_temp.bam"
813
    pysam.view("-b", "-o", temp_bam, input_sam)
814
    
815
    # Step 2: Sort BAM
816
    sorted_bam = f"{output_prefix}_sorted.bam"
817
    pysam.sort("-o", sorted_bam, temp_bam)
818
    
819
    # Step 3: Index sorted BAM
820
    pysam.index(sorted_bam)
821
    
822
    # Step 4: Mark duplicates
823
    markdup_bam = f"{output_prefix}_markdup.bam"
824
    pysam.markdup("-o", markdup_bam, sorted_bam)
825
    
826
    # Step 5: Index marked BAM
827
    pysam.index(markdup_bam)
828
    
829
    # Step 6: Generate statistics
830
    stats_file = f"{output_prefix}_stats.txt"
831
    pysam.stats(markdup_bam, save_stdout=stats_file)
832
    
833
    # Step 7: Generate flagstat
834
    flagstat_file = f"{output_prefix}_flagstat.txt"
835
    pysam.flagstat(markdup_bam, save_stdout=flagstat_file)
836
    
837
    # Cleanup intermediate files
838
    os.remove(temp_bam)
839
    
840
    return {
841
        'final_bam': markdup_bam,
842
        'stats_file': stats_file,
843
        'flagstat_file': flagstat_file
844
    }
845

846
# Usage
847
results = process_alignment_workflow("input.sam", "processed")
848
print(f"Final BAM: {results['final_bam']}")
849
```
850

851
### Variant Calling Pipeline
852

853
```python
854
import pysam
855

856
def variant_calling_pipeline(bam_files, reference_fasta, output_vcf):
857
    """Complete variant calling pipeline using bcftools."""
858
    
859
    # Step 1: Generate pileup
860
    pileup_bcf = "pileup.bcf"
861
    cmd_args = ["-f", reference_fasta, "-o", pileup_bcf, "-O", "b"] + bam_files
862
    pysam.mpileup(*cmd_args)
863
    
864
    # Step 2: Call variants
865
    calls_vcf = "calls.vcf"
866
    pysam.call("-mv", "-o", calls_vcf, "-O", "v", pileup_bcf)
867
    
868
    # Step 3: Filter variants
869
    pysam.filter("-i", "QUAL>=20 && DP>=10", "-o", output_vcf, calls_vcf)
870
    
871
    # Step 4: Index final VCF
872
    pysam.tabix_compress(output_vcf, f"{output_vcf}.gz")
873
    pysam.tabix_index(f"{output_vcf}.gz", preset="vcf")
874
    
875
    # Cleanup intermediate files
876
    os.remove(pileup_bcf)
877
    os.remove(calls_vcf)
878
    
879
    return f"{output_vcf}.gz"
880

881
# Usage
882
bam_files = ["sample1.bam", "sample2.bam", "sample3.bam"]
883
final_vcf = variant_calling_pipeline(bam_files, "reference.fa", "variants.vcf")
884
print(f"Final VCF: {final_vcf}")
885
```
886

887
### Batch Processing
888

889
```python
890
import pysam
891
import glob
892
import os
893
from multiprocessing import Pool
894

895
def process_single_bam(bam_file):
896
    """Process single BAM file."""
897
    base_name = os.path.splitext(bam_file)[0]
898
    
899
    try:
900
        # Sort if not already sorted
901
        if not bam_file.endswith('_sorted.bam'):
902
            sorted_bam = f"{base_name}_sorted.bam"
903
            pysam.sort("-o", sorted_bam, bam_file)
904
            bam_file = sorted_bam
905
        
906
        # Index
907
        pysam.index(bam_file)
908
        
909
        # Generate statistics
910
        stats_file = f"{base_name}_stats.txt"
911
        pysam.stats(bam_file, save_stdout=stats_file)
912
        
913
        return f"Processed: {bam_file}"
914
        
915
    except pysam.SamtoolsError as e:
916
        return f"Failed: {bam_file} - {e}"
917

918
def batch_process_bams(bam_pattern, num_processes=4):
919
    """Process multiple BAM files in parallel."""
920
    bam_files = glob.glob(bam_pattern)
921
    
922
    with Pool(processes=num_processes) as pool:
923
        results = pool.map(process_single_bam, bam_files)
924
    
925
    for result in results:
926
        print(result)
927

928
# Usage
929
batch_process_bams("*.bam", num_processes=8)
930
```
931

932
### Quality Control Pipeline
933

934
```python
935
import pysam
936
import json
937

938
def quality_control_analysis(bam_file):
939
    """Comprehensive quality control analysis."""
940
    base_name = os.path.splitext(bam_file)[0]
941
    
942
    # Collect various statistics
943
    qc_results = {}
944
    
945
    # Basic statistics
946
    stats_output = pysam.stats(bam_file, catch_stdout=True)
947
    qc_results['basic_stats'] = stats_output
948
    
949
    # Flag statistics
950
    flagstat_output = pysam.flagstat(bam_file, catch_stdout=True)
951
    qc_results['flag_stats'] = flagstat_output
952
    
953
    # Index statistics (if indexed)
954
    try:
955
        idxstats_output = pysam.idxstats(bam_file, catch_stdout=True)
956
        qc_results['index_stats'] = idxstats_output
957
    except pysam.SamtoolsError:
958
        qc_results['index_stats'] = "No index available"
959
    
960
    # Quick integrity check
961
    try:
962
        pysam.quickcheck(bam_file)
963
        qc_results['integrity_check'] = "PASS"
964
    except pysam.SamtoolsError:
965
        qc_results['integrity_check'] = "FAIL"
966
    
967
    # Save results
968
    qc_file = f"{base_name}_qc.json"
969
    with open(qc_file, 'w') as f:
970
        json.dump(qc_results, f, indent=2)
971
    
972
    return qc_file
973

974
# Usage
975
qc_report = quality_control_analysis("sample.bam")
976
print(f"QC report saved to: {qc_report}")
977
```
978

979
## Performance Tips
980

981
- Commands run in separate processes, so there's some overhead
982
- For simple operations, using the API classes directly may be faster
983
- Use `catch_stdout=True` to capture output instead of printing to console
984
- Commands support all the same arguments as their command-line equivalents
985
- Error handling uses `SamtoolsError` exception for failed commands
986
- Multi-threading in commands can improve performance for compression/decompression

Version

Tile

Files

command-tools.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

command-tools.mddocs/