CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pysam

Package for reading, manipulating, and writing genomic data

Pending
Overview
Eval results
Files

variant-files.mddocs/

VCF/BCF Variant Files

Comprehensive support for reading and writing variant call format files in VCF and BCF formats. Provides header management, sample data access, filtering, and indexing capabilities.

Capabilities

VariantFile

Main interface for reading and writing VCF/BCF files with full header and sample support.

class VariantFile:
    def __init__(self, filepath, mode="r", header=None, drop_samples=False, duplicate_filehandle=True, ignore_truncation=False, format_options=None, threads=1, index=None):
        """
        Open a VCF/BCF file for reading or writing.
        
        Parameters:
        - filepath: str, path to VCF/BCF file
        - mode: str, file mode ('r', 'w', 'rb', 'wb')
        - header: VariantHeader, header for writing
        - drop_samples: bool, ignore sample data
        - duplicate_filehandle: bool, allow multiple handles
        - ignore_truncation: bool, ignore truncated files
        - threads: int, number of threads for compression
        - index: str, path to index file
        
        Returns:
        VariantFile object
        """
    
    def fetch(self, contig=None, start=None, stop=None, region=None, reopen=False):
        """
        Fetch variant records from a region.
        
        Parameters:
        - contig: str, chromosome/contig name
        - start: int, 0-based start position
        - stop: int, 0-based stop position  
        - region: str, region string (chr:start-stop)
        - reopen: bool, reopen file for iteration
        
        Returns:
        Iterator of VariantRecord objects
        """
    
    def write(self, record):
        """
        Write a variant record.
        
        Parameters:
        - record: VariantRecord, variant to write
        """
    
    def new_record(self, contig=None, start=None, stop=None, alleles=None, id=None, qual=None, filter=None, info=None, samples=None, **kwargs):
        """
        Create new variant record.
        
        Parameters:
        - contig: str, chromosome name
        - start: int, 0-based start position
        - stop: int, 0-based stop position
        - alleles: tuple, reference and alternate alleles
        - id: str, variant identifier
        - qual: float, quality score
        - filter: str/list, filter status
        - info: dict, INFO field data
        - samples: dict, sample data
        
        Returns:
        VariantRecord object
        """
    
    def copy_record(self, record):
        """
        Create copy of variant record.
        
        Returns:
        VariantRecord object
        """
    
    def close(self):
        """Close the file."""
    
    # Properties
    @property
    def header(self) -> "VariantHeader":
        """File header."""
    
    @property
    def index(self):
        """File index."""
    
    @property
    def filename(self) -> str:
        """Filename."""
    
    @property
    def is_open(self) -> bool:
        """True if file is open."""
    
    @property
    def category(self) -> str:
        """File category."""
    
    @property
    def format(self) -> str:
        """File format."""
    
    def check_index(self):
        """
        Check if index exists and is valid.
        
        Returns:
        bool, True if valid index
        """

VariantRecord

Individual variant record with position, alleles, quality, and sample information.

class VariantRecord:
    # Core properties
    @property
    def rid(self) -> int:
        """Reference sequence ID."""
    
    @property
    def contig(self) -> str:
        """Chromosome/contig name."""
    
    @contig.setter
    def contig(self, value: str):
        """Set chromosome name."""
    
    @property
    def pos(self) -> int:
        """1-based position."""
    
    @pos.setter
    def pos(self, value: int):
        """Set position."""
    
    @property
    def start(self) -> int:
        """0-based start position."""
    
    @property
    def stop(self) -> int:
        """0-based stop position."""
    
    @property
    def id(self) -> str:
        """Variant identifier."""
    
    @id.setter
    def id(self, value: str):
        """Set variant identifier."""
    
    @property
    def ref(self) -> str:
        """Reference allele."""
    
    @ref.setter
    def ref(self, value: str):
        """Set reference allele."""
    
    @property
    def alts(self) -> tuple:
        """Alternate alleles."""
    
    @alts.setter
    def alts(self, value: tuple):
        """Set alternate alleles."""
    
    @property
    def alleles(self) -> tuple:
        """All alleles (reference + alternates)."""
    
    @alleles.setter
    def alleles(self, value: tuple):
        """Set all alleles."""
    
    @property
    def qual(self) -> float:
        """Quality score."""
    
    @qual.setter
    def qual(self, value: float):
        """Set quality score."""
    
    # Complex properties
    @property
    def filter(self) -> "VariantRecordFilter":
        """Filter information."""
    
    @property
    def info(self) -> "VariantRecordInfo":
        """INFO field data."""
    
    @property
    def format(self) -> "VariantRecordFormat":
        """FORMAT field definition."""
    
    @property
    def samples(self) -> "VariantRecordSamples":
        """Sample data."""
    
    # Methods
    def copy(self):
        """
        Create copy of record.
        
        Returns:
        VariantRecord object
        """
    
    def translate(self, mapping):
        """
        Translate chromosome names.
        
        Parameters:
        - mapping: dict, chromosome name mapping
        """
    
    def to_string(self):
        """
        Convert to VCF format string.
        
        Returns:
        str, VCF line
        """

VariantHeader

VCF/BCF header containing metadata, sample information, and field definitions.

class VariantHeader:
    def __init__(self):
        """Create new variant header."""
    
    # Properties
    @property
    def version(self) -> str:
        """VCF format version."""
    
    @property
    def samples(self) -> "VariantHeaderSamples":
        """Sample names and metadata."""
    
    @property
    def records(self) -> "VariantHeaderRecords":
        """Header records."""
    
    @property
    def contigs(self) -> "VariantHeaderContigs":
        """Contig information."""
    
    @property
    def filters(self) -> "VariantHeaderRecords":
        """FILTER definitions."""
    
    @property
    def info(self) -> "VariantHeaderRecords": 
        """INFO field definitions."""
    
    @property
    def formats(self) -> "VariantHeaderRecords":
        """FORMAT field definitions."""
    
    # Methods
    def add_record(self, record):
        """
        Add header record.
        
        Parameters:
        - record: VariantHeaderRecord, record to add
        """
    
    def add_sample(self, name):
        """
        Add sample.
        
        Parameters:
        - name: str, sample name
        """
    
    def add_line(self, line):
        """
        Add header line.
        
        Parameters:
        - line: str, header line
        """
    
    def copy(self):
        """
        Create copy of header.
        
        Returns:
        VariantHeader object
        """
    
    def merge(self, other):
        """
        Merge with another header.
        
        Parameters:
        - other: VariantHeader, header to merge
        """
    
    def subset(self, samples):
        """
        Create subset with specific samples.
        
        Parameters:
        - samples: list, sample names to include
        
        Returns:
        VariantHeader object
        """
    
    def to_string(self):
        """
        Convert to VCF header string.
        
        Returns:
        str, VCF header
        """

VariantRecordSamples

Sample data access for variant records with genotype and field information.

class VariantRecordSamples:
    def __getitem__(self, sample):
        """
        Get sample data.
        
        Parameters:
        - sample: str/int, sample name or index
        
        Returns:
        VariantRecordSample object
        """
    
    def __contains__(self, sample):
        """Check if sample exists."""
    
    def __len__(self):
        """Number of samples."""
    
    def __iter__(self):
        """Iterate over samples."""
    
    def keys(self):
        """
        Get sample names.
        
        Returns:
        Iterator of sample names
        """
    
    def values(self):
        """
        Get sample data.
        
        Returns:
        Iterator of VariantRecordSample objects
        """
    
    def items(self):
        """
        Get sample items.
        
        Returns:
        Iterator of (name, VariantRecordSample) tuples
        """

VariantRecordSample

Individual sample data within a variant record.

class VariantRecordSample:
    def __getitem__(self, field):
        """
        Get field value.
        
        Parameters:
        - field: str, field name
        
        Returns:
        Field value
        """
    
    def __setitem__(self, field, value):
        """
        Set field value.
        
        Parameters:
        - field: str, field name
        - value: field value
        """
    
    def __contains__(self, field):
        """Check if field exists."""
    
    def get(self, field, default=None):
        """
        Get field with default.
        
        Returns:
        Field value or default
        """
    
    def keys(self):
        """
        Get field names.
        
        Returns:
        Iterator of field names
        """
    
    def values(self):
        """
        Get field values.
        
        Returns:
        Iterator of field values
        """
    
    def items(self):
        """
        Get field items.
        
        Returns:
        Iterator of (field, value) tuples
        """
    
    @property
    def name(self) -> str:
        """Sample name."""
    
    # Genotype shortcuts
    @property
    def allele_indices(self) -> tuple:
        """Genotype allele indices."""
    
    @property
    def alleles(self) -> tuple:
        """Genotype alleles."""
    
    @property
    def phased(self) -> bool:
        """True if genotype is phased."""

VariantRecordInfo

INFO field data access for variant records.

class VariantRecordInfo:
    def __getitem__(self, key):
        """
        Get INFO field value.
        
        Parameters:
        - key: str, INFO field name
        
        Returns:
        Field value
        """
    
    def __setitem__(self, key, value):
        """
        Set INFO field value.
        
        Parameters:
        - key: str, INFO field name
        - value: field value
        """
    
    def __delitem__(self, key):
        """Delete INFO field."""
    
    def __contains__(self, key):
        """Check if INFO field exists."""
    
    def __len__(self):
        """Number of INFO fields."""
    
    def __iter__(self):
        """Iterate over INFO field names."""
    
    def get(self, key, default=None):
        """
        Get INFO field with default.
        
        Returns:
        Field value or default
        """
    
    def keys(self):
        """
        Get INFO field names.
        
        Returns:
        Iterator of field names
        """
    
    def values(self):
        """
        Get INFO field values.
        
        Returns:
        Iterator of field values
        """
    
    def items(self):
        """
        Get INFO field items.
        
        Returns:
        Iterator of (field, value) tuples
        """
    
    def clear(self):
        """Remove all INFO fields."""
    
    def update(self, other):
        """
        Update with another INFO object.
        
        Parameters:
        - other: dict/VariantRecordInfo, data to update with
        """

VariantRecordFilter

Filter status information for variant records.

class VariantRecordFilter:
    def __contains__(self, name):
        """Check if filter is applied."""
    
    def __iter__(self):
        """Iterate over applied filters."""
    
    def __len__(self):
        """Number of applied filters."""
    
    def add(self, name):
        """
        Add filter.
        
        Parameters:
        - name: str, filter name
        """
    
    def clear(self):
        """Remove all filters."""

Usage Examples

Basic File Reading

import pysam

# Read VCF file
with pysam.VariantFile("input.vcf") as vcffile:
    # Iterate over all variants
    for record in vcffile:
        print(f"{record.contig}:{record.pos} {record.ref}->{record.alts}")
    
    # Fetch variants in region
    for record in vcffile.fetch("chr1", 1000, 2000):
        if record.qual >= 30:
            print(f"High quality variant: {record.id}")

# Access sample data
with pysam.VariantFile("input.vcf") as vcffile:
    for record in vcffile:
        for sample_name in record.samples:
            sample = record.samples[sample_name]
            genotype = sample["GT"]
            print(f"Sample {sample_name}: {genotype}")

Writing VCF Files

import pysam

# Create header
header = pysam.VariantHeader()
header.add_line('##fileformat=VCFv4.2')
header.add_line('##contig=<ID=chr1,length=249250621>')
header.add_sample("Sample1")
header.add_sample("Sample2")

# Add INFO and FORMAT definitions
header.add_line('##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">')
header.add_line('##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">')
header.add_line('##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">')

with pysam.VariantFile("output.vcf", "w", header=header) as vcffile:
    # Create variant record
    record = vcffile.new_record(
        contig="chr1",
        start=100,
        alleles=("A", "T"),
        qual=60.0,
        info={"DP": 100}
    )
    
    # Set sample data
    record.samples["Sample1"]["GT"] = (0, 1)  # Het
    record.samples["Sample1"]["DP"] = 50
    record.samples["Sample2"]["GT"] = (1, 1)  # Hom alt
    record.samples["Sample2"]["DP"] = 45
    
    vcffile.write(record)

Filtering and Processing

import pysam

with pysam.VariantFile("input.vcf") as infile:
    # Create output with same header
    with pysam.VariantFile("filtered.vcf", "w", header=infile.header) as outfile:
        for record in infile:
            # Filter by quality and depth
            if record.qual >= 30 and record.info.get("DP", 0) >= 10:
                # Check if any sample is homozygous alternate
                has_hom_alt = False
                for sample in record.samples.values():
                    if sample["GT"] == (1, 1):
                        has_hom_alt = True
                        break
                
                if has_hom_alt:
                    outfile.write(record)

Install with Tessl CLI

npx tessl i tessl/pypi-pysam

docs

alignment-files.md

bgzf-files.md

command-tools.md

index.md

sequence-files.md

tabix-files.md

utilities.md

variant-files.md

tile.json