CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pyvcf

A VCFv4.0 and 4.1 parser for Python

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

genotype-analysis.mddocs/

Sample Genotype Analysis

Individual sample genotype calls with classification, phase information, and variant analysis methods for population genetics and clinical genomics.

Capabilities

Genotype Calls

The _Call class represents individual sample genotype calls with comprehensive analysis properties.

class _Call:
    """
    Represents a genotype call for one sample at one variant site.
    """
    
    site: '_Record'  # Reference to parent _Record
    sample: str      # Sample name
    data: object     # Namedtuple of FORMAT field data
    called: bool     # True if genotype was called
    gt_nums: str     # Raw genotype string (e.g., "0/1", "1|0")
    gt_alleles: list # List of allele indices
    ploidity: int    # Number of alleles (e.g., 2 for diploid)
    
    gt_bases: str    # Actual DNA sequences (e.g., "A/G")
    gt_type: int     # Genotype type: 0=hom_ref, 1=het, 2=hom_alt, None=uncalled
    phased: bool     # True if genotype is phased
    is_variant: bool # True if not reference call
    is_het: bool     # True if heterozygous
    is_filtered: bool # True if call failed filters
    
    def gt_phase_char(self):
        """
        Get phase character for genotype.
        
        Returns:
        str: "/" for unphased, "|" for phased
        """

Usage Examples

import vcf

reader = vcf.Reader(filename='variants.vcf')

for record in reader:
    print(f"Variant: {record.CHROM}:{record.POS}")
    
    # Iterate through all sample calls
    for call in record.samples:
        print(f"  Sample {call.sample}:")
        
        if call.called:
            print(f"    Genotype: {call.gt_bases} ({call.gt_nums})")
            print(f"    Type: {['Hom Ref', 'Het', 'Hom Alt'][call.gt_type]}")
            print(f"    Phased: {call.phased}")
            print(f"    Variant: {call.is_variant}")
            
            # Access FORMAT field data
            if hasattr(call.data, 'DP'):
                print(f"    Depth: {call.data.DP}")
            if hasattr(call.data, 'GQ'):
                print(f"    Genotype Quality: {call.data.GQ}")
                
        else:
            print("    Uncalled genotype")
    
    # Get specific sample
    if 'SAMPLE1' in reader.samples:
        call = record.genotype('SAMPLE1')
        if call.called and call.is_het:
            print(f"SAMPLE1 is heterozygous: {call.gt_bases}")
    
    # Count genotype types
    called_samples = [call for call in record.samples if call.called]
    het_count = sum(1 for call in called_samples if call.gt_type == 1)
    hom_alt_count = sum(1 for call in called_samples if call.gt_type == 2)
    
    print(f"Heterozygous calls: {het_count}")
    print(f"Homozygous alternate calls: {hom_alt_count}")

Advanced Genotype Analysis

import vcf

reader = vcf.Reader(filename='variants.vcf')

for record in reader:
    # Phase analysis
    phased_calls = [call for call in record.samples if call.called and call.phased]
    print(f"Phased genotypes: {len(phased_calls)}/{len(record.samples)}")
    
    # Quality analysis (if GQ field present)
    high_quality_calls = []
    for call in record.samples:
        if call.called and hasattr(call.data, 'GQ') and call.data.GQ >= 30:
            high_quality_calls.append(call)
    
    print(f"High quality calls (GQ>=30): {len(high_quality_calls)}")
    
    # Depth analysis (if DP field present)
    depths = []
    for call in record.samples:
        if call.called and hasattr(call.data, 'DP') and call.data.DP is not None:
            depths.append(call.data.DP)
    
    if depths:
        avg_depth = sum(depths) / len(depths)
        print(f"Average depth: {avg_depth:.1f}")
    
    # Allele-specific analysis
    for call in record.samples:
        if call.called and call.gt_type == 1:  # Heterozygous
            allele_indices = call.gt_alleles
            alleles = [record.alleles[int(i)] if i != '.' else '.' for i in allele_indices]
            print(f"Het sample {call.sample}: alleles {alleles}")

Common FORMAT Fields

PyVCF automatically parses FORMAT fields into the call.data namedtuple. Common fields include:

# Common FORMAT fields accessible via call.data
call.data.GT  # Genotype (string)
call.data.DP  # Read depth (integer)
call.data.GQ  # Genotype quality (integer)
call.data.PL  # Phred-scaled genotype likelihoods (list of integers)
call.data.AD  # Allelic depths (list of integers)
call.data.GL  # Genotype likelihoods (list of floats)

Install with Tessl CLI

npx tessl i tessl/pypi-pyvcf

docs

constants.md

genotype-analysis.md

index.md

sample-filtering.md

utils.md

variant-records.md

vcf-filtering.md

vcf-parsing.md

vcf-writing.md

tile.json