CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pyvcf

A VCFv4.0 and 4.1 parser for Python

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

vcf-filtering.mddocs/

VCF Filtering

Extensible filtering system with built-in filters for quality control and custom filter development for genomic variant analysis.

Capabilities

Filter Base Class

Base class for implementing custom VCF filters with standardized interface and argparse integration.

class Base:
    """Base class for VCF record filters."""
    
    name: str  # Filter identifier
    
    def customize_parser(self, parser):
        """
        Extend argparse parser with filter-specific options.
        
        Parameters:
        - parser: argparse.ArgumentParser, parser to extend
        """
        
    def __init__(self, args):
        """
        Initialize filter with argparse arguments.
        
        Parameters:
        - args: argparse.Namespace, parsed arguments
        """
        
    def __call__(self, record):
        """
        Filter a variant record.
        
        Parameters:
        - record: _Record, variant record to filter
        
        Returns:
        str or None: Filter name if record should be filtered, None to pass
        """
        
    def filter_name(self):
        """
        Generate filter name for VCF header.
        
        Returns:
        str: Filter name for FILTER field
        """

Built-in Filters

Pre-implemented filters for common quality control tasks.

class SiteQuality(Base):
    """Filter by site quality score."""
    name = 'sq'  # Filter identifier
    
class VariantGenotypeQuality(Base):
    """Filter by minimum genotype quality across samples."""
    name = 'mgq'  # Filter identifier
    
class ErrorBiasFilter(Base):
    """Statistical error bias filter (requires rpy2)."""
    name = 'eb'  # Filter identifier
    
class DepthPerSample(Base):
    """Filter by minimum depth per sample."""
    name = 'dps'  # Filter identifier
    
class AvgDepthPerSample(Base):
    """Filter by average depth across samples."""
    name = 'avg-dps'  # Filter identifier
    
class SnpOnly(Base):
    """Keep only SNP variants, filter out indels and SVs."""
    name = 'snp-only'  # Filter identifier

Usage Examples

import vcf
from vcf.filters import SiteQuality, DepthPerSample, SnpOnly

# Apply single filter
reader = vcf.Reader(filename='input.vcf')
site_filter = SiteQuality({'threshold': 30})

filtered_records = []
for record in reader:
    filter_result = site_filter(record)
    if filter_result is None:  # Record passes filter
        filtered_records.append(record)
    else:
        print(f"Filtered {record.CHROM}:{record.POS} - {filter_result}")

# Chain multiple filters
reader = vcf.Reader(filename='input.vcf')
filters = [
    SiteQuality({'threshold': 30}),
    DepthPerSample({'threshold': 10}),
    SnpOnly({})
]

for record in reader:
    passed = True
    for filt in filters:
        if filt(record) is not None:
            passed = False
            break
    
    if passed:
        print(f"Passed all filters: {record.CHROM}:{record.POS}")

# Write filtered VCF
reader = vcf.Reader(filename='input.vcf')
writer = vcf.Writer(open('filtered.vcf', 'w'), reader)

quality_filter = SiteQuality({'threshold': 30})
snp_filter = SnpOnly({})

for record in reader:
    # Apply filters
    if quality_filter(record) is None and snp_filter(record) is None:
        writer.write_record(record)
    else:
        # Add filter tags
        if quality_filter(record):
            record.add_filter(quality_filter.filter_name())
        if snp_filter(record):
            record.add_filter(snp_filter.filter_name())
        writer.write_record(record)

writer.close()

Custom Filter Development

import vcf
from vcf.filters import Base

class CustomQualityFilter(Base):
    """Custom filter combining quality and depth thresholds."""
    
    name = 'custom_qual'
    
    def customize_parser(self, parser):
        parser.add_argument('--min-qual', type=float, default=30,
                          help='Minimum site quality')
        parser.add_argument('--min-depth', type=int, default=10,
                          help='Minimum average depth')
    
    def __init__(self, args):
        self.min_qual = args.min_qual
        self.min_depth = args.min_depth
    
    def __call__(self, record):
        # Check quality
        if record.QUAL is None or record.QUAL < self.min_qual:
            return self.filter_name()
        
        # Check average depth
        depths = []
        for call in record.samples:
            if call.called and hasattr(call.data, 'DP') and call.data.DP:
                depths.append(call.data.DP)
        
        if depths and sum(depths) / len(depths) < self.min_depth:
            return self.filter_name()
        
        return None  # Pass filter
    
    def filter_name(self):
        return f"CustomQual{self.min_qual}Depth{self.min_depth}"

# Use custom filter
import argparse
parser = argparse.ArgumentParser()
custom_filter = CustomQualityFilter(argparse.Namespace(min_qual=40, min_depth=15))

reader = vcf.Reader(filename='input.vcf')
for record in reader:
    if custom_filter(record) is None:
        print(f"Passed: {record.CHROM}:{record.POS}")

Command Line Filter Usage

PyVCF provides command-line filtering through entry points:

# Available filters through entry points
vcf_filter.py --site-quality 30 input.vcf
vcf_filter.py --variant-genotype-quality 20 input.vcf
vcf_filter.py --depth-per-sample 10 input.vcf
vcf_filter.py --snp-only input.vcf

Entry point filters:

  • site_quality = vcf.filters:SiteQuality
  • vgq = vcf.filters:VariantGenotypeQuality
  • eb = vcf.filters:ErrorBiasFilter
  • dps = vcf.filters:DepthPerSample
  • avg-dps = vcf.filters:AvgDepthPerSample
  • snp-only = vcf.filters:SnpOnly

Install with Tessl CLI

npx tessl i tessl/pypi-pyvcf

docs

constants.md

genotype-analysis.md

index.md

sample-filtering.md

utils.md

variant-records.md

vcf-filtering.md

vcf-parsing.md

vcf-writing.md

tile.json