A VCFv4.0 and 4.1 parser for Python
npx @tessl/cli install tessl/pypi-pyvcf@0.6.0A comprehensive Python library for parsing and manipulating Variant Call Format (VCF) files v4.0 and 4.1. PyVCF provides a CSV-like interface for reading genomic variant data with automatic type conversion, comprehensive record access, and extensive filtering capabilities for bioinformatics applications.
pip install pyvcfimport vcfCommon imports for VCF parsing:
from vcf import Reader, WriterAlternative imports:
from vcf import VCFReader, VCFWriter # Backwards compatibility aliasesAdditional imports for filtering and utilities:
from vcf import Filter # Base filter class (actually vcf.filters.Base)
from vcf.filters import SiteQuality, DepthPerSample, SnpOnly
from vcf.sample_filter import SampleFilter
from vcf.utils import walk_together, trim_common_suffix
from vcf import RESERVED_INFO, RESERVED_FORMAT # Constantsimport vcf
# Read a VCF file
reader = vcf.Reader(filename='variants.vcf')
# Iterate through records
for record in reader:
print(f"Chr: {record.CHROM}, Pos: {record.POS}")
print(f"Ref: {record.REF}, Alt: {record.ALT}")
# Access sample genotypes
for sample_call in record.samples:
print(f"Sample {sample_call.sample}: {sample_call.gt_bases}")
# Write a VCF file
input_reader = vcf.Reader(filename='input.vcf')
writer = vcf.Writer(open('output.vcf', 'w'), input_reader)
for record in input_reader:
if record.QUAL and record.QUAL > 30: # Filter by quality
writer.write_record(record)
writer.close()PyVCF uses a structured approach to VCF parsing:
This design enables efficient processing of large genomic datasets while providing comprehensive access to variant information, sample genotypes, and metadata for bioinformatics workflows.
Core functionality for reading VCF files with comprehensive metadata support, automatic type conversion, and streaming iteration through variant records.
class Reader:
def __init__(self, fsock=None, filename=None, compressed=None,
prepend_chr=False, strict_whitespace=False, encoding='ascii'): ...
def __iter__(self): ...
def fetch(self, chrom, start=None, end=None): ...
class VCFReader: # Alias for Reader
passFunctionality for writing VCF records to files while preserving metadata and format integrity.
class Writer:
def __init__(self, stream, template, lineterminator="\n"): ...
def write_record(self, record): ...
def flush(self): ...
def close(self): ...
class VCFWriter: # Alias for Writer
passComprehensive variant record representation with coordinate properties, variant classification, and population genetics statistics.
class _Record:
# Standard VCF fields
CHROM: str
POS: int
ID: str
REF: str
ALT: list
QUAL: float
FILTER: list
INFO: dict
FORMAT: str
samples: list
# Coordinate properties
start: int
end: int
affected_start: int
affected_end: int
alleles: list
# Variant classification
is_snp: bool
is_indel: bool
is_sv: bool
var_type: str
var_subtype: str
# Population statistics
call_rate: float
aaf: list
heterozygosity: float
def genotype(self, name: str): ...
def get_hom_refs(self): ...
def get_hom_alts(self): ...
def get_hets(self): ...Individual sample genotype calls with classification, phase information, and variant analysis methods.
class _Call:
site: '_Record'
sample: str
data: object
called: bool
gt_nums: str
gt_alleles: list
ploidity: int
gt_bases: str
gt_type: int # 0=hom_ref, 1=het, 2=hom_alt, None=uncalled
phased: bool
is_variant: bool
is_het: boolExtensible filtering system with built-in filters for quality control and custom filter development.
class Base: # Base filter class (imported as Filter)
name: str
def __call__(self, record): ...
def filter_name(self): ...
class SiteQuality(Base): ...
class VariantGenotypeQuality(Base): ...
class DepthPerSample(Base): ...
class SnpOnly(Base): ...Filter VCF files by sample during parsing to create subset files with specific samples.
class SampleFilter:
def __init__(self, infile, outfile=None, filters=None, invert=False): ...
def set_filters(self, filters=None, invert=False): ...
def write(self, outfile=None): ...Utility functions for advanced VCF operations including multi-file synchronization and sequence manipulation.
def walk_together(*readers, **kwargs): ... # Synchronize multiple VCF files
def trim_common_suffix(*sequences): ... # Sequence manipulation utilitiesVCF specification constants, reserved field definitions, and metadata handling utilities.
VERSION: str # PyVCF version
RESERVED_INFO: dict # Reserved INFO field definitions from VCF spec
RESERVED_FORMAT: dict # Reserved FORMAT field definitions from VCF spec
field_counts: dict # Field number interpretation constants