0
# VCF Filtering
1
2
Extensible filtering system with built-in filters for quality control and custom filter development for genomic variant analysis.
3
4
## Capabilities
5
6
### Filter Base Class
7
8
Base class for implementing custom VCF filters with standardized interface and argparse integration.
9
10
```python { .api }
11
class Base:
12
"""Base class for VCF record filters."""
13
14
name: str # Filter identifier
15
16
def customize_parser(self, parser):
17
"""
18
Extend argparse parser with filter-specific options.
19
20
Parameters:
21
- parser: argparse.ArgumentParser, parser to extend
22
"""
23
24
def __init__(self, args):
25
"""
26
Initialize filter with argparse arguments.
27
28
Parameters:
29
- args: argparse.Namespace, parsed arguments
30
"""
31
32
def __call__(self, record):
33
"""
34
Filter a variant record.
35
36
Parameters:
37
- record: _Record, variant record to filter
38
39
Returns:
40
str or None: Filter name if record should be filtered, None to pass
41
"""
42
43
def filter_name(self):
44
"""
45
Generate filter name for VCF header.
46
47
Returns:
48
str: Filter name for FILTER field
49
"""
50
```
51
52
### Built-in Filters
53
54
Pre-implemented filters for common quality control tasks.
55
56
```python { .api }
57
class SiteQuality(Base):
58
"""Filter by site quality score."""
59
name = 'sq' # Filter identifier
60
61
class VariantGenotypeQuality(Base):
62
"""Filter by minimum genotype quality across samples."""
63
name = 'mgq' # Filter identifier
64
65
class ErrorBiasFilter(Base):
66
"""Statistical error bias filter (requires rpy2)."""
67
name = 'eb' # Filter identifier
68
69
class DepthPerSample(Base):
70
"""Filter by minimum depth per sample."""
71
name = 'dps' # Filter identifier
72
73
class AvgDepthPerSample(Base):
74
"""Filter by average depth across samples."""
75
name = 'avg-dps' # Filter identifier
76
77
class SnpOnly(Base):
78
"""Keep only SNP variants, filter out indels and SVs."""
79
name = 'snp-only' # Filter identifier
80
```
81
82
### Usage Examples
83
84
```python
85
import vcf
86
from vcf.filters import SiteQuality, DepthPerSample, SnpOnly
87
88
# Apply single filter
89
reader = vcf.Reader(filename='input.vcf')
90
site_filter = SiteQuality({'threshold': 30})
91
92
filtered_records = []
93
for record in reader:
94
filter_result = site_filter(record)
95
if filter_result is None: # Record passes filter
96
filtered_records.append(record)
97
else:
98
print(f"Filtered {record.CHROM}:{record.POS} - {filter_result}")
99
100
# Chain multiple filters
101
reader = vcf.Reader(filename='input.vcf')
102
filters = [
103
SiteQuality({'threshold': 30}),
104
DepthPerSample({'threshold': 10}),
105
SnpOnly({})
106
]
107
108
for record in reader:
109
passed = True
110
for filt in filters:
111
if filt(record) is not None:
112
passed = False
113
break
114
115
if passed:
116
print(f"Passed all filters: {record.CHROM}:{record.POS}")
117
118
# Write filtered VCF
119
reader = vcf.Reader(filename='input.vcf')
120
writer = vcf.Writer(open('filtered.vcf', 'w'), reader)
121
122
quality_filter = SiteQuality({'threshold': 30})
123
snp_filter = SnpOnly({})
124
125
for record in reader:
126
# Apply filters
127
if quality_filter(record) is None and snp_filter(record) is None:
128
writer.write_record(record)
129
else:
130
# Add filter tags
131
if quality_filter(record):
132
record.add_filter(quality_filter.filter_name())
133
if snp_filter(record):
134
record.add_filter(snp_filter.filter_name())
135
writer.write_record(record)
136
137
writer.close()
138
```
139
140
### Custom Filter Development
141
142
```python
143
import vcf
144
from vcf.filters import Base
145
146
class CustomQualityFilter(Base):
147
"""Custom filter combining quality and depth thresholds."""
148
149
name = 'custom_qual'
150
151
def customize_parser(self, parser):
152
parser.add_argument('--min-qual', type=float, default=30,
153
help='Minimum site quality')
154
parser.add_argument('--min-depth', type=int, default=10,
155
help='Minimum average depth')
156
157
def __init__(self, args):
158
self.min_qual = args.min_qual
159
self.min_depth = args.min_depth
160
161
def __call__(self, record):
162
# Check quality
163
if record.QUAL is None or record.QUAL < self.min_qual:
164
return self.filter_name()
165
166
# Check average depth
167
depths = []
168
for call in record.samples:
169
if call.called and hasattr(call.data, 'DP') and call.data.DP:
170
depths.append(call.data.DP)
171
172
if depths and sum(depths) / len(depths) < self.min_depth:
173
return self.filter_name()
174
175
return None # Pass filter
176
177
def filter_name(self):
178
return f"CustomQual{self.min_qual}Depth{self.min_depth}"
179
180
# Use custom filter
181
import argparse
182
parser = argparse.ArgumentParser()
183
custom_filter = CustomQualityFilter(argparse.Namespace(min_qual=40, min_depth=15))
184
185
reader = vcf.Reader(filename='input.vcf')
186
for record in reader:
187
if custom_filter(record) is None:
188
print(f"Passed: {record.CHROM}:{record.POS}")
189
```
190
191
### Command Line Filter Usage
192
193
PyVCF provides command-line filtering through entry points:
194
195
```bash
196
# Available filters through entry points
197
vcf_filter.py --site-quality 30 input.vcf
198
vcf_filter.py --variant-genotype-quality 20 input.vcf
199
vcf_filter.py --depth-per-sample 10 input.vcf
200
vcf_filter.py --snp-only input.vcf
201
```
202
203
Entry point filters:
204
- `site_quality = vcf.filters:SiteQuality`
205
- `vgq = vcf.filters:VariantGenotypeQuality`
206
- `eb = vcf.filters:ErrorBiasFilter`
207
- `dps = vcf.filters:DepthPerSample`
208
- `avg-dps = vcf.filters:AvgDepthPerSample`
209
- `snp-only = vcf.filters:SnpOnly`