0
# Constants and Reserved Fields
1
2
PyVCF constants for VCF specification compliance, field validation, and reserved field definitions for INFO and FORMAT fields.
3
4
## Capabilities
5
6
### Package Version
7
8
```python { .api }
9
VERSION: str # PyVCF version string (e.g., '0.6.8')
10
```
11
12
### Reserved INFO Fields
13
14
Pre-defined INFO field specifications from VCF 4.0/4.1 specification.
15
16
```python { .api }
17
RESERVED_INFO: dict = {
18
'AA': 'String', # Ancestral Allele
19
'AC': 'Integer', # Allele count in genotypes
20
'AF': 'Float', # Allele Frequency
21
'AN': 'Integer', # Total number of alleles in called genotypes
22
'BQ': 'Float', # Base Quality
23
'CIGAR': 'String', # CIGAR string describing alignment
24
'DB': 'Flag', # dbSNP membership
25
'DP': 'Integer', # Combined depth across samples
26
'END': 'Integer', # End position of variant
27
'H2': 'Flag', # HapMap2 membership
28
'H3': 'Flag', # HapMap3 membership
29
'MQ': 'Float', # Mapping Quality
30
'MQ0': 'Integer', # Number of MAPQ == 0 reads
31
'NS': 'Integer', # Number of samples with data
32
'SB': 'String', # Strand bias
33
'SOMATIC': 'Flag', # Somatic mutation
34
'VALIDATED': 'Flag', # Validated by follow-up experiment
35
'1000G': 'Flag', # 1000 Genomes membership
36
37
# Structural variant INFO fields
38
'IMPRECISE': 'Flag', # Imprecise structural variation
39
'NOVEL': 'Flag', # Novel structural variation
40
'SVEND': 'Integer', # End position of SV
41
'SVLEN': 'Integer', # Length of SV
42
'SVTYPE': 'String', # Type of structural variant
43
'MATEID': 'String', # ID of mate breakend
44
'EVENT': 'String', # ID of associated event
45
'HOMLEN': 'Integer', # Length of base pair homology
46
'DGVID': 'String', # ID from Database of Genomic Variants
47
'DBVARID': 'String', # ID from NCBI dbVar
48
}
49
```
50
51
### Reserved FORMAT Fields
52
53
Pre-defined FORMAT field specifications from VCF 4.0/4.1 specification.
54
55
```python { .api }
56
RESERVED_FORMAT: dict = {
57
'GT': 'String', # Genotype
58
'DP': 'Integer', # Read depth at this position for this sample
59
'FT': 'String', # Sample genotype filter
60
'GL': 'Float', # Genotype likelihoods
61
'GLE': 'String', # Genotype likelihoods (log10 encoded)
62
'PL': 'Integer', # Phred-scaled genotype likelihoods
63
'GP': 'Float', # Genotype posterior probabilities
64
'GQ': 'Integer', # Conditional genotype quality
65
'HQ': 'Integer', # Haplotype qualities
66
'PS': 'Integer', # Phase set identifier
67
'PQ': 'Integer', # Phasing quality
68
'EC': 'Integer', # Expected alternate allele counts
69
'MQ': 'Integer', # Mapping quality
70
'AD': 'Integer', # Allelic depths
71
'ADF': 'Integer', # Forward strand allelic depths
72
'ADR': 'Integer', # Reverse strand allelic depths
73
74
# Copy number fields
75
'CN': 'Integer', # Copy number genotype
76
'CNQ': 'Float', # Copy number genotype quality
77
'CNL': 'Float', # Copy number genotype likelihood
78
'NQ': 'Integer', # Phred style probability that the variant is novel
79
'HAP': 'Integer', # Unique haplotype identifier
80
'AHAP': 'Integer', # Alternative haplotype identifier
81
}
82
```
83
84
### Field Number Constants
85
86
Constants for interpreting field number specifications in VCF headers.
87
88
```python { .api }
89
field_counts: dict = {
90
'.': None, # Unknown number of values
91
'A': -1, # Number of alternate alleles
92
'G': -2, # Number of genotypes (including reference)
93
'R': -3, # Number of alleles (reference + alternates)
94
}
95
```
96
97
### Usage Examples
98
99
```python
100
import vcf
101
102
# Access version information
103
print(f"PyVCF version: {vcf.VERSION}")
104
105
# Check if INFO field is reserved
106
if 'DP' in vcf.RESERVED_INFO:
107
print(f"DP is a reserved INFO field, type: {vcf.RESERVED_INFO['DP']}")
108
109
# Check if FORMAT field is reserved
110
if 'GT' in vcf.RESERVED_FORMAT:
111
print(f"GT is a reserved FORMAT field, type: {vcf.RESERVED_FORMAT['GT']}")
112
113
# Validate custom field names against reserved fields
114
def validate_custom_info_field(field_name):
115
if field_name in vcf.RESERVED_INFO:
116
print(f"Warning: {field_name} is a reserved INFO field")
117
return False
118
return True
119
120
def validate_custom_format_field(field_name):
121
if field_name in vcf.RESERVED_FORMAT:
122
print(f"Warning: {field_name} is a reserved FORMAT field")
123
return False
124
return True
125
126
# Example validation
127
validate_custom_info_field('CUSTOM_INFO') # OK
128
validate_custom_info_field('DP') # Warning: reserved
129
validate_custom_format_field('CUSTOM_GT') # OK
130
validate_custom_format_field('GT') # Warning: reserved
131
```
132
133
### Working with Reserved Fields
134
135
```python
136
import vcf
137
138
reader = vcf.Reader(filename='variants.vcf')
139
140
for record in reader:
141
# Access standard INFO fields
142
if 'DP' in record.INFO:
143
depth = record.INFO['DP']
144
print(f"Total depth: {depth}")
145
146
if 'AF' in record.INFO:
147
allele_freqs = record.INFO['AF']
148
print(f"Allele frequencies: {allele_freqs}")
149
150
# Check for structural variant INFO fields
151
if record.is_sv:
152
if 'SVTYPE' in record.INFO:
153
sv_type = record.INFO['SVTYPE']
154
print(f"SV type: {sv_type}")
155
156
if 'SVLEN' in record.INFO:
157
sv_length = record.INFO['SVLEN']
158
print(f"SV length: {sv_length}")
159
160
# Access standard FORMAT fields for samples
161
for call in record.samples:
162
if hasattr(call.data, 'GT'):
163
print(f"Sample {call.sample} genotype: {call.data.GT}")
164
165
if hasattr(call.data, 'DP'):
166
print(f"Sample {call.sample} depth: {call.data.DP}")
167
168
if hasattr(call.data, 'GQ'):
169
print(f"Sample {call.sample} quality: {call.data.GQ}")
170
```
171
172
### Header Field Type Validation
173
174
```python
175
import vcf
176
177
def validate_field_definition(field_dict, reserved_dict, field_type):
178
"""Validate field definitions against reserved specifications."""
179
for field_id, field_info in field_dict.items():
180
if field_id in reserved_dict:
181
expected_type = reserved_dict[field_id]
182
actual_type = field_info.type
183
184
if actual_type != expected_type:
185
print(f"Warning: {field_type} field {field_id} "
186
f"type mismatch: expected {expected_type}, "
187
f"got {actual_type}")
188
189
# Validate a VCF file's field definitions
190
reader = vcf.Reader(filename='variants.vcf')
191
192
# Validate INFO field definitions
193
validate_field_definition(reader.infos, vcf.RESERVED_INFO, 'INFO')
194
195
# Validate FORMAT field definitions
196
validate_field_definition(reader.formats, vcf.RESERVED_FORMAT, 'FORMAT')
197
```
198
199
### Custom Field Number Handling
200
201
```python
202
import vcf
203
204
def interpret_field_number(num_str, num_alts=None, num_samples=None):
205
"""Interpret VCF field number specifications."""
206
if num_str == '.':
207
return None # Variable number
208
elif num_str == 'A':
209
return num_alts if num_alts else -1
210
elif num_str == 'G':
211
# Number of genotypes = (n+1)(n+2)/2 where n = number of alleles
212
if num_alts:
213
n_alleles = num_alts + 1 # Include reference
214
return (n_alleles * (n_alleles + 1)) // 2
215
return -2
216
elif num_str == 'R':
217
return (num_alts + 1) if num_alts else -3
218
else:
219
try:
220
return int(num_str)
221
except ValueError:
222
return None
223
224
# Example usage with a record
225
reader = vcf.Reader(filename='variants.vcf')
226
record = next(reader)
227
228
# Interpret field numbers for this record's context
229
num_alts = len(record.ALT)
230
print(f"Number of alternates: {num_alts}")
231
232
for field_id, field_info in reader.infos.items():
233
expected_count = interpret_field_number(field_info.num, num_alts)
234
print(f"INFO {field_id}: expects {expected_count} values")
235
```