0
# Sample Genotype Analysis
1
2
Individual sample genotype calls with classification, phase information, and variant analysis methods for population genetics and clinical genomics.
3
4
## Capabilities
5
6
### Genotype Calls
7
8
The _Call class represents individual sample genotype calls with comprehensive analysis properties.
9
10
```python { .api }
11
class _Call:
12
"""
13
Represents a genotype call for one sample at one variant site.
14
"""
15
16
site: '_Record' # Reference to parent _Record
17
sample: str # Sample name
18
data: object # Namedtuple of FORMAT field data
19
called: bool # True if genotype was called
20
gt_nums: str # Raw genotype string (e.g., "0/1", "1|0")
21
gt_alleles: list # List of allele indices
22
ploidity: int # Number of alleles (e.g., 2 for diploid)
23
24
gt_bases: str # Actual DNA sequences (e.g., "A/G")
25
gt_type: int # Genotype type: 0=hom_ref, 1=het, 2=hom_alt, None=uncalled
26
phased: bool # True if genotype is phased
27
is_variant: bool # True if not reference call
28
is_het: bool # True if heterozygous
29
is_filtered: bool # True if call failed filters
30
31
def gt_phase_char(self):
32
"""
33
Get phase character for genotype.
34
35
Returns:
36
str: "/" for unphased, "|" for phased
37
"""
38
```
39
40
### Usage Examples
41
42
```python
43
import vcf
44
45
reader = vcf.Reader(filename='variants.vcf')
46
47
for record in reader:
48
print(f"Variant: {record.CHROM}:{record.POS}")
49
50
# Iterate through all sample calls
51
for call in record.samples:
52
print(f" Sample {call.sample}:")
53
54
if call.called:
55
print(f" Genotype: {call.gt_bases} ({call.gt_nums})")
56
print(f" Type: {['Hom Ref', 'Het', 'Hom Alt'][call.gt_type]}")
57
print(f" Phased: {call.phased}")
58
print(f" Variant: {call.is_variant}")
59
60
# Access FORMAT field data
61
if hasattr(call.data, 'DP'):
62
print(f" Depth: {call.data.DP}")
63
if hasattr(call.data, 'GQ'):
64
print(f" Genotype Quality: {call.data.GQ}")
65
66
else:
67
print(" Uncalled genotype")
68
69
# Get specific sample
70
if 'SAMPLE1' in reader.samples:
71
call = record.genotype('SAMPLE1')
72
if call.called and call.is_het:
73
print(f"SAMPLE1 is heterozygous: {call.gt_bases}")
74
75
# Count genotype types
76
called_samples = [call for call in record.samples if call.called]
77
het_count = sum(1 for call in called_samples if call.gt_type == 1)
78
hom_alt_count = sum(1 for call in called_samples if call.gt_type == 2)
79
80
print(f"Heterozygous calls: {het_count}")
81
print(f"Homozygous alternate calls: {hom_alt_count}")
82
```
83
84
### Advanced Genotype Analysis
85
86
```python
87
import vcf
88
89
reader = vcf.Reader(filename='variants.vcf')
90
91
for record in reader:
92
# Phase analysis
93
phased_calls = [call for call in record.samples if call.called and call.phased]
94
print(f"Phased genotypes: {len(phased_calls)}/{len(record.samples)}")
95
96
# Quality analysis (if GQ field present)
97
high_quality_calls = []
98
for call in record.samples:
99
if call.called and hasattr(call.data, 'GQ') and call.data.GQ >= 30:
100
high_quality_calls.append(call)
101
102
print(f"High quality calls (GQ>=30): {len(high_quality_calls)}")
103
104
# Depth analysis (if DP field present)
105
depths = []
106
for call in record.samples:
107
if call.called and hasattr(call.data, 'DP') and call.data.DP is not None:
108
depths.append(call.data.DP)
109
110
if depths:
111
avg_depth = sum(depths) / len(depths)
112
print(f"Average depth: {avg_depth:.1f}")
113
114
# Allele-specific analysis
115
for call in record.samples:
116
if call.called and call.gt_type == 1: # Heterozygous
117
allele_indices = call.gt_alleles
118
alleles = [record.alleles[int(i)] if i != '.' else '.' for i in allele_indices]
119
print(f"Het sample {call.sample}: alleles {alleles}")
120
```
121
122
## Common FORMAT Fields
123
124
PyVCF automatically parses FORMAT fields into the call.data namedtuple. Common fields include:
125
126
```python
127
# Common FORMAT fields accessible via call.data
128
call.data.GT # Genotype (string)
129
call.data.DP # Read depth (integer)
130
call.data.GQ # Genotype quality (integer)
131
call.data.PL # Phred-scaled genotype likelihoods (list of integers)
132
call.data.AD # Allelic depths (list of integers)
133
call.data.GL # Genotype likelihoods (list of floats)
134
```