or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

constants.mdgenotype-analysis.mdindex.mdsample-filtering.mdutils.mdvariant-records.mdvcf-filtering.mdvcf-parsing.mdvcf-writing.md

genotype-analysis.mddocs/

0

# Sample Genotype Analysis

1

2

Individual sample genotype calls with classification, phase information, and variant analysis methods for population genetics and clinical genomics.

3

4

## Capabilities

5

6

### Genotype Calls

7

8

The _Call class represents individual sample genotype calls with comprehensive analysis properties.

9

10

```python { .api }

11

class _Call:

12

"""

13

Represents a genotype call for one sample at one variant site.

14

"""

15

16

site: '_Record' # Reference to parent _Record

17

sample: str # Sample name

18

data: object # Namedtuple of FORMAT field data

19

called: bool # True if genotype was called

20

gt_nums: str # Raw genotype string (e.g., "0/1", "1|0")

21

gt_alleles: list # List of allele indices

22

ploidity: int # Number of alleles (e.g., 2 for diploid)

23

24

gt_bases: str # Actual DNA sequences (e.g., "A/G")

25

gt_type: int # Genotype type: 0=hom_ref, 1=het, 2=hom_alt, None=uncalled

26

phased: bool # True if genotype is phased

27

is_variant: bool # True if not reference call

28

is_het: bool # True if heterozygous

29

is_filtered: bool # True if call failed filters

30

31

def gt_phase_char(self):

32

"""

33

Get phase character for genotype.

34

35

Returns:

36

str: "/" for unphased, "|" for phased

37

"""

38

```

39

40

### Usage Examples

41

42

```python

43

import vcf

44

45

reader = vcf.Reader(filename='variants.vcf')

46

47

for record in reader:

48

print(f"Variant: {record.CHROM}:{record.POS}")

49

50

# Iterate through all sample calls

51

for call in record.samples:

52

print(f" Sample {call.sample}:")

53

54

if call.called:

55

print(f" Genotype: {call.gt_bases} ({call.gt_nums})")

56

print(f" Type: {['Hom Ref', 'Het', 'Hom Alt'][call.gt_type]}")

57

print(f" Phased: {call.phased}")

58

print(f" Variant: {call.is_variant}")

59

60

# Access FORMAT field data

61

if hasattr(call.data, 'DP'):

62

print(f" Depth: {call.data.DP}")

63

if hasattr(call.data, 'GQ'):

64

print(f" Genotype Quality: {call.data.GQ}")

65

66

else:

67

print(" Uncalled genotype")

68

69

# Get specific sample

70

if 'SAMPLE1' in reader.samples:

71

call = record.genotype('SAMPLE1')

72

if call.called and call.is_het:

73

print(f"SAMPLE1 is heterozygous: {call.gt_bases}")

74

75

# Count genotype types

76

called_samples = [call for call in record.samples if call.called]

77

het_count = sum(1 for call in called_samples if call.gt_type == 1)

78

hom_alt_count = sum(1 for call in called_samples if call.gt_type == 2)

79

80

print(f"Heterozygous calls: {het_count}")

81

print(f"Homozygous alternate calls: {hom_alt_count}")

82

```

83

84

### Advanced Genotype Analysis

85

86

```python

87

import vcf

88

89

reader = vcf.Reader(filename='variants.vcf')

90

91

for record in reader:

92

# Phase analysis

93

phased_calls = [call for call in record.samples if call.called and call.phased]

94

print(f"Phased genotypes: {len(phased_calls)}/{len(record.samples)}")

95

96

# Quality analysis (if GQ field present)

97

high_quality_calls = []

98

for call in record.samples:

99

if call.called and hasattr(call.data, 'GQ') and call.data.GQ >= 30:

100

high_quality_calls.append(call)

101

102

print(f"High quality calls (GQ>=30): {len(high_quality_calls)}")

103

104

# Depth analysis (if DP field present)

105

depths = []

106

for call in record.samples:

107

if call.called and hasattr(call.data, 'DP') and call.data.DP is not None:

108

depths.append(call.data.DP)

109

110

if depths:

111

avg_depth = sum(depths) / len(depths)

112

print(f"Average depth: {avg_depth:.1f}")

113

114

# Allele-specific analysis

115

for call in record.samples:

116

if call.called and call.gt_type == 1: # Heterozygous

117

allele_indices = call.gt_alleles

118

alleles = [record.alleles[int(i)] if i != '.' else '.' for i in allele_indices]

119

print(f"Het sample {call.sample}: alleles {alleles}")

120

```

121

122

## Common FORMAT Fields

123

124

PyVCF automatically parses FORMAT fields into the call.data namedtuple. Common fields include:

125

126

```python

127

# Common FORMAT fields accessible via call.data

128

call.data.GT # Genotype (string)

129

call.data.DP # Read depth (integer)

130

call.data.GQ # Genotype quality (integer)

131

call.data.PL # Phred-scaled genotype likelihoods (list of integers)

132

call.data.AD # Allelic depths (list of integers)

133

call.data.GL # Genotype likelihoods (list of floats)

134

```