or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

constants.mdgenotype-analysis.mdindex.mdsample-filtering.mdutils.mdvariant-records.mdvcf-filtering.mdvcf-parsing.mdvcf-writing.md

variant-records.mddocs/

0

# Variant Record Analysis

1

2

Comprehensive variant record representation with coordinate properties, variant classification, and population genetics statistics for genomic analysis.

3

4

## Capabilities

5

6

### Variant Records

7

8

The _Record class represents individual variant sites with complete VCF information and computed properties.

9

10

```python { .api }

11

class _Record:

12

"""

13

Represents a single variant site (row) in a VCF file.

14

"""

15

16

# Standard VCF fields

17

CHROM: str # Chromosome name

18

POS: int # 1-based position

19

ID: str # Variant identifier

20

REF: str # Reference allele

21

ALT: list # List of alternate alleles

22

QUAL: float # Quality score

23

FILTER: list # Filter status

24

INFO: dict # INFO field dictionary

25

FORMAT: str # Format string

26

samples: list # List of _Call objects

27

28

# Coordinate properties

29

start: int # 0-based start coordinate (POS - 1)

30

end: int # 0-based end coordinate

31

affected_start: int # Start of affected region

32

affected_end: int # End of affected region

33

alleles: list # Combined REF and ALT alleles

34

35

# Variant classification properties

36

is_snp: bool # True if variant is a SNP

37

is_indel: bool # True if variant is an indel

38

is_sv: bool # True if variant is a structural variant

39

is_transition: bool # True if SNP is a transition

40

is_deletion: bool # True if indel is a deletion

41

is_monomorphic: bool # True for reference calls

42

is_filtered: bool # True if variant failed filters

43

var_type: str # "snp", "indel", "sv", "unknown"

44

var_subtype: str # "ts", "tv", "ins", "del", etc.

45

46

# Structural variant properties

47

sv_end: int # SV end position (from INFO.END)

48

is_sv_precise: bool # True if SV coordinates are precise

49

50

# Population statistics properties

51

num_called: int # Number of called samples

52

call_rate: float # Fraction of called genotypes

53

num_hom_ref: int # Number of homozygous reference calls

54

num_hom_alt: int # Number of homozygous alternate calls

55

num_het: int # Number of heterozygous calls

56

num_unknown: int # Number of uncalled genotypes

57

aaf: list # List of alternate allele frequencies

58

nucl_diversity: float # Nucleotide diversity estimate

59

heterozygosity: float # Site heterozygosity

60

61

def genotype(self, name: str):

62

"""

63

Get genotype call for specific sample.

64

65

Parameters:

66

- name: str, sample name

67

68

Returns:

69

_Call object for the sample

70

"""

71

72

def add_format(self, fmt: str):

73

"""

74

Add field to FORMAT.

75

76

Parameters:

77

- fmt: str, format field to add

78

"""

79

80

def add_filter(self, flt: str):

81

"""

82

Add filter to FILTER field.

83

84

Parameters:

85

- flt: str, filter name to add

86

"""

87

88

def add_info(self, info: str, value=True):

89

"""

90

Add INFO field.

91

92

Parameters:

93

- info: str, INFO field name

94

- value: INFO field value (default True for flags)

95

"""

96

97

def get_hom_refs(self):

98

"""

99

Get list of homozygous reference calls.

100

101

Returns:

102

List of _Call objects with homozygous reference genotypes

103

"""

104

105

def get_hom_alts(self):

106

"""

107

Get list of homozygous alternate calls.

108

109

Returns:

110

List of _Call objects with homozygous alternate genotypes

111

"""

112

113

def get_hets(self):

114

"""

115

Get list of heterozygous calls.

116

117

Returns:

118

List of _Call objects with heterozygous genotypes

119

"""

120

121

def get_unknowns(self):

122

"""

123

Get list of uncalled genotypes.

124

125

Returns:

126

List of _Call objects with uncalled genotypes

127

"""

128

```

129

130

### Usage Examples

131

132

```python

133

import vcf

134

135

reader = vcf.Reader(filename='variants.vcf')

136

137

for record in reader:

138

# Access basic variant information

139

print(f"Variant: {record.CHROM}:{record.POS} {record.REF}>{record.ALT}")

140

141

# Check variant classification

142

if record.is_snp:

143

print(f"SNP - Transition: {record.is_transition}")

144

elif record.is_indel:

145

print(f"Indel - Deletion: {record.is_deletion}")

146

147

# Population statistics

148

print(f"Call rate: {record.call_rate:.2f}")

149

print(f"Heterozygosity: {record.heterozygosity:.3f}")

150

if record.aaf:

151

print(f"Alternate allele frequencies: {record.aaf}")

152

153

# Access sample genotypes

154

hom_refs = record.get_hom_refs()

155

hets = record.get_hets()

156

hom_alts = record.get_hom_alts()

157

158

print(f"Genotype counts - Hom ref: {len(hom_refs)}, "

159

f"Het: {len(hets)}, Hom alt: {len(hom_alts)}")

160

161

# Get specific sample genotype

162

if 'SAMPLE1' in reader.samples:

163

call = record.genotype('SAMPLE1')

164

print(f"SAMPLE1 genotype: {call.gt_bases}")

165

166

# Modify record

167

record.add_info('ANALYZED', True)

168

if record.QUAL and record.QUAL < 30:

169

record.add_filter('LowQual')

170

```

171

172

## Types

173

174

### Alternate Allele Classes

175

176

```python { .api }

177

class _AltRecord:

178

"""Abstract base class for alternate allele representations."""

179

pass

180

181

class _Substitution(_AltRecord):

182

"""Regular nucleotide substitutions (SNV/MNV)."""

183

type: str # "SNV" or "MNV"

184

sequence: str # Alternate sequence

185

186

class _Breakend(_AltRecord):

187

"""Paired breakend for structural variants."""

188

type: str # "BND"

189

chr: str # Mate chromosome

190

pos: int # Mate position

191

orientation: bool # Breakend orientation

192

remoteOrientation: bool # Mate orientation

193

connectingSequence: str # Connecting sequence

194

withinMainAssembly: bool # True if mate in main assembly

195

196

class _SingleBreakend(_Breakend):

197

"""Single breakend (inherits from _Breakend)."""

198

pass

199

200

class _SV(_AltRecord):

201

"""Symbolic structural variant alleles (e.g., <DEL>, <DUP>)."""

202

type: str # SV type string

203

```