or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

alignment-files.mdbgzf-files.mdcommand-tools.mdindex.mdsequence-files.mdtabix-files.mdutilities.mdvariant-files.md

variant-files.mddocs/

0

# VCF/BCF Variant Files

1

2

Comprehensive support for reading and writing variant call format files in VCF and BCF formats. Provides header management, sample data access, filtering, and indexing capabilities.

3

4

## Capabilities

5

6

### VariantFile

7

8

Main interface for reading and writing VCF/BCF files with full header and sample support.

9

10

```python { .api }

11

class VariantFile:

12

def __init__(self, filepath, mode="r", header=None, drop_samples=False, duplicate_filehandle=True, ignore_truncation=False, format_options=None, threads=1, index=None):

13

"""

14

Open a VCF/BCF file for reading or writing.

15

16

Parameters:

17

- filepath: str, path to VCF/BCF file

18

- mode: str, file mode ('r', 'w', 'rb', 'wb')

19

- header: VariantHeader, header for writing

20

- drop_samples: bool, ignore sample data

21

- duplicate_filehandle: bool, allow multiple handles

22

- ignore_truncation: bool, ignore truncated files

23

- threads: int, number of threads for compression

24

- index: str, path to index file

25

26

Returns:

27

VariantFile object

28

"""

29

30

def fetch(self, contig=None, start=None, stop=None, region=None, reopen=False):

31

"""

32

Fetch variant records from a region.

33

34

Parameters:

35

- contig: str, chromosome/contig name

36

- start: int, 0-based start position

37

- stop: int, 0-based stop position

38

- region: str, region string (chr:start-stop)

39

- reopen: bool, reopen file for iteration

40

41

Returns:

42

Iterator of VariantRecord objects

43

"""

44

45

def write(self, record):

46

"""

47

Write a variant record.

48

49

Parameters:

50

- record: VariantRecord, variant to write

51

"""

52

53

def new_record(self, contig=None, start=None, stop=None, alleles=None, id=None, qual=None, filter=None, info=None, samples=None, **kwargs):

54

"""

55

Create new variant record.

56

57

Parameters:

58

- contig: str, chromosome name

59

- start: int, 0-based start position

60

- stop: int, 0-based stop position

61

- alleles: tuple, reference and alternate alleles

62

- id: str, variant identifier

63

- qual: float, quality score

64

- filter: str/list, filter status

65

- info: dict, INFO field data

66

- samples: dict, sample data

67

68

Returns:

69

VariantRecord object

70

"""

71

72

def copy_record(self, record):

73

"""

74

Create copy of variant record.

75

76

Returns:

77

VariantRecord object

78

"""

79

80

def close(self):

81

"""Close the file."""

82

83

# Properties

84

@property

85

def header(self) -> "VariantHeader":

86

"""File header."""

87

88

@property

89

def index(self):

90

"""File index."""

91

92

@property

93

def filename(self) -> str:

94

"""Filename."""

95

96

@property

97

def is_open(self) -> bool:

98

"""True if file is open."""

99

100

@property

101

def category(self) -> str:

102

"""File category."""

103

104

@property

105

def format(self) -> str:

106

"""File format."""

107

108

def check_index(self):

109

"""

110

Check if index exists and is valid.

111

112

Returns:

113

bool, True if valid index

114

"""

115

```

116

117

### VariantRecord

118

119

Individual variant record with position, alleles, quality, and sample information.

120

121

```python { .api }

122

class VariantRecord:

123

# Core properties

124

@property

125

def rid(self) -> int:

126

"""Reference sequence ID."""

127

128

@property

129

def contig(self) -> str:

130

"""Chromosome/contig name."""

131

132

@contig.setter

133

def contig(self, value: str):

134

"""Set chromosome name."""

135

136

@property

137

def pos(self) -> int:

138

"""1-based position."""

139

140

@pos.setter

141

def pos(self, value: int):

142

"""Set position."""

143

144

@property

145

def start(self) -> int:

146

"""0-based start position."""

147

148

@property

149

def stop(self) -> int:

150

"""0-based stop position."""

151

152

@property

153

def id(self) -> str:

154

"""Variant identifier."""

155

156

@id.setter

157

def id(self, value: str):

158

"""Set variant identifier."""

159

160

@property

161

def ref(self) -> str:

162

"""Reference allele."""

163

164

@ref.setter

165

def ref(self, value: str):

166

"""Set reference allele."""

167

168

@property

169

def alts(self) -> tuple:

170

"""Alternate alleles."""

171

172

@alts.setter

173

def alts(self, value: tuple):

174

"""Set alternate alleles."""

175

176

@property

177

def alleles(self) -> tuple:

178

"""All alleles (reference + alternates)."""

179

180

@alleles.setter

181

def alleles(self, value: tuple):

182

"""Set all alleles."""

183

184

@property

185

def qual(self) -> float:

186

"""Quality score."""

187

188

@qual.setter

189

def qual(self, value: float):

190

"""Set quality score."""

191

192

# Complex properties

193

@property

194

def filter(self) -> "VariantRecordFilter":

195

"""Filter information."""

196

197

@property

198

def info(self) -> "VariantRecordInfo":

199

"""INFO field data."""

200

201

@property

202

def format(self) -> "VariantRecordFormat":

203

"""FORMAT field definition."""

204

205

@property

206

def samples(self) -> "VariantRecordSamples":

207

"""Sample data."""

208

209

# Methods

210

def copy(self):

211

"""

212

Create copy of record.

213

214

Returns:

215

VariantRecord object

216

"""

217

218

def translate(self, mapping):

219

"""

220

Translate chromosome names.

221

222

Parameters:

223

- mapping: dict, chromosome name mapping

224

"""

225

226

def to_string(self):

227

"""

228

Convert to VCF format string.

229

230

Returns:

231

str, VCF line

232

"""

233

```

234

235

### VariantHeader

236

237

VCF/BCF header containing metadata, sample information, and field definitions.

238

239

```python { .api }

240

class VariantHeader:

241

def __init__(self):

242

"""Create new variant header."""

243

244

# Properties

245

@property

246

def version(self) -> str:

247

"""VCF format version."""

248

249

@property

250

def samples(self) -> "VariantHeaderSamples":

251

"""Sample names and metadata."""

252

253

@property

254

def records(self) -> "VariantHeaderRecords":

255

"""Header records."""

256

257

@property

258

def contigs(self) -> "VariantHeaderContigs":

259

"""Contig information."""

260

261

@property

262

def filters(self) -> "VariantHeaderRecords":

263

"""FILTER definitions."""

264

265

@property

266

def info(self) -> "VariantHeaderRecords":

267

"""INFO field definitions."""

268

269

@property

270

def formats(self) -> "VariantHeaderRecords":

271

"""FORMAT field definitions."""

272

273

# Methods

274

def add_record(self, record):

275

"""

276

Add header record.

277

278

Parameters:

279

- record: VariantHeaderRecord, record to add

280

"""

281

282

def add_sample(self, name):

283

"""

284

Add sample.

285

286

Parameters:

287

- name: str, sample name

288

"""

289

290

def add_line(self, line):

291

"""

292

Add header line.

293

294

Parameters:

295

- line: str, header line

296

"""

297

298

def copy(self):

299

"""

300

Create copy of header.

301

302

Returns:

303

VariantHeader object

304

"""

305

306

def merge(self, other):

307

"""

308

Merge with another header.

309

310

Parameters:

311

- other: VariantHeader, header to merge

312

"""

313

314

def subset(self, samples):

315

"""

316

Create subset with specific samples.

317

318

Parameters:

319

- samples: list, sample names to include

320

321

Returns:

322

VariantHeader object

323

"""

324

325

def to_string(self):

326

"""

327

Convert to VCF header string.

328

329

Returns:

330

str, VCF header

331

"""

332

```

333

334

### VariantRecordSamples

335

336

Sample data access for variant records with genotype and field information.

337

338

```python { .api }

339

class VariantRecordSamples:

340

def __getitem__(self, sample):

341

"""

342

Get sample data.

343

344

Parameters:

345

- sample: str/int, sample name or index

346

347

Returns:

348

VariantRecordSample object

349

"""

350

351

def __contains__(self, sample):

352

"""Check if sample exists."""

353

354

def __len__(self):

355

"""Number of samples."""

356

357

def __iter__(self):

358

"""Iterate over samples."""

359

360

def keys(self):

361

"""

362

Get sample names.

363

364

Returns:

365

Iterator of sample names

366

"""

367

368

def values(self):

369

"""

370

Get sample data.

371

372

Returns:

373

Iterator of VariantRecordSample objects

374

"""

375

376

def items(self):

377

"""

378

Get sample items.

379

380

Returns:

381

Iterator of (name, VariantRecordSample) tuples

382

"""

383

```

384

385

### VariantRecordSample

386

387

Individual sample data within a variant record.

388

389

```python { .api }

390

class VariantRecordSample:

391

def __getitem__(self, field):

392

"""

393

Get field value.

394

395

Parameters:

396

- field: str, field name

397

398

Returns:

399

Field value

400

"""

401

402

def __setitem__(self, field, value):

403

"""

404

Set field value.

405

406

Parameters:

407

- field: str, field name

408

- value: field value

409

"""

410

411

def __contains__(self, field):

412

"""Check if field exists."""

413

414

def get(self, field, default=None):

415

"""

416

Get field with default.

417

418

Returns:

419

Field value or default

420

"""

421

422

def keys(self):

423

"""

424

Get field names.

425

426

Returns:

427

Iterator of field names

428

"""

429

430

def values(self):

431

"""

432

Get field values.

433

434

Returns:

435

Iterator of field values

436

"""

437

438

def items(self):

439

"""

440

Get field items.

441

442

Returns:

443

Iterator of (field, value) tuples

444

"""

445

446

@property

447

def name(self) -> str:

448

"""Sample name."""

449

450

# Genotype shortcuts

451

@property

452

def allele_indices(self) -> tuple:

453

"""Genotype allele indices."""

454

455

@property

456

def alleles(self) -> tuple:

457

"""Genotype alleles."""

458

459

@property

460

def phased(self) -> bool:

461

"""True if genotype is phased."""

462

```

463

464

### VariantRecordInfo

465

466

INFO field data access for variant records.

467

468

```python { .api }

469

class VariantRecordInfo:

470

def __getitem__(self, key):

471

"""

472

Get INFO field value.

473

474

Parameters:

475

- key: str, INFO field name

476

477

Returns:

478

Field value

479

"""

480

481

def __setitem__(self, key, value):

482

"""

483

Set INFO field value.

484

485

Parameters:

486

- key: str, INFO field name

487

- value: field value

488

"""

489

490

def __delitem__(self, key):

491

"""Delete INFO field."""

492

493

def __contains__(self, key):

494

"""Check if INFO field exists."""

495

496

def __len__(self):

497

"""Number of INFO fields."""

498

499

def __iter__(self):

500

"""Iterate over INFO field names."""

501

502

def get(self, key, default=None):

503

"""

504

Get INFO field with default.

505

506

Returns:

507

Field value or default

508

"""

509

510

def keys(self):

511

"""

512

Get INFO field names.

513

514

Returns:

515

Iterator of field names

516

"""

517

518

def values(self):

519

"""

520

Get INFO field values.

521

522

Returns:

523

Iterator of field values

524

"""

525

526

def items(self):

527

"""

528

Get INFO field items.

529

530

Returns:

531

Iterator of (field, value) tuples

532

"""

533

534

def clear(self):

535

"""Remove all INFO fields."""

536

537

def update(self, other):

538

"""

539

Update with another INFO object.

540

541

Parameters:

542

- other: dict/VariantRecordInfo, data to update with

543

"""

544

```

545

546

### VariantRecordFilter

547

548

Filter status information for variant records.

549

550

```python { .api }

551

class VariantRecordFilter:

552

def __contains__(self, name):

553

"""Check if filter is applied."""

554

555

def __iter__(self):

556

"""Iterate over applied filters."""

557

558

def __len__(self):

559

"""Number of applied filters."""

560

561

def add(self, name):

562

"""

563

Add filter.

564

565

Parameters:

566

- name: str, filter name

567

"""

568

569

def clear(self):

570

"""Remove all filters."""

571

```

572

573

## Usage Examples

574

575

### Basic File Reading

576

577

```python

578

import pysam

579

580

# Read VCF file

581

with pysam.VariantFile("input.vcf") as vcffile:

582

# Iterate over all variants

583

for record in vcffile:

584

print(f"{record.contig}:{record.pos} {record.ref}->{record.alts}")

585

586

# Fetch variants in region

587

for record in vcffile.fetch("chr1", 1000, 2000):

588

if record.qual >= 30:

589

print(f"High quality variant: {record.id}")

590

591

# Access sample data

592

with pysam.VariantFile("input.vcf") as vcffile:

593

for record in vcffile:

594

for sample_name in record.samples:

595

sample = record.samples[sample_name]

596

genotype = sample["GT"]

597

print(f"Sample {sample_name}: {genotype}")

598

```

599

600

### Writing VCF Files

601

602

```python

603

import pysam

604

605

# Create header

606

header = pysam.VariantHeader()

607

header.add_line('##fileformat=VCFv4.2')

608

header.add_line('##contig=<ID=chr1,length=249250621>')

609

header.add_sample("Sample1")

610

header.add_sample("Sample2")

611

612

# Add INFO and FORMAT definitions

613

header.add_line('##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">')

614

header.add_line('##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">')

615

header.add_line('##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">')

616

617

with pysam.VariantFile("output.vcf", "w", header=header) as vcffile:

618

# Create variant record

619

record = vcffile.new_record(

620

contig="chr1",

621

start=100,

622

alleles=("A", "T"),

623

qual=60.0,

624

info={"DP": 100}

625

)

626

627

# Set sample data

628

record.samples["Sample1"]["GT"] = (0, 1) # Het

629

record.samples["Sample1"]["DP"] = 50

630

record.samples["Sample2"]["GT"] = (1, 1) # Hom alt

631

record.samples["Sample2"]["DP"] = 45

632

633

vcffile.write(record)

634

```

635

636

### Filtering and Processing

637

638

```python

639

import pysam

640

641

with pysam.VariantFile("input.vcf") as infile:

642

# Create output with same header

643

with pysam.VariantFile("filtered.vcf", "w", header=infile.header) as outfile:

644

for record in infile:

645

# Filter by quality and depth

646

if record.qual >= 30 and record.info.get("DP", 0) >= 10:

647

# Check if any sample is homozygous alternate

648

has_hom_alt = False

649

for sample in record.samples.values():

650

if sample["GT"] == (1, 1):

651

has_hom_alt = True

652

break

653

654

if has_hom_alt:

655

outfile.write(record)

656

```