or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

cli-commands.mdcomparison.mdconsensus.mdformat-conversion.mdgenbank-tbl.mdgff-processing.mdindex.mdsequence-operations.mdutilities.md

format-conversion.mddocs/

0

# Format Conversion

1

2

Comprehensive format conversion capabilities for genome annotation files, supporting conversion between GFF3, GTF, TBL, GenBank, and FASTA formats with full feature preservation, validation, and flexible filtering options.

3

4

## Capabilities

5

6

### GFF3 Conversion

7

8

Convert GFF3 files to other annotation formats while preserving all feature information and relationships.

9

10

```python { .api }

11

def gff2tbl(gff, fasta, output=False, table=1, debug=False, grep=[], grepv=[]):

12

"""

13

Convert GFF3 to TBL format.

14

15

Parameters:

16

- gff (str): Path to input GFF3 file

17

- fasta (str): Path to genome FASTA file

18

- output (str|bool): Output file path, or False for stdout

19

- table (int): Genetic code table (1 or 11)

20

- debug (bool): Enable debug output

21

- grep (list): Filter patterns to keep matches

22

- grepv (list): Filter patterns to remove matches

23

24

Returns:

25

None

26

"""

27

28

def gff2gtf(gff, fasta, output=False, table=1, debug=False, grep=[], grepv=[]):

29

"""

30

Convert GFF3 to GTF format.

31

32

Parameters:

33

- gff (str): Path to input GFF3 file

34

- fasta (str): Path to genome FASTA file

35

- output (str|bool): Output file path, or False for stdout

36

- table (int): Genetic code table (1 or 11)

37

- debug (bool): Enable debug output

38

- grep (list): Filter patterns to keep matches

39

- grepv (list): Filter patterns to remove matches

40

41

Returns:

42

None

43

"""

44

45

def gff2proteins(gff, fasta, output=False, table=1, strip_stop=False, debug=False, grep=[], grepv=[]):

46

"""

47

Convert GFF3 to protein FASTA sequences.

48

49

Parameters:

50

- gff (str): Path to input GFF3 file

51

- fasta (str): Path to genome FASTA file

52

- output (str|bool): Output file path, or False for stdout

53

- table (int): Genetic code table (1 or 11)

54

- strip_stop (bool): Remove stop codons from protein sequences

55

- debug (bool): Enable debug output

56

- grep (list): Filter patterns to keep matches

57

- grepv (list): Filter patterns to remove matches

58

59

Returns:

60

None

61

"""

62

63

def gff2transcripts(gff, fasta, output=False, table=1, debug=False, grep=[], grepv=[]):

64

"""

65

Convert GFF3 to transcript FASTA sequences.

66

67

Parameters:

68

- gff (str): Path to input GFF3 file

69

- fasta (str): Path to genome FASTA file

70

- output (str|bool): Output file path, or False for stdout

71

- table (int): Genetic code table (1 or 11)

72

- debug (bool): Enable debug output

73

- grep (list): Filter patterns to keep matches

74

- grepv (list): Filter patterns to remove matches

75

76

Returns:

77

None

78

"""

79

80

def gff2cdstranscripts(gff, fasta, output=False, table=1, debug=False, grep=[], grepv=[]):

81

"""

82

Convert GFF3 to CDS transcript FASTA sequences.

83

84

Parameters:

85

- gff (str): Path to input GFF3 file

86

- fasta (str): Path to genome FASTA file

87

- output (str|bool): Output file path, or False for stdout

88

- table (int): Genetic code table (1 or 11)

89

- debug (bool): Enable debug output

90

- grep (list): Filter patterns to keep matches

91

- grepv (list): Filter patterns to remove matches

92

93

Returns:

94

None

95

"""

96

97

def gff2gbff(gff, fasta, output=False, table=1, organism=False, strain=False, debug=False, tmpdir="/tmp", cleanup=True, grep=[], grepv=[]):

98

"""

99

Convert GFF3 to GenBank format.

100

101

Parameters:

102

- gff (str): Path to input GFF3 file

103

- fasta (str): Path to genome FASTA file

104

- output (str|bool): Output file path, or False for stdout

105

- table (int): Genetic code table (1 or 11)

106

- organism (str|bool): Organism name

107

- strain (str|bool): Strain name

108

- debug (bool): Enable debug output

109

- tmpdir (str): Temporary directory path

110

- cleanup (bool): Remove temporary files

111

- grep (list): Filter patterns to keep matches

112

- grepv (list): Filter patterns to remove matches

113

114

Returns:

115

None

116

"""

117

```

118

119

### TBL Conversion

120

121

Convert TBL (table) format files to other annotation formats with support for various output types.

122

123

```python { .api }

124

def tbl2gff3(tbl, fasta, output=False, table=1, grep=[], grepv=[]):

125

"""

126

Convert TBL to GFF3 format.

127

128

Parameters:

129

- tbl (str): Path to input TBL file

130

- fasta (str): Path to genome FASTA file

131

- output (str|bool): Output file path, or False for stdout

132

- table (int): Genetic code table (1 or 11)

133

- grep (list): Filter patterns to keep matches

134

- grepv (list): Filter patterns to remove matches

135

136

Returns:

137

None

138

"""

139

140

def tbl2gtf(tbl, fasta, output=False, table=1, grep=[], grepv=[]):

141

"""

142

Convert TBL to GTF format.

143

144

Parameters:

145

- tbl (str): Path to input TBL file

146

- fasta (str): Path to genome FASTA file

147

- output (str|bool): Output file path, or False for stdout

148

- table (int): Genetic code table (1 or 11)

149

- grep (list): Filter patterns to keep matches

150

- grepv (list): Filter patterns to remove matches

151

152

Returns:

153

None

154

"""

155

156

def tbl2proteins(tbl, fasta, output=False, table=1, strip_stop=False, grep=[], grepv=[]):

157

"""

158

Convert TBL to protein FASTA sequences.

159

160

Parameters:

161

- tbl (str): Path to input TBL file

162

- fasta (str): Path to genome FASTA file

163

- output (str|bool): Output file path, or False for stdout

164

- table (int): Genetic code table (1 or 11)

165

- strip_stop (bool): Remove stop codons from protein sequences

166

- grep (list): Filter patterns to keep matches

167

- grepv (list): Filter patterns to remove matches

168

169

Returns:

170

None

171

"""

172

173

def tbl2transcripts(tbl, fasta, output=False, table=1, grep=[], grepv=[]):

174

"""

175

Convert TBL to transcript FASTA sequences.

176

177

Parameters:

178

- tbl (str): Path to input TBL file

179

- fasta (str): Path to genome FASTA file

180

- output (str|bool): Output file path, or False for stdout

181

- table (int): Genetic code table (1 or 11)

182

- grep (list): Filter patterns to keep matches

183

- grepv (list): Filter patterns to remove matches

184

185

Returns:

186

None

187

"""

188

189

def tbl2cdstranscripts(tbl, fasta, output=False, table=1, grep=[], grepv=[]):

190

"""

191

Convert TBL to CDS transcript FASTA sequences.

192

193

Parameters:

194

- tbl (str): Path to input TBL file

195

- fasta (str): Path to genome FASTA file

196

- output (str|bool): Output file path, or False for stdout

197

- table (int): Genetic code table (1 or 11)

198

- grep (list): Filter patterns to keep matches

199

- grepv (list): Filter patterns to remove matches

200

201

Returns:

202

None

203

"""

204

205

def tbl2gbff(tbl, fasta, output=False, table=1, organism=False, strain=False, tmpdir="/tmp", cleanup=True, grep=[], grepv=[]):

206

"""

207

Convert TBL to GenBank format.

208

209

Parameters:

210

- tbl (str): Path to input TBL file

211

- fasta (str): Path to genome FASTA file

212

- output (str|bool): Output file path, or False for stdout

213

- table (int): Genetic code table (1 or 11)

214

- organism (str|bool): Organism name

215

- strain (str|bool): Strain name

216

- tmpdir (str): Temporary directory path

217

- cleanup (bool): Remove temporary files

218

- grep (list): Filter patterns to keep matches

219

- grepv (list): Filter patterns to remove matches

220

221

Returns:

222

None

223

"""

224

```

225

226

### GTF Conversion

227

228

Convert GTF format files to other annotation formats with full feature preservation.

229

230

```python { .api }

231

def gtf2gff(gtf, fasta, output=False, table=1, debug=False, grep=[], grepv=[]):

232

"""

233

Convert GTF to GFF3 format.

234

235

Parameters:

236

- gtf (str): Path to input GTF file

237

- fasta (str): Path to genome FASTA file

238

- output (str|bool): Output file path, or False for stdout

239

- table (int): Genetic code table (1 or 11)

240

- debug (bool): Enable debug output

241

- grep (list): Filter patterns to keep matches

242

- grepv (list): Filter patterns to remove matches

243

244

Returns:

245

None

246

"""

247

248

def gtf2tbl(gtf, fasta, output=False, table=1, debug=False, grep=[], grepv=[]):

249

"""

250

Convert GTF to TBL format.

251

252

Parameters:

253

- gtf (str): Path to input GTF file

254

- fasta (str): Path to genome FASTA file

255

- output (str|bool): Output file path, or False for stdout

256

- table (int): Genetic code table (1 or 11)

257

- debug (bool): Enable debug output

258

- grep (list): Filter patterns to keep matches

259

- grepv (list): Filter patterns to remove matches

260

261

Returns:

262

None

263

"""

264

265

def gtf2proteins(gtf, fasta, output=False, table=1, strip_stop=False, debug=False, grep=[], grepv=[]):

266

"""

267

Convert GTF to protein FASTA sequences.

268

269

Parameters:

270

- gtf (str): Path to input GTF file

271

- fasta (str): Path to genome FASTA file

272

- output (str|bool): Output file path, or False for stdout

273

- table (int): Genetic code table (1 or 11)

274

- strip_stop (bool): Remove stop codons from protein sequences

275

- debug (bool): Enable debug output

276

- grep (list): Filter patterns to keep matches

277

- grepv (list): Filter patterns to remove matches

278

279

Returns:

280

None

281

"""

282

283

def gtf2transcripts(gtf, fasta, output=False, table=1, debug=False, grep=[], grepv=[]):

284

"""

285

Convert GTF to transcript FASTA sequences.

286

287

Parameters:

288

- gtf (str): Path to input GTF file

289

- fasta (str): Path to genome FASTA file

290

- output (str|bool): Output file path, or False for stdout

291

- table (int): Genetic code table (1 or 11)

292

- debug (bool): Enable debug output

293

- grep (list): Filter patterns to keep matches

294

- grepv (list): Filter patterns to remove matches

295

296

Returns:

297

None

298

"""

299

300

def gtf2cdstranscripts(gtf, fasta, output=False, table=1, debug=False, grep=[], grepv=[]):

301

"""

302

Convert GTF to CDS transcript FASTA sequences.

303

304

Parameters:

305

- gtf (str): Path to input GTF file

306

- fasta (str): Path to genome FASTA file

307

- output (str|bool): Output file path, or False for stdout

308

- table (int): Genetic code table (1 or 11)

309

- debug (bool): Enable debug output

310

- grep (list): Filter patterns to keep matches

311

- grepv (list): Filter patterns to remove matches

312

313

Returns:

314

None

315

"""

316

317

def gtf2gbff(gtf, fasta, output=False, table=1, organism=False, strain=False, debug=False, tmpdir="/tmp", cleanup=True, grep=[], grepv=[]):

318

"""

319

Convert GTF to GenBank format.

320

321

Parameters:

322

- gtf (str): Path to input GTF file

323

- fasta (str): Path to genome FASTA file

324

- output (str|bool): Output file path, or False for stdout

325

- table (int): Genetic code table (1 or 11)

326

- organism (str|bool): Organism name

327

- strain (str|bool): Strain name

328

- debug (bool): Enable debug output

329

- tmpdir (str): Temporary directory path

330

- cleanup (bool): Remove temporary files

331

- grep (list): Filter patterns to keep matches

332

- grepv (list): Filter patterns to remove matches

333

334

Returns:

335

None

336

"""

337

```

338

339

## Usage Examples

340

341

### Basic Format Conversion

342

343

```python

344

from gfftk.convert import gff2proteins, tbl2gff3

345

346

# Convert GFF3 to protein sequences

347

gff2proteins("annotation.gff3", "genome.fasta", output="proteins.faa")

348

349

# Convert TBL to GFF3

350

tbl2gff3("annotation.tbl", "genome.fasta", output="annotation.gff3")

351

```

352

353

### Conversion with Filtering

354

355

```python

356

from gfftk.convert import gff2proteins

357

358

# Extract only kinase proteins with case-insensitive matching

359

gff2proteins(

360

"annotation.gff3",

361

"genome.fasta",

362

output="kinases.faa",

363

grep=["product:kinase:i"]

364

)

365

366

# Remove augustus predictions and extract proteins

367

gff2proteins(

368

"annotation.gff3",

369

"genome.fasta",

370

output="filtered_proteins.faa",

371

grepv=["source:augustus"]

372

)

373

```

374

375

### GenBank Conversion

376

377

```python

378

from gfftk.convert import gff2gbff

379

380

# Convert to GenBank format with organism information

381

gff2gbff(

382

"annotation.gff3",

383

"genome.fasta",

384

output="genome.gbff",

385

organism="Saccharomyces cerevisiae",

386

strain="S288C",

387

table=1

388

)

389

```

390

391

## Types

392

393

```python { .api }

394

# Filter pattern format for grep/grepv parameters

395

FilterPattern = str # Format: "key:pattern" or "key:pattern:i" for case-insensitive

396

397

# Common filter keys

398

FilterKeys = [

399

"product", # Product description

400

"source", # Annotation source

401

"name", # Gene/feature name

402

"note", # Free text notes

403

"contig", # Chromosome/contig name

404

"strand", # Strand orientation

405

"type", # Feature type

406

"db_xref", # Database cross-references

407

"go_terms", # Gene Ontology terms

408

]

409

410

# Genetic code table options

411

GeneticCodeTable = int # 1 (standard) or 11 (bacterial/archaeal/plant plastid)

412

```