CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-gfftk

Comprehensive Python toolkit for working with genome annotation files in GFF3, GTF, and TBL formats with format conversion and analysis capabilities

Overview
Eval results
Files

format-conversion.mddocs/

Format Conversion

Comprehensive format conversion capabilities for genome annotation files, supporting conversion between GFF3, GTF, TBL, GenBank, and FASTA formats with full feature preservation, validation, and flexible filtering options.

Capabilities

GFF3 Conversion

Convert GFF3 files to other annotation formats while preserving all feature information and relationships.

def gff2tbl(gff, fasta, output=False, table=1, debug=False, grep=[], grepv=[]):
    """
    Convert GFF3 to TBL format.

    Parameters:
    - gff (str): Path to input GFF3 file
    - fasta (str): Path to genome FASTA file
    - output (str|bool): Output file path, or False for stdout
    - table (int): Genetic code table (1 or 11)
    - debug (bool): Enable debug output
    - grep (list): Filter patterns to keep matches
    - grepv (list): Filter patterns to remove matches

    Returns:
    None
    """

def gff2gtf(gff, fasta, output=False, table=1, debug=False, grep=[], grepv=[]):
    """
    Convert GFF3 to GTF format.

    Parameters:
    - gff (str): Path to input GFF3 file
    - fasta (str): Path to genome FASTA file
    - output (str|bool): Output file path, or False for stdout
    - table (int): Genetic code table (1 or 11)
    - debug (bool): Enable debug output
    - grep (list): Filter patterns to keep matches
    - grepv (list): Filter patterns to remove matches

    Returns:
    None
    """

def gff2proteins(gff, fasta, output=False, table=1, strip_stop=False, debug=False, grep=[], grepv=[]):
    """
    Convert GFF3 to protein FASTA sequences.

    Parameters:
    - gff (str): Path to input GFF3 file
    - fasta (str): Path to genome FASTA file
    - output (str|bool): Output file path, or False for stdout
    - table (int): Genetic code table (1 or 11)
    - strip_stop (bool): Remove stop codons from protein sequences
    - debug (bool): Enable debug output
    - grep (list): Filter patterns to keep matches
    - grepv (list): Filter patterns to remove matches

    Returns:
    None
    """

def gff2transcripts(gff, fasta, output=False, table=1, debug=False, grep=[], grepv=[]):
    """
    Convert GFF3 to transcript FASTA sequences.

    Parameters:
    - gff (str): Path to input GFF3 file
    - fasta (str): Path to genome FASTA file
    - output (str|bool): Output file path, or False for stdout
    - table (int): Genetic code table (1 or 11)
    - debug (bool): Enable debug output
    - grep (list): Filter patterns to keep matches
    - grepv (list): Filter patterns to remove matches

    Returns:
    None
    """

def gff2cdstranscripts(gff, fasta, output=False, table=1, debug=False, grep=[], grepv=[]):
    """
    Convert GFF3 to CDS transcript FASTA sequences.

    Parameters:
    - gff (str): Path to input GFF3 file
    - fasta (str): Path to genome FASTA file
    - output (str|bool): Output file path, or False for stdout
    - table (int): Genetic code table (1 or 11)
    - debug (bool): Enable debug output
    - grep (list): Filter patterns to keep matches
    - grepv (list): Filter patterns to remove matches

    Returns:
    None
    """

def gff2gbff(gff, fasta, output=False, table=1, organism=False, strain=False, debug=False, tmpdir="/tmp", cleanup=True, grep=[], grepv=[]):
    """
    Convert GFF3 to GenBank format.

    Parameters:
    - gff (str): Path to input GFF3 file
    - fasta (str): Path to genome FASTA file
    - output (str|bool): Output file path, or False for stdout
    - table (int): Genetic code table (1 or 11)
    - organism (str|bool): Organism name
    - strain (str|bool): Strain name
    - debug (bool): Enable debug output
    - tmpdir (str): Temporary directory path
    - cleanup (bool): Remove temporary files
    - grep (list): Filter patterns to keep matches
    - grepv (list): Filter patterns to remove matches

    Returns:
    None
    """

TBL Conversion

Convert TBL (table) format files to other annotation formats with support for various output types.

def tbl2gff3(tbl, fasta, output=False, table=1, grep=[], grepv=[]):
    """
    Convert TBL to GFF3 format.

    Parameters:
    - tbl (str): Path to input TBL file
    - fasta (str): Path to genome FASTA file
    - output (str|bool): Output file path, or False for stdout
    - table (int): Genetic code table (1 or 11)
    - grep (list): Filter patterns to keep matches
    - grepv (list): Filter patterns to remove matches

    Returns:
    None
    """

def tbl2gtf(tbl, fasta, output=False, table=1, grep=[], grepv=[]):
    """
    Convert TBL to GTF format.

    Parameters:
    - tbl (str): Path to input TBL file
    - fasta (str): Path to genome FASTA file
    - output (str|bool): Output file path, or False for stdout
    - table (int): Genetic code table (1 or 11)
    - grep (list): Filter patterns to keep matches
    - grepv (list): Filter patterns to remove matches

    Returns:
    None
    """

def tbl2proteins(tbl, fasta, output=False, table=1, strip_stop=False, grep=[], grepv=[]):
    """
    Convert TBL to protein FASTA sequences.

    Parameters:
    - tbl (str): Path to input TBL file
    - fasta (str): Path to genome FASTA file
    - output (str|bool): Output file path, or False for stdout
    - table (int): Genetic code table (1 or 11)
    - strip_stop (bool): Remove stop codons from protein sequences
    - grep (list): Filter patterns to keep matches
    - grepv (list): Filter patterns to remove matches

    Returns:
    None
    """

def tbl2transcripts(tbl, fasta, output=False, table=1, grep=[], grepv=[]):
    """
    Convert TBL to transcript FASTA sequences.

    Parameters:
    - tbl (str): Path to input TBL file
    - fasta (str): Path to genome FASTA file
    - output (str|bool): Output file path, or False for stdout
    - table (int): Genetic code table (1 or 11)
    - grep (list): Filter patterns to keep matches
    - grepv (list): Filter patterns to remove matches

    Returns:
    None
    """

def tbl2cdstranscripts(tbl, fasta, output=False, table=1, grep=[], grepv=[]):
    """
    Convert TBL to CDS transcript FASTA sequences.

    Parameters:
    - tbl (str): Path to input TBL file
    - fasta (str): Path to genome FASTA file
    - output (str|bool): Output file path, or False for stdout
    - table (int): Genetic code table (1 or 11)
    - grep (list): Filter patterns to keep matches
    - grepv (list): Filter patterns to remove matches

    Returns:
    None
    """

def tbl2gbff(tbl, fasta, output=False, table=1, organism=False, strain=False, tmpdir="/tmp", cleanup=True, grep=[], grepv=[]):
    """
    Convert TBL to GenBank format.

    Parameters:
    - tbl (str): Path to input TBL file
    - fasta (str): Path to genome FASTA file
    - output (str|bool): Output file path, or False for stdout
    - table (int): Genetic code table (1 or 11)
    - organism (str|bool): Organism name
    - strain (str|bool): Strain name
    - tmpdir (str): Temporary directory path
    - cleanup (bool): Remove temporary files
    - grep (list): Filter patterns to keep matches
    - grepv (list): Filter patterns to remove matches

    Returns:
    None
    """

GTF Conversion

Convert GTF format files to other annotation formats with full feature preservation.

def gtf2gff(gtf, fasta, output=False, table=1, debug=False, grep=[], grepv=[]):
    """
    Convert GTF to GFF3 format.

    Parameters:
    - gtf (str): Path to input GTF file
    - fasta (str): Path to genome FASTA file
    - output (str|bool): Output file path, or False for stdout
    - table (int): Genetic code table (1 or 11)
    - debug (bool): Enable debug output
    - grep (list): Filter patterns to keep matches
    - grepv (list): Filter patterns to remove matches

    Returns:
    None
    """

def gtf2tbl(gtf, fasta, output=False, table=1, debug=False, grep=[], grepv=[]):
    """
    Convert GTF to TBL format.

    Parameters:
    - gtf (str): Path to input GTF file
    - fasta (str): Path to genome FASTA file
    - output (str|bool): Output file path, or False for stdout
    - table (int): Genetic code table (1 or 11)
    - debug (bool): Enable debug output
    - grep (list): Filter patterns to keep matches
    - grepv (list): Filter patterns to remove matches

    Returns:
    None
    """

def gtf2proteins(gtf, fasta, output=False, table=1, strip_stop=False, debug=False, grep=[], grepv=[]):
    """
    Convert GTF to protein FASTA sequences.

    Parameters:
    - gtf (str): Path to input GTF file
    - fasta (str): Path to genome FASTA file
    - output (str|bool): Output file path, or False for stdout
    - table (int): Genetic code table (1 or 11)
    - strip_stop (bool): Remove stop codons from protein sequences
    - debug (bool): Enable debug output
    - grep (list): Filter patterns to keep matches
    - grepv (list): Filter patterns to remove matches

    Returns:
    None
    """

def gtf2transcripts(gtf, fasta, output=False, table=1, debug=False, grep=[], grepv=[]):
    """
    Convert GTF to transcript FASTA sequences.

    Parameters:
    - gtf (str): Path to input GTF file
    - fasta (str): Path to genome FASTA file
    - output (str|bool): Output file path, or False for stdout
    - table (int): Genetic code table (1 or 11)
    - debug (bool): Enable debug output
    - grep (list): Filter patterns to keep matches
    - grepv (list): Filter patterns to remove matches

    Returns:
    None
    """

def gtf2cdstranscripts(gtf, fasta, output=False, table=1, debug=False, grep=[], grepv=[]):
    """
    Convert GTF to CDS transcript FASTA sequences.

    Parameters:
    - gtf (str): Path to input GTF file
    - fasta (str): Path to genome FASTA file
    - output (str|bool): Output file path, or False for stdout
    - table (int): Genetic code table (1 or 11)
    - debug (bool): Enable debug output
    - grep (list): Filter patterns to keep matches
    - grepv (list): Filter patterns to remove matches

    Returns:
    None
    """

def gtf2gbff(gtf, fasta, output=False, table=1, organism=False, strain=False, debug=False, tmpdir="/tmp", cleanup=True, grep=[], grepv=[]):
    """
    Convert GTF to GenBank format.

    Parameters:
    - gtf (str): Path to input GTF file
    - fasta (str): Path to genome FASTA file
    - output (str|bool): Output file path, or False for stdout
    - table (int): Genetic code table (1 or 11)
    - organism (str|bool): Organism name
    - strain (str|bool): Strain name
    - debug (bool): Enable debug output
    - tmpdir (str): Temporary directory path
    - cleanup (bool): Remove temporary files
    - grep (list): Filter patterns to keep matches
    - grepv (list): Filter patterns to remove matches

    Returns:
    None
    """

Usage Examples

Basic Format Conversion

from gfftk.convert import gff2proteins, tbl2gff3

# Convert GFF3 to protein sequences
gff2proteins("annotation.gff3", "genome.fasta", output="proteins.faa")

# Convert TBL to GFF3
tbl2gff3("annotation.tbl", "genome.fasta", output="annotation.gff3")

Conversion with Filtering

from gfftk.convert import gff2proteins

# Extract only kinase proteins with case-insensitive matching
gff2proteins(
    "annotation.gff3",
    "genome.fasta",
    output="kinases.faa",
    grep=["product:kinase:i"]
)

# Remove augustus predictions and extract proteins
gff2proteins(
    "annotation.gff3",
    "genome.fasta",
    output="filtered_proteins.faa",
    grepv=["source:augustus"]
)

GenBank Conversion

from gfftk.convert import gff2gbff

# Convert to GenBank format with organism information
gff2gbff(
    "annotation.gff3",
    "genome.fasta",
    output="genome.gbff",
    organism="Saccharomyces cerevisiae",
    strain="S288C",
    table=1
)

Types

# Filter pattern format for grep/grepv parameters
FilterPattern = str  # Format: "key:pattern" or "key:pattern:i" for case-insensitive

# Common filter keys
FilterKeys = [
    "product",      # Product description
    "source",       # Annotation source
    "name",         # Gene/feature name
    "note",         # Free text notes
    "contig",       # Chromosome/contig name
    "strand",       # Strand orientation
    "type",         # Feature type
    "db_xref",      # Database cross-references
    "go_terms",     # Gene Ontology terms
]

# Genetic code table options
GeneticCodeTable = int  # 1 (standard) or 11 (bacterial/archaeal/plant plastid)

Install with Tessl CLI

npx tessl i tessl/pypi-gfftk

docs

cli-commands.md

comparison.md

consensus.md

format-conversion.md

genbank-tbl.md

gff-processing.md

index.md

sequence-operations.md

utilities.md

tile.json