CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pygments

A syntax highlighting package that supports over 500 programming languages and text formats with extensive output format options

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

custom-components.mddocs/

Custom Components

Base classes and utilities for creating custom lexers, formatters, styles, and filters to extend Pygments functionality.

Capabilities

Custom Lexers

Base classes for implementing language-specific lexers.

class Lexer:
    """
    Base lexer class.
    
    Attributes:
    - name: Human-readable lexer name
    - aliases: List of short identifiers
    - filenames: List of filename patterns  
    - mimetypes: List of MIME types
    - priority: Priority for lexer selection (higher = preferred)
    """
    
    def get_tokens(self, text: str): ...
    def get_tokens_unprocessed(self, text: str): ...
    def analyse_text(text: str) -> float: ...
class RegexLexer(Lexer):
    """
    Lexer based on regular expressions and states.
    
    Attributes:
    - tokens: Dictionary mapping state names to token rules
    - flags: Regex flags (re.MULTILINE | re.IGNORECASE, etc.)
    """
class ExtendedRegexLexer(RegexLexer):
    """
    Enhanced regex lexer with additional features.
    """
class DelegatingLexer(Lexer):
    """
    Lexer that delegates to other lexers based on content.
    """

Usage example:

from pygments.lexer import RegexLexer
from pygments.token import *

class MyLanguageLexer(RegexLexer):
    name = 'MyLanguage'
    aliases = ['mylang', 'ml']
    filenames = ['*.ml', '*.mylang']
    mimetypes = ['text/x-mylang']
    
    tokens = {
        'root': [
            (r'\s+', Whitespace),
            (r'#.*$', Comment.Single),
            (r'\b(if|else|while|for)\b', Keyword),
            (r'\b[A-Z][a-zA-Z0-9_]*\b', Name.Class),
            (r'\b[a-z][a-zA-Z0-9_]*\b', Name),
            (r'"[^"]*"', String.Double),
            (r'\d+', Number.Integer),
            (r'[+\-*/=<>!]', Operator),
            (r'[(){}[\],;]', Punctuation),
        ]
    }

Custom Formatters

Base class for creating output formatters.

class Formatter:
    """
    Base formatter class.
    
    Attributes:
    - name: Human-readable formatter name
    - aliases: List of short identifiers
    - filenames: List of filename patterns
    - unicodeoutput: Whether formatter outputs Unicode
    """
    
    def format(self, tokensource, outfile): ...
    def get_style_defs(self, arg='') -> str: ...

Usage example:

from pygments.formatter import Formatter
from pygments.token import *

class JsonFormatter(Formatter):
    name = 'JSON'
    aliases = ['json']
    filenames = ['*.json']
    
    def format(self, tokensource, outfile):
        import json
        tokens = []
        for ttype, value in tokensource:
            tokens.append({
                'type': str(ttype),
                'value': value
            })
        json.dump(tokens, outfile, indent=2)

Custom Styles

Base class for creating color schemes.

class Style:
    """
    Base style class.
    
    Attributes:
    - name: Style name
    - styles: Dictionary mapping token types to style definitions
    """

Usage example:

from pygments.style import Style
from pygments.token import *

class MyDarkStyle(Style):
    name = 'mydark'
    
    styles = {
        Comment:                'italic #75715e',
        Keyword:                'bold #66d9ef',
        Name:                   '#f8f8f2',
        Name.Attribute:         '#a6e22e',
        Name.Class:             'bold #a6e22e', 
        Name.Function:          '#a6e22e',
        Number:                 '#ae81ff',
        Operator:               '#f92672',
        String:                 '#e6db74',
        String.Doc:             'italic #e6db74',
        Generic.Deleted:        '#f92672',
        Generic.Inserted:       '#a6e22e',
        Generic.Heading:        'bold #f8f8f2',
        Error:                  '#f8f8f2 bg:#f92672',
    }

Custom Filters

Base class for creating token stream filters.

class Filter:
    """
    Base filter class.
    
    Methods:
    - filter(lexer, stream): Process token stream
    """
    
    def filter(self, lexer, stream): ...

Usage example:

from pygments.filter import Filter
from pygments.token import *

class UppercaseFilter(Filter):
    """Convert all text to uppercase."""
    
    def filter(self, lexer, stream):
        for ttype, value in stream:
            yield ttype, value.upper()

class RedactSecretsFilter(Filter):
    """Replace sensitive information with asterisks."""
    
    def __init__(self, **options):
        Filter.__init__(self, **options)
        self.keywords = options.get('keywords', ['password', 'secret', 'key'])
    
    def filter(self, lexer, stream):
        for ttype, value in stream:
            if ttype is String:
                for keyword in self.keywords:
                    if keyword.lower() in value.lower():
                        value = '***REDACTED***'
                        break
            yield ttype, value

Lexer Development Utilities

Token Rules

def include(state: str): ...
def inherit(): ...  
def bygroups(*args): ...
def using(cls, **kwargs): ...
def this(): ...
def default(state: str): ...
def words(words: list, prefix: str = '', suffix: str = ''): ...

Usage in lexer tokens:

tokens = {
    'root': [
        (r'\s+', Whitespace),
        include('comments'),
        (r'\b(class|def)\b', Keyword, 'classdef'),
        (words(['int', 'str', 'bool'], suffix=r'\b'), Name.Builtin.Type),
        default('expr'),
    ],
    
    'comments': [
        (r'#.*$', Comment.Single),
        (r'/\*', Comment.Multiline, 'multiline-comment'),
    ],
    
    'multiline-comment': [
        (r'[^*/]+', Comment.Multiline),
        (r'/\*', Comment.Multiline, '#push'),
        (r'\*/', Comment.Multiline, '#pop'),
        (r'[*/]', Comment.Multiline),
    ],
    
    'classdef': [
        (r'\s+', Whitespace),
        (r'[A-Z][a-zA-Z0-9_]*', Name.Class, '#pop'),
    ],
    
    'expr': [
        (r'"', String.Double, 'string'),
        (r'\d+', Number.Integer),
        (r'[a-zA-Z_][a-zA-Z0-9_]*', Name),
        (r'[+\-*/]', Operator),
    ],
    
    'string': [
        (r'[^"\\]+', String.Double),
        (r'\\.', String.Escape),
        (r'"', String.Double, '#pop'),
    ],
}

Analysis Functions

def analyse_text(text: str) -> float:
    """
    Analyze text and return confidence score (0.0-1.0).
    Used for lexer guessing. Higher scores indicate better match.
    """

Example implementation:

@staticmethod
def analyse_text(text):
    score = 0.0
    
    # Check for specific keywords
    if re.search(r'\b(function|var|const|let)\b', text):
        score += 0.3
        
    # Check for syntax patterns
    if re.search(r'function\s+\w+\s*\(', text):
        score += 0.2
        
    # Check file structure
    if re.search(r'export\s+(default\s+)?', text):
        score += 0.1
        
    return min(score, 1.0)

Helper Classes

Lexer Context Management

class LexerContext:
    """Context for lexer state management."""

Token Type Utilities

def string_to_tokentype(s: str) -> _TokenType:
    """Convert string to token type (e.g., 'Name.Function' -> Token.Name.Function)."""

def is_token_subtype(ttype: _TokenType, other: _TokenType) -> bool:
    """Check if ttype is a subtype of other."""

Registration and Discovery

Plugin Entry Points

Register custom components using setuptools entry points:

# setup.py
setup(
    name='my-pygments-extensions',
    entry_points={
        'pygments.lexers': [
            'mylang = mypackage.lexers:MyLanguageLexer',
        ],
        'pygments.formatters': [
            'json = mypackage.formatters:JsonFormatter',
        ],
        'pygments.styles': [
            'mydark = mypackage.styles:MyDarkStyle',
        ],
        'pygments.filters': [
            'redact = mypackage.filters:RedactSecretsFilter',
        ],
    }
)

Loading Custom Components

from pygments.lexers import load_lexer_from_file
from pygments.formatters import load_formatter_from_file

# Load from files
custom_lexer = load_lexer_from_file('mylexer.py', 'MyLexer')
custom_formatter = load_formatter_from_file('myformatter.py', 'MyFormatter')

Testing Custom Components

# Test lexer
lexer = MyLanguageLexer()
tokens = list(lexer.get_tokens('test code here'))
assert len(tokens) > 0

# Test formatter
formatter = JsonFormatter()
result = formatter.format(tokens, sys.stdout)

# Test style
style = MyDarkStyle()
html_formatter = HtmlFormatter(style=style)

# Test filter
filter_instance = RedactSecretsFilter(keywords=['secret', 'password'])
lexer.add_filter(filter_instance)

Install with Tessl CLI

npx tessl i tessl/pypi-pygments

docs

command-line.md

custom-components.md

filter-system.md

formatter-management.md

high-level-api.md

index.md

lexer-management.md

style-management.md

tile.json