A syntax highlighting package that supports over 500 programming languages and text formats with extensive output format options
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Base classes and utilities for creating custom lexers, formatters, styles, and filters to extend Pygments functionality.
Base classes for implementing language-specific lexers.
class Lexer:
"""
Base lexer class.
Attributes:
- name: Human-readable lexer name
- aliases: List of short identifiers
- filenames: List of filename patterns
- mimetypes: List of MIME types
- priority: Priority for lexer selection (higher = preferred)
"""
def get_tokens(self, text: str): ...
def get_tokens_unprocessed(self, text: str): ...
def analyse_text(text: str) -> float: ...class RegexLexer(Lexer):
"""
Lexer based on regular expressions and states.
Attributes:
- tokens: Dictionary mapping state names to token rules
- flags: Regex flags (re.MULTILINE | re.IGNORECASE, etc.)
"""class ExtendedRegexLexer(RegexLexer):
"""
Enhanced regex lexer with additional features.
"""class DelegatingLexer(Lexer):
"""
Lexer that delegates to other lexers based on content.
"""Usage example:
from pygments.lexer import RegexLexer
from pygments.token import *
class MyLanguageLexer(RegexLexer):
name = 'MyLanguage'
aliases = ['mylang', 'ml']
filenames = ['*.ml', '*.mylang']
mimetypes = ['text/x-mylang']
tokens = {
'root': [
(r'\s+', Whitespace),
(r'#.*$', Comment.Single),
(r'\b(if|else|while|for)\b', Keyword),
(r'\b[A-Z][a-zA-Z0-9_]*\b', Name.Class),
(r'\b[a-z][a-zA-Z0-9_]*\b', Name),
(r'"[^"]*"', String.Double),
(r'\d+', Number.Integer),
(r'[+\-*/=<>!]', Operator),
(r'[(){}[\],;]', Punctuation),
]
}Base class for creating output formatters.
class Formatter:
"""
Base formatter class.
Attributes:
- name: Human-readable formatter name
- aliases: List of short identifiers
- filenames: List of filename patterns
- unicodeoutput: Whether formatter outputs Unicode
"""
def format(self, tokensource, outfile): ...
def get_style_defs(self, arg='') -> str: ...Usage example:
from pygments.formatter import Formatter
from pygments.token import *
class JsonFormatter(Formatter):
name = 'JSON'
aliases = ['json']
filenames = ['*.json']
def format(self, tokensource, outfile):
import json
tokens = []
for ttype, value in tokensource:
tokens.append({
'type': str(ttype),
'value': value
})
json.dump(tokens, outfile, indent=2)Base class for creating color schemes.
class Style:
"""
Base style class.
Attributes:
- name: Style name
- styles: Dictionary mapping token types to style definitions
"""Usage example:
from pygments.style import Style
from pygments.token import *
class MyDarkStyle(Style):
name = 'mydark'
styles = {
Comment: 'italic #75715e',
Keyword: 'bold #66d9ef',
Name: '#f8f8f2',
Name.Attribute: '#a6e22e',
Name.Class: 'bold #a6e22e',
Name.Function: '#a6e22e',
Number: '#ae81ff',
Operator: '#f92672',
String: '#e6db74',
String.Doc: 'italic #e6db74',
Generic.Deleted: '#f92672',
Generic.Inserted: '#a6e22e',
Generic.Heading: 'bold #f8f8f2',
Error: '#f8f8f2 bg:#f92672',
}Base class for creating token stream filters.
class Filter:
"""
Base filter class.
Methods:
- filter(lexer, stream): Process token stream
"""
def filter(self, lexer, stream): ...Usage example:
from pygments.filter import Filter
from pygments.token import *
class UppercaseFilter(Filter):
"""Convert all text to uppercase."""
def filter(self, lexer, stream):
for ttype, value in stream:
yield ttype, value.upper()
class RedactSecretsFilter(Filter):
"""Replace sensitive information with asterisks."""
def __init__(self, **options):
Filter.__init__(self, **options)
self.keywords = options.get('keywords', ['password', 'secret', 'key'])
def filter(self, lexer, stream):
for ttype, value in stream:
if ttype is String:
for keyword in self.keywords:
if keyword.lower() in value.lower():
value = '***REDACTED***'
break
yield ttype, valuedef include(state: str): ...
def inherit(): ...
def bygroups(*args): ...
def using(cls, **kwargs): ...
def this(): ...
def default(state: str): ...
def words(words: list, prefix: str = '', suffix: str = ''): ...Usage in lexer tokens:
tokens = {
'root': [
(r'\s+', Whitespace),
include('comments'),
(r'\b(class|def)\b', Keyword, 'classdef'),
(words(['int', 'str', 'bool'], suffix=r'\b'), Name.Builtin.Type),
default('expr'),
],
'comments': [
(r'#.*$', Comment.Single),
(r'/\*', Comment.Multiline, 'multiline-comment'),
],
'multiline-comment': [
(r'[^*/]+', Comment.Multiline),
(r'/\*', Comment.Multiline, '#push'),
(r'\*/', Comment.Multiline, '#pop'),
(r'[*/]', Comment.Multiline),
],
'classdef': [
(r'\s+', Whitespace),
(r'[A-Z][a-zA-Z0-9_]*', Name.Class, '#pop'),
],
'expr': [
(r'"', String.Double, 'string'),
(r'\d+', Number.Integer),
(r'[a-zA-Z_][a-zA-Z0-9_]*', Name),
(r'[+\-*/]', Operator),
],
'string': [
(r'[^"\\]+', String.Double),
(r'\\.', String.Escape),
(r'"', String.Double, '#pop'),
],
}def analyse_text(text: str) -> float:
"""
Analyze text and return confidence score (0.0-1.0).
Used for lexer guessing. Higher scores indicate better match.
"""Example implementation:
@staticmethod
def analyse_text(text):
score = 0.0
# Check for specific keywords
if re.search(r'\b(function|var|const|let)\b', text):
score += 0.3
# Check for syntax patterns
if re.search(r'function\s+\w+\s*\(', text):
score += 0.2
# Check file structure
if re.search(r'export\s+(default\s+)?', text):
score += 0.1
return min(score, 1.0)class LexerContext:
"""Context for lexer state management."""def string_to_tokentype(s: str) -> _TokenType:
"""Convert string to token type (e.g., 'Name.Function' -> Token.Name.Function)."""
def is_token_subtype(ttype: _TokenType, other: _TokenType) -> bool:
"""Check if ttype is a subtype of other."""Register custom components using setuptools entry points:
# setup.py
setup(
name='my-pygments-extensions',
entry_points={
'pygments.lexers': [
'mylang = mypackage.lexers:MyLanguageLexer',
],
'pygments.formatters': [
'json = mypackage.formatters:JsonFormatter',
],
'pygments.styles': [
'mydark = mypackage.styles:MyDarkStyle',
],
'pygments.filters': [
'redact = mypackage.filters:RedactSecretsFilter',
],
}
)from pygments.lexers import load_lexer_from_file
from pygments.formatters import load_formatter_from_file
# Load from files
custom_lexer = load_lexer_from_file('mylexer.py', 'MyLexer')
custom_formatter = load_formatter_from_file('myformatter.py', 'MyFormatter')# Test lexer
lexer = MyLanguageLexer()
tokens = list(lexer.get_tokens('test code here'))
assert len(tokens) > 0
# Test formatter
formatter = JsonFormatter()
result = formatter.format(tokens, sys.stdout)
# Test style
style = MyDarkStyle()
html_formatter = HtmlFormatter(style=style)
# Test filter
filter_instance = RedactSecretsFilter(keywords=['secret', 'password'])
lexer.add_filter(filter_instance)Install with Tessl CLI
npx tessl i tessl/pypi-pygments