Tessl Tile for pypi/pygments@2.19.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

command-line.md custom-components.md filter-system.md formatter-management.md high-level-api.md index.md lexer-management.md style-management.md

custom-components.mddocs/

0
# Custom Components
1

2
Base classes and utilities for creating custom lexers, formatters, styles, and filters to extend Pygments functionality.
3

4
## Capabilities
5

6
### Custom Lexers
7

8
Base classes for implementing language-specific lexers.
9

10
```python { .api }
11
class Lexer:
12
    """
13
    Base lexer class.
14
    
15
    Attributes:
16
    - name: Human-readable lexer name
17
    - aliases: List of short identifiers
18
    - filenames: List of filename patterns  
19
    - mimetypes: List of MIME types
20
    - priority: Priority for lexer selection (higher = preferred)
21
    """
22
    
23
    def get_tokens(self, text: str): ...
24
    def get_tokens_unprocessed(self, text: str): ...
25
    def analyse_text(text: str) -> float: ...
26
```
27

28
```python { .api }
29
class RegexLexer(Lexer):
30
    """
31
    Lexer based on regular expressions and states.
32
    
33
    Attributes:
34
    - tokens: Dictionary mapping state names to token rules
35
    - flags: Regex flags (re.MULTILINE | re.IGNORECASE, etc.)
36
    """
37
```
38

39
```python { .api }
40
class ExtendedRegexLexer(RegexLexer):
41
    """
42
    Enhanced regex lexer with additional features.
43
    """
44
```
45

46
```python { .api }
47
class DelegatingLexer(Lexer):
48
    """
49
    Lexer that delegates to other lexers based on content.
50
    """
51
```
52

53
Usage example:
54

55
```python
56
from pygments.lexer import RegexLexer
57
from pygments.token import *
58

59
class MyLanguageLexer(RegexLexer):
60
    name = 'MyLanguage'
61
    aliases = ['mylang', 'ml']
62
    filenames = ['*.ml', '*.mylang']
63
    mimetypes = ['text/x-mylang']
64
    
65
    tokens = {
66
        'root': [
67
            (r'\s+', Whitespace),
68
            (r'#.*$', Comment.Single),
69
            (r'\b(if|else|while|for)\b', Keyword),
70
            (r'\b[A-Z][a-zA-Z0-9_]*\b', Name.Class),
71
            (r'\b[a-z][a-zA-Z0-9_]*\b', Name),
72
            (r'"[^"]*"', String.Double),
73
            (r'\d+', Number.Integer),
74
            (r'[+\-*/=<>!]', Operator),
75
            (r'[(){}[\],;]', Punctuation),
76
        ]
77
    }
78
```
79

80
### Custom Formatters
81

82
Base class for creating output formatters.
83

84
```python { .api }
85
class Formatter:
86
    """
87
    Base formatter class.
88
    
89
    Attributes:
90
    - name: Human-readable formatter name
91
    - aliases: List of short identifiers
92
    - filenames: List of filename patterns
93
    - unicodeoutput: Whether formatter outputs Unicode
94
    """
95
    
96
    def format(self, tokensource, outfile): ...
97
    def get_style_defs(self, arg='') -> str: ...
98
```
99

100
Usage example:
101

102
```python
103
from pygments.formatter import Formatter
104
from pygments.token import *
105

106
class JsonFormatter(Formatter):
107
    name = 'JSON'
108
    aliases = ['json']
109
    filenames = ['*.json']
110
    
111
    def format(self, tokensource, outfile):
112
        import json
113
        tokens = []
114
        for ttype, value in tokensource:
115
            tokens.append({
116
                'type': str(ttype),
117
                'value': value
118
            })
119
        json.dump(tokens, outfile, indent=2)
120
```
121

122
### Custom Styles
123

124
Base class for creating color schemes.
125

126
```python { .api }
127
class Style:
128
    """
129
    Base style class.
130
    
131
    Attributes:
132
    - name: Style name
133
    - styles: Dictionary mapping token types to style definitions
134
    """
135
```
136

137
Usage example:
138

139
```python
140
from pygments.style import Style
141
from pygments.token import *
142

143
class MyDarkStyle(Style):
144
    name = 'mydark'
145
    
146
    styles = {
147
        Comment:                'italic #75715e',
148
        Keyword:                'bold #66d9ef',
149
        Name:                   '#f8f8f2',
150
        Name.Attribute:         '#a6e22e',
151
        Name.Class:             'bold #a6e22e', 
152
        Name.Function:          '#a6e22e',
153
        Number:                 '#ae81ff',
154
        Operator:               '#f92672',
155
        String:                 '#e6db74',
156
        String.Doc:             'italic #e6db74',
157
        Generic.Deleted:        '#f92672',
158
        Generic.Inserted:       '#a6e22e',
159
        Generic.Heading:        'bold #f8f8f2',
160
        Error:                  '#f8f8f2 bg:#f92672',
161
    }
162
```
163

164
### Custom Filters
165

166
Base class for creating token stream filters.
167

168
```python { .api }
169
class Filter:
170
    """
171
    Base filter class.
172
    
173
    Methods:
174
    - filter(lexer, stream): Process token stream
175
    """
176
    
177
    def filter(self, lexer, stream): ...
178
```
179

180
Usage example:
181

182
```python
183
from pygments.filter import Filter
184
from pygments.token import *
185

186
class UppercaseFilter(Filter):
187
    """Convert all text to uppercase."""
188
    
189
    def filter(self, lexer, stream):
190
        for ttype, value in stream:
191
            yield ttype, value.upper()
192

193
class RedactSecretsFilter(Filter):
194
    """Replace sensitive information with asterisks."""
195
    
196
    def __init__(self, **options):
197
        Filter.__init__(self, **options)
198
        self.keywords = options.get('keywords', ['password', 'secret', 'key'])
199
    
200
    def filter(self, lexer, stream):
201
        for ttype, value in stream:
202
            if ttype is String:
203
                for keyword in self.keywords:
204
                    if keyword.lower() in value.lower():
205
                        value = '***REDACTED***'
206
                        break
207
            yield ttype, value
208
```
209

210
## Lexer Development Utilities
211

212
### Token Rules
213

214
```python { .api }
215
def include(state: str): ...
216
def inherit(): ...  
217
def bygroups(*args): ...
218
def using(cls, **kwargs): ...
219
def this(): ...
220
def default(state: str): ...
221
def words(words: list, prefix: str = '', suffix: str = ''): ...
222
```
223

224
Usage in lexer tokens:
225

226
```python
227
tokens = {
228
    'root': [
229
        (r'\s+', Whitespace),
230
        include('comments'),
231
        (r'\b(class|def)\b', Keyword, 'classdef'),
232
        (words(['int', 'str', 'bool'], suffix=r'\b'), Name.Builtin.Type),
233
        default('expr'),
234
    ],
235
    
236
    'comments': [
237
        (r'#.*$', Comment.Single),
238
        (r'/\*', Comment.Multiline, 'multiline-comment'),
239
    ],
240
    
241
    'multiline-comment': [
242
        (r'[^*/]+', Comment.Multiline),
243
        (r'/\*', Comment.Multiline, '#push'),
244
        (r'\*/', Comment.Multiline, '#pop'),
245
        (r'[*/]', Comment.Multiline),
246
    ],
247
    
248
    'classdef': [
249
        (r'\s+', Whitespace),
250
        (r'[A-Z][a-zA-Z0-9_]*', Name.Class, '#pop'),
251
    ],
252
    
253
    'expr': [
254
        (r'"', String.Double, 'string'),
255
        (r'\d+', Number.Integer),
256
        (r'[a-zA-Z_][a-zA-Z0-9_]*', Name),
257
        (r'[+\-*/]', Operator),
258
    ],
259
    
260
    'string': [
261
        (r'[^"\\]+', String.Double),
262
        (r'\\.', String.Escape),
263
        (r'"', String.Double, '#pop'),
264
    ],
265
}
266
```
267

268
### Analysis Functions
269

270
```python { .api }
271
def analyse_text(text: str) -> float:
272
    """
273
    Analyze text and return confidence score (0.0-1.0).
274
    Used for lexer guessing. Higher scores indicate better match.
275
    """
276
```
277

278
Example implementation:
279

280
```python
281
@staticmethod
282
def analyse_text(text):
283
    score = 0.0
284
    
285
    # Check for specific keywords
286
    if re.search(r'\b(function|var|const|let)\b', text):
287
        score += 0.3
288
        
289
    # Check for syntax patterns
290
    if re.search(r'function\s+\w+\s*\(', text):
291
        score += 0.2
292
        
293
    # Check file structure
294
    if re.search(r'export\s+(default\s+)?', text):
295
        score += 0.1
296
        
297
    return min(score, 1.0)
298
```
299

300
## Helper Classes
301

302
### Lexer Context Management
303

304
```python { .api }
305
class LexerContext:
306
    """Context for lexer state management."""
307
```
308

309
### Token Type Utilities
310

311
```python { .api }
312
def string_to_tokentype(s: str) -> _TokenType:
313
    """Convert string to token type (e.g., 'Name.Function' -> Token.Name.Function)."""
314

315
def is_token_subtype(ttype: _TokenType, other: _TokenType) -> bool:
316
    """Check if ttype is a subtype of other."""
317
```
318

319
## Registration and Discovery
320

321
### Plugin Entry Points
322

323
Register custom components using setuptools entry points:
324

325
```python
326
# setup.py
327
setup(
328
    name='my-pygments-extensions',
329
    entry_points={
330
        'pygments.lexers': [
331
            'mylang = mypackage.lexers:MyLanguageLexer',
332
        ],
333
        'pygments.formatters': [
334
            'json = mypackage.formatters:JsonFormatter',
335
        ],
336
        'pygments.styles': [
337
            'mydark = mypackage.styles:MyDarkStyle',
338
        ],
339
        'pygments.filters': [
340
            'redact = mypackage.filters:RedactSecretsFilter',
341
        ],
342
    }
343
)
344
```
345

346
### Loading Custom Components
347

348
```python
349
from pygments.lexers import load_lexer_from_file
350
from pygments.formatters import load_formatter_from_file
351

352
# Load from files
353
custom_lexer = load_lexer_from_file('mylexer.py', 'MyLexer')
354
custom_formatter = load_formatter_from_file('myformatter.py', 'MyFormatter')
355
```
356

357
## Testing Custom Components
358

359
```python
360
# Test lexer
361
lexer = MyLanguageLexer()
362
tokens = list(lexer.get_tokens('test code here'))
363
assert len(tokens) > 0
364

365
# Test formatter
366
formatter = JsonFormatter()
367
result = formatter.format(tokens, sys.stdout)
368

369
# Test style
370
style = MyDarkStyle()
371
html_formatter = HtmlFormatter(style=style)
372

373
# Test filter
374
filter_instance = RedactSecretsFilter(keywords=['secret', 'password'])
375
lexer.add_filter(filter_instance)
376
```

Version

Tile

Files

custom-components.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

custom-components.mddocs/