or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

command-line.mdcustom-components.mdfilter-system.mdformatter-management.mdhigh-level-api.mdindex.mdlexer-management.mdstyle-management.md

custom-components.mddocs/

0

# Custom Components

1

2

Base classes and utilities for creating custom lexers, formatters, styles, and filters to extend Pygments functionality.

3

4

## Capabilities

5

6

### Custom Lexers

7

8

Base classes for implementing language-specific lexers.

9

10

```python { .api }

11

class Lexer:

12

"""

13

Base lexer class.

14

15

Attributes:

16

- name: Human-readable lexer name

17

- aliases: List of short identifiers

18

- filenames: List of filename patterns

19

- mimetypes: List of MIME types

20

- priority: Priority for lexer selection (higher = preferred)

21

"""

22

23

def get_tokens(self, text: str): ...

24

def get_tokens_unprocessed(self, text: str): ...

25

def analyse_text(text: str) -> float: ...

26

```

27

28

```python { .api }

29

class RegexLexer(Lexer):

30

"""

31

Lexer based on regular expressions and states.

32

33

Attributes:

34

- tokens: Dictionary mapping state names to token rules

35

- flags: Regex flags (re.MULTILINE | re.IGNORECASE, etc.)

36

"""

37

```

38

39

```python { .api }

40

class ExtendedRegexLexer(RegexLexer):

41

"""

42

Enhanced regex lexer with additional features.

43

"""

44

```

45

46

```python { .api }

47

class DelegatingLexer(Lexer):

48

"""

49

Lexer that delegates to other lexers based on content.

50

"""

51

```

52

53

Usage example:

54

55

```python

56

from pygments.lexer import RegexLexer

57

from pygments.token import *

58

59

class MyLanguageLexer(RegexLexer):

60

name = 'MyLanguage'

61

aliases = ['mylang', 'ml']

62

filenames = ['*.ml', '*.mylang']

63

mimetypes = ['text/x-mylang']

64

65

tokens = {

66

'root': [

67

(r'\s+', Whitespace),

68

(r'#.*$', Comment.Single),

69

(r'\b(if|else|while|for)\b', Keyword),

70

(r'\b[A-Z][a-zA-Z0-9_]*\b', Name.Class),

71

(r'\b[a-z][a-zA-Z0-9_]*\b', Name),

72

(r'"[^"]*"', String.Double),

73

(r'\d+', Number.Integer),

74

(r'[+\-*/=<>!]', Operator),

75

(r'[(){}[\],;]', Punctuation),

76

]

77

}

78

```

79

80

### Custom Formatters

81

82

Base class for creating output formatters.

83

84

```python { .api }

85

class Formatter:

86

"""

87

Base formatter class.

88

89

Attributes:

90

- name: Human-readable formatter name

91

- aliases: List of short identifiers

92

- filenames: List of filename patterns

93

- unicodeoutput: Whether formatter outputs Unicode

94

"""

95

96

def format(self, tokensource, outfile): ...

97

def get_style_defs(self, arg='') -> str: ...

98

```

99

100

Usage example:

101

102

```python

103

from pygments.formatter import Formatter

104

from pygments.token import *

105

106

class JsonFormatter(Formatter):

107

name = 'JSON'

108

aliases = ['json']

109

filenames = ['*.json']

110

111

def format(self, tokensource, outfile):

112

import json

113

tokens = []

114

for ttype, value in tokensource:

115

tokens.append({

116

'type': str(ttype),

117

'value': value

118

})

119

json.dump(tokens, outfile, indent=2)

120

```

121

122

### Custom Styles

123

124

Base class for creating color schemes.

125

126

```python { .api }

127

class Style:

128

"""

129

Base style class.

130

131

Attributes:

132

- name: Style name

133

- styles: Dictionary mapping token types to style definitions

134

"""

135

```

136

137

Usage example:

138

139

```python

140

from pygments.style import Style

141

from pygments.token import *

142

143

class MyDarkStyle(Style):

144

name = 'mydark'

145

146

styles = {

147

Comment: 'italic #75715e',

148

Keyword: 'bold #66d9ef',

149

Name: '#f8f8f2',

150

Name.Attribute: '#a6e22e',

151

Name.Class: 'bold #a6e22e',

152

Name.Function: '#a6e22e',

153

Number: '#ae81ff',

154

Operator: '#f92672',

155

String: '#e6db74',

156

String.Doc: 'italic #e6db74',

157

Generic.Deleted: '#f92672',

158

Generic.Inserted: '#a6e22e',

159

Generic.Heading: 'bold #f8f8f2',

160

Error: '#f8f8f2 bg:#f92672',

161

}

162

```

163

164

### Custom Filters

165

166

Base class for creating token stream filters.

167

168

```python { .api }

169

class Filter:

170

"""

171

Base filter class.

172

173

Methods:

174

- filter(lexer, stream): Process token stream

175

"""

176

177

def filter(self, lexer, stream): ...

178

```

179

180

Usage example:

181

182

```python

183

from pygments.filter import Filter

184

from pygments.token import *

185

186

class UppercaseFilter(Filter):

187

"""Convert all text to uppercase."""

188

189

def filter(self, lexer, stream):

190

for ttype, value in stream:

191

yield ttype, value.upper()

192

193

class RedactSecretsFilter(Filter):

194

"""Replace sensitive information with asterisks."""

195

196

def __init__(self, **options):

197

Filter.__init__(self, **options)

198

self.keywords = options.get('keywords', ['password', 'secret', 'key'])

199

200

def filter(self, lexer, stream):

201

for ttype, value in stream:

202

if ttype is String:

203

for keyword in self.keywords:

204

if keyword.lower() in value.lower():

205

value = '***REDACTED***'

206

break

207

yield ttype, value

208

```

209

210

## Lexer Development Utilities

211

212

### Token Rules

213

214

```python { .api }

215

def include(state: str): ...

216

def inherit(): ...

217

def bygroups(*args): ...

218

def using(cls, **kwargs): ...

219

def this(): ...

220

def default(state: str): ...

221

def words(words: list, prefix: str = '', suffix: str = ''): ...

222

```

223

224

Usage in lexer tokens:

225

226

```python

227

tokens = {

228

'root': [

229

(r'\s+', Whitespace),

230

include('comments'),

231

(r'\b(class|def)\b', Keyword, 'classdef'),

232

(words(['int', 'str', 'bool'], suffix=r'\b'), Name.Builtin.Type),

233

default('expr'),

234

],

235

236

'comments': [

237

(r'#.*$', Comment.Single),

238

(r'/\*', Comment.Multiline, 'multiline-comment'),

239

],

240

241

'multiline-comment': [

242

(r'[^*/]+', Comment.Multiline),

243

(r'/\*', Comment.Multiline, '#push'),

244

(r'\*/', Comment.Multiline, '#pop'),

245

(r'[*/]', Comment.Multiline),

246

],

247

248

'classdef': [

249

(r'\s+', Whitespace),

250

(r'[A-Z][a-zA-Z0-9_]*', Name.Class, '#pop'),

251

],

252

253

'expr': [

254

(r'"', String.Double, 'string'),

255

(r'\d+', Number.Integer),

256

(r'[a-zA-Z_][a-zA-Z0-9_]*', Name),

257

(r'[+\-*/]', Operator),

258

],

259

260

'string': [

261

(r'[^"\\]+', String.Double),

262

(r'\\.', String.Escape),

263

(r'"', String.Double, '#pop'),

264

],

265

}

266

```

267

268

### Analysis Functions

269

270

```python { .api }

271

def analyse_text(text: str) -> float:

272

"""

273

Analyze text and return confidence score (0.0-1.0).

274

Used for lexer guessing. Higher scores indicate better match.

275

"""

276

```

277

278

Example implementation:

279

280

```python

281

@staticmethod

282

def analyse_text(text):

283

score = 0.0

284

285

# Check for specific keywords

286

if re.search(r'\b(function|var|const|let)\b', text):

287

score += 0.3

288

289

# Check for syntax patterns

290

if re.search(r'function\s+\w+\s*\(', text):

291

score += 0.2

292

293

# Check file structure

294

if re.search(r'export\s+(default\s+)?', text):

295

score += 0.1

296

297

return min(score, 1.0)

298

```

299

300

## Helper Classes

301

302

### Lexer Context Management

303

304

```python { .api }

305

class LexerContext:

306

"""Context for lexer state management."""

307

```

308

309

### Token Type Utilities

310

311

```python { .api }

312

def string_to_tokentype(s: str) -> _TokenType:

313

"""Convert string to token type (e.g., 'Name.Function' -> Token.Name.Function)."""

314

315

def is_token_subtype(ttype: _TokenType, other: _TokenType) -> bool:

316

"""Check if ttype is a subtype of other."""

317

```

318

319

## Registration and Discovery

320

321

### Plugin Entry Points

322

323

Register custom components using setuptools entry points:

324

325

```python

326

# setup.py

327

setup(

328

name='my-pygments-extensions',

329

entry_points={

330

'pygments.lexers': [

331

'mylang = mypackage.lexers:MyLanguageLexer',

332

],

333

'pygments.formatters': [

334

'json = mypackage.formatters:JsonFormatter',

335

],

336

'pygments.styles': [

337

'mydark = mypackage.styles:MyDarkStyle',

338

],

339

'pygments.filters': [

340

'redact = mypackage.filters:RedactSecretsFilter',

341

],

342

}

343

)

344

```

345

346

### Loading Custom Components

347

348

```python

349

from pygments.lexers import load_lexer_from_file

350

from pygments.formatters import load_formatter_from_file

351

352

# Load from files

353

custom_lexer = load_lexer_from_file('mylexer.py', 'MyLexer')

354

custom_formatter = load_formatter_from_file('myformatter.py', 'MyFormatter')

355

```

356

357

## Testing Custom Components

358

359

```python

360

# Test lexer

361

lexer = MyLanguageLexer()

362

tokens = list(lexer.get_tokens('test code here'))

363

assert len(tokens) > 0

364

365

# Test formatter

366

formatter = JsonFormatter()

367

result = formatter.format(tokens, sys.stdout)

368

369

# Test style

370

style = MyDarkStyle()

371

html_formatter = HtmlFormatter(style=style)

372

373

# Test filter

374

filter_instance = RedactSecretsFilter(keywords=['secret', 'password'])

375

lexer.add_filter(filter_instance)

376

```