0
# Token System
1
2
The token system provides structured representation of parsed code through Token objects and token streams. This system converts raw code strings into hierarchical structures that preserve both content and semantic meaning for rendering highlighted markup.
3
4
## Capabilities
5
6
### Token Constructor
7
8
The main constructor for creating token objects that represent parsed code elements.
9
10
```javascript { .api }
11
/**
12
* Constructor for token objects representing parsed code elements
13
* @param {string} type - Token type identifier (keyword, string, number, etc.)
14
* @param {string|TokenStream} content - Token content or nested token stream
15
* @param {string|string[]} [alias] - Additional CSS class names for styling
16
* @param {string} [matchedStr] - Original matched string for length calculation
17
* @constructor
18
*/
19
function Token(type, content, alias, matchedStr);
20
```
21
22
**Properties:**
23
24
```javascript { .api }
25
/**
26
* Token object properties
27
* @typedef {Object} Token
28
* @property {string} type - Token type for CSS class generation
29
* @property {string|TokenStream} content - Token content or nested tokens
30
* @property {string|string[]} alias - Additional CSS classes
31
* @property {number} length - Length of original matched string
32
*/
33
```
34
35
**Usage Examples:**
36
37
```javascript
38
// Create simple tokens
39
const keywordToken = new Prism.Token('keyword', 'function');
40
const stringToken = new Prism.Token('string', '"Hello World"');
41
const numberToken = new Prism.Token('number', '42');
42
43
// Token with alias for additional styling
44
const classToken = new Prism.Token('class-name', 'MyClass', 'important');
45
46
// Token with multiple aliases
47
const operatorToken = new Prism.Token('operator', '===', ['equality', 'strict']);
48
49
// Complex token with nested content
50
const functionToken = new Prism.Token('function', [
51
'myFunction',
52
new Prism.Token('punctuation', '('),
53
'param',
54
new Prism.Token('punctuation', ')')
55
]);
56
57
// Access token properties
58
console.log(keywordToken.type); // 'keyword'
59
console.log(keywordToken.content); // 'function'
60
console.log(keywordToken.length); // 8 (if matchedStr was 'function')
61
```
62
63
### Token Stream
64
65
Array structure containing strings and Token objects representing parsed code.
66
67
```javascript { .api }
68
/**
69
* Array of strings and Token objects representing parsed code
70
* @typedef {Array<string|Token>} TokenStream
71
*/
72
```
73
74
**Properties:**
75
- No adjacent strings (consolidated during parsing)
76
- No empty strings (except single empty string case)
77
- Mixed content of raw strings and Token objects
78
79
**Usage Examples:**
80
81
```javascript
82
// Example token stream from tokenization
83
const code = 'const message = "Hello";';
84
const tokens = Prism.tokenize(code, Prism.languages.javascript);
85
86
// Typical token stream structure:
87
// [
88
// Token { type: 'keyword', content: 'const' },
89
// ' ',
90
// Token { type: 'variable', content: 'message' },
91
// ' ',
92
// Token { type: 'operator', content: '=' },
93
// ' ',
94
// Token { type: 'string', content: '"Hello"' },
95
// Token { type: 'punctuation', content: ';' }
96
// ]
97
98
// Process token stream
99
function analyzeTokens(tokenStream) {
100
const analysis = { tokens: 0, strings: 0, types: {} };
101
102
tokenStream.forEach(item => {
103
if (item instanceof Prism.Token) {
104
analysis.tokens++;
105
analysis.types[item.type] = (analysis.types[item.type] || 0) + 1;
106
} else if (typeof item === 'string') {
107
analysis.strings++;
108
}
109
});
110
111
return analysis;
112
}
113
```
114
115
### Token Stringification
116
117
#### stringify
118
119
Convert tokens and token streams to HTML markup strings.
120
121
```javascript { .api }
122
/**
123
* Convert tokens or token streams to HTML string representation
124
* @param {string|Token|TokenStream} o - Token, token stream, or string to stringify
125
* @param {string} language - Language identifier for CSS class generation
126
* @returns {string} HTML markup string with syntax highlighting
127
*/
128
Token.stringify(o, language);
129
```
130
131
**Usage Examples:**
132
133
```javascript
134
// Stringify simple token
135
const token = new Prism.Token('keyword', 'function');
136
const html = Prism.Token.stringify(token, 'javascript');
137
console.log(html);
138
// Output: '<span class="token keyword">function</span>'
139
140
// Stringify token with alias
141
const classToken = new Prism.Token('class-name', 'MyClass', 'important');
142
const classHtml = Prism.Token.stringify(classToken, 'javascript');
143
console.log(classHtml);
144
// Output: '<span class="token class-name important">MyClass</span>'
145
146
// Stringify token stream
147
const tokens = [
148
new Prism.Token('keyword', 'const'),
149
' ',
150
new Prism.Token('variable', 'x'),
151
' ',
152
new Prism.Token('operator', '='),
153
' ',
154
new Prism.Token('number', '42')
155
];
156
157
const streamHtml = Prism.Token.stringify(tokens, 'javascript');
158
console.log(streamHtml);
159
// Output: '<span class="token keyword">const</span> <span class="token variable">x</span> <span class="token operator">=</span> <span class="token number">42</span>'
160
161
// Stringify nested tokens
162
const nestedToken = new Prism.Token('function-call', [
163
new Prism.Token('function', 'console'),
164
new Prism.Token('punctuation', '.'),
165
new Prism.Token('function', 'log'),
166
new Prism.Token('punctuation', '('),
167
new Prism.Token('string', '"Hello"'),
168
new Prism.Token('punctuation', ')')
169
]);
170
171
const nestedHtml = Prism.Token.stringify(nestedToken, 'javascript');
172
console.log(nestedHtml);
173
// Output: '<span class="token function-call"><span class="token function">console</span><span class="token punctuation">.</span><span class="token function">log</span><span class="token punctuation">(</span><span class="token string">"Hello"</span><span class="token punctuation">)</span></span>'
174
```
175
176
### Token Processing
177
178
#### Token Manipulation
179
180
Working with tokens after tokenization for analysis or modification.
181
182
```javascript
183
// Extract specific token types
184
function extractTokensByType(tokenStream, targetType) {
185
const matches = [];
186
187
function processTokens(tokens) {
188
tokens.forEach(token => {
189
if (token instanceof Prism.Token) {
190
if (token.type === targetType) {
191
matches.push(token.content);
192
}
193
// Recursively process nested tokens
194
if (Array.isArray(token.content)) {
195
processTokens(token.content);
196
}
197
}
198
});
199
}
200
201
processTokens(tokenStream);
202
return matches;
203
}
204
205
// Usage example
206
const code = 'function getName() { return "John"; }';
207
const tokens = Prism.tokenize(code, Prism.languages.javascript);
208
const functions = extractTokensByType(tokens, 'function');
209
const strings = extractTokensByType(tokens, 'string');
210
211
console.log('Functions:', functions); // ['getName']
212
console.log('Strings:', strings); // ['"John"']
213
```
214
215
#### Token Filtering
216
217
```javascript
218
// Filter token stream based on criteria
219
function filterTokens(tokenStream, predicate) {
220
return tokenStream.filter(token => {
221
if (token instanceof Prism.Token) {
222
return predicate(token);
223
}
224
return true; // Keep strings
225
});
226
}
227
228
// Remove comment tokens
229
const withoutComments = filterTokens(tokens, token => token.type !== 'comment');
230
231
// Keep only specific token types
232
const keywordsOnly = filterTokens(tokens, token => token.type === 'keyword');
233
```
234
235
#### Token Transformation
236
237
```javascript
238
// Transform tokens while preserving structure
239
function transformTokens(tokenStream, transformer) {
240
return tokenStream.map(item => {
241
if (item instanceof Prism.Token) {
242
const transformed = transformer(item);
243
// Handle nested content recursively
244
if (Array.isArray(transformed.content)) {
245
transformed.content = transformTokens(transformed.content, transformer);
246
}
247
return transformed;
248
}
249
return item; // Keep strings unchanged
250
});
251
}
252
253
// Example: Add line information to tokens
254
let lineNumber = 1;
255
const withLineNumbers = transformTokens(tokens, token => {
256
const newToken = new Prism.Token(token.type, token.content, token.alias);
257
newToken.line = lineNumber;
258
if (typeof token.content === 'string' && token.content.includes('\n')) {
259
lineNumber += (token.content.match(/\n/g) || []).length;
260
}
261
return newToken;
262
});
263
```
264
265
## Hook Integration
266
267
The token system integrates with the hook system for customization:
268
269
```javascript
270
// Modify tokens after tokenization
271
Prism.hooks.add('after-tokenize', function(env) {
272
// env.tokens contains the token stream
273
env.tokens = env.tokens.map(token => {
274
if (token instanceof Prism.Token && token.type === 'string') {
275
// Add special styling to long strings
276
if (token.content.length > 20) {
277
token.alias = (token.alias || []).concat(['long-string']);
278
}
279
}
280
return token;
281
});
282
});
283
284
// Custom token processing before HTML generation
285
Prism.hooks.add('wrap', function(env) {
286
// env.type, env.content, env.tag, env.attributes, env.language
287
if (env.type === 'keyword') {
288
env.attributes.title = 'This is a keyword';
289
}
290
});
291
```
292
293
## Advanced Token Patterns
294
295
### Nested Token Structures
296
297
```javascript
298
// Complex nested token example (JSX-like)
299
const jsxToken = new Prism.Token('tag', [
300
new Prism.Token('punctuation', '<'),
301
new Prism.Token('tag-name', 'Component'),
302
' ',
303
new Prism.Token('attr-name', 'prop'),
304
new Prism.Token('punctuation', '='),
305
new Prism.Token('attr-value', [
306
new Prism.Token('punctuation', '{'),
307
new Prism.Token('string', '"value"'),
308
new Prism.Token('punctuation', '}')
309
]),
310
new Prism.Token('punctuation', '>')
311
]);
312
```
313
314
### Token Analysis
315
316
```javascript
317
// Analyze token complexity and nesting depth
318
function analyzeTokenDepth(tokenStream) {
319
let maxDepth = 0;
320
321
function getDepth(tokens, currentDepth = 0) {
322
maxDepth = Math.max(maxDepth, currentDepth);
323
324
tokens.forEach(token => {
325
if (token instanceof Prism.Token && Array.isArray(token.content)) {
326
getDepth(token.content, currentDepth + 1);
327
}
328
});
329
}
330
331
getDepth(tokenStream);
332
return maxDepth;
333
}
334
335
// Count token statistics
336
function getTokenStats(tokenStream) {
337
const stats = {
338
totalTokens: 0,
339
tokenTypes: {},
340
maxNesting: 0,
341
totalContent: 0
342
};
343
344
function processTokens(tokens, depth = 0) {
345
stats.maxNesting = Math.max(stats.maxNesting, depth);
346
347
tokens.forEach(token => {
348
if (token instanceof Prism.Token) {
349
stats.totalTokens++;
350
stats.tokenTypes[token.type] = (stats.tokenTypes[token.type] || 0) + 1;
351
352
if (typeof token.content === 'string') {
353
stats.totalContent += token.content.length;
354
} else if (Array.isArray(token.content)) {
355
processTokens(token.content, depth + 1);
356
}
357
}
358
});
359
}
360
361
processTokens(tokenStream);
362
return stats;
363
}
364
```
365
366
## Performance Considerations
367
368
```javascript
369
// Efficient token processing for large streams
370
function processLargeTokenStream(tokenStream, processor) {
371
// Process in chunks to avoid stack overflow
372
const chunkSize = 1000;
373
const results = [];
374
375
for (let i = 0; i < tokenStream.length; i += chunkSize) {
376
const chunk = tokenStream.slice(i, i + chunkSize);
377
results.push(...chunk.map(processor));
378
}
379
380
return results;
381
}
382
383
// Memory-efficient token streaming
384
function* tokenGenerator(code, grammar) {
385
const tokens = Prism.tokenize(code, grammar);
386
for (const token of tokens) {
387
yield token;
388
}
389
}
390
391
// Usage with generator
392
const tokenGen = tokenGenerator(largeCodeString, Prism.languages.javascript);
393
for (const token of tokenGen) {
394
// Process one token at a time
395
console.log(token);
396
}
397
```