or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-highlighting.mdindex.mdlanguage-system.mdplugin-system.mdtoken-system.mdutilities.md

token-system.mddocs/

0

# Token System

1

2

The token system provides structured representation of parsed code through Token objects and token streams. This system converts raw code strings into hierarchical structures that preserve both content and semantic meaning for rendering highlighted markup.

3

4

## Capabilities

5

6

### Token Constructor

7

8

The main constructor for creating token objects that represent parsed code elements.

9

10

```javascript { .api }

11

/**

12

* Constructor for token objects representing parsed code elements

13

* @param {string} type - Token type identifier (keyword, string, number, etc.)

14

* @param {string|TokenStream} content - Token content or nested token stream

15

* @param {string|string[]} [alias] - Additional CSS class names for styling

16

* @param {string} [matchedStr] - Original matched string for length calculation

17

* @constructor

18

*/

19

function Token(type, content, alias, matchedStr);

20

```

21

22

**Properties:**

23

24

```javascript { .api }

25

/**

26

* Token object properties

27

* @typedef {Object} Token

28

* @property {string} type - Token type for CSS class generation

29

* @property {string|TokenStream} content - Token content or nested tokens

30

* @property {string|string[]} alias - Additional CSS classes

31

* @property {number} length - Length of original matched string

32

*/

33

```

34

35

**Usage Examples:**

36

37

```javascript

38

// Create simple tokens

39

const keywordToken = new Prism.Token('keyword', 'function');

40

const stringToken = new Prism.Token('string', '"Hello World"');

41

const numberToken = new Prism.Token('number', '42');

42

43

// Token with alias for additional styling

44

const classToken = new Prism.Token('class-name', 'MyClass', 'important');

45

46

// Token with multiple aliases

47

const operatorToken = new Prism.Token('operator', '===', ['equality', 'strict']);

48

49

// Complex token with nested content

50

const functionToken = new Prism.Token('function', [

51

'myFunction',

52

new Prism.Token('punctuation', '('),

53

'param',

54

new Prism.Token('punctuation', ')')

55

]);

56

57

// Access token properties

58

console.log(keywordToken.type); // 'keyword'

59

console.log(keywordToken.content); // 'function'

60

console.log(keywordToken.length); // 8 (if matchedStr was 'function')

61

```

62

63

### Token Stream

64

65

Array structure containing strings and Token objects representing parsed code.

66

67

```javascript { .api }

68

/**

69

* Array of strings and Token objects representing parsed code

70

* @typedef {Array<string|Token>} TokenStream

71

*/

72

```

73

74

**Properties:**

75

- No adjacent strings (consolidated during parsing)

76

- No empty strings (except single empty string case)

77

- Mixed content of raw strings and Token objects

78

79

**Usage Examples:**

80

81

```javascript

82

// Example token stream from tokenization

83

const code = 'const message = "Hello";';

84

const tokens = Prism.tokenize(code, Prism.languages.javascript);

85

86

// Typical token stream structure:

87

// [

88

// Token { type: 'keyword', content: 'const' },

89

// ' ',

90

// Token { type: 'variable', content: 'message' },

91

// ' ',

92

// Token { type: 'operator', content: '=' },

93

// ' ',

94

// Token { type: 'string', content: '"Hello"' },

95

// Token { type: 'punctuation', content: ';' }

96

// ]

97

98

// Process token stream

99

function analyzeTokens(tokenStream) {

100

const analysis = { tokens: 0, strings: 0, types: {} };

101

102

tokenStream.forEach(item => {

103

if (item instanceof Prism.Token) {

104

analysis.tokens++;

105

analysis.types[item.type] = (analysis.types[item.type] || 0) + 1;

106

} else if (typeof item === 'string') {

107

analysis.strings++;

108

}

109

});

110

111

return analysis;

112

}

113

```

114

115

### Token Stringification

116

117

#### stringify

118

119

Convert tokens and token streams to HTML markup strings.

120

121

```javascript { .api }

122

/**

123

* Convert tokens or token streams to HTML string representation

124

* @param {string|Token|TokenStream} o - Token, token stream, or string to stringify

125

* @param {string} language - Language identifier for CSS class generation

126

* @returns {string} HTML markup string with syntax highlighting

127

*/

128

Token.stringify(o, language);

129

```

130

131

**Usage Examples:**

132

133

```javascript

134

// Stringify simple token

135

const token = new Prism.Token('keyword', 'function');

136

const html = Prism.Token.stringify(token, 'javascript');

137

console.log(html);

138

// Output: '<span class="token keyword">function</span>'

139

140

// Stringify token with alias

141

const classToken = new Prism.Token('class-name', 'MyClass', 'important');

142

const classHtml = Prism.Token.stringify(classToken, 'javascript');

143

console.log(classHtml);

144

// Output: '<span class="token class-name important">MyClass</span>'

145

146

// Stringify token stream

147

const tokens = [

148

new Prism.Token('keyword', 'const'),

149

' ',

150

new Prism.Token('variable', 'x'),

151

' ',

152

new Prism.Token('operator', '='),

153

' ',

154

new Prism.Token('number', '42')

155

];

156

157

const streamHtml = Prism.Token.stringify(tokens, 'javascript');

158

console.log(streamHtml);

159

// Output: '<span class="token keyword">const</span> <span class="token variable">x</span> <span class="token operator">=</span> <span class="token number">42</span>'

160

161

// Stringify nested tokens

162

const nestedToken = new Prism.Token('function-call', [

163

new Prism.Token('function', 'console'),

164

new Prism.Token('punctuation', '.'),

165

new Prism.Token('function', 'log'),

166

new Prism.Token('punctuation', '('),

167

new Prism.Token('string', '"Hello"'),

168

new Prism.Token('punctuation', ')')

169

]);

170

171

const nestedHtml = Prism.Token.stringify(nestedToken, 'javascript');

172

console.log(nestedHtml);

173

// Output: '<span class="token function-call"><span class="token function">console</span><span class="token punctuation">.</span><span class="token function">log</span><span class="token punctuation">(</span><span class="token string">"Hello"</span><span class="token punctuation">)</span></span>'

174

```

175

176

### Token Processing

177

178

#### Token Manipulation

179

180

Working with tokens after tokenization for analysis or modification.

181

182

```javascript

183

// Extract specific token types

184

function extractTokensByType(tokenStream, targetType) {

185

const matches = [];

186

187

function processTokens(tokens) {

188

tokens.forEach(token => {

189

if (token instanceof Prism.Token) {

190

if (token.type === targetType) {

191

matches.push(token.content);

192

}

193

// Recursively process nested tokens

194

if (Array.isArray(token.content)) {

195

processTokens(token.content);

196

}

197

}

198

});

199

}

200

201

processTokens(tokenStream);

202

return matches;

203

}

204

205

// Usage example

206

const code = 'function getName() { return "John"; }';

207

const tokens = Prism.tokenize(code, Prism.languages.javascript);

208

const functions = extractTokensByType(tokens, 'function');

209

const strings = extractTokensByType(tokens, 'string');

210

211

console.log('Functions:', functions); // ['getName']

212

console.log('Strings:', strings); // ['"John"']

213

```

214

215

#### Token Filtering

216

217

```javascript

218

// Filter token stream based on criteria

219

function filterTokens(tokenStream, predicate) {

220

return tokenStream.filter(token => {

221

if (token instanceof Prism.Token) {

222

return predicate(token);

223

}

224

return true; // Keep strings

225

});

226

}

227

228

// Remove comment tokens

229

const withoutComments = filterTokens(tokens, token => token.type !== 'comment');

230

231

// Keep only specific token types

232

const keywordsOnly = filterTokens(tokens, token => token.type === 'keyword');

233

```

234

235

#### Token Transformation

236

237

```javascript

238

// Transform tokens while preserving structure

239

function transformTokens(tokenStream, transformer) {

240

return tokenStream.map(item => {

241

if (item instanceof Prism.Token) {

242

const transformed = transformer(item);

243

// Handle nested content recursively

244

if (Array.isArray(transformed.content)) {

245

transformed.content = transformTokens(transformed.content, transformer);

246

}

247

return transformed;

248

}

249

return item; // Keep strings unchanged

250

});

251

}

252

253

// Example: Add line information to tokens

254

let lineNumber = 1;

255

const withLineNumbers = transformTokens(tokens, token => {

256

const newToken = new Prism.Token(token.type, token.content, token.alias);

257

newToken.line = lineNumber;

258

if (typeof token.content === 'string' && token.content.includes('\n')) {

259

lineNumber += (token.content.match(/\n/g) || []).length;

260

}

261

return newToken;

262

});

263

```

264

265

## Hook Integration

266

267

The token system integrates with the hook system for customization:

268

269

```javascript

270

// Modify tokens after tokenization

271

Prism.hooks.add('after-tokenize', function(env) {

272

// env.tokens contains the token stream

273

env.tokens = env.tokens.map(token => {

274

if (token instanceof Prism.Token && token.type === 'string') {

275

// Add special styling to long strings

276

if (token.content.length > 20) {

277

token.alias = (token.alias || []).concat(['long-string']);

278

}

279

}

280

return token;

281

});

282

});

283

284

// Custom token processing before HTML generation

285

Prism.hooks.add('wrap', function(env) {

286

// env.type, env.content, env.tag, env.attributes, env.language

287

if (env.type === 'keyword') {

288

env.attributes.title = 'This is a keyword';

289

}

290

});

291

```

292

293

## Advanced Token Patterns

294

295

### Nested Token Structures

296

297

```javascript

298

// Complex nested token example (JSX-like)

299

const jsxToken = new Prism.Token('tag', [

300

new Prism.Token('punctuation', '<'),

301

new Prism.Token('tag-name', 'Component'),

302

' ',

303

new Prism.Token('attr-name', 'prop'),

304

new Prism.Token('punctuation', '='),

305

new Prism.Token('attr-value', [

306

new Prism.Token('punctuation', '{'),

307

new Prism.Token('string', '"value"'),

308

new Prism.Token('punctuation', '}')

309

]),

310

new Prism.Token('punctuation', '>')

311

]);

312

```

313

314

### Token Analysis

315

316

```javascript

317

// Analyze token complexity and nesting depth

318

function analyzeTokenDepth(tokenStream) {

319

let maxDepth = 0;

320

321

function getDepth(tokens, currentDepth = 0) {

322

maxDepth = Math.max(maxDepth, currentDepth);

323

324

tokens.forEach(token => {

325

if (token instanceof Prism.Token && Array.isArray(token.content)) {

326

getDepth(token.content, currentDepth + 1);

327

}

328

});

329

}

330

331

getDepth(tokenStream);

332

return maxDepth;

333

}

334

335

// Count token statistics

336

function getTokenStats(tokenStream) {

337

const stats = {

338

totalTokens: 0,

339

tokenTypes: {},

340

maxNesting: 0,

341

totalContent: 0

342

};

343

344

function processTokens(tokens, depth = 0) {

345

stats.maxNesting = Math.max(stats.maxNesting, depth);

346

347

tokens.forEach(token => {

348

if (token instanceof Prism.Token) {

349

stats.totalTokens++;

350

stats.tokenTypes[token.type] = (stats.tokenTypes[token.type] || 0) + 1;

351

352

if (typeof token.content === 'string') {

353

stats.totalContent += token.content.length;

354

} else if (Array.isArray(token.content)) {

355

processTokens(token.content, depth + 1);

356

}

357

}

358

});

359

}

360

361

processTokens(tokenStream);

362

return stats;

363

}

364

```

365

366

## Performance Considerations

367

368

```javascript

369

// Efficient token processing for large streams

370

function processLargeTokenStream(tokenStream, processor) {

371

// Process in chunks to avoid stack overflow

372

const chunkSize = 1000;

373

const results = [];

374

375

for (let i = 0; i < tokenStream.length; i += chunkSize) {

376

const chunk = tokenStream.slice(i, i + chunkSize);

377

results.push(...chunk.map(processor));

378

}

379

380

return results;

381

}

382

383

// Memory-efficient token streaming

384

function* tokenGenerator(code, grammar) {

385

const tokens = Prism.tokenize(code, grammar);

386

for (const token of tokens) {

387

yield token;

388

}

389

}

390

391

// Usage with generator

392

const tokenGen = tokenGenerator(largeCodeString, Prism.languages.javascript);

393

for (const token of tokenGen) {

394

// Process one token at a time

395

console.log(token);

396

}

397

```