0
# Parser Infrastructure
1
2
Low-level parsing components including lexer, parser, and composer for advanced YAML processing and custom tooling development. These components provide direct access to the YAML parsing pipeline for specialized use cases.
3
4
## Capabilities
5
6
### Lexer
7
8
The Lexer tokenizes YAML source text into a stream of tokens representing the concrete syntax structure.
9
10
```typescript { .api }
11
class Lexer {
12
/**
13
* Tokenize YAML source into token stream
14
* @param src - YAML source string to tokenize
15
* @returns Generator yielding Token objects
16
*/
17
lex(src: string): Generator<Token>;
18
}
19
20
interface Token {
21
/** Token type identifier */
22
type: string;
23
24
/** Character offset in source */
25
offset: number;
26
27
/** Token length */
28
indent: number;
29
30
/** Source text content */
31
source: string;
32
}
33
```
34
35
**Usage Examples:**
36
37
```typescript
38
import { Lexer } from "yaml";
39
40
const lexer = new Lexer();
41
const source = `
42
name: John Doe
43
age: 30
44
hobbies:
45
- reading
46
- coding
47
`;
48
49
// Tokenize source
50
const tokens = Array.from(lexer.lex(source));
51
52
tokens.forEach((token, index) => {
53
console.log(`Token ${index}:`, {
54
type: token.type,
55
offset: token.offset,
56
content: JSON.stringify(token.source)
57
});
58
});
59
60
// Example output:
61
// Token 0: { type: 'stream-start', offset: 0, content: '' }
62
// Token 1: { type: 'block-map-key', offset: 1, content: 'name' }
63
// Token 2: { type: 'block-map-value', offset: 5, content: ':' }
64
// Token 3: { type: 'scalar', offset: 7, content: 'John Doe' }
65
// ...
66
```
67
68
### Parser
69
70
The Parser converts token streams into Concrete Syntax Tree (CST) nodes, preserving source structure and formatting.
71
72
```typescript { .api }
73
class Parser {
74
/**
75
* Create parser with optional newline callback
76
* @param onNewLine - Called for each newline encountered
77
*/
78
constructor(onNewLine?: (offset: number) => void);
79
80
/**
81
* Parse token stream into CST nodes
82
* @param src - YAML source string
83
* @returns Generator yielding CST nodes
84
*/
85
parse(src: string): Generator<ParsedNode>;
86
87
/**
88
* Parse next token from internal state
89
* @returns Next CST node or null
90
*/
91
next(): ParsedNode | null;
92
93
/**
94
* Signal end of input
95
* @returns Final CST node or null
96
*/
97
end(): ParsedNode | null;
98
}
99
```
100
101
**Usage Examples:**
102
103
```typescript
104
import { Parser, LineCounter } from "yaml";
105
106
// Create parser with line tracking
107
const lineCounter = new LineCounter();
108
const parser = new Parser(lineCounter.addNewLine);
109
110
const source = `
111
documents:
112
- title: "First Document"
113
content: "Hello World"
114
- title: "Second Document"
115
content: "Goodbye World"
116
`;
117
118
// Parse into CST nodes
119
const cstNodes = Array.from(parser.parse(source));
120
121
cstNodes.forEach((node, index) => {
122
console.log(`CST Node ${index}:`, {
123
type: node.type,
124
range: node.range,
125
value: node.source
126
});
127
});
128
129
// Manual parsing with next()
130
const parser2 = new Parser();
131
const tokens = new Lexer().lex(source);
132
133
for (const token of tokens) {
134
const node = parser2.next();
135
if (node) {
136
console.log('Parsed node:', node.type);
137
}
138
}
139
140
// Finalize parsing
141
const finalNode = parser2.end();
142
if (finalNode) {
143
console.log('Final node:', finalNode.type);
144
}
145
```
146
147
### Composer
148
149
The Composer converts CST nodes into Document objects with full AST representation.
150
151
```typescript { .api }
152
class Composer<Contents = ParsedNode, Strict = true> {
153
/**
154
* Create composer with options
155
* @param options - Document and schema options
156
*/
157
constructor(options?: DocumentOptions & SchemaOptions);
158
159
/**
160
* Compose CST tokens into Document objects
161
* @param tokens - Token stream from parser
162
* @param forceDoc - Force document creation even for empty input
163
* @param endOffset - Expected end offset for validation
164
* @returns Generator yielding Document objects
165
*/
166
compose(
167
tokens: Iterable<Token>,
168
forceDoc?: boolean,
169
endOffset?: number
170
): Generator<Document<Contents, Strict>>;
171
172
/**
173
* Process next token
174
* @param token - Token to process
175
* @returns Document if completed, undefined otherwise
176
*/
177
next(token: Token): Document<Contents, Strict> | undefined;
178
179
/**
180
* Signal end of token stream
181
* @param forceDoc - Force document creation
182
* @param endOffset - Expected end offset
183
* @returns Final document or undefined
184
*/
185
end(forceDoc?: boolean, endOffset?: number): Document<Contents, Strict> | undefined;
186
187
/**
188
* Get stream information for empty documents
189
* @returns Stream metadata
190
*/
191
streamInfo(): {
192
comment: string;
193
directives: Directives;
194
errors: YAMLError[];
195
warnings: YAMLError[];
196
};
197
}
198
```
199
200
**Usage Examples:**
201
202
```typescript
203
import { Lexer, Parser, Composer } from "yaml";
204
205
const source = `
206
# Multi-document YAML
207
---
208
document: 1
209
title: "First Document"
210
data:
211
- item1
212
- item2
213
---
214
document: 2
215
title: "Second Document"
216
data:
217
- item3
218
- item4
219
---
220
# Empty document follows
221
`;
222
223
// Complete parsing pipeline
224
const lexer = new Lexer();
225
const parser = new Parser();
226
const composer = new Composer({
227
version: '1.2',
228
keepCstNodes: true
229
});
230
231
// Process through pipeline
232
const tokens = lexer.lex(source);
233
const cstNodes = parser.parse(source);
234
const documents = Array.from(composer.compose(cstNodes));
235
236
console.log(`Parsed ${documents.length} documents`);
237
238
documents.forEach((doc, index) => {
239
console.log(`Document ${index + 1}:`, doc.toJS());
240
console.log(`Errors: ${doc.errors.length}, Warnings: ${doc.warnings.length}`);
241
});
242
243
// Handle empty stream
244
if (documents.length === 0) {
245
const streamInfo = composer.streamInfo();
246
console.log('Empty stream info:', streamInfo);
247
}
248
```
249
250
### Line Counter
251
252
Utility for tracking line and column positions in source text for error reporting.
253
254
```typescript { .api }
255
class LineCounter {
256
/**
257
* Register newline character at offset
258
* @param offset - Character position of newline
259
*/
260
addNewLine(offset: number): void;
261
262
/**
263
* Get line and column position for offset
264
* @param pos - Character position
265
* @returns Line and column numbers (1-based)
266
*/
267
linePos(pos: number): { line: number; col: number };
268
}
269
```
270
271
**Usage Examples:**
272
273
```typescript
274
import { LineCounter, Parser, prettifyError } from "yaml";
275
276
const source = `
277
line 1: value
278
line 2: another value
279
line 3:
280
nested: content
281
invalid: [unclosed array
282
line 6: final value
283
`;
284
285
// Track line positions during parsing
286
const lineCounter = new LineCounter();
287
const parser = new Parser(lineCounter.addNewLine);
288
289
try {
290
const nodes = Array.from(parser.parse(source));
291
console.log('Parsing completed successfully');
292
} catch (error) {
293
// Add position context to errors
294
if (error.pos) {
295
const position = lineCounter.linePos(error.pos[0]);
296
console.log(`Error at line ${position.line}, column ${position.col}`);
297
298
// Pretty print error with context
299
const formatter = prettifyError(source, lineCounter);
300
formatter(error);
301
}
302
}
303
304
// Manual position tracking
305
source.split('').forEach((char, index) => {
306
if (char === '\n') {
307
lineCounter.addNewLine(index);
308
}
309
});
310
311
// Query positions
312
console.log('Position 50:', lineCounter.linePos(50));
313
console.log('Position 100:', lineCounter.linePos(100));
314
```
315
316
### CST (Concrete Syntax Tree) Namespace
317
318
Complete set of CST interfaces and utilities for working with YAML's concrete syntax representation.
319
320
```typescript { .api }
321
namespace CST {
322
/** Base interface for all CST tokens */
323
interface Token {
324
type: string;
325
offset: number;
326
indent: number;
327
source: string;
328
}
329
330
/** Document-level CST node */
331
interface Document extends Token {
332
start: Token;
333
value?: Token;
334
end?: Token[];
335
}
336
337
/** Scalar value token */
338
interface FlowScalar extends Token {
339
type: 'scalar';
340
end: Token[];
341
}
342
343
/** Block scalar token */
344
interface BlockScalar extends Token {
345
type: 'block-scalar';
346
header: Token;
347
value?: string;
348
}
349
350
/** Collection tokens */
351
interface FlowCollection extends Token {
352
type: 'flow-collection';
353
start: Token;
354
items: Token[];
355
end: Token;
356
}
357
358
interface BlockCollection extends Token {
359
type: 'block-collection';
360
items: Token[];
361
}
362
363
/**
364
* Convert CST to string representation
365
* @param cst - CST node to stringify
366
* @returns String representation
367
*/
368
function stringify(cst: Token): string;
369
370
/**
371
* Visit CST nodes with callback
372
* @param cst - Root CST node
373
* @param visitor - Visitor function
374
*/
375
function visit(cst: Token, visitor: (token: Token) => void): void;
376
377
/**
378
* Create scalar token
379
* @param value - Scalar value
380
* @param context - Creation context
381
* @returns Scalar token
382
*/
383
function createScalarToken(value: string, context?: any): FlowScalar;
384
}
385
```
386
387
**Usage Examples:**
388
389
```typescript
390
import { Lexer, Parser, CST } from "yaml";
391
392
const source = `
393
name: "John Doe"
394
age: 30
395
active: true
396
`;
397
398
// Access CST directly
399
const lexer = new Lexer();
400
const parser = new Parser();
401
402
const tokens = Array.from(lexer.lex(source));
403
const cstNodes = Array.from(parser.parse(source));
404
405
// Work with CST nodes
406
cstNodes.forEach(node => {
407
console.log('CST Node Type:', node.type);
408
console.log('CST Node Source:', JSON.stringify(node.source));
409
410
// Convert CST back to string
411
const reconstructed = CST.stringify(node);
412
console.log('Reconstructed:', reconstructed);
413
});
414
415
// Visit CST nodes
416
CST.visit(cstNodes[0], (token) => {
417
console.log(`Visiting token: ${token.type} at offset ${token.offset}`);
418
});
419
420
// Create custom CST tokens
421
const customScalar = CST.createScalarToken('custom value');
422
console.log('Custom scalar:', CST.stringify(customScalar));
423
```
424
425
### Advanced Pipeline Usage
426
427
Combine all components for custom YAML processing workflows.
428
429
```typescript
430
import { Lexer, Parser, Composer, LineCounter } from "yaml";
431
432
class CustomYAMLProcessor {
433
private lexer = new Lexer();
434
private lineCounter = new LineCounter();
435
private parser = new Parser(this.lineCounter.addNewLine);
436
private composer = new Composer({
437
keepCstNodes: true,
438
prettyErrors: true
439
});
440
441
async processYAML(source: string) {
442
try {
443
// Stage 1: Tokenization
444
console.log('Stage 1: Tokenizing...');
445
const tokens = Array.from(this.lexer.lex(source));
446
console.log(`Generated ${tokens.length} tokens`);
447
448
// Stage 2: Parsing to CST
449
console.log('Stage 2: Parsing to CST...');
450
const cstNodes = Array.from(this.parser.parse(source));
451
console.log(`Generated ${cstNodes.length} CST nodes`);
452
453
// Stage 3: Composition to Documents
454
console.log('Stage 3: Composing documents...');
455
const documents = Array.from(this.composer.compose(cstNodes));
456
console.log(`Generated ${documents.length} documents`);
457
458
// Process each document
459
const results = documents.map((doc, index) => ({
460
index,
461
content: doc.toJS(),
462
errors: doc.errors.length,
463
warnings: doc.warnings.length,
464
hasComments: this.hasComments(doc)
465
}));
466
467
return {
468
success: true,
469
documents: results,
470
totalErrors: results.reduce((sum, doc) => sum + doc.errors, 0),
471
totalWarnings: results.reduce((sum, doc) => sum + doc.warnings, 0)
472
};
473
474
} catch (error) {
475
return {
476
success: false,
477
error: error.message,
478
position: error.pos ? this.lineCounter.linePos(error.pos[0]) : null
479
};
480
}
481
}
482
483
private hasComments(doc: Document): boolean {
484
let hasComments = false;
485
486
visit(doc.contents, (key, node) => {
487
if (isNode(node) && (node.comment || node.commentBefore)) {
488
hasComments = true;
489
return visit.BREAK;
490
}
491
});
492
493
return hasComments;
494
}
495
}
496
497
// Usage
498
const processor = new CustomYAMLProcessor();
499
500
const complexYAML = `
501
# Configuration file
502
app:
503
name: MyApp # Application name
504
version: 1.0.0
505
506
# Database settings
507
database:
508
host: localhost
509
port: 5432
510
511
# Feature flags
512
features:
513
- auth # Authentication
514
- logging # Request logging
515
- metrics # Performance metrics
516
`;
517
518
processor.processYAML(complexYAML).then(result => {
519
console.log('Processing result:', result);
520
});
521
```
522
523
## Pipeline Architecture
524
525
The YAML processing pipeline consists of three main stages:
526
527
1. **Lexical Analysis (Lexer)**: Converts source text into tokens
528
2. **Syntactic Analysis (Parser)**: Converts tokens into CST nodes
529
3. **Semantic Analysis (Composer)**: Converts CST into Document AST
530
531
This separation allows for:
532
- Custom tokenization logic
533
- Syntax tree manipulation
534
- Alternative document representations
535
- Advanced error handling and recovery
536
- Performance optimization for specific use cases