0
# Grammar Parsing
1
2
Low-level grammar parsing functionality for converting PEG grammar strings into Abstract Syntax Trees. The `parser` namespace provides direct access to Peggy's grammar parser and syntax error handling.
3
4
## Capabilities
5
6
### Parse Grammar Function
7
8
Parses PEG grammar definitions into AST representations for further processing by the compiler.
9
10
```typescript { .api }
11
namespace parser {
12
/**
13
* Parses grammar and returns the grammar AST
14
* @param grammar - Source text of the PEG grammar
15
* @param options - Parser options for grammar parsing
16
* @returns Grammar AST ready for compilation
17
* @throws {SyntaxError} If grammar has incorrect syntax
18
*/
19
function parse(grammar: string, options?: Options): ast.Grammar;
20
21
/**
22
* Parser options for grammar parsing
23
*/
24
interface Options {
25
/** Source identifier attached to LocationRange objects */
26
grammarSource?: any;
27
/** Words not allowed as label names */
28
reservedWords: string[];
29
/** Start rule - must be "Grammar" */
30
startRule?: "Grammar";
31
}
32
}
33
```
34
35
**Usage Examples:**
36
37
```typescript
38
import { parser } from "peggy";
39
40
// Basic grammar parsing
41
const grammar = `
42
start = "hello" " " name:[a-z]+ { return "Hello, " + name.join(""); }
43
`;
44
45
try {
46
const ast = parser.parse(grammar, {
47
reservedWords: ["function", "class", "var"],
48
grammarSource: "my-grammar.peggy"
49
});
50
51
console.log(ast.rules.length); // Number of rules
52
console.log(ast.rules[0].name); // "start"
53
} catch (error) {
54
console.error("Parse error:", error.message);
55
}
56
```
57
58
### Grammar Syntax Errors
59
60
Detailed syntax error reporting with location information and expectation details.
61
62
```typescript { .api }
63
namespace parser {
64
/**
65
* Grammar syntax error with detailed location and expectation info
66
*/
67
class SyntaxError extends globalThis.SyntaxError {
68
/** Location where error occurred */
69
location: LocationRange;
70
/** Possible tokens expected at error location */
71
expected: Expectation[] | null;
72
/** Character found at error location */
73
found: string | null;
74
75
constructor(
76
message: string,
77
expected: Expectation[] | null,
78
found: string | null,
79
location: LocationRange
80
);
81
82
/**
83
* Format error with source context
84
* @param sources - Source text mapping for context
85
* @returns Formatted error with source lines
86
*/
87
format(sources: SourceText[]): string;
88
89
/**
90
* Build human-readable message from expectations
91
* @param expected - Array of expected items
92
* @param found - Text found instead of expected
93
* @returns Human-readable error message
94
*/
95
static buildMessage(expected: Expectation[], found: string): string;
96
}
97
}
98
```
99
100
**Error Handling Example:**
101
102
```typescript
103
import { parser } from "peggy";
104
105
try {
106
const ast = parser.parse("start = @invalid", {
107
reservedWords: [],
108
grammarSource: "test.peggy"
109
});
110
} catch (error) {
111
if (error instanceof parser.SyntaxError) {
112
console.log("Syntax error at line", error.location.start.line);
113
console.log("Expected:", error.expected);
114
console.log("Found:", error.found);
115
116
// Format with source context
117
const formatted = error.format([{
118
source: "test.peggy",
119
text: "start = @invalid"
120
}]);
121
console.log(formatted);
122
}
123
}
124
```
125
126
### Expectation Types
127
128
Different types of expectations that can appear in syntax errors, describing what the parser was looking for.
129
130
```typescript { .api }
131
namespace parser {
132
/**
133
* Union of all expectation types
134
*/
135
type Expectation =
136
| LiteralExpectation
137
| ClassExpectation
138
| AnyExpectation
139
| EndExpectation
140
| OtherExpectation;
141
142
/**
143
* Expected literal string
144
*/
145
interface LiteralExpectation {
146
type: "literal";
147
/** Expected sequence of characters */
148
text: string;
149
/** If true, case-insensitive matching */
150
ignoreCase: boolean;
151
}
152
153
/**
154
* Expected character class
155
*/
156
interface ClassExpectation {
157
type: "class";
158
/** Character ranges and individual characters */
159
parts: (string[] | string)[];
160
/** If true, negated character class */
161
inverted: boolean;
162
/** If true, case-insensitive matching */
163
ignoreCase: boolean;
164
}
165
166
/**
167
* Expected any character
168
*/
169
interface AnyExpectation {
170
type: "any";
171
}
172
173
/**
174
* Expected end of input
175
*/
176
interface EndExpectation {
177
type: "end";
178
}
179
180
/**
181
* Expected other item (from expected() or rule names)
182
*/
183
interface OtherExpectation {
184
type: "other";
185
/** Human-readable description */
186
description: string;
187
}
188
}
189
```
190
191
## AST Structure
192
193
The Abstract Syntax Tree structure returned by the parser, representing the complete grammar definition.
194
195
### Grammar Node
196
197
The root node representing the entire grammar with all its components.
198
199
```typescript { .api }
200
namespace ast {
201
/**
202
* Root grammar AST node
203
*/
204
interface Grammar extends Node<"grammar"> {
205
/** Imported external grammars */
206
imports: GrammarImport[];
207
/** Code executed once when importing parser module */
208
topLevelInitializer?: TopLevelInitializer | TopLevelInitializer[];
209
/** Code executed each time parse() is called */
210
initializer?: Initializer | Initializer[];
211
/** All rules in the grammar */
212
rules: Rule[];
213
214
// Added by compiler passes:
215
/** Generated JavaScript code and source map */
216
code?: SourceNode;
217
/** String literals used in bytecode */
218
literals?: string[];
219
/** Character classes used in bytecode */
220
classes?: GrammarCharacterClass[];
221
/** Error expectations used in bytecode */
222
expectations?: GrammarExpectation[];
223
/** Imported names for library references */
224
importedNames?: string[];
225
/** User-defined functions (actions, predicates) */
226
functions?: FunctionConst[];
227
/** Location ranges used in bytecode */
228
locations?: LocationRange[];
229
}
230
231
/**
232
* Base AST node interface
233
*/
234
interface Node<T> {
235
/** Node type identifier */
236
type: T;
237
/** Source location of this node */
238
location: LocationRange;
239
}
240
}
241
```
242
243
### Rule and Expression Types
244
245
Grammar rules and the various expression types that can appear in rule definitions.
246
247
```typescript { .api }
248
namespace ast {
249
/**
250
* Grammar rule definition
251
*/
252
interface Rule extends Expr<"rule"> {
253
/** Rule identifier */
254
name: string;
255
/** Location of rule name for error reporting */
256
nameLocation: LocationRange;
257
/** Rule's parsing expression */
258
expression: Expression | Named;
259
/** Generated bytecode (added by compiler) */
260
bytecode?: number[];
261
}
262
263
/**
264
* Named rule expression
265
*/
266
interface Named extends Expr<"named"> {
267
/** Human-readable rule name for errors */
268
name: string;
269
/** Underlying expression */
270
expression: Expression;
271
}
272
273
/**
274
* Union of all expression types
275
*/
276
type Expression =
277
| Action
278
| Choice
279
| Labeled
280
| Prefixed
281
| Primary
282
| Repeated
283
| Sequence
284
| Suffixed;
285
286
/**
287
* Base expression interface
288
*/
289
interface Expr<T> extends Node<T> {
290
/** Match result estimation (added by compiler) */
291
match?: MatchResult;
292
}
293
294
/**
295
* Match result enumeration
296
*/
297
enum MatchResult {
298
ALWAYS = 1, // Always matches
299
SOMETIMES = 0, // May or may not match
300
NEVER = -1 // Never matches
301
}
302
}
303
```
304
305
### Primary Expression Types
306
307
The fundamental expression types that form the building blocks of PEG grammars.
308
309
```typescript { .api }
310
namespace ast {
311
/**
312
* Union of primary expression types
313
*/
314
type Primary =
315
| Any
316
| CharacterClass
317
| Group
318
| LibraryReference
319
| Literal
320
| RuleReference
321
| SemanticPredicate;
322
323
/**
324
* Reference to another rule
325
*/
326
interface RuleReference extends Expr<"rule_ref"> {
327
/** Name of referenced rule */
328
name: string;
329
}
330
331
/**
332
* Reference to rule in imported library
333
*/
334
interface LibraryReference extends Expr<"library_ref"> {
335
/** Rule name (undefined for default rule) */
336
name: string | undefined;
337
/** Library namespace name */
338
library: string;
339
/** Import statement index */
340
libraryNumber: number;
341
}
342
343
/**
344
* Literal string match
345
*/
346
interface Literal extends Expr<"literal"> {
347
/** String to match */
348
value: string;
349
/** Case-insensitive matching */
350
ignoreCase: boolean;
351
}
352
353
/**
354
* Character class match
355
*/
356
interface CharacterClass extends Expr<"class"> {
357
/** Character ranges and individual characters */
358
parts: (string[] | string | ClassEscape)[];
359
/** Negated character class */
360
inverted: boolean;
361
/** Case-insensitive matching */
362
ignoreCase: boolean;
363
/** Unicode mode */
364
unicode: boolean;
365
}
366
367
/**
368
* Match any character
369
*/
370
interface Any extends Expr<"any"> {}
371
372
/**
373
* Semantic predicate (code that returns boolean)
374
*/
375
interface SemanticPredicate extends CodeBlockExpr<"semantic_and" | "semantic_not"> {}
376
377
/**
378
* Grouping expression for label scoping
379
*/
380
interface Group extends Expr<"group"> {
381
expression: Labeled | Sequence;
382
}
383
}
384
```
385
386
**AST Usage Example:**
387
388
```typescript
389
import { parser } from "peggy";
390
391
const grammar = `
392
start = "hello" name:word { return "Hello, " + name; }
393
word = [a-z]+
394
`;
395
396
const ast = parser.parse(grammar, { reservedWords: [] });
397
398
// Examine the AST structure
399
console.log("Number of rules:", ast.rules.length);
400
console.log("First rule name:", ast.rules[0].name);
401
console.log("First rule type:", ast.rules[0].expression.type);
402
403
// Walk through rules
404
ast.rules.forEach(rule => {
405
console.log(`Rule ${rule.name} at line ${rule.location.start.line}`);
406
});
407
```
408
409
## Complete AST Types
410
411
The `ast` namespace provides comprehensive type definitions for all AST node types in Peggy grammars.
412
413
### Core AST Interfaces
414
415
Base interfaces used throughout the AST type system.
416
417
```typescript { .api }
418
namespace ast {
419
/**
420
* Base AST node interface
421
* @template T - Node type identifier
422
*/
423
interface Node<T> {
424
/** Node type identifier */
425
type: T;
426
/** Source location of this node */
427
location: LocationRange;
428
}
429
430
/**
431
* Base expression node interface
432
* @template T - Expression type identifier
433
*/
434
interface Expr<T> extends Node<T> {
435
/** Match result estimation (added by compiler passes) */
436
match?: MatchResult;
437
}
438
439
/**
440
* Match result enumeration for expressions
441
*/
442
enum MatchResult {
443
ALWAYS = 1, // Expression always matches
444
SOMETIMES = 0, // Expression may match
445
NEVER = -1 // Expression never matches
446
}
447
}
448
```