0
# AST Node Types and Structures
1
2
Comprehensive type system covering all ECMAScript regular expression syntax elements as Abstract Syntax Tree nodes.
3
4
## Capabilities
5
6
### Core Node Types
7
8
The foundational type unions that categorize all AST nodes.
9
10
```typescript { .api }
11
/**
12
* The type which includes all nodes
13
*/
14
type AST.Node = AST.BranchNode | AST.LeafNode;
15
16
/**
17
* The type which includes all branch nodes (nodes with children)
18
*/
19
type AST.BranchNode =
20
| AST.RegExpLiteral
21
| AST.Pattern
22
| AST.Alternative
23
| AST.Group
24
| AST.CapturingGroup
25
| AST.Quantifier
26
| AST.CharacterClass
27
| AST.LookaroundAssertion
28
| AST.CharacterClassRange;
29
30
/**
31
* The type which includes all leaf nodes (nodes without children)
32
*/
33
type AST.LeafNode =
34
| AST.BoundaryAssertion
35
| AST.CharacterSet
36
| AST.Character
37
| AST.Backreference
38
| AST.Flags;
39
40
/**
41
* The type which includes all element nodes (pattern components)
42
*/
43
type AST.Element = AST.Assertion | AST.Quantifier | AST.QuantifiableElement;
44
45
/**
46
* The type which includes all elements that can be quantified
47
*/
48
type AST.QuantifiableElement =
49
| AST.Group
50
| AST.CapturingGroup
51
| AST.CharacterClass
52
| AST.CharacterSet
53
| AST.Character
54
| AST.Backreference
55
| AST.LookaheadAssertion; // Lookahead assertions are quantifiable in Annex-B
56
57
/**
58
* The type which includes all character class elements
59
*/
60
type AST.CharacterClassElement =
61
| AST.EscapeCharacterSet
62
| AST.UnicodePropertyCharacterSet
63
| AST.Character
64
| AST.CharacterClassRange;
65
```
66
67
### Base Node Interface
68
69
All AST nodes inherit from this base interface providing common properties.
70
71
```typescript { .api }
72
/**
73
* The base interface for all AST nodes
74
*/
75
interface AST.NodeBase {
76
/** The type discriminator for this node */
77
type: string;
78
/** The parent node, or null for root nodes */
79
parent: AST.Node | null;
80
/** The start index of this node in the original source */
81
start: number;
82
/** The end index of this node in the original source */
83
end: number;
84
/** The raw source text for this node */
85
raw: string;
86
}
87
```
88
89
## Root and Structural Nodes
90
91
### RegExp Literal
92
93
The root node representing a complete regular expression literal.
94
95
```typescript { .api }
96
/**
97
* Regular expression literal node (/pattern/flags)
98
*/
99
interface AST.RegExpLiteral extends AST.NodeBase {
100
type: "RegExpLiteral";
101
parent: null;
102
/** The pattern part of the regex */
103
pattern: AST.Pattern;
104
/** The flags part of the regex */
105
flags: AST.Flags;
106
}
107
```
108
109
### Pattern
110
111
Container for the main regex pattern, containing alternatives.
112
113
```typescript { .api }
114
/**
115
* Pattern node containing alternatives
116
*/
117
interface AST.Pattern extends AST.NodeBase {
118
type: "Pattern";
119
parent: AST.RegExpLiteral | null;
120
/** Array of alternative patterns (separated by |) */
121
alternatives: AST.Alternative[];
122
}
123
```
124
125
### Alternative
126
127
A single alternative within a pattern (one branch of a disjunction).
128
129
```typescript { .api }
130
/**
131
* Alternative node representing one branch of a disjunction
132
*/
133
interface AST.Alternative extends AST.NodeBase {
134
type: "Alternative";
135
parent: AST.Pattern | AST.Group | AST.CapturingGroup | AST.LookaroundAssertion;
136
/** Array of elements in this alternative */
137
elements: AST.Element[];
138
}
139
```
140
141
## Group Nodes
142
143
### Group
144
145
Non-capturing group node.
146
147
```typescript { .api }
148
/**
149
* Non-capturing group (?:...)
150
*/
151
interface AST.Group extends AST.NodeBase {
152
type: "Group";
153
parent: AST.Alternative | AST.Quantifier;
154
/** Array of alternatives within this group */
155
alternatives: AST.Alternative[];
156
}
157
```
158
159
### Capturing Group
160
161
Capturing group node with optional name.
162
163
```typescript { .api }
164
/**
165
* Capturing group (...) or named capturing group (?<name>...)
166
*/
167
interface AST.CapturingGroup extends AST.NodeBase {
168
type: "CapturingGroup";
169
parent: AST.Alternative | AST.Quantifier;
170
/** The name of the group (for named groups) or null */
171
name: string | null;
172
/** Array of alternatives within this group */
173
alternatives: AST.Alternative[];
174
/** Array of backreferences that refer to this group */
175
references: AST.Backreference[];
176
}
177
```
178
179
## Assertion Nodes
180
181
### Lookaround Assertions
182
183
```typescript { .api }
184
/**
185
* Union type for all lookaround assertions
186
*/
187
type AST.LookaroundAssertion = AST.LookaheadAssertion | AST.LookbehindAssertion;
188
189
/**
190
* Lookahead assertion (?=...) or (?!...)
191
*/
192
interface AST.LookaheadAssertion extends AST.NodeBase {
193
type: "Assertion";
194
parent: AST.Alternative | AST.Quantifier;
195
kind: "lookahead";
196
/** True for negative lookahead (?!...) */
197
negate: boolean;
198
/** Array of alternatives within this assertion */
199
alternatives: AST.Alternative[];
200
}
201
202
/**
203
* Lookbehind assertion (?<=...) or (?<!...)
204
*/
205
interface AST.LookbehindAssertion extends AST.NodeBase {
206
type: "Assertion";
207
parent: AST.Alternative;
208
kind: "lookbehind";
209
/** True for negative lookbehind (?<!...) */
210
negate: boolean;
211
/** Array of alternatives within this assertion */
212
alternatives: AST.Alternative[];
213
}
214
```
215
216
### Boundary Assertions
217
218
```typescript { .api }
219
/**
220
* Union type for all assertions
221
*/
222
type AST.Assertion = AST.BoundaryAssertion | AST.LookaroundAssertion;
223
224
/**
225
* Union type for boundary assertions
226
*/
227
type AST.BoundaryAssertion = AST.EdgeAssertion | AST.WordBoundaryAssertion;
228
229
/**
230
* Edge assertion ^ or $
231
*/
232
interface AST.EdgeAssertion extends AST.NodeBase {
233
type: "Assertion";
234
parent: AST.Alternative | AST.Quantifier;
235
/** "start" for ^ or "end" for $ */
236
kind: "start" | "end";
237
}
238
239
/**
240
* Word boundary assertion \\b or \\B
241
*/
242
interface AST.WordBoundaryAssertion extends AST.NodeBase {
243
type: "Assertion";
244
parent: AST.Alternative | AST.Quantifier;
245
kind: "word";
246
/** True for \\B (non-word boundary) */
247
negate: boolean;
248
}
249
```
250
251
## Quantifier Node
252
253
```typescript { .api }
254
/**
255
* Quantifier node for repetition {min,max}, +, *, ?
256
*/
257
interface AST.Quantifier extends AST.NodeBase {
258
type: "Quantifier";
259
parent: AST.Alternative;
260
/** Minimum repetitions */
261
min: number;
262
/** Maximum repetitions (Infinity for unbounded) */
263
max: number;
264
/** True for greedy quantifiers, false for lazy (?+*{}) */
265
greedy: boolean;
266
/** The element being quantified */
267
element: AST.QuantifiableElement;
268
}
269
```
270
271
## Character Class Nodes
272
273
### Character Class
274
275
```typescript { .api }
276
/**
277
* Character class [...]
278
*/
279
interface AST.CharacterClass extends AST.NodeBase {
280
type: "CharacterClass";
281
parent: AST.Alternative | AST.Quantifier;
282
/** True for negated character class [^...] */
283
negate: boolean;
284
/** Array of elements within the character class */
285
elements: AST.CharacterClassElement[];
286
}
287
```
288
289
### Character Class Range
290
291
```typescript { .api }
292
/**
293
* Character class range a-z
294
*/
295
interface AST.CharacterClassRange extends AST.NodeBase {
296
type: "CharacterClassRange";
297
parent: AST.CharacterClass;
298
/** The minimum character of the range */
299
min: AST.Character;
300
/** The maximum character of the range */
301
max: AST.Character;
302
}
303
```
304
305
## Character Set Nodes
306
307
```typescript { .api }
308
/**
309
* Union type for all character sets
310
*/
311
type AST.CharacterSet =
312
| AST.AnyCharacterSet
313
| AST.EscapeCharacterSet
314
| AST.UnicodePropertyCharacterSet;
315
316
/**
317
* Any character set (dot .)
318
*/
319
interface AST.AnyCharacterSet extends AST.NodeBase {
320
type: "CharacterSet";
321
parent: AST.Alternative | AST.Quantifier;
322
kind: "any";
323
}
324
325
/**
326
* Escape character set \\d, \\s, \\w, \\D, \\S, \\W
327
*/
328
interface AST.EscapeCharacterSet extends AST.NodeBase {
329
type: "CharacterSet";
330
parent: AST.Alternative | AST.Quantifier | AST.CharacterClass;
331
kind: "digit" | "space" | "word";
332
/** True for uppercase variants (\\D, \\S, \\W) */
333
negate: boolean;
334
}
335
336
/**
337
* Unicode property character set \\p{...} or \\P{...}
338
*/
339
interface AST.UnicodePropertyCharacterSet extends AST.NodeBase {
340
type: "CharacterSet";
341
parent: AST.Alternative | AST.Quantifier | AST.CharacterClass;
342
kind: "property";
343
/** The property name (e.g., "Letter", "Script") */
344
key: string;
345
/** The property value (e.g., "Latin") or null for binary properties */
346
value: string | null;
347
/** True for \\P{...} (negated) */
348
negate: boolean;
349
}
350
```
351
352
## Character and Reference Nodes
353
354
### Character
355
356
```typescript { .api }
357
/**
358
* Single character node
359
*/
360
interface AST.Character extends AST.NodeBase {
361
type: "Character";
362
parent: AST.Alternative | AST.Quantifier | AST.CharacterClass | AST.CharacterClassRange;
363
/** The Unicode code point value of the character */
364
value: number;
365
}
366
```
367
368
### Backreference
369
370
```typescript { .api }
371
/**
372
* Backreference \\1 or \\k<name>
373
*/
374
interface AST.Backreference extends AST.NodeBase {
375
type: "Backreference";
376
parent: AST.Alternative | AST.Quantifier;
377
/** The reference (number or name) */
378
ref: number | string;
379
/** The resolved capturing group this refers to */
380
resolved: AST.CapturingGroup;
381
}
382
```
383
384
## Flags Node
385
386
```typescript { .api }
387
/**
388
* Flags node representing regex flags
389
*/
390
interface AST.Flags extends AST.NodeBase {
391
type: "Flags";
392
parent: AST.RegExpLiteral | null;
393
/** Dot-all flag (s) - makes . match newlines */
394
dotAll: boolean;
395
/** Global flag (g) - find all matches */
396
global: boolean;
397
/** Has indices flag (d) - generate match indices */
398
hasIndices: boolean;
399
/** Ignore case flag (i) - case insensitive matching */
400
ignoreCase: boolean;
401
/** Multiline flag (m) - ^ and $ match line boundaries */
402
multiline: boolean;
403
/** Sticky flag (y) - match from lastIndex only */
404
sticky: boolean;
405
/** Unicode flag (u) - enable full Unicode support */
406
unicode: boolean;
407
}
408
```
409
410
## Usage Examples
411
412
### Working with AST Nodes
413
414
```typescript
415
import { parseRegExpLiteral } from "regexpp";
416
417
const ast = parseRegExpLiteral("/(?<year>\\d{4})-(?<month>\\d{2})/g");
418
419
// Type-safe access to properties
420
console.log(ast.type); // "RegExpLiteral"
421
console.log(ast.pattern.alternatives.length); // Number of alternatives
422
console.log(ast.flags.global); // true
423
424
// Navigate the AST structure
425
const firstAlternative = ast.pattern.alternatives[0];
426
const firstElement = firstAlternative.elements[0];
427
428
if (firstElement.type === "CapturingGroup") {
429
console.log(firstElement.name); // "year"
430
console.log(firstElement.alternatives.length);
431
}
432
433
// Check node types
434
function analyzeElement(element: AST.Element) {
435
switch (element.type) {
436
case "CapturingGroup":
437
return `Capturing group: ${element.name || 'unnamed'}`;
438
case "Quantifier":
439
return `Quantifier: {${element.min},${element.max}}`;
440
case "Assertion":
441
return `Assertion: ${element.kind}`;
442
case "CharacterClass":
443
return `Character class: ${element.negate ? 'negated' : 'normal'}`;
444
case "CharacterSet":
445
return `Character set: ${element.kind}`;
446
case "Character":
447
return `Character: ${String.fromCodePoint(element.value)}`;
448
case "Backreference":
449
return `Backreference: ${element.ref}`;
450
default:
451
return `Unknown element type`;
452
}
453
}
454
```
455
456
### Type Guards
457
458
```typescript
459
import { AST } from "regexpp";
460
461
// Type guard functions for safe casting
462
function isCapturingGroup(node: AST.Node): node is AST.CapturingGroup {
463
return node.type === "CapturingGroup";
464
}
465
466
function isQuantifier(node: AST.Node): node is AST.Quantifier {
467
return node.type === "Quantifier";
468
}
469
470
function isCharacterSet(node: AST.Node): node is AST.CharacterSet {
471
return node.type === "CharacterSet";
472
}
473
474
// Usage
475
function processNode(node: AST.Node) {
476
if (isCapturingGroup(node)) {
477
// TypeScript knows this is AST.CapturingGroup
478
console.log(node.name, node.alternatives.length);
479
} else if (isQuantifier(node)) {
480
// TypeScript knows this is AST.Quantifier
481
console.log(node.min, node.max, node.greedy);
482
} else if (isCharacterSet(node)) {
483
// TypeScript knows this is AST.CharacterSet
484
console.log(node.kind);
485
}
486
}
487
```