0
# AST Traversal and Manipulation
1
2
The RegExpVisitor class implements the visitor pattern for traversing and manipulating regex Abstract Syntax Trees with custom callbacks for each node type.
3
4
## Capabilities
5
6
### RegExp Visitor Class
7
8
Creates a visitor instance with handlers for different AST node types.
9
10
```typescript { .api }
11
/**
12
* The visitor to walk on AST using the visitor pattern
13
*/
14
class RegExpVisitor {
15
/**
16
* Initialize the visitor with handlers
17
* @param handlers - Callbacks for each node type
18
*/
19
constructor(handlers: RegExpVisitor.Handlers);
20
}
21
22
interface RegExpVisitor.Handlers {
23
// Root and structural nodes
24
onRegExpLiteralEnter?(node: AST.RegExpLiteral): void;
25
onRegExpLiteralLeave?(node: AST.RegExpLiteral): void;
26
onPatternEnter?(node: AST.Pattern): void;
27
onPatternLeave?(node: AST.Pattern): void;
28
onAlternativeEnter?(node: AST.Alternative): void;
29
onAlternativeLeave?(node: AST.Alternative): void;
30
31
// Groups and assertions
32
onGroupEnter?(node: AST.Group): void;
33
onGroupLeave?(node: AST.Group): void;
34
onCapturingGroupEnter?(node: AST.CapturingGroup): void;
35
onCapturingGroupLeave?(node: AST.CapturingGroup): void;
36
onAssertionEnter?(node: AST.Assertion): void;
37
onAssertionLeave?(node: AST.Assertion): void;
38
39
// Quantifiers and characters
40
onQuantifierEnter?(node: AST.Quantifier): void;
41
onQuantifierLeave?(node: AST.Quantifier): void;
42
onCharacterEnter?(node: AST.Character): void;
43
onCharacterLeave?(node: AST.Character): void;
44
onCharacterSetEnter?(node: AST.CharacterSet): void;
45
onCharacterSetLeave?(node: AST.CharacterSet): void;
46
47
// Character classes
48
onCharacterClassEnter?(node: AST.CharacterClass): void;
49
onCharacterClassLeave?(node: AST.CharacterClass): void;
50
onCharacterClassRangeEnter?(node: AST.CharacterClassRange): void;
51
onCharacterClassRangeLeave?(node: AST.CharacterClassRange): void;
52
53
// Backreferences and flags
54
onBackreferenceEnter?(node: AST.Backreference): void;
55
onBackreferenceLeave?(node: AST.Backreference): void;
56
onFlagsEnter?(node: AST.Flags): void;
57
onFlagsLeave?(node: AST.Flags): void;
58
}
59
```
60
61
**Usage Examples:**
62
63
```typescript
64
import { parseRegExpLiteral } from "regexpp";
65
import { RegExpVisitor } from "regexpp/visitor";
66
67
// Create visitor with specific handlers
68
const visitor = new RegExpVisitor({
69
onCharacterClassEnter(node) {
70
console.log(`Entering character class: ${node.raw}`);
71
},
72
onQuantifierEnter(node) {
73
console.log(`Found quantifier: min=${node.min}, max=${node.max}, greedy=${node.greedy}`);
74
}
75
});
76
77
// Create analyzer visitor
78
const analyzer = new RegExpVisitor({
79
onCapturingGroupEnter(node) {
80
if (node.name) {
81
console.log(`Named capture group: ${node.name}`);
82
} else {
83
console.log("Unnamed capture group");
84
}
85
},
86
onBackreferenceEnter(node) {
87
console.log(`Backreference to: ${node.ref}`);
88
}
89
});
90
```
91
92
### Visit AST
93
94
Traverses an Abstract Syntax Tree starting from the given node.
95
96
```typescript { .api }
97
/**
98
* Visit a given node and descendant nodes
99
* @param node - The root node to visit (typically AST.RegExpLiteral)
100
*/
101
visit(node: AST.Node): void;
102
```
103
104
**Usage Examples:**
105
106
```typescript
107
import { parseRegExpLiteral } from "regexpp";
108
import { RegExpVisitor } from "regexpp/visitor";
109
110
const ast = parseRegExpLiteral("/(?<year>\\d{4})-(?<month>\\d{2})/g");
111
112
// Count different node types
113
let counts = {
114
characters: 0,
115
groups: 0,
116
quantifiers: 0,
117
characterSets: 0
118
};
119
120
const counter = new RegExpVisitor({
121
onCharacterEnter() { counts.characters++; },
122
onCapturingGroupEnter() { counts.groups++; },
123
onQuantifierEnter() { counts.quantifiers++; },
124
onCharacterSetEnter() { counts.characterSets++; }
125
});
126
127
counter.visit(ast);
128
console.log(counts);
129
130
// Extract specific information
131
const info = {
132
namedGroups: [],
133
quantifiers: [],
134
assertions: []
135
};
136
137
const extractor = new RegExpVisitor({
138
onCapturingGroupEnter(node) {
139
if (node.name) {
140
info.namedGroups.push({
141
name: node.name,
142
position: node.start,
143
raw: node.raw
144
});
145
}
146
},
147
148
onQuantifierEnter(node) {
149
info.quantifiers.push({
150
min: node.min,
151
max: node.max,
152
greedy: node.greedy,
153
raw: node.raw,
154
position: node.start
155
});
156
},
157
158
onAssertionEnter(node) {
159
info.assertions.push({
160
kind: node.kind,
161
raw: node.raw,
162
position: node.start
163
});
164
}
165
});
166
167
extractor.visit(ast);
168
console.log(info);
169
```
170
171
## Advanced Usage Patterns
172
173
### AST Transformation Tracking
174
175
Track modifications to understand regex structure changes:
176
177
```typescript
178
import { parseRegExpLiteral } from "regexpp";
179
import { RegExpVisitor } from "regexpp/visitor";
180
181
const ast = parseRegExpLiteral("/[a-z]+\\d{2,4}(?:suffix)?/gi");
182
183
// Track all quantified elements
184
const quantifiedElements = [];
185
186
const tracker = new RegExpVisitor({
187
onQuantifierEnter(node) {
188
const elementType = node.element.type;
189
const elementRaw = node.element.raw;
190
191
quantifiedElements.push({
192
quantifier: {
193
min: node.min,
194
max: node.max,
195
greedy: node.greedy,
196
raw: node.raw
197
},
198
element: {
199
type: elementType,
200
raw: elementRaw
201
},
202
position: node.start
203
});
204
}
205
});
206
207
tracker.visit(ast);
208
console.log(quantifiedElements);
209
// Output: Details about [a-z]+, \\d{2,4}, and (?:suffix)?
210
```
211
212
### Pattern Analysis
213
214
Analyze regex patterns for complexity and features:
215
216
```typescript
217
import { parseRegExpLiteral } from "regexpp";
218
import { RegExpVisitor } from "regexpp/visitor";
219
220
function analyzeRegexComplexity(regexString: string) {
221
const ast = parseRegExpLiteral(regexString);
222
223
const analysis = {
224
totalNodes: 0,
225
features: {
226
namedGroups: 0,
227
lookarounds: 0,
228
backreferences: 0,
229
unicodeProperties: 0,
230
characterClasses: 0,
231
quantifiers: 0
232
},
233
flags: {
234
global: ast.flags.global,
235
ignoreCase: ast.flags.ignoreCase,
236
multiline: ast.flags.multiline,
237
dotAll: ast.flags.dotAll,
238
unicode: ast.flags.unicode,
239
sticky: ast.flags.sticky,
240
hasIndices: ast.flags.hasIndices
241
}
242
};
243
244
const analyzer = new RegExpVisitor({
245
onRegExpLiteralEnter() { analysis.totalNodes++; },
246
onPatternEnter() { analysis.totalNodes++; },
247
onAlternativeEnter() { analysis.totalNodes++; },
248
onGroupEnter() { analysis.totalNodes++; },
249
onCapturingGroupEnter(node) {
250
analysis.totalNodes++;
251
if (node.name) analysis.features.namedGroups++;
252
},
253
onAssertionEnter(node) {
254
analysis.totalNodes++;
255
if (node.kind === 'lookahead' || node.kind === 'lookbehind') {
256
analysis.features.lookarounds++;
257
}
258
},
259
onQuantifierEnter() {
260
analysis.totalNodes++;
261
analysis.features.quantifiers++;
262
},
263
onCharacterClassEnter() {
264
analysis.totalNodes++;
265
analysis.features.characterClasses++;
266
},
267
onCharacterSetEnter(node) {
268
analysis.totalNodes++;
269
if (node.kind === 'property') {
270
analysis.features.unicodeProperties++;
271
}
272
},
273
onCharacterEnter() { analysis.totalNodes++; },
274
onBackreferenceEnter() {
275
analysis.totalNodes++;
276
analysis.features.backreferences++;
277
},
278
onFlagsEnter() { analysis.totalNodes++; }
279
});
280
281
analyzer.visit(ast);
282
return analysis;
283
}
284
285
// Example usage
286
const complexity = analyzeRegexComplexity("/(?<year>\\d{4})-(?<month>\\d{2})\\k<year>/g");
287
console.log(complexity);
288
```
289
290
### Pattern Validation
291
292
Use visitor to validate specific regex patterns or constraints:
293
294
```typescript
295
import { parseRegExpLiteral } from "regexpp";
296
import { RegExpVisitor } from "regexpp/visitor";
297
298
function validateSafeRegex(regexString: string): { safe: boolean; issues: string[] } {
299
const ast = parseRegExpLiteral(regexString);
300
const issues = [];
301
302
const validator = new RegExpVisitor({
303
onQuantifierEnter(node) {
304
// Check for potentially dangerous quantifiers
305
if (node.max === Infinity && !node.greedy) {
306
issues.push(`Lazy infinite quantifier at position ${node.start} may cause performance issues`);
307
}
308
309
if (node.min > 1000) {
310
issues.push(`Large minimum quantifier {${node.min},} at position ${node.start} may cause performance issues`);
311
}
312
},
313
314
onBackreferenceEnter(node) {
315
// Warn about backreferences in certain contexts
316
issues.push(`Backreference \\${node.ref} at position ${node.start} may impact performance`);
317
},
318
319
onAssertionEnter(node) {
320
if (node.kind === 'lookbehind') {
321
issues.push(`Lookbehind assertion at position ${node.start} has limited browser support`);
322
}
323
}
324
});
325
326
validator.visit(ast);
327
328
return {
329
safe: issues.length === 0,
330
issues
331
};
332
}
333
334
// Example usage
335
const validation = validateSafeRegex("/(?<=\\w+)\\d+.*?/");
336
console.log(validation);
337
```
338
339
## Types
340
341
```typescript { .api }
342
// All AST node types that can be visited
343
type AST.Node = AST.BranchNode | AST.LeafNode;
344
345
type AST.BranchNode =
346
| AST.RegExpLiteral
347
| AST.Pattern
348
| AST.Alternative
349
| AST.Group
350
| AST.CapturingGroup
351
| AST.Quantifier
352
| AST.CharacterClass
353
| AST.LookaroundAssertion
354
| AST.CharacterClassRange;
355
356
type AST.LeafNode =
357
| AST.BoundaryAssertion
358
| AST.CharacterSet
359
| AST.Character
360
| AST.Backreference
361
| AST.Flags;
362
363
// Handler function signature
364
type VisitorHandler<T extends AST.Node> = (node: T) => void;
365
```