Regular expression parser for ECMAScript that generates Abstract Syntax Trees from regex patterns.
—
Comprehensive type system covering all ECMAScript regular expression syntax elements as Abstract Syntax Tree nodes.
The foundational type unions that categorize all AST nodes.
/**
* The type which includes all nodes
*/
type AST.Node = AST.BranchNode | AST.LeafNode;
/**
* The type which includes all branch nodes (nodes with children)
*/
type AST.BranchNode =
| AST.RegExpLiteral
| AST.Pattern
| AST.Alternative
| AST.Group
| AST.CapturingGroup
| AST.Quantifier
| AST.CharacterClass
| AST.LookaroundAssertion
| AST.CharacterClassRange;
/**
* The type which includes all leaf nodes (nodes without children)
*/
type AST.LeafNode =
| AST.BoundaryAssertion
| AST.CharacterSet
| AST.Character
| AST.Backreference
| AST.Flags;
/**
* The type which includes all element nodes (pattern components)
*/
type AST.Element = AST.Assertion | AST.Quantifier | AST.QuantifiableElement;
/**
* The type which includes all elements that can be quantified
*/
type AST.QuantifiableElement =
| AST.Group
| AST.CapturingGroup
| AST.CharacterClass
| AST.CharacterSet
| AST.Character
| AST.Backreference
| AST.LookaheadAssertion; // Lookahead assertions are quantifiable in Annex-B
/**
* The type which includes all character class elements
*/
type AST.CharacterClassElement =
| AST.EscapeCharacterSet
| AST.UnicodePropertyCharacterSet
| AST.Character
| AST.CharacterClassRange;All AST nodes inherit from this base interface providing common properties.
/**
* The base interface for all AST nodes
*/
interface AST.NodeBase {
/** The type discriminator for this node */
type: string;
/** The parent node, or null for root nodes */
parent: AST.Node | null;
/** The start index of this node in the original source */
start: number;
/** The end index of this node in the original source */
end: number;
/** The raw source text for this node */
raw: string;
}The root node representing a complete regular expression literal.
/**
* Regular expression literal node (/pattern/flags)
*/
interface AST.RegExpLiteral extends AST.NodeBase {
type: "RegExpLiteral";
parent: null;
/** The pattern part of the regex */
pattern: AST.Pattern;
/** The flags part of the regex */
flags: AST.Flags;
}Container for the main regex pattern, containing alternatives.
/**
* Pattern node containing alternatives
*/
interface AST.Pattern extends AST.NodeBase {
type: "Pattern";
parent: AST.RegExpLiteral | null;
/** Array of alternative patterns (separated by |) */
alternatives: AST.Alternative[];
}A single alternative within a pattern (one branch of a disjunction).
/**
* Alternative node representing one branch of a disjunction
*/
interface AST.Alternative extends AST.NodeBase {
type: "Alternative";
parent: AST.Pattern | AST.Group | AST.CapturingGroup | AST.LookaroundAssertion;
/** Array of elements in this alternative */
elements: AST.Element[];
}Non-capturing group node.
/**
* Non-capturing group (?:...)
*/
interface AST.Group extends AST.NodeBase {
type: "Group";
parent: AST.Alternative | AST.Quantifier;
/** Array of alternatives within this group */
alternatives: AST.Alternative[];
}Capturing group node with optional name.
/**
* Capturing group (...) or named capturing group (?<name>...)
*/
interface AST.CapturingGroup extends AST.NodeBase {
type: "CapturingGroup";
parent: AST.Alternative | AST.Quantifier;
/** The name of the group (for named groups) or null */
name: string | null;
/** Array of alternatives within this group */
alternatives: AST.Alternative[];
/** Array of backreferences that refer to this group */
references: AST.Backreference[];
}/**
* Union type for all lookaround assertions
*/
type AST.LookaroundAssertion = AST.LookaheadAssertion | AST.LookbehindAssertion;
/**
* Lookahead assertion (?=...) or (?!...)
*/
interface AST.LookaheadAssertion extends AST.NodeBase {
type: "Assertion";
parent: AST.Alternative | AST.Quantifier;
kind: "lookahead";
/** True for negative lookahead (?!...) */
negate: boolean;
/** Array of alternatives within this assertion */
alternatives: AST.Alternative[];
}
/**
* Lookbehind assertion (?<=...) or (?<!...)
*/
interface AST.LookbehindAssertion extends AST.NodeBase {
type: "Assertion";
parent: AST.Alternative;
kind: "lookbehind";
/** True for negative lookbehind (?<!...) */
negate: boolean;
/** Array of alternatives within this assertion */
alternatives: AST.Alternative[];
}/**
* Union type for all assertions
*/
type AST.Assertion = AST.BoundaryAssertion | AST.LookaroundAssertion;
/**
* Union type for boundary assertions
*/
type AST.BoundaryAssertion = AST.EdgeAssertion | AST.WordBoundaryAssertion;
/**
* Edge assertion ^ or $
*/
interface AST.EdgeAssertion extends AST.NodeBase {
type: "Assertion";
parent: AST.Alternative | AST.Quantifier;
/** "start" for ^ or "end" for $ */
kind: "start" | "end";
}
/**
* Word boundary assertion \\b or \\B
*/
interface AST.WordBoundaryAssertion extends AST.NodeBase {
type: "Assertion";
parent: AST.Alternative | AST.Quantifier;
kind: "word";
/** True for \\B (non-word boundary) */
negate: boolean;
}/**
* Quantifier node for repetition {min,max}, +, *, ?
*/
interface AST.Quantifier extends AST.NodeBase {
type: "Quantifier";
parent: AST.Alternative;
/** Minimum repetitions */
min: number;
/** Maximum repetitions (Infinity for unbounded) */
max: number;
/** True for greedy quantifiers, false for lazy (?+*{}) */
greedy: boolean;
/** The element being quantified */
element: AST.QuantifiableElement;
}/**
* Character class [...]
*/
interface AST.CharacterClass extends AST.NodeBase {
type: "CharacterClass";
parent: AST.Alternative | AST.Quantifier;
/** True for negated character class [^...] */
negate: boolean;
/** Array of elements within the character class */
elements: AST.CharacterClassElement[];
}/**
* Character class range a-z
*/
interface AST.CharacterClassRange extends AST.NodeBase {
type: "CharacterClassRange";
parent: AST.CharacterClass;
/** The minimum character of the range */
min: AST.Character;
/** The maximum character of the range */
max: AST.Character;
}/**
* Union type for all character sets
*/
type AST.CharacterSet =
| AST.AnyCharacterSet
| AST.EscapeCharacterSet
| AST.UnicodePropertyCharacterSet;
/**
* Any character set (dot .)
*/
interface AST.AnyCharacterSet extends AST.NodeBase {
type: "CharacterSet";
parent: AST.Alternative | AST.Quantifier;
kind: "any";
}
/**
* Escape character set \\d, \\s, \\w, \\D, \\S, \\W
*/
interface AST.EscapeCharacterSet extends AST.NodeBase {
type: "CharacterSet";
parent: AST.Alternative | AST.Quantifier | AST.CharacterClass;
kind: "digit" | "space" | "word";
/** True for uppercase variants (\\D, \\S, \\W) */
negate: boolean;
}
/**
* Unicode property character set \\p{...} or \\P{...}
*/
interface AST.UnicodePropertyCharacterSet extends AST.NodeBase {
type: "CharacterSet";
parent: AST.Alternative | AST.Quantifier | AST.CharacterClass;
kind: "property";
/** The property name (e.g., "Letter", "Script") */
key: string;
/** The property value (e.g., "Latin") or null for binary properties */
value: string | null;
/** True for \\P{...} (negated) */
negate: boolean;
}/**
* Single character node
*/
interface AST.Character extends AST.NodeBase {
type: "Character";
parent: AST.Alternative | AST.Quantifier | AST.CharacterClass | AST.CharacterClassRange;
/** The Unicode code point value of the character */
value: number;
}/**
* Backreference \\1 or \\k<name>
*/
interface AST.Backreference extends AST.NodeBase {
type: "Backreference";
parent: AST.Alternative | AST.Quantifier;
/** The reference (number or name) */
ref: number | string;
/** The resolved capturing group this refers to */
resolved: AST.CapturingGroup;
}/**
* Flags node representing regex flags
*/
interface AST.Flags extends AST.NodeBase {
type: "Flags";
parent: AST.RegExpLiteral | null;
/** Dot-all flag (s) - makes . match newlines */
dotAll: boolean;
/** Global flag (g) - find all matches */
global: boolean;
/** Has indices flag (d) - generate match indices */
hasIndices: boolean;
/** Ignore case flag (i) - case insensitive matching */
ignoreCase: boolean;
/** Multiline flag (m) - ^ and $ match line boundaries */
multiline: boolean;
/** Sticky flag (y) - match from lastIndex only */
sticky: boolean;
/** Unicode flag (u) - enable full Unicode support */
unicode: boolean;
}import { parseRegExpLiteral } from "regexpp";
const ast = parseRegExpLiteral("/(?<year>\\d{4})-(?<month>\\d{2})/g");
// Type-safe access to properties
console.log(ast.type); // "RegExpLiteral"
console.log(ast.pattern.alternatives.length); // Number of alternatives
console.log(ast.flags.global); // true
// Navigate the AST structure
const firstAlternative = ast.pattern.alternatives[0];
const firstElement = firstAlternative.elements[0];
if (firstElement.type === "CapturingGroup") {
console.log(firstElement.name); // "year"
console.log(firstElement.alternatives.length);
}
// Check node types
function analyzeElement(element: AST.Element) {
switch (element.type) {
case "CapturingGroup":
return `Capturing group: ${element.name || 'unnamed'}`;
case "Quantifier":
return `Quantifier: {${element.min},${element.max}}`;
case "Assertion":
return `Assertion: ${element.kind}`;
case "CharacterClass":
return `Character class: ${element.negate ? 'negated' : 'normal'}`;
case "CharacterSet":
return `Character set: ${element.kind}`;
case "Character":
return `Character: ${String.fromCodePoint(element.value)}`;
case "Backreference":
return `Backreference: ${element.ref}`;
default:
return `Unknown element type`;
}
}import { AST } from "regexpp";
// Type guard functions for safe casting
function isCapturingGroup(node: AST.Node): node is AST.CapturingGroup {
return node.type === "CapturingGroup";
}
function isQuantifier(node: AST.Node): node is AST.Quantifier {
return node.type === "Quantifier";
}
function isCharacterSet(node: AST.Node): node is AST.CharacterSet {
return node.type === "CharacterSet";
}
// Usage
function processNode(node: AST.Node) {
if (isCapturingGroup(node)) {
// TypeScript knows this is AST.CapturingGroup
console.log(node.name, node.alternatives.length);
} else if (isQuantifier(node)) {
// TypeScript knows this is AST.Quantifier
console.log(node.min, node.max, node.greedy);
} else if (isCharacterSet(node)) {
// TypeScript knows this is AST.CharacterSet
console.log(node.kind);
}
}Install with Tessl CLI
npx tessl i tessl/npm-regexpp