Regular expression parser for ECMAScript that generates Abstract Syntax Trees from regex patterns.
npx @tessl/cli install tessl/npm-regexpp@3.2.0RegexPP is a comprehensive regular expression parser and validator for ECMAScript that generates Abstract Syntax Trees (AST) from regex patterns. It provides three main components: RegExpParser for parsing regex literals and patterns into ASTs, RegExpValidator for validating regex syntax against ECMAScript standards, and RegExpVisitor for traversing and manipulating regex ASTs. The library supports all modern ECMAScript regex features including Unicode handling, named capture groups, lookbehind assertions, and other ES2015-ES2022 regex enhancements.
npm install regexpp// Main exports
import {
AST,
RegExpParser,
RegExpValidator,
parseRegExpLiteral,
validateRegExpLiteral,
visitRegExpAST
} from "regexpp";
// For direct RegExpVisitor usage (not available from main module)
import { RegExpVisitor } from "regexpp/visitor";For CommonJS:
// Main exports
const {
AST,
RegExpParser,
RegExpValidator,
parseRegExpLiteral,
validateRegExpLiteral,
visitRegExpAST
} = require("regexpp");
// For direct RegExpVisitor usage (not available from main module)
const { RegExpVisitor } = require("regexpp/visitor");import { parseRegExpLiteral, validateRegExpLiteral, visitRegExpAST } from "regexpp";
// Parse a regex literal to AST
const ast = parseRegExpLiteral("/[a-z]+/gi");
console.log(ast.type); // "RegExpLiteral"
console.log(ast.flags.global); // true
console.log(ast.flags.ignoreCase); // true
// Validate regex syntax
try {
validateRegExpLiteral("/[a-z]+/gi");
console.log("Valid regex");
} catch (error) {
console.log("Invalid regex:", error.message);
}
// Visit AST nodes
visitRegExpAST(ast, {
onCharacterClassEnter(node) {
console.log("Found character class:", node.raw);
},
onQuantifierEnter(node) {
console.log("Found quantifier:", node.raw);
}
});RegexPP is built around several key components:
parseRegExpLiteral, validateRegExpLiteral, visitRegExpAST) for common use casesRegExpParser class providing full parsing functionality with AST generationRegExpValidator class for syntax validation with detailed callbacksRegExpVisitor class for AST traversal and manipulationThree main convenience functions that provide simple access to the most common regex processing operations.
/**
* Parse a given regular expression literal then make AST object
* @param source - The source code to parse (string or RegExp)
* @param options - The parsing options
* @returns The AST of the regular expression
*/
function parseRegExpLiteral(
source: string | RegExp,
options?: RegExpParser.Options,
): AST.RegExpLiteral;
/**
* Validate a given regular expression literal
* @param source - The source code to validate
* @param options - The validation options
*/
function validateRegExpLiteral(
source: string,
options?: RegExpValidator.Options,
): void;
/**
* Visit each node of a given AST
* @param node - The AST to visit
* @param handlers - The visitor callbacks
*/
function visitRegExpAST(
node: AST.Node,
handlers: RegExpVisitor.Handlers,
): void;Advanced parsing functionality for converting regex strings into detailed Abstract Syntax Trees with full ECMAScript compliance.
class RegExpParser {
constructor(options?: RegExpParser.Options);
parseLiteral(source: string, start?: number, end?: number): AST.RegExpLiteral;
parsePattern(source: string, start?: number, end?: number, uFlag?: boolean): AST.Pattern;
parseFlags(source: string, start?: number, end?: number): AST.Flags;
}
interface RegExpParser.Options {
/** Disable Annex B syntax. Default is false */
strict?: boolean;
/** ECMAScript version. Default is 2022 */
ecmaVersion?: EcmaVersion;
}Syntax validation with optional detailed callbacks for each regex component during validation.
class RegExpValidator {
constructor(options?: RegExpValidator.Options);
validateLiteral(source: string, start?: number, end?: number): void;
validatePattern(source: string, start?: number, end?: number, uFlag?: boolean): void;
validateFlags(source: string, start?: number, end?: number): void;
}
interface RegExpValidator.Options {
/** Disable Annex B syntax. Default is false */
strict?: boolean;
/** ECMAScript version. Default is 2022 */
ecmaVersion?: EcmaVersion;
// Plus many optional callback functions for validation events
}Visitor pattern implementation for traversing and manipulating regex Abstract Syntax Trees.
class RegExpVisitor {
constructor(handlers: RegExpVisitor.Handlers);
visit(node: AST.Node): void;
}
interface RegExpVisitor.Handlers {
// Optional callback functions for entering/leaving each AST node type
onRegExpLiteralEnter?(node: AST.RegExpLiteral): void;
onRegExpLiteralLeave?(node: AST.RegExpLiteral): void;
onPatternEnter?(node: AST.Pattern): void;
onPatternLeave?(node: AST.Pattern): void;
// ... many more callback options
}Comprehensive type system covering all ECMAScript regular expression syntax elements.
// Core node types
type AST.Node = AST.BranchNode | AST.LeafNode;
type AST.BranchNode = AST.RegExpLiteral | AST.Pattern | AST.Alternative | /* ... */;
type AST.LeafNode = AST.BoundaryAssertion | AST.CharacterSet | /* ... */;
// Key interfaces
interface AST.RegExpLiteral extends AST.NodeBase {
type: "RegExpLiteral";
pattern: AST.Pattern;
flags: AST.Flags;
}
interface AST.Pattern extends AST.NodeBase {
type: "Pattern";
alternatives: AST.Alternative[];
}// ECMAScript version support
type EcmaVersion = 5 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022;
// Base interface for all AST nodes
interface AST.NodeBase {
type: string;
parent: AST.Node | null;
start: number;
end: number;
raw: string;
}