tessl/npm-regexpp

Regular expression parser for ECMAScript that generates Abstract Syntax Trees from regex patterns.

—

Pending

Overview

Eval results

Files

Regular Expression Parsing

Name: tessl/npm-regexpp
Author: tessl

The RegExpParser class provides advanced parsing functionality for converting regex strings into detailed Abstract Syntax Trees with full ECMAScript compliance.

Capabilities

RegExp Parser Class

Creates a parser instance with configurable options for parsing regular expressions into ASTs.

/**
 * Regular expression parser that converts regex strings to Abstract Syntax Trees
 */
class RegExpParser {
  /**
   * Initialize the parser with options
   * @param options - Parser configuration options
   */
  constructor(options?: RegExpParser.Options);
}

interface RegExpParser.Options {
  /** The flag to disable Annex B syntax. Default is false */
  strict?: boolean;
  /** 
   * ECMAScript version. Default is 2022
   * - 2015 added u and y flags
   * - 2018 added s flag, Named Capturing Group, Lookbehind Assertion, and Unicode Property Escape
   * - 2020 added d flag (hasIndices)
   */
  ecmaVersion?: EcmaVersion;
}

Usage Examples:

import { RegExpParser } from "regexpp";

// Create parser with default options (ES2022, non-strict)
const parser = new RegExpParser();

// Create parser for specific ECMAScript version
const es2018Parser = new RegExpParser({
  ecmaVersion: 2018,
  strict: false
});

// Create strict parser (disables Annex B features)
const strictParser = new RegExpParser({
  strict: true,
  ecmaVersion: 2022
});

Parse Literal

Parses a complete regular expression literal including pattern and flags.

/**
 * Parse a regular expression literal
 * @param source - The source code to parse (e.g., "/abc/g")
 * @param start - The start index in the source code. Default is 0
 * @param end - The end index in the source code. Default is source.length
 * @returns The AST of the regular expression literal
 */
parseLiteral(source: string, start?: number, end?: number): AST.RegExpLiteral;

Usage Examples:

import { RegExpParser } from "regexpp";

const parser = new RegExpParser();

// Parse complete literal
const ast1 = parser.parseLiteral("/[a-z]+/gi");
console.log(ast1.type); // "RegExpLiteral"
console.log(ast1.pattern.alternatives.length); // Number of alternatives
console.log(ast1.flags.global); // true
console.log(ast1.flags.ignoreCase); // true

// Parse substring of source
const source = "const pattern = /\\d{2,4}/g;";
const ast2 = parser.parseLiteral(source, 16, 26); // Just the "/\\d{2,4}/g" part
console.log(ast2.raw); // "/\\d{2,4}/g"

// Parse complex patterns
const ast3 = parser.parseLiteral("/(?<year>\\d{4})-(?<month>\\d{2})/");
console.log(ast3.pattern.alternatives[0].elements[0].name); // "year"

Parse Pattern

Parses just the pattern part of a regular expression (without delimiters and flags).

/**
 * Parse a regular expression pattern
 * @param source - The source code to parse (e.g., "abc")
 * @param start - The start index in the source code. Default is 0
 * @param end - The end index in the source code. Default is source.length
 * @param uFlag - The flag to enable Unicode mode
 * @returns The AST of the regular expression pattern
 */
parsePattern(source: string, start?: number, end?: number, uFlag?: boolean): AST.Pattern;

Usage Examples:

import { RegExpParser } from "regexpp";

const parser = new RegExpParser();

// Parse simple pattern
const pattern1 = parser.parsePattern("abc");
console.log(pattern1.type); // "Pattern"
console.log(pattern1.alternatives[0].elements.length); // 3 (a, b, c)

// Parse pattern with Unicode flag
const pattern2 = parser.parsePattern("\\p{Letter}+", 0, undefined, true);
console.log(pattern2.alternatives[0].elements[0].type); // "CharacterSet"

// Parse complex pattern
const pattern3 = parser.parsePattern("(\\d{2})|(\\w+)");
console.log(pattern3.alternatives.length); // 2 alternatives
console.log(pattern3.alternatives[0].elements[0].type); // "CapturingGroup"

// Parse substring
const source = "prefix-[a-z]+-suffix";
const pattern4 = parser.parsePattern(source, 7, 13); // Just "[a-z]+"
console.log(pattern4.alternatives[0].elements.length); // 2 (character class + quantifier)

Parse Flags

Parses just the flags part of a regular expression.

/**
 * Parse regular expression flags
 * @param source - The source code to parse (e.g., "gim")
 * @param start - The start index in the source code. Default is 0
 * @param end - The end index in the source code. Default is source.length
 * @returns The AST of the regular expression flags
 */
parseFlags(source: string, start?: number, end?: number): AST.Flags;

Usage Examples:

import { RegExpParser } from "regexpp";

const parser = new RegExpParser();

// Parse flags
const flags1 = parser.parseFlags("gim");
console.log(flags1.type); // "Flags"
console.log(flags1.global); // true
console.log(flags1.ignoreCase); // true
console.log(flags1.multiline); // true
console.log(flags1.unicode); // false

// Parse all possible flags
const flags2 = parser.parseFlags("gimsuyd");
console.log(flags2.global); // true (g)
console.log(flags2.ignoreCase); // true (i)
console.log(flags2.multiline); // true (m)
console.log(flags2.dotAll); // true (s)
console.log(flags2.unicode); // true (u)
console.log(flags2.sticky); // true (y)
console.log(flags2.hasIndices); // true (d)

// Parse flags from substring  
const source = "/pattern/gi;";
const flags3 = parser.parseFlags(source, 9, 11); // Just "gi"
console.log(flags3.global); // true
console.log(flags3.ignoreCase); // true

Error Handling

All parsing methods may throw RegExpSyntaxError for invalid syntax:

import { RegExpParser } from "regexpp";

const parser = new RegExpParser();

try {
  parser.parseLiteral("/[z-a]/"); // Invalid character class range
} catch (error) {
  console.log(error.name); // "RegExpSyntaxError"
  console.log(error.message); // Detailed error message
  console.log(error.index); // Position where error occurred
}

Types

type EcmaVersion = 5 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022;

// Return types from parsing methods
interface AST.RegExpLiteral extends AST.NodeBase {
  type: "RegExpLiteral";
  parent: null;
  pattern: AST.Pattern;
  flags: AST.Flags;
}

interface AST.Pattern extends AST.NodeBase {
  type: "Pattern";
  parent: AST.RegExpLiteral | null;
  alternatives: AST.Alternative[];
}

interface AST.Flags extends AST.NodeBase {
  type: "Flags";
  parent: AST.RegExpLiteral | null;
  dotAll: boolean;    // s flag
  global: boolean;    // g flag
  hasIndices: boolean; // d flag
  ignoreCase: boolean; // i flag
  multiline: boolean;  // m flag
  sticky: boolean;     // y flag
  unicode: boolean;    // u flag
}

Install with Tessl CLI