CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-regexpp

Regular expression parser for ECMAScript that generates Abstract Syntax Trees from regex patterns.

Pending
Overview
Eval results
Files

validation.mddocs/

Regular Expression Validation

The RegExpValidator class provides syntax validation with optional detailed callbacks for each regex component during validation, without generating an AST.

Capabilities

RegExp Validator Class

Creates a validator instance with configurable options and optional callbacks for validation events.

/**
 * Regular expression validator that checks regex syntax compliance
 */
class RegExpValidator {
  /**
   * Initialize the validator with options
   * @param options - Validator configuration options and callbacks
   */
  constructor(options?: RegExpValidator.Options);
}

interface RegExpValidator.Options {
  /** The flag to disable Annex B syntax. Default is false */
  strict?: boolean;
  /** ECMAScript version. Default is 2022 */
  ecmaVersion?: EcmaVersion;
  
  // Validation event callbacks (all optional)
  onLiteralEnter?(start: number): void;
  onLiteralLeave?(start: number, end: number): void;
  onFlags?(start: number, end: number, global: boolean, ignoreCase: boolean, multiline: boolean, unicode: boolean, sticky: boolean, dotAll: boolean, hasIndices: boolean): void;
  onPatternEnter?(start: number): void;
  onPatternLeave?(start: number, end: number): void;
  onDisjunctionEnter?(start: number): void;
  onDisjunctionLeave?(start: number, end: number): void;
  onAlternativeEnter?(start: number, index: number): void;
  onAlternativeLeave?(start: number, end: number, index: number): void;
  onGroupEnter?(start: number): void;
  onGroupLeave?(start: number, end: number): void;
  onCapturingGroupEnter?(start: number, name: string | null): void;
  onCapturingGroupLeave?(start: number, end: number, name: string | null): void;
  onQuantifier?(start: number, end: number, min: number, max: number, greedy: boolean): void;
  onLookaroundAssertionEnter?(start: number, kind: "lookahead" | "lookbehind", negate: boolean): void;
  onLookaroundAssertionLeave?(start: number, end: number, kind: "lookahead" | "lookbehind", negate: boolean): void;
  onEdgeAssertion?(start: number, end: number, kind: "start" | "end"): void;
  onWordBoundaryAssertion?(start: number, end: number, kind: "word", negate: boolean): void;
  onAnyCharacterSet?(start: number, end: number, kind: "any"): void;
  onEscapeCharacterSet?(start: number, end: number, kind: "digit" | "space" | "word", negate: boolean): void;
  onUnicodePropertyCharacterSet?(start: number, end: number, kind: "property", key: string, value: string | null, negate: boolean): void;
  onCharacter?(start: number, end: number, value: number): void;
  onBackreference?(start: number, end: number, ref: number | string): void;
  onCharacterClassEnter?(start: number, negate: boolean): void;
  onCharacterClassLeave?(start: number, end: number, negate: boolean): void;
  onCharacterClassRange?(start: number, end: number, min: number, max: number): void;
}

Usage Examples:

import { RegExpValidator } from "regexpp";

// Create basic validator
const validator = new RegExpValidator();

// Create validator with callbacks to analyze regex structure
const analyticValidator = new RegExpValidator({
  ecmaVersion: 2022,
  onCapturingGroupEnter(start, name) {
    console.log(`Found capturing group: ${name || 'unnamed'} at position ${start}`);
  },
  onQuantifier(start, end, min, max, greedy) {
    console.log(`Found quantifier {${min},${max}} (greedy: ${greedy}) at ${start}-${end}`);
  },
  onUnicodePropertyCharacterSet(start, end, kind, key, value, negate) {
    console.log(`Unicode property: \\${negate ? 'P' : 'p'}{${key}${value ? '=' + value : ''}} at ${start}-${end}`);
  }
});

// Create strict validator (disables Annex B features)
const strictValidator = new RegExpValidator({
  strict: true,
  ecmaVersion: 2022
});

Validate Literal

Validates a complete regular expression literal including pattern and flags.

/**
 * Validate a regular expression literal
 * @param source - The source code to validate (e.g., "/abc/g")
 * @param start - The start index in the source code. Default is 0
 * @param end - The end index in the source code. Default is source.length
 * @throws {RegExpSyntaxError} If the regex has invalid syntax
 */
validateLiteral(source: string, start?: number, end?: number): void;

Usage Examples:

import { RegExpValidator } from "regexpp";

const validator = new RegExpValidator();

// Basic validation
try {
  validator.validateLiteral("/[a-z]+/gi");
  console.log("Valid regex literal");
} catch (error) {
  console.log("Invalid regex:", error.message);
}

// Validate with callbacks
const callbackValidator = new RegExpValidator({
  onFlags(start, end, global, ignoreCase, multiline, unicode, sticky, dotAll, hasIndices) {
    console.log(`Flags: g=${global}, i=${ignoreCase}, m=${multiline}, u=${unicode}, y=${sticky}, s=${dotAll}, d=${hasIndices}`);
  }
});

callbackValidator.validateLiteral("/test/gimsuyd");

// Validate substring
const source = "const regex = /\\d{2,4}/g;";
try {
  validator.validateLiteral(source, 14, 24); // Just the "/\\d{2,4}/g" part
  console.log("Extracted regex is valid");
} catch (error) {
  console.log("Invalid regex syntax");
}

Validate Pattern

Validates just the pattern part of a regular expression (without delimiters and flags).

/**
 * Validate a regular expression pattern
 * @param source - The source code to validate (e.g., "abc")
 * @param start - The start index in the source code. Default is 0
 * @param end - The end index in the source code. Default is source.length
 * @param uFlag - The flag to enable Unicode mode
 * @throws {RegExpSyntaxError} If the pattern has invalid syntax
 */
validatePattern(source: string, start?: number, end?: number, uFlag?: boolean): void;

Usage Examples:

import { RegExpValidator } from "regexpp";

const validator = new RegExpValidator();

// Validate simple pattern
try {
  validator.validatePattern("abc");
  console.log("Valid pattern");
} catch (error) {
  console.log("Invalid pattern:", error.message);
}

// Validate with Unicode flag
try {
  validator.validatePattern("\\p{Letter}+", 0, undefined, true);
  console.log("Valid Unicode pattern");
} catch (error) {
  console.log("Invalid Unicode pattern");
}

// Validate with callbacks
const patternValidator = new RegExpValidator({
  onCharacterClassEnter(start, negate) {
    console.log(`Character class ${negate ? 'negated' : 'normal'} at position ${start}`);
  }
});

patternValidator.validatePattern("[^a-z]+");

Validate Flags

Validates just the flags part of a regular expression.

/**
 * Validate regular expression flags
 * @param source - The source code to validate (e.g., "gim")
 * @param start - The start index in the source code. Default is 0
 * @param end - The end index in the source code. Default is source.length
 * @throws {RegExpSyntaxError} If the flags are invalid
 */
validateFlags(source: string, start?: number, end?: number): void;

Usage Examples:

import { RegExpValidator } from "regexpp";

const validator = new RegExpValidator();

// Validate flags
try {
  validator.validateFlags("gim");
  console.log("Valid flags");
} catch (error) {
  console.log("Invalid flags:", error.message);
}

// Invalid flags will throw
try {
  validator.validateFlags("gix"); // 'x' is not a valid flag
} catch (error) {
  console.log("Error:", error.message);
}

// Duplicate flags will throw
try {
  validator.validateFlags("gg"); // Duplicate 'g' flag
} catch (error) {
  console.log("Error:", error.message);
}

Advanced Validation with Callbacks

The validator's callback system allows detailed analysis during validation:

import { RegExpValidator } from "regexpp";

// Comprehensive analysis validator
const analyzer = new RegExpValidator({
  ecmaVersion: 2022,
  
  onPatternEnter(start) {
    console.log(`Starting pattern analysis at position ${start}`);
  },
  
  onCapturingGroupEnter(start, name) {
    if (name) {
      console.log(`Named capture group '${name}' at ${start}`);
    } else {
      console.log(`Unnamed capture group at ${start}`);
    }
  },
  
  onQuantifier(start, end, min, max, greedy) {
    const type = min === max ? 'exact' : min === 0 && max === Infinity ? 'zero-or-more' : 'range';
    console.log(`Quantifier: ${type} {${min},${max === Infinity ? '∞' : max}} ${greedy ? 'greedy' : 'lazy'}`);
  },
  
  onBackreference(start, end, ref) {
    console.log(`Backreference to ${typeof ref === 'string' ? `'${ref}'` : `group ${ref}`} at ${start}-${end}`);
  }
});

// Analyze a complex regex
analyzer.validateLiteral("/(?<year>\\d{4})-(?<month>\\d{2})\\k<year>/g");

Error Handling

All validation methods may throw RegExpSyntaxError for invalid syntax:

import { RegExpValidator } from "regexpp";

const validator = new RegExpValidator();

try {
  validator.validateLiteral("/[z-a]/"); // Invalid character class range
} catch (error) {
  console.log(error.name); // "RegExpSyntaxError"
  console.log(error.message); // Detailed error message  
  console.log(error.index); // Position where error occurred
}

// Version-specific validation
const es2017Validator = new RegExpValidator({ ecmaVersion: 2017 });

try {
  es2017Validator.validatePattern("(?<=\\w)\\d+"); // Lookbehind requires ES2018+
} catch (error) {
  console.log("Feature not supported in ES2017");
}

Types

type EcmaVersion = 5 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022;

// Callback function types
type ValidationCallback = (start: number, end: number, ...args: any[]) => void;

// Error type thrown by validation methods
class RegExpSyntaxError extends SyntaxError {
  index: number;
  constructor(source: string, uFlag: boolean, index: number, message: string);
}

Install with Tessl CLI

npx tessl i tessl/npm-regexpp

docs

ast-types.md

core-functions.md

index.md

parsing.md

validation.md

visitor.md

tile.json