CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-regexpp

Regular expression parser for ECMAScript that generates Abstract Syntax Trees from regex patterns.

Pending
Overview
Eval results
Files

visitor.mddocs/

AST Traversal and Manipulation

The RegExpVisitor class implements the visitor pattern for traversing and manipulating regex Abstract Syntax Trees with custom callbacks for each node type.

Capabilities

RegExp Visitor Class

Creates a visitor instance with handlers for different AST node types.

/**
 * The visitor to walk on AST using the visitor pattern
 */
class RegExpVisitor {
  /**
   * Initialize the visitor with handlers
   * @param handlers - Callbacks for each node type
   */
  constructor(handlers: RegExpVisitor.Handlers);
}

interface RegExpVisitor.Handlers {
  // Root and structural nodes
  onRegExpLiteralEnter?(node: AST.RegExpLiteral): void;
  onRegExpLiteralLeave?(node: AST.RegExpLiteral): void;
  onPatternEnter?(node: AST.Pattern): void;
  onPatternLeave?(node: AST.Pattern): void;
  onAlternativeEnter?(node: AST.Alternative): void;
  onAlternativeLeave?(node: AST.Alternative): void;
  
  // Groups and assertions
  onGroupEnter?(node: AST.Group): void;
  onGroupLeave?(node: AST.Group): void;
  onCapturingGroupEnter?(node: AST.CapturingGroup): void;
  onCapturingGroupLeave?(node: AST.CapturingGroup): void;
  onAssertionEnter?(node: AST.Assertion): void;
  onAssertionLeave?(node: AST.Assertion): void;
  
  // Quantifiers and characters
  onQuantifierEnter?(node: AST.Quantifier): void;
  onQuantifierLeave?(node: AST.Quantifier): void;
  onCharacterEnter?(node: AST.Character): void;
  onCharacterLeave?(node: AST.Character): void;
  onCharacterSetEnter?(node: AST.CharacterSet): void;
  onCharacterSetLeave?(node: AST.CharacterSet): void;
  
  // Character classes
  onCharacterClassEnter?(node: AST.CharacterClass): void;
  onCharacterClassLeave?(node: AST.CharacterClass): void;
  onCharacterClassRangeEnter?(node: AST.CharacterClassRange): void;
  onCharacterClassRangeLeave?(node: AST.CharacterClassRange): void;
  
  // Backreferences and flags
  onBackreferenceEnter?(node: AST.Backreference): void;
  onBackreferenceLeave?(node: AST.Backreference): void;
  onFlagsEnter?(node: AST.Flags): void;
  onFlagsLeave?(node: AST.Flags): void;
}

Usage Examples:

import { parseRegExpLiteral } from "regexpp";
import { RegExpVisitor } from "regexpp/visitor";

// Create visitor with specific handlers
const visitor = new RegExpVisitor({
  onCharacterClassEnter(node) {
    console.log(`Entering character class: ${node.raw}`);
  },
  onQuantifierEnter(node) {
    console.log(`Found quantifier: min=${node.min}, max=${node.max}, greedy=${node.greedy}`);
  }
});

// Create analyzer visitor
const analyzer = new RegExpVisitor({
  onCapturingGroupEnter(node) {
    if (node.name) {
      console.log(`Named capture group: ${node.name}`);
    } else {
      console.log("Unnamed capture group");
    }
  },
  onBackreferenceEnter(node) {
    console.log(`Backreference to: ${node.ref}`);
  }
});

Visit AST

Traverses an Abstract Syntax Tree starting from the given node.

/**
 * Visit a given node and descendant nodes
 * @param node - The root node to visit (typically AST.RegExpLiteral)
 */
visit(node: AST.Node): void;

Usage Examples:

import { parseRegExpLiteral } from "regexpp";
import { RegExpVisitor } from "regexpp/visitor";

const ast = parseRegExpLiteral("/(?<year>\\d{4})-(?<month>\\d{2})/g");

// Count different node types
let counts = {
  characters: 0,
  groups: 0,
  quantifiers: 0,
  characterSets: 0
};

const counter = new RegExpVisitor({
  onCharacterEnter() { counts.characters++; },
  onCapturingGroupEnter() { counts.groups++; },
  onQuantifierEnter() { counts.quantifiers++; },
  onCharacterSetEnter() { counts.characterSets++; }
});

counter.visit(ast);
console.log(counts);

// Extract specific information
const info = {
  namedGroups: [],
  quantifiers: [],
  assertions: []
};

const extractor = new RegExpVisitor({
  onCapturingGroupEnter(node) {
    if (node.name) {
      info.namedGroups.push({
        name: node.name,
        position: node.start,
        raw: node.raw
      });
    }
  },
  
  onQuantifierEnter(node) {
    info.quantifiers.push({
      min: node.min,
      max: node.max,
      greedy: node.greedy,
      raw: node.raw,
      position: node.start
    });
  },
  
  onAssertionEnter(node) {
    info.assertions.push({
      kind: node.kind,
      raw: node.raw,
      position: node.start
    });
  }
});

extractor.visit(ast);
console.log(info);

Advanced Usage Patterns

AST Transformation Tracking

Track modifications to understand regex structure changes:

import { parseRegExpLiteral } from "regexpp";
import { RegExpVisitor } from "regexpp/visitor";

const ast = parseRegExpLiteral("/[a-z]+\\d{2,4}(?:suffix)?/gi");

// Track all quantified elements
const quantifiedElements = [];

const tracker = new RegExpVisitor({
  onQuantifierEnter(node) {
    const elementType = node.element.type;
    const elementRaw = node.element.raw;
    
    quantifiedElements.push({
      quantifier: {
        min: node.min,
        max: node.max,
        greedy: node.greedy,
        raw: node.raw
      },
      element: {
        type: elementType,
        raw: elementRaw
      },
      position: node.start
    });
  }
});

tracker.visit(ast);
console.log(quantifiedElements);
// Output: Details about [a-z]+, \\d{2,4}, and (?:suffix)?

Pattern Analysis

Analyze regex patterns for complexity and features:

import { parseRegExpLiteral } from "regexpp";
import { RegExpVisitor } from "regexpp/visitor";

function analyzeRegexComplexity(regexString: string) {
  const ast = parseRegExpLiteral(regexString);
  
  const analysis = {
    totalNodes: 0,
    features: {
      namedGroups: 0,
      lookarounds: 0,
      backreferences: 0,
      unicodeProperties: 0,
      characterClasses: 0,
      quantifiers: 0
    },
    flags: {
      global: ast.flags.global,
      ignoreCase: ast.flags.ignoreCase,
      multiline: ast.flags.multiline,
      dotAll: ast.flags.dotAll,
      unicode: ast.flags.unicode,
      sticky: ast.flags.sticky,
      hasIndices: ast.flags.hasIndices
    }
  };
  
  const analyzer = new RegExpVisitor({
    onRegExpLiteralEnter() { analysis.totalNodes++; },
    onPatternEnter() { analysis.totalNodes++; },
    onAlternativeEnter() { analysis.totalNodes++; },
    onGroupEnter() { analysis.totalNodes++; },
    onCapturingGroupEnter(node) {
      analysis.totalNodes++;
      if (node.name) analysis.features.namedGroups++;
    },
    onAssertionEnter(node) {
      analysis.totalNodes++;
      if (node.kind === 'lookahead' || node.kind === 'lookbehind') {
        analysis.features.lookarounds++;
      }
    },
    onQuantifierEnter() {
      analysis.totalNodes++;
      analysis.features.quantifiers++;
    },
    onCharacterClassEnter() {
      analysis.totalNodes++;
      analysis.features.characterClasses++;
    },
    onCharacterSetEnter(node) {
      analysis.totalNodes++;
      if (node.kind === 'property') {
        analysis.features.unicodeProperties++;
      }
    },
    onCharacterEnter() { analysis.totalNodes++; },
    onBackreferenceEnter() {
      analysis.totalNodes++;
      analysis.features.backreferences++;
    },
    onFlagsEnter() { analysis.totalNodes++; }
  });
  
  analyzer.visit(ast);
  return analysis;
}

// Example usage
const complexity = analyzeRegexComplexity("/(?<year>\\d{4})-(?<month>\\d{2})\\k<year>/g");
console.log(complexity);

Pattern Validation

Use visitor to validate specific regex patterns or constraints:

import { parseRegExpLiteral } from "regexpp";
import { RegExpVisitor } from "regexpp/visitor";

function validateSafeRegex(regexString: string): { safe: boolean; issues: string[] } {
  const ast = parseRegExpLiteral(regexString);
  const issues = [];
  
  const validator = new RegExpVisitor({
    onQuantifierEnter(node) {
      // Check for potentially dangerous quantifiers
      if (node.max === Infinity && !node.greedy) {
        issues.push(`Lazy infinite quantifier at position ${node.start} may cause performance issues`);
      }
      
      if (node.min > 1000) {
        issues.push(`Large minimum quantifier {${node.min},} at position ${node.start} may cause performance issues`);
      }
    },
    
    onBackreferenceEnter(node) {
      // Warn about backreferences in certain contexts
      issues.push(`Backreference \\${node.ref} at position ${node.start} may impact performance`);
    },
    
    onAssertionEnter(node) {
      if (node.kind === 'lookbehind') {
        issues.push(`Lookbehind assertion at position ${node.start} has limited browser support`);
      }
    }
  });
  
  validator.visit(ast);
  
  return {
    safe: issues.length === 0,
    issues
  };
}

// Example usage
const validation = validateSafeRegex("/(?<=\\w+)\\d+.*?/");
console.log(validation);

Types

// All AST node types that can be visited
type AST.Node = AST.BranchNode | AST.LeafNode;

type AST.BranchNode = 
  | AST.RegExpLiteral 
  | AST.Pattern 
  | AST.Alternative 
  | AST.Group 
  | AST.CapturingGroup 
  | AST.Quantifier 
  | AST.CharacterClass 
  | AST.LookaroundAssertion 
  | AST.CharacterClassRange;

type AST.LeafNode = 
  | AST.BoundaryAssertion 
  | AST.CharacterSet 
  | AST.Character 
  | AST.Backreference 
  | AST.Flags;

// Handler function signature
type VisitorHandler<T extends AST.Node> = (node: T) => void;

Install with Tessl CLI

npx tessl i tessl/npm-regexpp

docs

ast-types.md

core-functions.md

index.md

parsing.md

validation.md

visitor.md

tile.json