CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-regexpp

Regular expression parser for ECMAScript that generates Abstract Syntax Trees from regex patterns.

Pending
Overview
Eval results
Files

ast-types.mddocs/

AST Node Types and Structures

Comprehensive type system covering all ECMAScript regular expression syntax elements as Abstract Syntax Tree nodes.

Capabilities

Core Node Types

The foundational type unions that categorize all AST nodes.

/**
 * The type which includes all nodes
 */
type AST.Node = AST.BranchNode | AST.LeafNode;

/**
 * The type which includes all branch nodes (nodes with children)
 */
type AST.BranchNode = 
  | AST.RegExpLiteral 
  | AST.Pattern 
  | AST.Alternative 
  | AST.Group 
  | AST.CapturingGroup 
  | AST.Quantifier 
  | AST.CharacterClass 
  | AST.LookaroundAssertion 
  | AST.CharacterClassRange;

/**
 * The type which includes all leaf nodes (nodes without children)
 */
type AST.LeafNode = 
  | AST.BoundaryAssertion 
  | AST.CharacterSet 
  | AST.Character 
  | AST.Backreference 
  | AST.Flags;

/**
 * The type which includes all element nodes (pattern components)
 */
type AST.Element = AST.Assertion | AST.Quantifier | AST.QuantifiableElement;

/**
 * The type which includes all elements that can be quantified
 */
type AST.QuantifiableElement = 
  | AST.Group 
  | AST.CapturingGroup 
  | AST.CharacterClass 
  | AST.CharacterSet 
  | AST.Character 
  | AST.Backreference 
  | AST.LookaheadAssertion; // Lookahead assertions are quantifiable in Annex-B

/**
 * The type which includes all character class elements
 */
type AST.CharacterClassElement = 
  | AST.EscapeCharacterSet 
  | AST.UnicodePropertyCharacterSet 
  | AST.Character 
  | AST.CharacterClassRange;

Base Node Interface

All AST nodes inherit from this base interface providing common properties.

/**
 * The base interface for all AST nodes
 */
interface AST.NodeBase {
  /** The type discriminator for this node */
  type: string;
  /** The parent node, or null for root nodes */
  parent: AST.Node | null;
  /** The start index of this node in the original source */
  start: number;
  /** The end index of this node in the original source */
  end: number;
  /** The raw source text for this node */
  raw: string;
}

Root and Structural Nodes

RegExp Literal

The root node representing a complete regular expression literal.

/**
 * Regular expression literal node (/pattern/flags)
 */
interface AST.RegExpLiteral extends AST.NodeBase {
  type: "RegExpLiteral";
  parent: null;
  /** The pattern part of the regex */
  pattern: AST.Pattern;
  /** The flags part of the regex */
  flags: AST.Flags;
}

Pattern

Container for the main regex pattern, containing alternatives.

/**
 * Pattern node containing alternatives
 */
interface AST.Pattern extends AST.NodeBase {
  type: "Pattern";
  parent: AST.RegExpLiteral | null;
  /** Array of alternative patterns (separated by |) */
  alternatives: AST.Alternative[];
}

Alternative

A single alternative within a pattern (one branch of a disjunction).

/**
 * Alternative node representing one branch of a disjunction
 */
interface AST.Alternative extends AST.NodeBase {
  type: "Alternative";
  parent: AST.Pattern | AST.Group | AST.CapturingGroup | AST.LookaroundAssertion;
  /** Array of elements in this alternative */
  elements: AST.Element[];
}

Group Nodes

Group

Non-capturing group node.

/**
 * Non-capturing group (?:...)
 */
interface AST.Group extends AST.NodeBase {
  type: "Group";
  parent: AST.Alternative | AST.Quantifier;
  /** Array of alternatives within this group */
  alternatives: AST.Alternative[];
}

Capturing Group

Capturing group node with optional name.

/**
 * Capturing group (...) or named capturing group (?<name>...)
 */
interface AST.CapturingGroup extends AST.NodeBase {
  type: "CapturingGroup";
  parent: AST.Alternative | AST.Quantifier;
  /** The name of the group (for named groups) or null */
  name: string | null;
  /** Array of alternatives within this group */
  alternatives: AST.Alternative[];
  /** Array of backreferences that refer to this group */
  references: AST.Backreference[];
}

Assertion Nodes

Lookaround Assertions

/**
 * Union type for all lookaround assertions
 */
type AST.LookaroundAssertion = AST.LookaheadAssertion | AST.LookbehindAssertion;

/**
 * Lookahead assertion (?=...) or (?!...)
 */
interface AST.LookaheadAssertion extends AST.NodeBase {
  type: "Assertion";
  parent: AST.Alternative | AST.Quantifier;
  kind: "lookahead";
  /** True for negative lookahead (?!...) */
  negate: boolean;
  /** Array of alternatives within this assertion */
  alternatives: AST.Alternative[];
}

/**
 * Lookbehind assertion (?<=...) or (?<!...)
 */
interface AST.LookbehindAssertion extends AST.NodeBase {
  type: "Assertion";
  parent: AST.Alternative;
  kind: "lookbehind";
  /** True for negative lookbehind (?<!...) */
  negate: boolean;
  /** Array of alternatives within this assertion */
  alternatives: AST.Alternative[];
}

Boundary Assertions

/**
 * Union type for all assertions
 */
type AST.Assertion = AST.BoundaryAssertion | AST.LookaroundAssertion;

/**
 * Union type for boundary assertions
 */
type AST.BoundaryAssertion = AST.EdgeAssertion | AST.WordBoundaryAssertion;

/**
 * Edge assertion ^ or $
 */
interface AST.EdgeAssertion extends AST.NodeBase {
  type: "Assertion";
  parent: AST.Alternative | AST.Quantifier;
  /** "start" for ^ or "end" for $ */
  kind: "start" | "end";
}

/**
 * Word boundary assertion \\b or \\B
 */
interface AST.WordBoundaryAssertion extends AST.NodeBase {
  type: "Assertion";
  parent: AST.Alternative | AST.Quantifier;
  kind: "word";
  /** True for \\B (non-word boundary) */
  negate: boolean;
}

Quantifier Node

/**
 * Quantifier node for repetition {min,max}, +, *, ?
 */
interface AST.Quantifier extends AST.NodeBase {
  type: "Quantifier";
  parent: AST.Alternative;
  /** Minimum repetitions */
  min: number;
  /** Maximum repetitions (Infinity for unbounded) */
  max: number;
  /** True for greedy quantifiers, false for lazy (?+*{}) */
  greedy: boolean;
  /** The element being quantified */
  element: AST.QuantifiableElement;
}

Character Class Nodes

Character Class

/**
 * Character class [...]
 */
interface AST.CharacterClass extends AST.NodeBase {
  type: "CharacterClass";
  parent: AST.Alternative | AST.Quantifier;
  /** True for negated character class [^...] */
  negate: boolean;
  /** Array of elements within the character class */
  elements: AST.CharacterClassElement[];
}

Character Class Range

/**
 * Character class range a-z
 */
interface AST.CharacterClassRange extends AST.NodeBase {
  type: "CharacterClassRange";
  parent: AST.CharacterClass;
  /** The minimum character of the range */
  min: AST.Character;
  /** The maximum character of the range */
  max: AST.Character;
}

Character Set Nodes

/**
 * Union type for all character sets
 */
type AST.CharacterSet = 
  | AST.AnyCharacterSet 
  | AST.EscapeCharacterSet 
  | AST.UnicodePropertyCharacterSet;

/**
 * Any character set (dot .)
 */
interface AST.AnyCharacterSet extends AST.NodeBase {
  type: "CharacterSet";
  parent: AST.Alternative | AST.Quantifier;
  kind: "any";
}

/**
 * Escape character set \\d, \\s, \\w, \\D, \\S, \\W
 */
interface AST.EscapeCharacterSet extends AST.NodeBase {
  type: "CharacterSet";
  parent: AST.Alternative | AST.Quantifier | AST.CharacterClass;
  kind: "digit" | "space" | "word";
  /** True for uppercase variants (\\D, \\S, \\W) */
  negate: boolean;
}

/**
 * Unicode property character set \\p{...} or \\P{...}
 */
interface AST.UnicodePropertyCharacterSet extends AST.NodeBase {
  type: "CharacterSet";
  parent: AST.Alternative | AST.Quantifier | AST.CharacterClass;
  kind: "property";
  /** The property name (e.g., "Letter", "Script") */
  key: string;
  /** The property value (e.g., "Latin") or null for binary properties */
  value: string | null;
  /** True for \\P{...} (negated) */
  negate: boolean;
}

Character and Reference Nodes

Character

/**
 * Single character node
 */
interface AST.Character extends AST.NodeBase {
  type: "Character";
  parent: AST.Alternative | AST.Quantifier | AST.CharacterClass | AST.CharacterClassRange;
  /** The Unicode code point value of the character */
  value: number;
}

Backreference

/**
 * Backreference \\1 or \\k<name>
 */
interface AST.Backreference extends AST.NodeBase {
  type: "Backreference";
  parent: AST.Alternative | AST.Quantifier;
  /** The reference (number or name) */
  ref: number | string;
  /** The resolved capturing group this refers to */
  resolved: AST.CapturingGroup;
}

Flags Node

/**
 * Flags node representing regex flags
 */
interface AST.Flags extends AST.NodeBase {
  type: "Flags";
  parent: AST.RegExpLiteral | null;
  /** Dot-all flag (s) - makes . match newlines */
  dotAll: boolean;
  /** Global flag (g) - find all matches */
  global: boolean;
  /** Has indices flag (d) - generate match indices */
  hasIndices: boolean;
  /** Ignore case flag (i) - case insensitive matching */
  ignoreCase: boolean;
  /** Multiline flag (m) - ^ and $ match line boundaries */
  multiline: boolean;
  /** Sticky flag (y) - match from lastIndex only */
  sticky: boolean;
  /** Unicode flag (u) - enable full Unicode support */
  unicode: boolean;
}

Usage Examples

Working with AST Nodes

import { parseRegExpLiteral } from "regexpp";

const ast = parseRegExpLiteral("/(?<year>\\d{4})-(?<month>\\d{2})/g");

// Type-safe access to properties
console.log(ast.type); // "RegExpLiteral"
console.log(ast.pattern.alternatives.length); // Number of alternatives
console.log(ast.flags.global); // true

// Navigate the AST structure
const firstAlternative = ast.pattern.alternatives[0];
const firstElement = firstAlternative.elements[0];

if (firstElement.type === "CapturingGroup") {
  console.log(firstElement.name); // "year"
  console.log(firstElement.alternatives.length);
}

// Check node types
function analyzeElement(element: AST.Element) {
  switch (element.type) {
    case "CapturingGroup":
      return `Capturing group: ${element.name || 'unnamed'}`;
    case "Quantifier":
      return `Quantifier: {${element.min},${element.max}}`;
    case "Assertion":
      return `Assertion: ${element.kind}`;
    case "CharacterClass":
      return `Character class: ${element.negate ? 'negated' : 'normal'}`;
    case "CharacterSet":
      return `Character set: ${element.kind}`;
    case "Character":
      return `Character: ${String.fromCodePoint(element.value)}`;
    case "Backreference":
      return `Backreference: ${element.ref}`;
    default:
      return `Unknown element type`;
  }
}

Type Guards

import { AST } from "regexpp";

// Type guard functions for safe casting
function isCapturingGroup(node: AST.Node): node is AST.CapturingGroup {
  return node.type === "CapturingGroup";
}

function isQuantifier(node: AST.Node): node is AST.Quantifier {
  return node.type === "Quantifier";
}

function isCharacterSet(node: AST.Node): node is AST.CharacterSet {
  return node.type === "CharacterSet";
}

// Usage
function processNode(node: AST.Node) {
  if (isCapturingGroup(node)) {
    // TypeScript knows this is AST.CapturingGroup
    console.log(node.name, node.alternatives.length);
  } else if (isQuantifier(node)) {
    // TypeScript knows this is AST.Quantifier
    console.log(node.min, node.max, node.greedy);
  } else if (isCharacterSet(node)) {
    // TypeScript knows this is AST.CharacterSet
    console.log(node.kind);
  }
}

Install with Tessl CLI

npx tessl i tessl/npm-regexpp

docs

ast-types.md

core-functions.md

index.md

parsing.md

validation.md

visitor.md

tile.json