tessl/npm-jison

A parser generator with Bison's API for creating bottom-up parsers from grammar definitions

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Securityby

Pending

The risk profile of this skill

Overview

Eval results

Files

Algorithm Selection

Name: tessl/npm-jison
Author: tessl

Support for multiple parsing algorithms including LR(0), SLR(1), LALR(1), LR(1), and LL(1) with automatic selection based on grammar complexity. Each algorithm offers different trade-offs between parsing power, table size, and generation time.

Capabilities

Algorithm Overview

Jison supports five different parsing algorithms, each with specific characteristics:

LR(0): Simplest LR algorithm, works with very limited grammars
SLR(1): Simple LR with one token lookahead, more powerful than LR(0)
LALR(1): Look-Ahead LR, good balance of power and efficiency (default)
LR(1): Canonical LR with full lookahead, most powerful LR algorithm
LL(1): Top-down parsing, requires left-factored grammars

LR(0) Generator

Basic LR parser generator for simple grammars without lookahead conflicts.

/**
 * LR(0) parser generator constructor
 * @param grammar - Grammar definition
 * @param options - Generation options
 */
function LR0Generator(grammar, options);

/**
 * LR(0) generator instance methods
 */
class LR0Generator {
  type: "LR(0)";            // Algorithm identifier
  
  /**
   * Build LR(0) parsing table
   */
  buildTable(): void;
  
  /**
   * Create parser instance from generated table
   * @returns Parser ready for input processing
   */
  createParser(): GeneratedParser;
  
  /**
   * Generate parser source code
   * @param options - Code generation options
   * @returns Generated parser source
   */
  generate(options?: GenerationOptions): string;
}

Usage Examples:

const { LR0Generator } = require("jison");

// Simple grammar that works with LR(0)
const lr0Grammar = {
  "bnf": {
    "S": [["A", "return $1;"]],
    "A": [["a", "$$ = 'matched_a';"]]
  }
};

const lr0Gen = new LR0Generator(lr0Grammar);
const parser = lr0Gen.createParser();

try {
  const result = parser.parse("a");
  console.log(result); // "matched_a"
} catch (error) {
  console.error("LR(0) parse failed:", error.message);
}

SLR(1) Generator

Simple LR(1) parser generator using Follow sets for lookahead resolution.

/**
 * SLR(1) parser generator constructor
 * @param grammar - Grammar definition
 * @param options - Generation options
 */
function SLRGenerator(grammar, options);

/**
 * SLR(1) generator instance methods
 */
class SLRGenerator extends LR0Generator {
  type: "SLR(1)";           // Algorithm identifier
  
  /**
   * Compute lookahead tokens using Follow sets
   * @param state - Parser state
   * @param item - LR item
   * @returns Array of lookahead tokens
   */
  lookAheads(state: State, item: Item): string[];
}

Usage Examples:

const { SLRGenerator } = require("jison");

// Grammar with simple conflicts resolvable by SLR(1)
const slrGrammar = {
  "bnf": {
    "E": [
      ["E + T", "$$ = $1 + $3;"],
      ["T", "$$ = $1;"]
    ],
    "T": [
      ["( E )", "$$ = $2;"],
      ["id", "$$ = $1;"]
    ]
  }
};

const slrGen = new SLRGenerator(slrGrammar);
const parser = slrGen.createParser();

LALR(1) Generator

Look-Ahead LR(1) parser generator - the default algorithm providing good balance of power and efficiency.

/**
 * LALR(1) parser generator constructor (default algorithm)
 * @param grammar - Grammar definition
 * @param options - Generation options including onDemandLookahead
 */
function LALRGenerator(grammar, options);

/**
 * LALR(1) generator instance methods
 */
class LALRGenerator extends LR0Generator {
  type: "LALR(1)";          // Algorithm identifier
  onDemandLookahead: boolean; // Compute lookaheads only for inadequate states
  
  /**
   * Build new grammar for lookahead computation
   */
  buildNewGrammar(): void;
  
  /**
   * Union lookahead sets from auxiliary grammar
   */
  unionLookaheads(): void;
  
  /**
   * Compute lookahead tokens for LALR(1)
   * @param state - Parser state
   * @param item - LR item
   * @returns Array of lookahead tokens
   */
  lookAheads(state: State, item: Item): string[];
  
  /**
   * Compute goto operation for state and symbol
   * @param p - State number
   * @param w - Symbol sequence
   * @returns Target state number
   */
  go(p: number, w: string[]): number;
}

Usage Examples:

const { LALRGenerator } = require("jison");

// Complex grammar requiring LALR(1) lookahead
const lalrGrammar = {
  "lex": {
    "rules": [
      ["\\s+", "/* skip */"],
      ["[a-zA-Z]+", "return 'ID';"],
      ["=", "return '=';"],
      [";", "return ';';"]
    ]
  },
  "bnf": {
    "prog": [["stmts", "return $1;"]],
    "stmts": [
      ["stmts stmt", "$$ = $1.concat([$2]);"],
      ["stmt", "$$ = [$1];"]
    ],
    "stmt": [["ID = ID ;", "$$ = {type: 'assign', left: $1, right: $3};"]]
  }
};

const lalrGen = new LALRGenerator(lalrGrammar, {
  onDemandLookahead: true  // Optimize lookahead computation
});
const parser = lalrGen.createParser();

LR(1) Generator

Canonical LR(1) parser generator - most powerful LR algorithm with full lookahead context.

/**
 * Canonical LR(1) parser generator constructor
 * @param grammar - Grammar definition
 * @param options - Generation options
 */
function LR1Generator(grammar, options);

/**
 * LR(1) generator instance methods
 */
class LR1Generator extends LR0Generator {
  type: "Canonical LR(1)"; // Algorithm identifier
  
  /**
   * Enhanced LR(1) Item class with lookahead context
   */
  Item: typeof LR1Item;
  
  /**
   * LR(1) closure operation with lookahead propagation
   * @param itemSet - Set of LR(1) items
   * @returns Closure of the item set
   */
  closureOperation(itemSet: ItemSet): ItemSet;
  
  /**
   * Compute lookahead tokens for LR(1) items
   * @param state - Parser state
   * @param item - LR(1) item
   * @returns Array of lookahead tokens
   */
  lookAheads(state: State, item: LR1Item): string[];
}

/**
 * LR(1) Item with lookahead context
 */
class LR1Item {
  production: Production;   // Associated production
  dotPosition: number;      // Position of dot in production
  follows: string[];        // Lookahead tokens
  predecessor?: number;     // Predecessor item reference
  
  /**
   * Remaining handle after dot position
   * @returns Array of symbols after the dot
   */
  remainingHandle(): string[];
  
  /**
   * Check equality with another LR(1) item
   * @param item - Item to compare with
   * @returns True if items are equivalent
   */
  eq(item: LR1Item): boolean;
}

Usage Examples:

const { LR1Generator } = require("jison");

// Grammar requiring full LR(1) power (not LALR)
const lr1Grammar = {
  "bnf": {
    "S": [
      ["A a", "return 'S->Aa';"],
      ["B b", "return 'S->Bb';"]
    ],
    "A": [["c", "$$ = 'A->c';"]],
    "B": [["c", "$$ = 'B->c';"]]
  }
};

const lr1Gen = new LR1Generator(lr1Grammar);
const parser = lr1Gen.createParser();

// This grammar creates LALR conflicts but LR(1) can handle it
console.log(parser.parse("c a")); // "S->Aa"
console.log(parser.parse("c b")); // "S->Bb"

LL(1) Generator

Top-down LL(1) parser generator for left-factored grammars.

/**
 * LL(1) parser generator constructor
 * @param grammar - Grammar definition (must be left-factored)
 * @param options - Generation options
 */
function LLGenerator(grammar, options);

/**
 * LL(1) generator instance methods
 */
class LLGenerator {
  type: "LL(1)";            // Algorithm identifier
  
  /**
   * Build LL(1) parsing table
   * @param productions - Grammar productions
   * @returns LL(1) parsing table
   */
  parseTable(productions: Production[]): ParseTable;
  
  /**
   * Create LL(1) parser instance
   * @returns Generated LL(1) parser
   */
  createParser(): GeneratedParser;
}

Usage Examples:

const { LLGenerator } = require("jison");

// Left-factored grammar suitable for LL(1)
const llGrammar = {
  "bnf": {
    "E": [
      ["T E_prime", "$$ = $2($1);"]
    ],
    "E_prime": [
      ["+ T E_prime", "$$ = function(left) { return $3(left + $2); };"],
      ["", "$$ = function(left) { return left; };"]  // epsilon production
    ],
    "T": [["id", "$$ = $1;"]]
  }
};

const llGen = new LLGenerator(llGrammar);
const parser = llGen.createParser();

Algorithm Selection Guidelines

When to Use Each Algorithm

LR(0): Use for very simple grammars without any conflicts:

// Example: Simple assignment statements
const simpleGrammar = {
  "bnf": {
    "stmt": [["ID = NUM", "return {assign: $1, value: $3};"]]
  }
};

SLR(1): Use for grammars with simple reduce-reduce conflicts:

// Example: Basic arithmetic expressions
const arithmeticGrammar = {
  "bnf": {
    "expr": [
      ["expr + term", "$$ = $1 + $3;"],
      ["term", "$$ = $1;"]
    ],
    "term": [["NUM", "$$ = Number($1);"]]
  }
};

LALR(1) (default): Use for most grammars - good balance of power and efficiency:

// Example: Programming language constructs
const programGrammar = {
  "bnf": {
    "program": [["declarations statements", "return {decls: $1, stmts: $2};"]],
    "declarations": [["var_decls func_decls", "$$ = $1.concat($2);"]],
    // ... more complex grammar rules
  }
};

LR(1): Use for grammars with complex lookahead requirements:

// Example: Context-sensitive constructs
const contextGrammar = {
  "bnf": {
    "stmt": [
      ["if_stmt", "$$ = $1;"],
      ["block_stmt", "$$ = $1;"]
    ],
    "if_stmt": [["IF expr THEN stmt ELSE stmt", "return {if: $2, then: $4, else: $6};"]],
    // Grammar that creates LALR conflicts
  }
};

LL(1): Use for grammars that need top-down parsing:

// Example: Recursive descent friendly grammar
const recursiveGrammar = {
  "bnf": {
    "expr": [["term expr_rest", "$$ = $2($1);"]],
    "expr_rest": [
      ["+ term expr_rest", "$$ = function(left) { return $3(left + $2); };"],
      ["", "$$ = function(left) { return left; };"]
    ]
  }
};

Automatic Selection

Use the Generator factory for automatic algorithm selection:

const jison = require("jison");

// Automatic selection (defaults to LALR(1))
const autoGen = new jison.Generator(grammar);

// Explicit algorithm selection
const lr1Gen = new jison.Generator(grammar, { type: 'lr' });
const llGen = new jison.Generator(grammar, { type: 'll' });

Performance Comparison

Algorithm	Parse Table Size	Generation Time	Parse Speed	Grammar Support
LR(0)	Smallest	Fastest	Fastest	Very Limited
SLR(1)	Small	Fast	Fast	Limited
LALR(1)	Medium	Medium	Fast	Good
LR(1)	Large	Slow	Fast	Excellent
LL(1)	Medium	Medium	Medium	Limited

Debugging Algorithm Selection

Enable debug mode to see algorithm-specific information:

const generator = new jison.Generator(grammar, { 
  type: 'lalr',
  debug: true 
});

// Debug output shows:
// - State construction details
// - Conflict resolution
// - Lookahead computation
// - Table compression statistics

Error Handling by Algorithm

Different algorithms provide different error recovery capabilities:

LR Algorithms (LR0, SLR, LALR, LR1)

// LR parsers support error recovery
const parser = generator.createParser();

try {
  const result = parser.parse(invalidInput);
} catch (error) {
  console.log("Parse error:", error.message);
  console.log("Recoverable:", error.hash.recoverable);
  console.log("Expected tokens:", error.hash.expected);
}

LL(1) Algorithm

// LL parsers have simpler error reporting
const llParser = llGenerator.createParser();

try {
  const result = llParser.parse(invalidInput);
} catch (error) {
  console.log("LL parse error:", error.message);
  // Less detailed error information than LR parsers
}

Types

/**
 * Parser generator state
 */
interface State {
  edges: Record<string, number>;   // Transitions to other states
  reductions: Item[];              // Reduction items in this state
  inadequate: boolean;             // Whether state has conflicts
}

/**
 * Production rule representation
 */
interface Production {
  symbol: string;                  // Left-hand side nonterminal
  handle: string[];                // Right-hand side symbols
  id: number;                      // Production identifier
  precedence: number;              // Operator precedence
}

/**
 * LR parsing item
 */
interface Item {
  production: Production;          // Associated production rule
  dotPosition: number;             // Position of dot in production
  follows: string[];               // Lookahead tokens
  
  remainingHandle(): string[];     // Symbols after dot position
  eq(item: Item): boolean;         // Check equality with another item
}

/**
 * Set of LR items
 */
interface ItemSet {
  contains(item: Item): boolean;   // Check if item is in set
  push(item: Item): void;          // Add item to set
  forEach(fn: (item: Item) => void): void; // Iterate over items
}

/**
 * Code generation options
 */
interface GenerationOptions {
  moduleName?: string;             // Generated module name
  moduleType?: 'commonjs' | 'amd' | 'js'; // Module format
  debug?: boolean;                 // Include debug information
}

/**
 * LL(1) parsing table
 */
interface ParseTable {
  [nonterminal: string]: {
    [terminal: string]: number[];  // Production numbers to apply
  };
}