CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-parse5

HTML parser and serializer that is fully compliant with the WHATWG HTML Living Standard.

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

error-handling.mddocs/

Error Handling

Comprehensive error handling system that provides detailed parsing error information with source code locations and standardized error codes following the WHATWG HTML specification.

Capabilities

Parser Error Interface

Detailed error information including location data for precise error reporting.

/**
 * Parser error with location information
 */
interface ParserError {
  /** Error code following WHATWG HTML specification */
  code: string;
  /** One-based line index of the error start */
  startLine: number;
  /** One-based column index of the error start */
  startCol: number;
  /** Zero-based character offset of the error start */
  startOffset: number;
  /** One-based line index of the error end */
  endLine: number;
  /** One-based column index of the error end */
  endCol: number;
  /** Zero-based character offset of the error end */
  endOffset: number;
}

/**
 * Error handler callback function
 */
type ParserErrorHandler = (error: ParserError) => void;

Usage Examples:

import { parse } from "parse5";

const errors: ParserError[] = [];

const document = parse('<div><span></div>', {
  onParseError: (error) => {
    errors.push(error);
    console.log(`Error ${error.code} at line ${error.startLine}, column ${error.startCol}`);
  }
});

console.log(errors.length); // 1
console.log(errors[0].code); // 'end-tag-with-trailing-solidus' or similar

Error Codes Enumeration

Comprehensive enumeration of all HTML parsing error codes defined by the WHATWG specification.

/**
 * HTML parsing error codes following WHATWG HTML Living Standard
 */
enum ErrorCodes {
  // Input stream errors
  controlCharacterInInputStream = 'control-character-in-input-stream',
  noncharacterInInputStream = 'noncharacter-in-input-stream',
  surrogateInInputStream = 'surrogate-in-input-stream',

  // Tag-related errors
  nonVoidHtmlElementStartTagWithTrailingSolidus = 'non-void-html-element-start-tag-with-trailing-solidus',
  endTagWithAttributes = 'end-tag-with-attributes',
  endTagWithTrailingSolidus = 'end-tag-with-trailing-solidus',
  unexpectedSolidusInTag = 'unexpected-solidus-in-tag',
  unexpectedQuestionMarkInsteadOfTagName = 'unexpected-question-mark-instead-of-tag-name',
  invalidFirstCharacterOfTagName = 'invalid-first-character-of-tag-name',
  missingEndTagName = 'missing-end-tag-name',

  // Character and entity errors
  unexpectedNullCharacter = 'unexpected-null-character',
  unknownNamedCharacterReference = 'unknown-named-character-reference',
  missingSemicolonAfterCharacterReference = 'missing-semicolon-after-character-reference',
  nullCharacterReference = 'null-character-reference',
  characterReferenceOutsideUnicodeRange = 'character-reference-outside-unicode-range',
  surrogateCharacterReference = 'surrogate-character-reference',
  noncharacterCharacterReference = 'noncharacter-character-reference',
  controlCharacterReference = 'control-character-reference',

  // Attribute errors
  unexpectedEqualsSignBeforeAttributeName = 'unexpected-equals-sign-before-attribute-name',
  unexpectedCharacterInAttributeName = 'unexpected-character-in-attribute-name',
  missingAttributeValue = 'missing-attribute-value',
  missingWhitespaceBetweenAttributes = 'missing-whitespace-between-attributes',
  unexpectedCharacterInUnquotedAttributeValue = 'unexpected-character-in-unquoted-attribute-value',

  // DOCTYPE errors
  missingWhitespaceAfterDoctypePublicKeyword = 'missing-whitespace-after-doctype-public-keyword',
  missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers = 'missing-whitespace-between-doctype-public-and-system-identifiers',
  missingWhitespaceAfterDoctypeSystemKeyword = 'missing-whitespace-after-doctype-system-keyword',
  missingQuoteBeforeDoctypePublicIdentifier = 'missing-quote-before-doctype-public-identifier',
  missingQuoteBeforeDoctypeSystemIdentifier = 'missing-quote-before-doctype-system-identifier',
  missingDoctypePublicIdentifier = 'missing-doctype-public-identifier',
  missingDoctypeSystemIdentifier = 'missing-doctype-system-identifier',
  abruptDoctypePublicIdentifier = 'abrupt-doctype-public-identifier',
  abruptDoctypeSystemIdentifier = 'abrupt-doctype-system-identifier',
  unexpectedCharacterAfterDoctypeSystemIdentifier = 'unexpected-character-after-doctype-system-identifier',

  // End-of-file errors
  eofBeforeTagName = 'eof-before-tag-name',
  eofInTag = 'eof-in-tag',
  eofInScriptHtmlCommentLikeText = 'eof-in-script-html-comment-like-text',
  eofInDoctype = 'eof-in-doctype',
  eofInComment = 'eof-in-comment',
  eofInCdata = 'eof-in-cdata',

  // Comment errors
  cdataInHtmlContent = 'cdata-in-html-content',
  incorrectlyOpenedComment = 'incorrectly-opened-comment',
  nestedComment = 'nested-comment',
  abruptClosingOfEmptyComment = 'abrupt-closing-of-empty-comment',
  incorrectlyClosedComment = 'incorrectly-closed-comment',

  // Numeric character reference errors
  absenceOfDigitsInNumericCharacterReference = 'absence-of-digits-in-numeric-character-reference',
}

/**
 * Export alias for error codes enum
 */
const ERR = ErrorCodes;

Usage Examples:

import { parse, ErrorCodes } from "parse5";

const document = parse('<div><span></div>', {
  onParseError: (error) => {
    switch (error.code) {
      case ErrorCodes.endTagWithAttributes:
        console.log('End tag has attributes');
        break;
      case ErrorCodes.unexpectedNullCharacter:
        console.log('Unexpected null character found');
        break;
      default:
        console.log(`Unknown error: ${error.code}`);
    }
  }
});

Location Information

Detailed location tracking for precise error positioning.

/**
 * Location information interface used in errors and nodes
 */
interface Location {
  /** One-based line index of the first character */
  startLine: number;
  /** One-based column index of the first character */
  startCol: number;
  /** Zero-based first character index */
  startOffset: number;
  /** One-based line index of the last character */
  endLine: number;
  /** One-based column index of the last character (after the character) */
  endCol: number;
  /** Zero-based last character index (after the character) */
  endOffset: number;
}

/**
 * Location information with attribute positions
 */
interface LocationWithAttributes extends Location {
  /** Start tag attributes' location info */
  attrs?: Record<string, Location>;
}

/**
 * Element location with start and end tag positions
 */
interface ElementLocation extends LocationWithAttributes {
  /** Element's start tag location info */
  startTag?: Location;
  /** Element's end tag location info (undefined if no closing tag) */
  endTag?: Location;
}

Usage Examples:

import { parse } from "parse5";

const html = `<div class="container">
  <span>Content</span>
</div>`;

let parseErrors: ParserError[] = [];

const document = parse(html, {
  sourceCodeLocationInfo: true,
  onParseError: (error) => {
    parseErrors.push(error);
    console.log(`Error at line ${error.startLine}: ${error.code}`);
    console.log(`Position: ${error.startOffset}-${error.endOffset}`);
  }
});

Error Handling Patterns

Error Collection and Reporting

import { parse, parseFragment } from "parse5";
import type { ParserError } from "parse5";

class HTMLErrorCollector {
  private errors: ParserError[] = [];

  parseWithErrorCollection(html: string) {
    this.errors = [];
    
    const document = parse(html, {
      onParseError: (error) => {
        this.errors.push(error);
      }
    });

    return {
      document,
      errors: this.errors,
      hasErrors: this.errors.length > 0
    };
  }

  getErrorSummary() {
    const errorCounts = new Map<string, number>();
    
    this.errors.forEach(error => {
      const count = errorCounts.get(error.code) || 0;
      errorCounts.set(error.code, count + 1);
    });

    return Array.from(errorCounts.entries()).map(([code, count]) => ({
      code,
      count
    }));
  }
}

Error Severity Classification

import { ErrorCodes } from "parse5";
import type { ParserError } from "parse5";

enum ErrorSeverity {
  LOW = 1,
  MEDIUM = 2,
  HIGH = 3,
  CRITICAL = 4
}

function classifyError(error: ParserError): ErrorSeverity {
  switch (error.code) {
    // Critical structural errors
    case ErrorCodes.eofInTag:
    case ErrorCodes.eofBeforeTagName:
      return ErrorSeverity.CRITICAL;

    // High priority errors affecting parsing
    case ErrorCodes.missingEndTagName:
    case ErrorCodes.unexpectedCharacterInAttributeName:
      return ErrorSeverity.HIGH;

    // Medium priority formatting issues
    case ErrorCodes.missingWhitespaceBetweenAttributes:
    case ErrorCodes.endTagWithAttributes:
      return ErrorSeverity.MEDIUM;

    // Low priority cosmetic issues
    case ErrorCodes.unexpectedNullCharacter:
    case ErrorCodes.controlCharacterInInputStream:
      return ErrorSeverity.LOW;

    default:
      return ErrorSeverity.MEDIUM;
  }
}

Validation and Cleanup

import { parse, serialize } from "parse5";
import type { ParserError } from "parse5";

interface ValidationResult {
  isValid: boolean;
  errors: ParserError[];
  cleanedHTML?: string;
  errorCount: number;
}

function validateAndCleanHTML(html: string): ValidationResult {
  const errors: ParserError[] = [];
  
  const document = parse(html, {
    onParseError: (error) => {
      errors.push(error);
    }
  });

  const result: ValidationResult = {
    isValid: errors.length === 0,
    errors,
    errorCount: errors.length
  };

  // If there were errors, provide cleaned HTML
  if (errors.length > 0) {
    result.cleanedHTML = serialize(document);
  }

  return result;
}

// Usage
const validation = validateAndCleanHTML('<div><span></div>');
if (!validation.isValid) {
  console.log(`Found ${validation.errorCount} errors`);
  console.log('Cleaned HTML:', validation.cleanedHTML);
}

Error Context Enhancement

import { parse } from "parse5";
import type { ParserError } from "parse5";

interface EnhancedError extends ParserError {
  context: string;
  suggestion?: string;
}

function parseWithEnhancedErrors(html: string): EnhancedError[] {
  const lines = html.split('\n');
  const enhancedErrors: EnhancedError[] = [];

  parse(html, {
    onParseError: (error) => {
      const line = lines[error.startLine - 1] || '';
      const contextStart = Math.max(0, error.startCol - 10);
      const contextEnd = Math.min(line.length, error.endCol + 10);
      const context = line.substring(contextStart, contextEnd);

      const enhanced: EnhancedError = {
        ...error,
        context,
        suggestion: getSuggestionForError(error.code)
      };

      enhancedErrors.push(enhanced);
    }
  });

  return enhancedErrors;
}

function getSuggestionForError(code: string): string {
  switch (code) {
    case 'end-tag-with-attributes':
      return 'Remove attributes from closing tags';
    case 'missing-end-tag-name':
      return 'Add tag name after </ in closing tag';
    case 'unexpected-null-character':
      return 'Remove or replace null characters';
    default:
      return 'Check HTML specification for this error';
  }
}

Common Error Scenarios

Malformed Tags

import { parse } from "parse5";

// Missing closing tag name
const html1 = '<div>Content</>';
parse(html1, {
  onParseError: (error) => {
    console.log(error.code); // 'missing-end-tag-name'
  }
});

// Attributes in closing tag
const html2 = '<div>Content</div class="error">';
parse(html2, {
  onParseError: (error) => {
    console.log(error.code); // 'end-tag-with-attributes'
  }
});

Character Encoding Issues

import { parse } from "parse5";

// Null characters in input
const htmlWithNull = '<div>Content\0here</div>';
parse(htmlWithNull, {
  onParseError: (error) => {
    console.log(error.code); // 'unexpected-null-character'
  }
});

// Invalid character references
const htmlWithBadEntity = '<div>Content &invalid; here</div>';
parse(htmlWithBadEntity, {
  onParseError: (error) => {
    console.log(error.code); // 'unknown-named-character-reference'
  }
});

DOCTYPE Errors

import { parse } from "parse5";

// Malformed DOCTYPE
const htmlWithBadDoctype = '<!DOCTYPE html SYSTEM>';
parse(htmlWithBadDoctype, {
  onParseError: (error) => {
    console.log(error.code); // DOCTYPE-related error
  }
});

Install with Tessl CLI

npx tessl i tessl/npm-parse5

docs

error-handling.md

html-utilities.md

index.md

parsing.md

serialization.md

tokenization.md

tree-adapters.md

tile.json