CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-parse5

HTML parser and serializer that is fully compliant with the WHATWG HTML Living Standard.

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

html-utilities.mddocs/

HTML Constants and Utilities

HTML specification constants, enumerations, and utility functions that provide access to standardized HTML element names, namespace URIs, document modes, and other HTML5 specification details.

Capabilities

HTML Namespace

Complete HTML constants and utilities namespace providing access to all HTML specification details.

/**
 * HTML namespace containing constants and utilities
 */
namespace html {
  // Core namespace and utility functions
}

Namespace Constants

HTML namespace URIs defined by web standards.

/**
 * Namespace URI constants
 */
enum NS {
  /** HTML namespace URI */
  HTML = 'http://www.w3.org/1999/xhtml',
  
  /** MathML namespace URI */
  MATHML = 'http://www.w3.org/1998/Math/MathML',
  
  /** SVG namespace URI */
  SVG = 'http://www.w3.org/2000/svg',
  
  /** XLink namespace URI */
  XLINK = 'http://www.w3.org/1999/xlink',
  
  /** XML namespace URI */
  XML = 'http://www.w3.org/XML/1998/namespace',
  
  /** XMLNS namespace URI */
  XMLNS = 'http://www.w3.org/2000/xmlns/'
}

Usage Examples:

import { html } from "parse5";

// Check element namespace
function isHTMLElement(element: Element): boolean {
  return element.namespaceURI === html.NS.HTML;
}

function isSVGElement(element: Element): boolean {
  return element.namespaceURI === html.NS.SVG;
}

function isMathMLElement(element: Element): boolean {
  return element.namespaceURI === html.NS.MATHML;
}

HTML Tag Names

Standardized HTML tag name constants.

/**
 * HTML tag name constants
 */
enum TAG_NAMES {
  A = 'a',
  ADDRESS = 'address',
  ANNOTATION_XML = 'annotation-xml',
  APPLET = 'applet',
  AREA = 'area',
  ARTICLE = 'article',
  ASIDE = 'aside',
  B = 'b',
  BASE = 'base',
  BASEFONT = 'basefont',
  BGSOUND = 'bgsound',
  BIG = 'big',
  BLOCKQUOTE = 'blockquote',
  BODY = 'body',
  BR = 'br',
  BUTTON = 'button',
  CAPTION = 'caption',
  CENTER = 'center',
  CODE = 'code',
  COL = 'col',
  COLGROUP = 'colgroup',
  DD = 'dd',
  DESC = 'desc',
  DETAILS = 'details',
  DIALOG = 'dialog',
  DIR = 'dir',
  DIV = 'div',
  DL = 'dl',
  DT = 'dt',
  EM = 'em',
  EMBED = 'embed',
  FIELDSET = 'fieldset',
  FIGCAPTION = 'figcaption',
  FIGURE = 'figure',
  FONT = 'font',
  FOOTER = 'footer',
  FOREIGN_OBJECT = 'foreignObject',
  FORM = 'form',
  FRAME = 'frame',
  FRAMESET = 'frameset',
  H1 = 'h1',
  H2 = 'h2',
  H3 = 'h3',
  H4 = 'h4',
  H5 = 'h5',
  H6 = 'h6',
  HEAD = 'head',
  HEADER = 'header',
  HGROUP = 'hgroup',
  HR = 'hr',
  HTML = 'html',
  I = 'i',
  IMG = 'img',
  IMAGE = 'image',
  INPUT = 'input',
  IFRAME = 'iframe',
  KEYGEN = 'keygen',
  LABEL = 'label',
  LI = 'li',
  LINK = 'link',
  LISTING = 'listing',
  MAIN = 'main',
  MALIGNMARK = 'malignmark',
  MARQUEE = 'marquee',
  MATH = 'math',
  MENU = 'menu',
  META = 'meta',
  MGLYPH = 'mglyph',
  MI = 'mi',
  MO = 'mo',
  MN = 'mn',
  MS = 'ms',
  MTEXT = 'mtext',
  NAV = 'nav',
  NOBR = 'nobr',
  NOFRAMES = 'noframes',
  NOEMBED = 'noembed',
  NOSCRIPT = 'noscript',
  OBJECT = 'object',
  OL = 'ol',
  OPTGROUP = 'optgroup',
  OPTION = 'option',
  P = 'p',
  PARAM = 'param',
  PICTURE = 'picture',
  PLAINTEXT = 'plaintext',
  PRE = 'pre',
  RB = 'rb',
  RP = 'rp',
  RT = 'rt',
  RTC = 'rtc',
  RUBY = 'ruby',
  S = 's',
  SCRIPT = 'script',
  SEARCH = 'search',
  SECTION = 'section',
  SELECT = 'select',
  SOURCE = 'source',
  SMALL = 'small',
  SPAN = 'span',
  STRIKE = 'strike',
  STRONG = 'strong',
  STYLE = 'style',
  SUB = 'sub',
  SUMMARY = 'summary',
  SUP = 'sup',
  TABLE = 'table',
  TBODY = 'tbody',
  TEMPLATE = 'template',
  TEXTAREA = 'textarea',
  TFOOT = 'tfoot',
  TD = 'td',
  TH = 'th',
  THEAD = 'thead',
  TITLE = 'title',
  TR = 'tr',
  TRACK = 'track',
  TT = 'tt',
  U = 'u',
  UL = 'ul',
  SVG = 'svg',
  VAR = 'var',
  WBR = 'wbr',
  XMP = 'xmp'
}

HTML Tag IDs

Numerical tag identifiers for efficient tag comparison.

/**
 * HTML tag ID enumeration for efficient comparison
 */
enum TAG_ID {
  UNKNOWN = 0,
  A = 1,
  ADDRESS = 2,
  ANNOTATION_XML = 3,
  APPLET = 4,
  AREA = 5,
  ARTICLE = 6,
  ASIDE = 7,
  B = 8,
  BASE = 9,
  BASEFONT = 10,
  BGSOUND = 11,
  BIG = 12,
  BLOCKQUOTE = 13,
  BODY = 14,
  BR = 15,
  BUTTON = 16,
  CAPTION = 17,
  CENTER = 18,
  CODE = 19,
  COL = 20,
  COLGROUP = 21,
  DD = 22,
  DESC = 23,
  DETAILS = 24,
  DIALOG = 25,
  DIR = 26,
  DIV = 27,
  DL = 28,
  DT = 29,
  EM = 30,
  EMBED = 31,
  FIELDSET = 32,
  FIGCAPTION = 33,
  FIGURE = 34,
  FONT = 35,
  FOOTER = 36,
  FOREIGN_OBJECT = 37,
  FORM = 38,
  FRAME = 39,
  FRAMESET = 40,
  H1 = 41,
  H2 = 42,
  H3 = 43,
  H4 = 44,
  H5 = 45,
  H6 = 46,
  HEAD = 47,
  HEADER = 48,
  HGROUP = 49,
  HR = 50,
  HTML = 51,
  I = 52,
  IMG = 53,
  IMAGE = 54,
  INPUT = 55,
  IFRAME = 56,
  KEYGEN = 57,
  LABEL = 58,
  LI = 59,
  LINK = 60,
  LISTING = 61,
  MAIN = 62,
  MALIGNMARK = 63,
  MARQUEE = 64,
  MATH = 65,
  MENU = 66,
  META = 67,
  MGLYPH = 68,
  MI = 69,
  MO = 70,
  MN = 71,
  MS = 72,
  MTEXT = 73,
  NAV = 74,
  NOBR = 75,
  NOFRAMES = 76,
  NOEMBED = 77,
  NOSCRIPT = 78,
  OBJECT = 79,
  OL = 80,
  OPTGROUP = 81,
  OPTION = 82,
  P = 83,
  PARAM = 84,
  PICTURE = 85,
  PLAINTEXT = 86,
  PRE = 87,
  RB = 88,
  RP = 89,
  RT = 90,
  RTC = 91,
  RUBY = 92,
  S = 93,
  SCRIPT = 94,
  SEARCH = 95,
  SECTION = 96,
  SELECT = 97,
  SOURCE = 98,
  SMALL = 99,
  SPAN = 100,
  STRIKE = 101,
  STRONG = 102,
  STYLE = 103,
  SUB = 104,
  SUMMARY = 105,
  SUP = 106,
  TABLE = 107,
  TBODY = 108,
  TEMPLATE = 109,
  TEXTAREA = 110,
  TFOOT = 111,
  TD = 112,
  TH = 113,
  THEAD = 114,
  TITLE = 115,
  TR = 116,
  TRACK = 117,
  TT = 118,
  U = 119,
  UL = 120,
  SVG = 121,
  VAR = 122,
  WBR = 123,
  XMP = 124
}

Document Mode Constants

HTML document mode enumeration for quirks mode handling.

/**
 * Document mode constants for quirks mode handling
 */
enum DOCUMENT_MODE {
  /** Standards mode (no quirks) */
  NO_QUIRKS = 'no-quirks',
  
  /** Quirks mode */
  QUIRKS = 'quirks',
  
  /** Limited quirks mode */
  LIMITED_QUIRKS = 'limited-quirks'
}

HTML Attributes

Common HTML attribute name constants.

/**
 * Common HTML attribute name constants
 */
enum ATTRS {
  TYPE = 'type',
  ACTION = 'action',
  ENCODING = 'encoding',
  PROMPT = 'prompt',
  NAME = 'name',
  COLOR = 'color',
  FACE = 'face',
  SIZE = 'size',
  CLASS = 'class',
  ID = 'id',
  STYLE = 'style',
  LANG = 'lang',
  DIR = 'dir',
  TITLE = 'title',
  ALT = 'alt',
  SRC = 'src',
  HREF = 'href',
  REL = 'rel',
  MEDIA = 'media',
  CHARSET = 'charset',
  CONTENT = 'content',
  HTTP_EQUIV = 'http-equiv',
  VALUE = 'value',
  CHECKED = 'checked',
  SELECTED = 'selected',
  DISABLED = 'disabled',
  READONLY = 'readonly',
  MULTIPLE = 'multiple',
  HIDDEN = 'hidden',
  REQUIRED = 'required',
  AUTOFOCUS = 'autofocus',
  PLACEHOLDER = 'placeholder',
  PATTERN = 'pattern',
  MIN = 'min',
  MAX = 'max',
  STEP = 'step',
  MAXLENGTH = 'maxlength',
  MINLENGTH = 'minlength',
  COLS = 'cols',
  ROWS = 'rows',
  WRAP = 'wrap',
  ACCEPT = 'accept',
  ACCEPT_CHARSET = 'accept-charset',
  ACCESSKEY = 'accesskey',
  CONTENTEDITABLE = 'contenteditable',
  CONTEXTMENU = 'contextmenu',
  CONTROLS = 'controls',
  COORDS = 'coords',
  CROSSORIGIN = 'crossorigin',
  DATA = 'data',
  DATETIME = 'datetime',
  DEFAULT = 'default',
  DEFER = 'defer',
  DRAGGABLE = 'draggable',
  DROPZONE = 'dropzone',
  ENCTYPE = 'enctype',
  FOR = 'for',
  FORM = 'form',
  FORMACTION = 'formaction',
  FORMENCTYPE = 'formenctype',
  FORMMETHOD = 'formmethod',
  FORMNOVALIDATE = 'formnovalidate',
  FORMTARGET = 'formtarget',
  FRAMEBORDER = 'frameborder',
  HEADERS = 'headers',
  HEIGHT = 'height',
  HIGH = 'high',
  HREFLANG = 'hreflang',
  ICON = 'icon',
  ISMAP = 'ismap',
  ITEMID = 'itemid',
  ITEMPROP = 'itemprop',
  ITEMREF = 'itemref',
  ITEMSCOPE = 'itemscope',
  ITEMTYPE = 'itemtype',
  KIND = 'kind',
  LABEL = 'label',
  LIST = 'list',
  LOOP = 'loop',
  LOW = 'low',
  MANIFEST = 'manifest',
  MARGINHEIGHT = 'marginheight',
  MARGINWIDTH = 'marginwidth',
  METHOD = 'method',
  MUTED = 'muted',
  NOVALIDATE = 'novalidate',
  OPEN = 'open',
  OPTIMUM = 'optimum',
  PING = 'ping',
  POSTER = 'poster',
  PRELOAD = 'preload',
  RADIOGROUP = 'radiogroup',
  REVERSED = 'reversed',
  ROLE = 'role',
  SANDBOX = 'sandbox',
  SCOPE = 'scope',
  SCOPED = 'scoped',
  SEAMLESS = 'seamless',
  SHAPE = 'shape',
  SIZES = 'sizes',
  SPAN = 'span',
  SPELLCHECK = 'spellcheck',
  SRCDOC = 'srcdoc',
  SRCLANG = 'srclang',
  SRCSET = 'srcset',
  START = 'start',
  TABINDEX = 'tabindex',
  TARGET = 'target',
  TRANSLATE = 'translate',
  USEMAP = 'usemap',
  WIDTH = 'width'
}

HTML Utility Functions

Utility functions for HTML processing and tag identification.

/**
 * Get numeric tag ID from tag name string
 * @param tagName - HTML tag name
 * @returns Corresponding TAG_ID or TAG_ID.UNKNOWN
 */
function getTagID(tagName: string): TAG_ID;

/**
 * Check if a tag contains unescaped text content
 * @param tagName - HTML tag name
 * @param scriptingEnabled - Whether scripting is enabled
 * @returns True if tag contains unescaped text
 */
function hasUnescapedText(tagName: string, scriptingEnabled: boolean): boolean;

Usage Examples:

import { html } from "parse5";

// Get tag ID for efficient comparison
const divTagId = html.getTagID('div');
const spanTagId = html.getTagID('span');

if (divTagId === html.TAG_ID.DIV) {
  console.log('Found div tag');
}

// Check if tag has unescaped text content
const hasUnescaped = html.hasUnescapedText('script', true);
console.log('Script has unescaped text:', hasUnescaped); // true

// Use tag name constants
function isHeadingTag(tagName: string): boolean {
  return [
    html.TAG_NAMES.H1,
    html.TAG_NAMES.H2,
    html.TAG_NAMES.H3,
    html.TAG_NAMES.H4,
    html.TAG_NAMES.H5,
    html.TAG_NAMES.H6
  ].includes(tagName as any);
}

// Use attribute constants
function hasRequiredClass(element: Element): boolean {
  const className = element.getAttribute(html.ATTRS.CLASS);
  return className?.includes('required') ?? false;
}

// Use namespace constants
function createElement(tagName: string, namespace = html.NS.HTML): Element {
  return document.createElementNS(namespace, tagName);
}

HTML Specification Compliance Patterns

Tag Validation

import { html } from "parse5";

class HTMLValidator {
  private voidElements = new Set([
    html.TAG_ID.AREA, html.TAG_ID.BASE, html.TAG_ID.BR,
    html.TAG_ID.COL, html.TAG_ID.EMBED, html.TAG_ID.HR,
    html.TAG_ID.IMG, html.TAG_ID.INPUT, html.TAG_ID.LINK,
    html.TAG_ID.META, html.TAG_ID.PARAM, html.TAG_ID.SOURCE,
    html.TAG_ID.TRACK, html.TAG_ID.WBR
  ]);

  isVoidElement(tagName: string): boolean {
    const tagId = html.getTagID(tagName);
    return this.voidElements.has(tagId);
  }

  isBlockElement(tagName: string): boolean {
    const tagId = html.getTagID(tagName);
    const blockElements = new Set([
      html.TAG_ID.DIV, html.TAG_ID.P, html.TAG_ID.H1,
      html.TAG_ID.H2, html.TAG_ID.H3, html.TAG_ID.H4,
      html.TAG_ID.H5, html.TAG_ID.H6, html.TAG_ID.HEADER,
      html.TAG_ID.FOOTER, html.TAG_ID.SECTION, html.TAG_ID.ARTICLE,
      html.TAG_ID.ASIDE, html.TAG_ID.NAV, html.TAG_ID.MAIN
    ]);
    return blockElements.has(tagId);
  }

  isInlineElement(tagName: string): boolean {
    const tagId = html.getTagID(tagName);
    const inlineElements = new Set([
      html.TAG_ID.SPAN, html.TAG_ID.A, html.TAG_ID.STRONG,
      html.TAG_ID.EM, html.TAG_ID.CODE, html.TAG_ID.B,
      html.TAG_ID.I, html.TAG_ID.U, html.TAG_ID.S
    ]);
    return inlineElements.has(tagId);
  }
}

Namespace-Aware Element Creation

import { html } from "parse5";

class NamespaceAwareBuilder {
  createElement(tagName: string, namespace?: string): Element {
    // Determine namespace based on tag name if not provided
    if (!namespace) {
      if (tagName === html.TAG_NAMES.SVG) {
        namespace = html.NS.SVG;
      } else if (tagName === html.TAG_NAMES.MATH) {
        namespace = html.NS.MATHML;
      } else {
        namespace = html.NS.HTML;
      }
    }

    return document.createElementNS(namespace, tagName);
  }

  createSVGElement(tagName: string): Element {
    return this.createElement(tagName, html.NS.SVG);
  }

  createMathMLElement(tagName: string): Element {
    return this.createElement(tagName, html.NS.MATHML);
  }

  isForeignElement(element: Element): boolean {
    return element.namespaceURI !== html.NS.HTML;
  }
}

Document Mode Detection

import { html } from "parse5";

class DocumentModeDetector {
  detectMode(doctype: string | null): html.DOCUMENT_MODE {
    if (!doctype) {
      return html.DOCUMENT_MODE.QUIRKS;
    }

    const doctypeLower = doctype.toLowerCase();
    
    // HTML5 doctype
    if (doctypeLower === '<!doctype html>') {
      return html.DOCUMENT_MODE.NO_QUIRKS;
    }

    // Legacy doctypes that trigger quirks mode
    const quirksPatterns = [
      'html 4.01 transitional',
      'html 4.01 frameset',
      'xhtml 1.0 transitional',
      'xhtml 1.0 frameset'
    ];

    if (quirksPatterns.some(pattern => doctypeLower.includes(pattern))) {
      return html.DOCUMENT_MODE.LIMITED_QUIRKS;
    }

    // Default to no-quirks for unrecognized doctypes
    return html.DOCUMENT_MODE.NO_QUIRKS;
  }

  applyQuirksMode(mode: html.DOCUMENT_MODE): void {
    switch (mode) {
      case html.DOCUMENT_MODE.QUIRKS:
        console.log('Applying full quirks mode');
        break;
      case html.DOCUMENT_MODE.LIMITED_QUIRKS:
        console.log('Applying limited quirks mode');
        break;
      case html.DOCUMENT_MODE.NO_QUIRKS:
        console.log('Applying standards mode');
        break;
    }
  }
}

Attribute Processing

import { html } from "parse5";

class AttributeProcessor {
  private booleanAttributes = new Set([
    html.ATTRS.CHECKED, html.ATTRS.SELECTED, html.ATTRS.DISABLED,
    html.ATTRS.READONLY, html.ATTRS.MULTIPLE, html.ATTRS.HIDDEN,
    html.ATTRS.REQUIRED, html.ATTRS.AUTOFOCUS, html.ATTRS.DEFER,
    html.ATTRS.ASYNC, html.ATTRS.CONTROLS, html.ATTRS.LOOP,
    html.ATTRS.MUTED, html.ATTRS.OPEN, html.ATTRS.REVERSED
  ]);

  isBooleanAttribute(name: string): boolean {
    return this.booleanAttributes.has(name as any);
  }

  normalizeAttributeValue(name: string, value: string): string | boolean {
    if (this.isBooleanAttribute(name)) {
      // Boolean attributes: presence = true, absence = false
      return value !== null;
    }

    // Standard attributes
    return value;
  }

  validateAttribute(tagName: string, attrName: string, attrValue: string): boolean {
    const tagId = html.getTagID(tagName);

    // Example validation rules
    switch (tagId) {
      case html.TAG_ID.IMG:
        if (attrName === html.ATTRS.SRC) {
          return attrValue.length > 0;
        }
        if (attrName === html.ATTRS.ALT) {
          return true; // Alt can be empty
        }
        break;

      case html.TAG_ID.A:
        if (attrName === html.ATTRS.HREF) {
          return attrValue.length > 0;
        }
        break;

      case html.TAG_ID.INPUT:
        if (attrName === html.ATTRS.TYPE) {
          const validTypes = ['text', 'password', 'email', 'number', 'checkbox', 'radio', 'submit', 'button'];
          return validTypes.includes(attrValue);
        }
        break;
    }

    return true; // Default to valid
  }
}

Install with Tessl CLI

npx tessl i tessl/npm-parse5

docs

error-handling.md

html-utilities.md

index.md

parsing.md

serialization.md

tokenization.md

tree-adapters.md

tile.json