HTML parser and serializer that is fully compliant with the WHATWG HTML Living Standard.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
HTML specification constants, enumerations, and utility functions that provide access to standardized HTML element names, namespace URIs, document modes, and other HTML5 specification details.
Complete HTML constants and utilities namespace providing access to all HTML specification details.
/**
* HTML namespace containing constants and utilities
*/
namespace html {
// Core namespace and utility functions
}HTML namespace URIs defined by web standards.
/**
* Namespace URI constants
*/
enum NS {
/** HTML namespace URI */
HTML = 'http://www.w3.org/1999/xhtml',
/** MathML namespace URI */
MATHML = 'http://www.w3.org/1998/Math/MathML',
/** SVG namespace URI */
SVG = 'http://www.w3.org/2000/svg',
/** XLink namespace URI */
XLINK = 'http://www.w3.org/1999/xlink',
/** XML namespace URI */
XML = 'http://www.w3.org/XML/1998/namespace',
/** XMLNS namespace URI */
XMLNS = 'http://www.w3.org/2000/xmlns/'
}Usage Examples:
import { html } from "parse5";
// Check element namespace
function isHTMLElement(element: Element): boolean {
return element.namespaceURI === html.NS.HTML;
}
function isSVGElement(element: Element): boolean {
return element.namespaceURI === html.NS.SVG;
}
function isMathMLElement(element: Element): boolean {
return element.namespaceURI === html.NS.MATHML;
}Standardized HTML tag name constants.
/**
* HTML tag name constants
*/
enum TAG_NAMES {
A = 'a',
ADDRESS = 'address',
ANNOTATION_XML = 'annotation-xml',
APPLET = 'applet',
AREA = 'area',
ARTICLE = 'article',
ASIDE = 'aside',
B = 'b',
BASE = 'base',
BASEFONT = 'basefont',
BGSOUND = 'bgsound',
BIG = 'big',
BLOCKQUOTE = 'blockquote',
BODY = 'body',
BR = 'br',
BUTTON = 'button',
CAPTION = 'caption',
CENTER = 'center',
CODE = 'code',
COL = 'col',
COLGROUP = 'colgroup',
DD = 'dd',
DESC = 'desc',
DETAILS = 'details',
DIALOG = 'dialog',
DIR = 'dir',
DIV = 'div',
DL = 'dl',
DT = 'dt',
EM = 'em',
EMBED = 'embed',
FIELDSET = 'fieldset',
FIGCAPTION = 'figcaption',
FIGURE = 'figure',
FONT = 'font',
FOOTER = 'footer',
FOREIGN_OBJECT = 'foreignObject',
FORM = 'form',
FRAME = 'frame',
FRAMESET = 'frameset',
H1 = 'h1',
H2 = 'h2',
H3 = 'h3',
H4 = 'h4',
H5 = 'h5',
H6 = 'h6',
HEAD = 'head',
HEADER = 'header',
HGROUP = 'hgroup',
HR = 'hr',
HTML = 'html',
I = 'i',
IMG = 'img',
IMAGE = 'image',
INPUT = 'input',
IFRAME = 'iframe',
KEYGEN = 'keygen',
LABEL = 'label',
LI = 'li',
LINK = 'link',
LISTING = 'listing',
MAIN = 'main',
MALIGNMARK = 'malignmark',
MARQUEE = 'marquee',
MATH = 'math',
MENU = 'menu',
META = 'meta',
MGLYPH = 'mglyph',
MI = 'mi',
MO = 'mo',
MN = 'mn',
MS = 'ms',
MTEXT = 'mtext',
NAV = 'nav',
NOBR = 'nobr',
NOFRAMES = 'noframes',
NOEMBED = 'noembed',
NOSCRIPT = 'noscript',
OBJECT = 'object',
OL = 'ol',
OPTGROUP = 'optgroup',
OPTION = 'option',
P = 'p',
PARAM = 'param',
PICTURE = 'picture',
PLAINTEXT = 'plaintext',
PRE = 'pre',
RB = 'rb',
RP = 'rp',
RT = 'rt',
RTC = 'rtc',
RUBY = 'ruby',
S = 's',
SCRIPT = 'script',
SEARCH = 'search',
SECTION = 'section',
SELECT = 'select',
SOURCE = 'source',
SMALL = 'small',
SPAN = 'span',
STRIKE = 'strike',
STRONG = 'strong',
STYLE = 'style',
SUB = 'sub',
SUMMARY = 'summary',
SUP = 'sup',
TABLE = 'table',
TBODY = 'tbody',
TEMPLATE = 'template',
TEXTAREA = 'textarea',
TFOOT = 'tfoot',
TD = 'td',
TH = 'th',
THEAD = 'thead',
TITLE = 'title',
TR = 'tr',
TRACK = 'track',
TT = 'tt',
U = 'u',
UL = 'ul',
SVG = 'svg',
VAR = 'var',
WBR = 'wbr',
XMP = 'xmp'
}Numerical tag identifiers for efficient tag comparison.
/**
* HTML tag ID enumeration for efficient comparison
*/
enum TAG_ID {
UNKNOWN = 0,
A = 1,
ADDRESS = 2,
ANNOTATION_XML = 3,
APPLET = 4,
AREA = 5,
ARTICLE = 6,
ASIDE = 7,
B = 8,
BASE = 9,
BASEFONT = 10,
BGSOUND = 11,
BIG = 12,
BLOCKQUOTE = 13,
BODY = 14,
BR = 15,
BUTTON = 16,
CAPTION = 17,
CENTER = 18,
CODE = 19,
COL = 20,
COLGROUP = 21,
DD = 22,
DESC = 23,
DETAILS = 24,
DIALOG = 25,
DIR = 26,
DIV = 27,
DL = 28,
DT = 29,
EM = 30,
EMBED = 31,
FIELDSET = 32,
FIGCAPTION = 33,
FIGURE = 34,
FONT = 35,
FOOTER = 36,
FOREIGN_OBJECT = 37,
FORM = 38,
FRAME = 39,
FRAMESET = 40,
H1 = 41,
H2 = 42,
H3 = 43,
H4 = 44,
H5 = 45,
H6 = 46,
HEAD = 47,
HEADER = 48,
HGROUP = 49,
HR = 50,
HTML = 51,
I = 52,
IMG = 53,
IMAGE = 54,
INPUT = 55,
IFRAME = 56,
KEYGEN = 57,
LABEL = 58,
LI = 59,
LINK = 60,
LISTING = 61,
MAIN = 62,
MALIGNMARK = 63,
MARQUEE = 64,
MATH = 65,
MENU = 66,
META = 67,
MGLYPH = 68,
MI = 69,
MO = 70,
MN = 71,
MS = 72,
MTEXT = 73,
NAV = 74,
NOBR = 75,
NOFRAMES = 76,
NOEMBED = 77,
NOSCRIPT = 78,
OBJECT = 79,
OL = 80,
OPTGROUP = 81,
OPTION = 82,
P = 83,
PARAM = 84,
PICTURE = 85,
PLAINTEXT = 86,
PRE = 87,
RB = 88,
RP = 89,
RT = 90,
RTC = 91,
RUBY = 92,
S = 93,
SCRIPT = 94,
SEARCH = 95,
SECTION = 96,
SELECT = 97,
SOURCE = 98,
SMALL = 99,
SPAN = 100,
STRIKE = 101,
STRONG = 102,
STYLE = 103,
SUB = 104,
SUMMARY = 105,
SUP = 106,
TABLE = 107,
TBODY = 108,
TEMPLATE = 109,
TEXTAREA = 110,
TFOOT = 111,
TD = 112,
TH = 113,
THEAD = 114,
TITLE = 115,
TR = 116,
TRACK = 117,
TT = 118,
U = 119,
UL = 120,
SVG = 121,
VAR = 122,
WBR = 123,
XMP = 124
}HTML document mode enumeration for quirks mode handling.
/**
* Document mode constants for quirks mode handling
*/
enum DOCUMENT_MODE {
/** Standards mode (no quirks) */
NO_QUIRKS = 'no-quirks',
/** Quirks mode */
QUIRKS = 'quirks',
/** Limited quirks mode */
LIMITED_QUIRKS = 'limited-quirks'
}Common HTML attribute name constants.
/**
* Common HTML attribute name constants
*/
enum ATTRS {
TYPE = 'type',
ACTION = 'action',
ENCODING = 'encoding',
PROMPT = 'prompt',
NAME = 'name',
COLOR = 'color',
FACE = 'face',
SIZE = 'size',
CLASS = 'class',
ID = 'id',
STYLE = 'style',
LANG = 'lang',
DIR = 'dir',
TITLE = 'title',
ALT = 'alt',
SRC = 'src',
HREF = 'href',
REL = 'rel',
MEDIA = 'media',
CHARSET = 'charset',
CONTENT = 'content',
HTTP_EQUIV = 'http-equiv',
VALUE = 'value',
CHECKED = 'checked',
SELECTED = 'selected',
DISABLED = 'disabled',
READONLY = 'readonly',
MULTIPLE = 'multiple',
HIDDEN = 'hidden',
REQUIRED = 'required',
AUTOFOCUS = 'autofocus',
PLACEHOLDER = 'placeholder',
PATTERN = 'pattern',
MIN = 'min',
MAX = 'max',
STEP = 'step',
MAXLENGTH = 'maxlength',
MINLENGTH = 'minlength',
COLS = 'cols',
ROWS = 'rows',
WRAP = 'wrap',
ACCEPT = 'accept',
ACCEPT_CHARSET = 'accept-charset',
ACCESSKEY = 'accesskey',
CONTENTEDITABLE = 'contenteditable',
CONTEXTMENU = 'contextmenu',
CONTROLS = 'controls',
COORDS = 'coords',
CROSSORIGIN = 'crossorigin',
DATA = 'data',
DATETIME = 'datetime',
DEFAULT = 'default',
DEFER = 'defer',
DRAGGABLE = 'draggable',
DROPZONE = 'dropzone',
ENCTYPE = 'enctype',
FOR = 'for',
FORM = 'form',
FORMACTION = 'formaction',
FORMENCTYPE = 'formenctype',
FORMMETHOD = 'formmethod',
FORMNOVALIDATE = 'formnovalidate',
FORMTARGET = 'formtarget',
FRAMEBORDER = 'frameborder',
HEADERS = 'headers',
HEIGHT = 'height',
HIGH = 'high',
HREFLANG = 'hreflang',
ICON = 'icon',
ISMAP = 'ismap',
ITEMID = 'itemid',
ITEMPROP = 'itemprop',
ITEMREF = 'itemref',
ITEMSCOPE = 'itemscope',
ITEMTYPE = 'itemtype',
KIND = 'kind',
LABEL = 'label',
LIST = 'list',
LOOP = 'loop',
LOW = 'low',
MANIFEST = 'manifest',
MARGINHEIGHT = 'marginheight',
MARGINWIDTH = 'marginwidth',
METHOD = 'method',
MUTED = 'muted',
NOVALIDATE = 'novalidate',
OPEN = 'open',
OPTIMUM = 'optimum',
PING = 'ping',
POSTER = 'poster',
PRELOAD = 'preload',
RADIOGROUP = 'radiogroup',
REVERSED = 'reversed',
ROLE = 'role',
SANDBOX = 'sandbox',
SCOPE = 'scope',
SCOPED = 'scoped',
SEAMLESS = 'seamless',
SHAPE = 'shape',
SIZES = 'sizes',
SPAN = 'span',
SPELLCHECK = 'spellcheck',
SRCDOC = 'srcdoc',
SRCLANG = 'srclang',
SRCSET = 'srcset',
START = 'start',
TABINDEX = 'tabindex',
TARGET = 'target',
TRANSLATE = 'translate',
USEMAP = 'usemap',
WIDTH = 'width'
}Utility functions for HTML processing and tag identification.
/**
* Get numeric tag ID from tag name string
* @param tagName - HTML tag name
* @returns Corresponding TAG_ID or TAG_ID.UNKNOWN
*/
function getTagID(tagName: string): TAG_ID;
/**
* Check if a tag contains unescaped text content
* @param tagName - HTML tag name
* @param scriptingEnabled - Whether scripting is enabled
* @returns True if tag contains unescaped text
*/
function hasUnescapedText(tagName: string, scriptingEnabled: boolean): boolean;Usage Examples:
import { html } from "parse5";
// Get tag ID for efficient comparison
const divTagId = html.getTagID('div');
const spanTagId = html.getTagID('span');
if (divTagId === html.TAG_ID.DIV) {
console.log('Found div tag');
}
// Check if tag has unescaped text content
const hasUnescaped = html.hasUnescapedText('script', true);
console.log('Script has unescaped text:', hasUnescaped); // true
// Use tag name constants
function isHeadingTag(tagName: string): boolean {
return [
html.TAG_NAMES.H1,
html.TAG_NAMES.H2,
html.TAG_NAMES.H3,
html.TAG_NAMES.H4,
html.TAG_NAMES.H5,
html.TAG_NAMES.H6
].includes(tagName as any);
}
// Use attribute constants
function hasRequiredClass(element: Element): boolean {
const className = element.getAttribute(html.ATTRS.CLASS);
return className?.includes('required') ?? false;
}
// Use namespace constants
function createElement(tagName: string, namespace = html.NS.HTML): Element {
return document.createElementNS(namespace, tagName);
}import { html } from "parse5";
class HTMLValidator {
private voidElements = new Set([
html.TAG_ID.AREA, html.TAG_ID.BASE, html.TAG_ID.BR,
html.TAG_ID.COL, html.TAG_ID.EMBED, html.TAG_ID.HR,
html.TAG_ID.IMG, html.TAG_ID.INPUT, html.TAG_ID.LINK,
html.TAG_ID.META, html.TAG_ID.PARAM, html.TAG_ID.SOURCE,
html.TAG_ID.TRACK, html.TAG_ID.WBR
]);
isVoidElement(tagName: string): boolean {
const tagId = html.getTagID(tagName);
return this.voidElements.has(tagId);
}
isBlockElement(tagName: string): boolean {
const tagId = html.getTagID(tagName);
const blockElements = new Set([
html.TAG_ID.DIV, html.TAG_ID.P, html.TAG_ID.H1,
html.TAG_ID.H2, html.TAG_ID.H3, html.TAG_ID.H4,
html.TAG_ID.H5, html.TAG_ID.H6, html.TAG_ID.HEADER,
html.TAG_ID.FOOTER, html.TAG_ID.SECTION, html.TAG_ID.ARTICLE,
html.TAG_ID.ASIDE, html.TAG_ID.NAV, html.TAG_ID.MAIN
]);
return blockElements.has(tagId);
}
isInlineElement(tagName: string): boolean {
const tagId = html.getTagID(tagName);
const inlineElements = new Set([
html.TAG_ID.SPAN, html.TAG_ID.A, html.TAG_ID.STRONG,
html.TAG_ID.EM, html.TAG_ID.CODE, html.TAG_ID.B,
html.TAG_ID.I, html.TAG_ID.U, html.TAG_ID.S
]);
return inlineElements.has(tagId);
}
}import { html } from "parse5";
class NamespaceAwareBuilder {
createElement(tagName: string, namespace?: string): Element {
// Determine namespace based on tag name if not provided
if (!namespace) {
if (tagName === html.TAG_NAMES.SVG) {
namespace = html.NS.SVG;
} else if (tagName === html.TAG_NAMES.MATH) {
namespace = html.NS.MATHML;
} else {
namespace = html.NS.HTML;
}
}
return document.createElementNS(namespace, tagName);
}
createSVGElement(tagName: string): Element {
return this.createElement(tagName, html.NS.SVG);
}
createMathMLElement(tagName: string): Element {
return this.createElement(tagName, html.NS.MATHML);
}
isForeignElement(element: Element): boolean {
return element.namespaceURI !== html.NS.HTML;
}
}import { html } from "parse5";
class DocumentModeDetector {
detectMode(doctype: string | null): html.DOCUMENT_MODE {
if (!doctype) {
return html.DOCUMENT_MODE.QUIRKS;
}
const doctypeLower = doctype.toLowerCase();
// HTML5 doctype
if (doctypeLower === '<!doctype html>') {
return html.DOCUMENT_MODE.NO_QUIRKS;
}
// Legacy doctypes that trigger quirks mode
const quirksPatterns = [
'html 4.01 transitional',
'html 4.01 frameset',
'xhtml 1.0 transitional',
'xhtml 1.0 frameset'
];
if (quirksPatterns.some(pattern => doctypeLower.includes(pattern))) {
return html.DOCUMENT_MODE.LIMITED_QUIRKS;
}
// Default to no-quirks for unrecognized doctypes
return html.DOCUMENT_MODE.NO_QUIRKS;
}
applyQuirksMode(mode: html.DOCUMENT_MODE): void {
switch (mode) {
case html.DOCUMENT_MODE.QUIRKS:
console.log('Applying full quirks mode');
break;
case html.DOCUMENT_MODE.LIMITED_QUIRKS:
console.log('Applying limited quirks mode');
break;
case html.DOCUMENT_MODE.NO_QUIRKS:
console.log('Applying standards mode');
break;
}
}
}import { html } from "parse5";
class AttributeProcessor {
private booleanAttributes = new Set([
html.ATTRS.CHECKED, html.ATTRS.SELECTED, html.ATTRS.DISABLED,
html.ATTRS.READONLY, html.ATTRS.MULTIPLE, html.ATTRS.HIDDEN,
html.ATTRS.REQUIRED, html.ATTRS.AUTOFOCUS, html.ATTRS.DEFER,
html.ATTRS.ASYNC, html.ATTRS.CONTROLS, html.ATTRS.LOOP,
html.ATTRS.MUTED, html.ATTRS.OPEN, html.ATTRS.REVERSED
]);
isBooleanAttribute(name: string): boolean {
return this.booleanAttributes.has(name as any);
}
normalizeAttributeValue(name: string, value: string): string | boolean {
if (this.isBooleanAttribute(name)) {
// Boolean attributes: presence = true, absence = false
return value !== null;
}
// Standard attributes
return value;
}
validateAttribute(tagName: string, attrName: string, attrValue: string): boolean {
const tagId = html.getTagID(tagName);
// Example validation rules
switch (tagId) {
case html.TAG_ID.IMG:
if (attrName === html.ATTRS.SRC) {
return attrValue.length > 0;
}
if (attrName === html.ATTRS.ALT) {
return true; // Alt can be empty
}
break;
case html.TAG_ID.A:
if (attrName === html.ATTRS.HREF) {
return attrValue.length > 0;
}
break;
case html.TAG_ID.INPUT:
if (attrName === html.ATTRS.TYPE) {
const validTypes = ['text', 'password', 'email', 'number', 'checkbox', 'radio', 'submit', 'button'];
return validTypes.includes(attrValue);
}
break;
}
return true; // Default to valid
}
}Install with Tessl CLI
npx tessl i tessl/npm-parse5