HTML parser and serializer that is fully compliant with the WHATWG HTML Living Standard.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Comprehensive error handling system that provides detailed parsing error information with source code locations and standardized error codes following the WHATWG HTML specification.
Detailed error information including location data for precise error reporting.
/**
* Parser error with location information
*/
interface ParserError {
/** Error code following WHATWG HTML specification */
code: string;
/** One-based line index of the error start */
startLine: number;
/** One-based column index of the error start */
startCol: number;
/** Zero-based character offset of the error start */
startOffset: number;
/** One-based line index of the error end */
endLine: number;
/** One-based column index of the error end */
endCol: number;
/** Zero-based character offset of the error end */
endOffset: number;
}
/**
* Error handler callback function
*/
type ParserErrorHandler = (error: ParserError) => void;Usage Examples:
import { parse } from "parse5";
const errors: ParserError[] = [];
const document = parse('<div><span></div>', {
onParseError: (error) => {
errors.push(error);
console.log(`Error ${error.code} at line ${error.startLine}, column ${error.startCol}`);
}
});
console.log(errors.length); // 1
console.log(errors[0].code); // 'end-tag-with-trailing-solidus' or similarComprehensive enumeration of all HTML parsing error codes defined by the WHATWG specification.
/**
* HTML parsing error codes following WHATWG HTML Living Standard
*/
enum ErrorCodes {
// Input stream errors
controlCharacterInInputStream = 'control-character-in-input-stream',
noncharacterInInputStream = 'noncharacter-in-input-stream',
surrogateInInputStream = 'surrogate-in-input-stream',
// Tag-related errors
nonVoidHtmlElementStartTagWithTrailingSolidus = 'non-void-html-element-start-tag-with-trailing-solidus',
endTagWithAttributes = 'end-tag-with-attributes',
endTagWithTrailingSolidus = 'end-tag-with-trailing-solidus',
unexpectedSolidusInTag = 'unexpected-solidus-in-tag',
unexpectedQuestionMarkInsteadOfTagName = 'unexpected-question-mark-instead-of-tag-name',
invalidFirstCharacterOfTagName = 'invalid-first-character-of-tag-name',
missingEndTagName = 'missing-end-tag-name',
// Character and entity errors
unexpectedNullCharacter = 'unexpected-null-character',
unknownNamedCharacterReference = 'unknown-named-character-reference',
missingSemicolonAfterCharacterReference = 'missing-semicolon-after-character-reference',
nullCharacterReference = 'null-character-reference',
characterReferenceOutsideUnicodeRange = 'character-reference-outside-unicode-range',
surrogateCharacterReference = 'surrogate-character-reference',
noncharacterCharacterReference = 'noncharacter-character-reference',
controlCharacterReference = 'control-character-reference',
// Attribute errors
unexpectedEqualsSignBeforeAttributeName = 'unexpected-equals-sign-before-attribute-name',
unexpectedCharacterInAttributeName = 'unexpected-character-in-attribute-name',
missingAttributeValue = 'missing-attribute-value',
missingWhitespaceBetweenAttributes = 'missing-whitespace-between-attributes',
unexpectedCharacterInUnquotedAttributeValue = 'unexpected-character-in-unquoted-attribute-value',
// DOCTYPE errors
missingWhitespaceAfterDoctypePublicKeyword = 'missing-whitespace-after-doctype-public-keyword',
missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers = 'missing-whitespace-between-doctype-public-and-system-identifiers',
missingWhitespaceAfterDoctypeSystemKeyword = 'missing-whitespace-after-doctype-system-keyword',
missingQuoteBeforeDoctypePublicIdentifier = 'missing-quote-before-doctype-public-identifier',
missingQuoteBeforeDoctypeSystemIdentifier = 'missing-quote-before-doctype-system-identifier',
missingDoctypePublicIdentifier = 'missing-doctype-public-identifier',
missingDoctypeSystemIdentifier = 'missing-doctype-system-identifier',
abruptDoctypePublicIdentifier = 'abrupt-doctype-public-identifier',
abruptDoctypeSystemIdentifier = 'abrupt-doctype-system-identifier',
unexpectedCharacterAfterDoctypeSystemIdentifier = 'unexpected-character-after-doctype-system-identifier',
// End-of-file errors
eofBeforeTagName = 'eof-before-tag-name',
eofInTag = 'eof-in-tag',
eofInScriptHtmlCommentLikeText = 'eof-in-script-html-comment-like-text',
eofInDoctype = 'eof-in-doctype',
eofInComment = 'eof-in-comment',
eofInCdata = 'eof-in-cdata',
// Comment errors
cdataInHtmlContent = 'cdata-in-html-content',
incorrectlyOpenedComment = 'incorrectly-opened-comment',
nestedComment = 'nested-comment',
abruptClosingOfEmptyComment = 'abrupt-closing-of-empty-comment',
incorrectlyClosedComment = 'incorrectly-closed-comment',
// Numeric character reference errors
absenceOfDigitsInNumericCharacterReference = 'absence-of-digits-in-numeric-character-reference',
}
/**
* Export alias for error codes enum
*/
const ERR = ErrorCodes;Usage Examples:
import { parse, ErrorCodes } from "parse5";
const document = parse('<div><span></div>', {
onParseError: (error) => {
switch (error.code) {
case ErrorCodes.endTagWithAttributes:
console.log('End tag has attributes');
break;
case ErrorCodes.unexpectedNullCharacter:
console.log('Unexpected null character found');
break;
default:
console.log(`Unknown error: ${error.code}`);
}
}
});Detailed location tracking for precise error positioning.
/**
* Location information interface used in errors and nodes
*/
interface Location {
/** One-based line index of the first character */
startLine: number;
/** One-based column index of the first character */
startCol: number;
/** Zero-based first character index */
startOffset: number;
/** One-based line index of the last character */
endLine: number;
/** One-based column index of the last character (after the character) */
endCol: number;
/** Zero-based last character index (after the character) */
endOffset: number;
}
/**
* Location information with attribute positions
*/
interface LocationWithAttributes extends Location {
/** Start tag attributes' location info */
attrs?: Record<string, Location>;
}
/**
* Element location with start and end tag positions
*/
interface ElementLocation extends LocationWithAttributes {
/** Element's start tag location info */
startTag?: Location;
/** Element's end tag location info (undefined if no closing tag) */
endTag?: Location;
}Usage Examples:
import { parse } from "parse5";
const html = `<div class="container">
<span>Content</span>
</div>`;
let parseErrors: ParserError[] = [];
const document = parse(html, {
sourceCodeLocationInfo: true,
onParseError: (error) => {
parseErrors.push(error);
console.log(`Error at line ${error.startLine}: ${error.code}`);
console.log(`Position: ${error.startOffset}-${error.endOffset}`);
}
});import { parse, parseFragment } from "parse5";
import type { ParserError } from "parse5";
class HTMLErrorCollector {
private errors: ParserError[] = [];
parseWithErrorCollection(html: string) {
this.errors = [];
const document = parse(html, {
onParseError: (error) => {
this.errors.push(error);
}
});
return {
document,
errors: this.errors,
hasErrors: this.errors.length > 0
};
}
getErrorSummary() {
const errorCounts = new Map<string, number>();
this.errors.forEach(error => {
const count = errorCounts.get(error.code) || 0;
errorCounts.set(error.code, count + 1);
});
return Array.from(errorCounts.entries()).map(([code, count]) => ({
code,
count
}));
}
}import { ErrorCodes } from "parse5";
import type { ParserError } from "parse5";
enum ErrorSeverity {
LOW = 1,
MEDIUM = 2,
HIGH = 3,
CRITICAL = 4
}
function classifyError(error: ParserError): ErrorSeverity {
switch (error.code) {
// Critical structural errors
case ErrorCodes.eofInTag:
case ErrorCodes.eofBeforeTagName:
return ErrorSeverity.CRITICAL;
// High priority errors affecting parsing
case ErrorCodes.missingEndTagName:
case ErrorCodes.unexpectedCharacterInAttributeName:
return ErrorSeverity.HIGH;
// Medium priority formatting issues
case ErrorCodes.missingWhitespaceBetweenAttributes:
case ErrorCodes.endTagWithAttributes:
return ErrorSeverity.MEDIUM;
// Low priority cosmetic issues
case ErrorCodes.unexpectedNullCharacter:
case ErrorCodes.controlCharacterInInputStream:
return ErrorSeverity.LOW;
default:
return ErrorSeverity.MEDIUM;
}
}import { parse, serialize } from "parse5";
import type { ParserError } from "parse5";
interface ValidationResult {
isValid: boolean;
errors: ParserError[];
cleanedHTML?: string;
errorCount: number;
}
function validateAndCleanHTML(html: string): ValidationResult {
const errors: ParserError[] = [];
const document = parse(html, {
onParseError: (error) => {
errors.push(error);
}
});
const result: ValidationResult = {
isValid: errors.length === 0,
errors,
errorCount: errors.length
};
// If there were errors, provide cleaned HTML
if (errors.length > 0) {
result.cleanedHTML = serialize(document);
}
return result;
}
// Usage
const validation = validateAndCleanHTML('<div><span></div>');
if (!validation.isValid) {
console.log(`Found ${validation.errorCount} errors`);
console.log('Cleaned HTML:', validation.cleanedHTML);
}import { parse } from "parse5";
import type { ParserError } from "parse5";
interface EnhancedError extends ParserError {
context: string;
suggestion?: string;
}
function parseWithEnhancedErrors(html: string): EnhancedError[] {
const lines = html.split('\n');
const enhancedErrors: EnhancedError[] = [];
parse(html, {
onParseError: (error) => {
const line = lines[error.startLine - 1] || '';
const contextStart = Math.max(0, error.startCol - 10);
const contextEnd = Math.min(line.length, error.endCol + 10);
const context = line.substring(contextStart, contextEnd);
const enhanced: EnhancedError = {
...error,
context,
suggestion: getSuggestionForError(error.code)
};
enhancedErrors.push(enhanced);
}
});
return enhancedErrors;
}
function getSuggestionForError(code: string): string {
switch (code) {
case 'end-tag-with-attributes':
return 'Remove attributes from closing tags';
case 'missing-end-tag-name':
return 'Add tag name after </ in closing tag';
case 'unexpected-null-character':
return 'Remove or replace null characters';
default:
return 'Check HTML specification for this error';
}
}import { parse } from "parse5";
// Missing closing tag name
const html1 = '<div>Content</>';
parse(html1, {
onParseError: (error) => {
console.log(error.code); // 'missing-end-tag-name'
}
});
// Attributes in closing tag
const html2 = '<div>Content</div class="error">';
parse(html2, {
onParseError: (error) => {
console.log(error.code); // 'end-tag-with-attributes'
}
});import { parse } from "parse5";
// Null characters in input
const htmlWithNull = '<div>Content\0here</div>';
parse(htmlWithNull, {
onParseError: (error) => {
console.log(error.code); // 'unexpected-null-character'
}
});
// Invalid character references
const htmlWithBadEntity = '<div>Content &invalid; here</div>';
parse(htmlWithBadEntity, {
onParseError: (error) => {
console.log(error.code); // 'unknown-named-character-reference'
}
});import { parse } from "parse5";
// Malformed DOCTYPE
const htmlWithBadDoctype = '<!DOCTYPE html SYSTEM>';
parse(htmlWithBadDoctype, {
onParseError: (error) => {
console.log(error.code); // DOCTYPE-related error
}
});Install with Tessl CLI
npx tessl i tessl/npm-parse5