0
# Error Handling
1
2
Comprehensive error handling system that provides detailed parsing error information with source code locations and standardized error codes following the WHATWG HTML specification.
3
4
## Capabilities
5
6
### Parser Error Interface
7
8
Detailed error information including location data for precise error reporting.
9
10
```typescript { .api }
11
/**
12
* Parser error with location information
13
*/
14
interface ParserError {
15
/** Error code following WHATWG HTML specification */
16
code: string;
17
/** One-based line index of the error start */
18
startLine: number;
19
/** One-based column index of the error start */
20
startCol: number;
21
/** Zero-based character offset of the error start */
22
startOffset: number;
23
/** One-based line index of the error end */
24
endLine: number;
25
/** One-based column index of the error end */
26
endCol: number;
27
/** Zero-based character offset of the error end */
28
endOffset: number;
29
}
30
31
/**
32
* Error handler callback function
33
*/
34
type ParserErrorHandler = (error: ParserError) => void;
35
```
36
37
**Usage Examples:**
38
39
```typescript
40
import { parse } from "parse5";
41
42
const errors: ParserError[] = [];
43
44
const document = parse('<div><span></div>', {
45
onParseError: (error) => {
46
errors.push(error);
47
console.log(`Error ${error.code} at line ${error.startLine}, column ${error.startCol}`);
48
}
49
});
50
51
console.log(errors.length); // 1
52
console.log(errors[0].code); // 'end-tag-with-trailing-solidus' or similar
53
```
54
55
### Error Codes Enumeration
56
57
Comprehensive enumeration of all HTML parsing error codes defined by the WHATWG specification.
58
59
```typescript { .api }
60
/**
61
* HTML parsing error codes following WHATWG HTML Living Standard
62
*/
63
enum ErrorCodes {
64
// Input stream errors
65
controlCharacterInInputStream = 'control-character-in-input-stream',
66
noncharacterInInputStream = 'noncharacter-in-input-stream',
67
surrogateInInputStream = 'surrogate-in-input-stream',
68
69
// Tag-related errors
70
nonVoidHtmlElementStartTagWithTrailingSolidus = 'non-void-html-element-start-tag-with-trailing-solidus',
71
endTagWithAttributes = 'end-tag-with-attributes',
72
endTagWithTrailingSolidus = 'end-tag-with-trailing-solidus',
73
unexpectedSolidusInTag = 'unexpected-solidus-in-tag',
74
unexpectedQuestionMarkInsteadOfTagName = 'unexpected-question-mark-instead-of-tag-name',
75
invalidFirstCharacterOfTagName = 'invalid-first-character-of-tag-name',
76
missingEndTagName = 'missing-end-tag-name',
77
78
// Character and entity errors
79
unexpectedNullCharacter = 'unexpected-null-character',
80
unknownNamedCharacterReference = 'unknown-named-character-reference',
81
missingSemicolonAfterCharacterReference = 'missing-semicolon-after-character-reference',
82
nullCharacterReference = 'null-character-reference',
83
characterReferenceOutsideUnicodeRange = 'character-reference-outside-unicode-range',
84
surrogateCharacterReference = 'surrogate-character-reference',
85
noncharacterCharacterReference = 'noncharacter-character-reference',
86
controlCharacterReference = 'control-character-reference',
87
88
// Attribute errors
89
unexpectedEqualsSignBeforeAttributeName = 'unexpected-equals-sign-before-attribute-name',
90
unexpectedCharacterInAttributeName = 'unexpected-character-in-attribute-name',
91
missingAttributeValue = 'missing-attribute-value',
92
missingWhitespaceBetweenAttributes = 'missing-whitespace-between-attributes',
93
unexpectedCharacterInUnquotedAttributeValue = 'unexpected-character-in-unquoted-attribute-value',
94
95
// DOCTYPE errors
96
missingWhitespaceAfterDoctypePublicKeyword = 'missing-whitespace-after-doctype-public-keyword',
97
missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers = 'missing-whitespace-between-doctype-public-and-system-identifiers',
98
missingWhitespaceAfterDoctypeSystemKeyword = 'missing-whitespace-after-doctype-system-keyword',
99
missingQuoteBeforeDoctypePublicIdentifier = 'missing-quote-before-doctype-public-identifier',
100
missingQuoteBeforeDoctypeSystemIdentifier = 'missing-quote-before-doctype-system-identifier',
101
missingDoctypePublicIdentifier = 'missing-doctype-public-identifier',
102
missingDoctypeSystemIdentifier = 'missing-doctype-system-identifier',
103
abruptDoctypePublicIdentifier = 'abrupt-doctype-public-identifier',
104
abruptDoctypeSystemIdentifier = 'abrupt-doctype-system-identifier',
105
unexpectedCharacterAfterDoctypeSystemIdentifier = 'unexpected-character-after-doctype-system-identifier',
106
107
// End-of-file errors
108
eofBeforeTagName = 'eof-before-tag-name',
109
eofInTag = 'eof-in-tag',
110
eofInScriptHtmlCommentLikeText = 'eof-in-script-html-comment-like-text',
111
eofInDoctype = 'eof-in-doctype',
112
eofInComment = 'eof-in-comment',
113
eofInCdata = 'eof-in-cdata',
114
115
// Comment errors
116
cdataInHtmlContent = 'cdata-in-html-content',
117
incorrectlyOpenedComment = 'incorrectly-opened-comment',
118
nestedComment = 'nested-comment',
119
abruptClosingOfEmptyComment = 'abrupt-closing-of-empty-comment',
120
incorrectlyClosedComment = 'incorrectly-closed-comment',
121
122
// Numeric character reference errors
123
absenceOfDigitsInNumericCharacterReference = 'absence-of-digits-in-numeric-character-reference',
124
}
125
126
/**
127
* Export alias for error codes enum
128
*/
129
const ERR = ErrorCodes;
130
```
131
132
**Usage Examples:**
133
134
```typescript
135
import { parse, ErrorCodes } from "parse5";
136
137
const document = parse('<div><span></div>', {
138
onParseError: (error) => {
139
switch (error.code) {
140
case ErrorCodes.endTagWithAttributes:
141
console.log('End tag has attributes');
142
break;
143
case ErrorCodes.unexpectedNullCharacter:
144
console.log('Unexpected null character found');
145
break;
146
default:
147
console.log(`Unknown error: ${error.code}`);
148
}
149
}
150
});
151
```
152
153
### Location Information
154
155
Detailed location tracking for precise error positioning.
156
157
```typescript { .api }
158
/**
159
* Location information interface used in errors and nodes
160
*/
161
interface Location {
162
/** One-based line index of the first character */
163
startLine: number;
164
/** One-based column index of the first character */
165
startCol: number;
166
/** Zero-based first character index */
167
startOffset: number;
168
/** One-based line index of the last character */
169
endLine: number;
170
/** One-based column index of the last character (after the character) */
171
endCol: number;
172
/** Zero-based last character index (after the character) */
173
endOffset: number;
174
}
175
176
/**
177
* Location information with attribute positions
178
*/
179
interface LocationWithAttributes extends Location {
180
/** Start tag attributes' location info */
181
attrs?: Record<string, Location>;
182
}
183
184
/**
185
* Element location with start and end tag positions
186
*/
187
interface ElementLocation extends LocationWithAttributes {
188
/** Element's start tag location info */
189
startTag?: Location;
190
/** Element's end tag location info (undefined if no closing tag) */
191
endTag?: Location;
192
}
193
```
194
195
**Usage Examples:**
196
197
```typescript
198
import { parse } from "parse5";
199
200
const html = `<div class="container">
201
<span>Content</span>
202
</div>`;
203
204
let parseErrors: ParserError[] = [];
205
206
const document = parse(html, {
207
sourceCodeLocationInfo: true,
208
onParseError: (error) => {
209
parseErrors.push(error);
210
console.log(`Error at line ${error.startLine}: ${error.code}`);
211
console.log(`Position: ${error.startOffset}-${error.endOffset}`);
212
}
213
});
214
```
215
216
## Error Handling Patterns
217
218
### Error Collection and Reporting
219
220
```typescript
221
import { parse, parseFragment } from "parse5";
222
import type { ParserError } from "parse5";
223
224
class HTMLErrorCollector {
225
private errors: ParserError[] = [];
226
227
parseWithErrorCollection(html: string) {
228
this.errors = [];
229
230
const document = parse(html, {
231
onParseError: (error) => {
232
this.errors.push(error);
233
}
234
});
235
236
return {
237
document,
238
errors: this.errors,
239
hasErrors: this.errors.length > 0
240
};
241
}
242
243
getErrorSummary() {
244
const errorCounts = new Map<string, number>();
245
246
this.errors.forEach(error => {
247
const count = errorCounts.get(error.code) || 0;
248
errorCounts.set(error.code, count + 1);
249
});
250
251
return Array.from(errorCounts.entries()).map(([code, count]) => ({
252
code,
253
count
254
}));
255
}
256
}
257
```
258
259
### Error Severity Classification
260
261
```typescript
262
import { ErrorCodes } from "parse5";
263
import type { ParserError } from "parse5";
264
265
enum ErrorSeverity {
266
LOW = 1,
267
MEDIUM = 2,
268
HIGH = 3,
269
CRITICAL = 4
270
}
271
272
function classifyError(error: ParserError): ErrorSeverity {
273
switch (error.code) {
274
// Critical structural errors
275
case ErrorCodes.eofInTag:
276
case ErrorCodes.eofBeforeTagName:
277
return ErrorSeverity.CRITICAL;
278
279
// High priority errors affecting parsing
280
case ErrorCodes.missingEndTagName:
281
case ErrorCodes.unexpectedCharacterInAttributeName:
282
return ErrorSeverity.HIGH;
283
284
// Medium priority formatting issues
285
case ErrorCodes.missingWhitespaceBetweenAttributes:
286
case ErrorCodes.endTagWithAttributes:
287
return ErrorSeverity.MEDIUM;
288
289
// Low priority cosmetic issues
290
case ErrorCodes.unexpectedNullCharacter:
291
case ErrorCodes.controlCharacterInInputStream:
292
return ErrorSeverity.LOW;
293
294
default:
295
return ErrorSeverity.MEDIUM;
296
}
297
}
298
```
299
300
### Validation and Cleanup
301
302
```typescript
303
import { parse, serialize } from "parse5";
304
import type { ParserError } from "parse5";
305
306
interface ValidationResult {
307
isValid: boolean;
308
errors: ParserError[];
309
cleanedHTML?: string;
310
errorCount: number;
311
}
312
313
function validateAndCleanHTML(html: string): ValidationResult {
314
const errors: ParserError[] = [];
315
316
const document = parse(html, {
317
onParseError: (error) => {
318
errors.push(error);
319
}
320
});
321
322
const result: ValidationResult = {
323
isValid: errors.length === 0,
324
errors,
325
errorCount: errors.length
326
};
327
328
// If there were errors, provide cleaned HTML
329
if (errors.length > 0) {
330
result.cleanedHTML = serialize(document);
331
}
332
333
return result;
334
}
335
336
// Usage
337
const validation = validateAndCleanHTML('<div><span></div>');
338
if (!validation.isValid) {
339
console.log(`Found ${validation.errorCount} errors`);
340
console.log('Cleaned HTML:', validation.cleanedHTML);
341
}
342
```
343
344
### Error Context Enhancement
345
346
```typescript
347
import { parse } from "parse5";
348
import type { ParserError } from "parse5";
349
350
interface EnhancedError extends ParserError {
351
context: string;
352
suggestion?: string;
353
}
354
355
function parseWithEnhancedErrors(html: string): EnhancedError[] {
356
const lines = html.split('\n');
357
const enhancedErrors: EnhancedError[] = [];
358
359
parse(html, {
360
onParseError: (error) => {
361
const line = lines[error.startLine - 1] || '';
362
const contextStart = Math.max(0, error.startCol - 10);
363
const contextEnd = Math.min(line.length, error.endCol + 10);
364
const context = line.substring(contextStart, contextEnd);
365
366
const enhanced: EnhancedError = {
367
...error,
368
context,
369
suggestion: getSuggestionForError(error.code)
370
};
371
372
enhancedErrors.push(enhanced);
373
}
374
});
375
376
return enhancedErrors;
377
}
378
379
function getSuggestionForError(code: string): string {
380
switch (code) {
381
case 'end-tag-with-attributes':
382
return 'Remove attributes from closing tags';
383
case 'missing-end-tag-name':
384
return 'Add tag name after </ in closing tag';
385
case 'unexpected-null-character':
386
return 'Remove or replace null characters';
387
default:
388
return 'Check HTML specification for this error';
389
}
390
}
391
```
392
393
## Common Error Scenarios
394
395
### Malformed Tags
396
397
```typescript
398
import { parse } from "parse5";
399
400
// Missing closing tag name
401
const html1 = '<div>Content</>';
402
parse(html1, {
403
onParseError: (error) => {
404
console.log(error.code); // 'missing-end-tag-name'
405
}
406
});
407
408
// Attributes in closing tag
409
const html2 = '<div>Content</div class="error">';
410
parse(html2, {
411
onParseError: (error) => {
412
console.log(error.code); // 'end-tag-with-attributes'
413
}
414
});
415
```
416
417
### Character Encoding Issues
418
419
```typescript
420
import { parse } from "parse5";
421
422
// Null characters in input
423
const htmlWithNull = '<div>Content\0here</div>';
424
parse(htmlWithNull, {
425
onParseError: (error) => {
426
console.log(error.code); // 'unexpected-null-character'
427
}
428
});
429
430
// Invalid character references
431
const htmlWithBadEntity = '<div>Content &invalid; here</div>';
432
parse(htmlWithBadEntity, {
433
onParseError: (error) => {
434
console.log(error.code); // 'unknown-named-character-reference'
435
}
436
});
437
```
438
439
### DOCTYPE Errors
440
441
```typescript
442
import { parse } from "parse5";
443
444
// Malformed DOCTYPE
445
const htmlWithBadDoctype = '<!DOCTYPE html SYSTEM>';
446
parse(htmlWithBadDoctype, {
447
onParseError: (error) => {
448
console.log(error.code); // DOCTYPE-related error
449
}
450
});
451
```