or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

error-handling.mdhtml-utilities.mdindex.mdparsing.mdserialization.mdtokenization.mdtree-adapters.md

error-handling.mddocs/

0

# Error Handling

1

2

Comprehensive error handling system that provides detailed parsing error information with source code locations and standardized error codes following the WHATWG HTML specification.

3

4

## Capabilities

5

6

### Parser Error Interface

7

8

Detailed error information including location data for precise error reporting.

9

10

```typescript { .api }

11

/**

12

* Parser error with location information

13

*/

14

interface ParserError {

15

/** Error code following WHATWG HTML specification */

16

code: string;

17

/** One-based line index of the error start */

18

startLine: number;

19

/** One-based column index of the error start */

20

startCol: number;

21

/** Zero-based character offset of the error start */

22

startOffset: number;

23

/** One-based line index of the error end */

24

endLine: number;

25

/** One-based column index of the error end */

26

endCol: number;

27

/** Zero-based character offset of the error end */

28

endOffset: number;

29

}

30

31

/**

32

* Error handler callback function

33

*/

34

type ParserErrorHandler = (error: ParserError) => void;

35

```

36

37

**Usage Examples:**

38

39

```typescript

40

import { parse } from "parse5";

41

42

const errors: ParserError[] = [];

43

44

const document = parse('<div><span></div>', {

45

onParseError: (error) => {

46

errors.push(error);

47

console.log(`Error ${error.code} at line ${error.startLine}, column ${error.startCol}`);

48

}

49

});

50

51

console.log(errors.length); // 1

52

console.log(errors[0].code); // 'end-tag-with-trailing-solidus' or similar

53

```

54

55

### Error Codes Enumeration

56

57

Comprehensive enumeration of all HTML parsing error codes defined by the WHATWG specification.

58

59

```typescript { .api }

60

/**

61

* HTML parsing error codes following WHATWG HTML Living Standard

62

*/

63

enum ErrorCodes {

64

// Input stream errors

65

controlCharacterInInputStream = 'control-character-in-input-stream',

66

noncharacterInInputStream = 'noncharacter-in-input-stream',

67

surrogateInInputStream = 'surrogate-in-input-stream',

68

69

// Tag-related errors

70

nonVoidHtmlElementStartTagWithTrailingSolidus = 'non-void-html-element-start-tag-with-trailing-solidus',

71

endTagWithAttributes = 'end-tag-with-attributes',

72

endTagWithTrailingSolidus = 'end-tag-with-trailing-solidus',

73

unexpectedSolidusInTag = 'unexpected-solidus-in-tag',

74

unexpectedQuestionMarkInsteadOfTagName = 'unexpected-question-mark-instead-of-tag-name',

75

invalidFirstCharacterOfTagName = 'invalid-first-character-of-tag-name',

76

missingEndTagName = 'missing-end-tag-name',

77

78

// Character and entity errors

79

unexpectedNullCharacter = 'unexpected-null-character',

80

unknownNamedCharacterReference = 'unknown-named-character-reference',

81

missingSemicolonAfterCharacterReference = 'missing-semicolon-after-character-reference',

82

nullCharacterReference = 'null-character-reference',

83

characterReferenceOutsideUnicodeRange = 'character-reference-outside-unicode-range',

84

surrogateCharacterReference = 'surrogate-character-reference',

85

noncharacterCharacterReference = 'noncharacter-character-reference',

86

controlCharacterReference = 'control-character-reference',

87

88

// Attribute errors

89

unexpectedEqualsSignBeforeAttributeName = 'unexpected-equals-sign-before-attribute-name',

90

unexpectedCharacterInAttributeName = 'unexpected-character-in-attribute-name',

91

missingAttributeValue = 'missing-attribute-value',

92

missingWhitespaceBetweenAttributes = 'missing-whitespace-between-attributes',

93

unexpectedCharacterInUnquotedAttributeValue = 'unexpected-character-in-unquoted-attribute-value',

94

95

// DOCTYPE errors

96

missingWhitespaceAfterDoctypePublicKeyword = 'missing-whitespace-after-doctype-public-keyword',

97

missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers = 'missing-whitespace-between-doctype-public-and-system-identifiers',

98

missingWhitespaceAfterDoctypeSystemKeyword = 'missing-whitespace-after-doctype-system-keyword',

99

missingQuoteBeforeDoctypePublicIdentifier = 'missing-quote-before-doctype-public-identifier',

100

missingQuoteBeforeDoctypeSystemIdentifier = 'missing-quote-before-doctype-system-identifier',

101

missingDoctypePublicIdentifier = 'missing-doctype-public-identifier',

102

missingDoctypeSystemIdentifier = 'missing-doctype-system-identifier',

103

abruptDoctypePublicIdentifier = 'abrupt-doctype-public-identifier',

104

abruptDoctypeSystemIdentifier = 'abrupt-doctype-system-identifier',

105

unexpectedCharacterAfterDoctypeSystemIdentifier = 'unexpected-character-after-doctype-system-identifier',

106

107

// End-of-file errors

108

eofBeforeTagName = 'eof-before-tag-name',

109

eofInTag = 'eof-in-tag',

110

eofInScriptHtmlCommentLikeText = 'eof-in-script-html-comment-like-text',

111

eofInDoctype = 'eof-in-doctype',

112

eofInComment = 'eof-in-comment',

113

eofInCdata = 'eof-in-cdata',

114

115

// Comment errors

116

cdataInHtmlContent = 'cdata-in-html-content',

117

incorrectlyOpenedComment = 'incorrectly-opened-comment',

118

nestedComment = 'nested-comment',

119

abruptClosingOfEmptyComment = 'abrupt-closing-of-empty-comment',

120

incorrectlyClosedComment = 'incorrectly-closed-comment',

121

122

// Numeric character reference errors

123

absenceOfDigitsInNumericCharacterReference = 'absence-of-digits-in-numeric-character-reference',

124

}

125

126

/**

127

* Export alias for error codes enum

128

*/

129

const ERR = ErrorCodes;

130

```

131

132

**Usage Examples:**

133

134

```typescript

135

import { parse, ErrorCodes } from "parse5";

136

137

const document = parse('<div><span></div>', {

138

onParseError: (error) => {

139

switch (error.code) {

140

case ErrorCodes.endTagWithAttributes:

141

console.log('End tag has attributes');

142

break;

143

case ErrorCodes.unexpectedNullCharacter:

144

console.log('Unexpected null character found');

145

break;

146

default:

147

console.log(`Unknown error: ${error.code}`);

148

}

149

}

150

});

151

```

152

153

### Location Information

154

155

Detailed location tracking for precise error positioning.

156

157

```typescript { .api }

158

/**

159

* Location information interface used in errors and nodes

160

*/

161

interface Location {

162

/** One-based line index of the first character */

163

startLine: number;

164

/** One-based column index of the first character */

165

startCol: number;

166

/** Zero-based first character index */

167

startOffset: number;

168

/** One-based line index of the last character */

169

endLine: number;

170

/** One-based column index of the last character (after the character) */

171

endCol: number;

172

/** Zero-based last character index (after the character) */

173

endOffset: number;

174

}

175

176

/**

177

* Location information with attribute positions

178

*/

179

interface LocationWithAttributes extends Location {

180

/** Start tag attributes' location info */

181

attrs?: Record<string, Location>;

182

}

183

184

/**

185

* Element location with start and end tag positions

186

*/

187

interface ElementLocation extends LocationWithAttributes {

188

/** Element's start tag location info */

189

startTag?: Location;

190

/** Element's end tag location info (undefined if no closing tag) */

191

endTag?: Location;

192

}

193

```

194

195

**Usage Examples:**

196

197

```typescript

198

import { parse } from "parse5";

199

200

const html = `<div class="container">

201

<span>Content</span>

202

</div>`;

203

204

let parseErrors: ParserError[] = [];

205

206

const document = parse(html, {

207

sourceCodeLocationInfo: true,

208

onParseError: (error) => {

209

parseErrors.push(error);

210

console.log(`Error at line ${error.startLine}: ${error.code}`);

211

console.log(`Position: ${error.startOffset}-${error.endOffset}`);

212

}

213

});

214

```

215

216

## Error Handling Patterns

217

218

### Error Collection and Reporting

219

220

```typescript

221

import { parse, parseFragment } from "parse5";

222

import type { ParserError } from "parse5";

223

224

class HTMLErrorCollector {

225

private errors: ParserError[] = [];

226

227

parseWithErrorCollection(html: string) {

228

this.errors = [];

229

230

const document = parse(html, {

231

onParseError: (error) => {

232

this.errors.push(error);

233

}

234

});

235

236

return {

237

document,

238

errors: this.errors,

239

hasErrors: this.errors.length > 0

240

};

241

}

242

243

getErrorSummary() {

244

const errorCounts = new Map<string, number>();

245

246

this.errors.forEach(error => {

247

const count = errorCounts.get(error.code) || 0;

248

errorCounts.set(error.code, count + 1);

249

});

250

251

return Array.from(errorCounts.entries()).map(([code, count]) => ({

252

code,

253

count

254

}));

255

}

256

}

257

```

258

259

### Error Severity Classification

260

261

```typescript

262

import { ErrorCodes } from "parse5";

263

import type { ParserError } from "parse5";

264

265

enum ErrorSeverity {

266

LOW = 1,

267

MEDIUM = 2,

268

HIGH = 3,

269

CRITICAL = 4

270

}

271

272

function classifyError(error: ParserError): ErrorSeverity {

273

switch (error.code) {

274

// Critical structural errors

275

case ErrorCodes.eofInTag:

276

case ErrorCodes.eofBeforeTagName:

277

return ErrorSeverity.CRITICAL;

278

279

// High priority errors affecting parsing

280

case ErrorCodes.missingEndTagName:

281

case ErrorCodes.unexpectedCharacterInAttributeName:

282

return ErrorSeverity.HIGH;

283

284

// Medium priority formatting issues

285

case ErrorCodes.missingWhitespaceBetweenAttributes:

286

case ErrorCodes.endTagWithAttributes:

287

return ErrorSeverity.MEDIUM;

288

289

// Low priority cosmetic issues

290

case ErrorCodes.unexpectedNullCharacter:

291

case ErrorCodes.controlCharacterInInputStream:

292

return ErrorSeverity.LOW;

293

294

default:

295

return ErrorSeverity.MEDIUM;

296

}

297

}

298

```

299

300

### Validation and Cleanup

301

302

```typescript

303

import { parse, serialize } from "parse5";

304

import type { ParserError } from "parse5";

305

306

interface ValidationResult {

307

isValid: boolean;

308

errors: ParserError[];

309

cleanedHTML?: string;

310

errorCount: number;

311

}

312

313

function validateAndCleanHTML(html: string): ValidationResult {

314

const errors: ParserError[] = [];

315

316

const document = parse(html, {

317

onParseError: (error) => {

318

errors.push(error);

319

}

320

});

321

322

const result: ValidationResult = {

323

isValid: errors.length === 0,

324

errors,

325

errorCount: errors.length

326

};

327

328

// If there were errors, provide cleaned HTML

329

if (errors.length > 0) {

330

result.cleanedHTML = serialize(document);

331

}

332

333

return result;

334

}

335

336

// Usage

337

const validation = validateAndCleanHTML('<div><span></div>');

338

if (!validation.isValid) {

339

console.log(`Found ${validation.errorCount} errors`);

340

console.log('Cleaned HTML:', validation.cleanedHTML);

341

}

342

```

343

344

### Error Context Enhancement

345

346

```typescript

347

import { parse } from "parse5";

348

import type { ParserError } from "parse5";

349

350

interface EnhancedError extends ParserError {

351

context: string;

352

suggestion?: string;

353

}

354

355

function parseWithEnhancedErrors(html: string): EnhancedError[] {

356

const lines = html.split('\n');

357

const enhancedErrors: EnhancedError[] = [];

358

359

parse(html, {

360

onParseError: (error) => {

361

const line = lines[error.startLine - 1] || '';

362

const contextStart = Math.max(0, error.startCol - 10);

363

const contextEnd = Math.min(line.length, error.endCol + 10);

364

const context = line.substring(contextStart, contextEnd);

365

366

const enhanced: EnhancedError = {

367

...error,

368

context,

369

suggestion: getSuggestionForError(error.code)

370

};

371

372

enhancedErrors.push(enhanced);

373

}

374

});

375

376

return enhancedErrors;

377

}

378

379

function getSuggestionForError(code: string): string {

380

switch (code) {

381

case 'end-tag-with-attributes':

382

return 'Remove attributes from closing tags';

383

case 'missing-end-tag-name':

384

return 'Add tag name after </ in closing tag';

385

case 'unexpected-null-character':

386

return 'Remove or replace null characters';

387

default:

388

return 'Check HTML specification for this error';

389

}

390

}

391

```

392

393

## Common Error Scenarios

394

395

### Malformed Tags

396

397

```typescript

398

import { parse } from "parse5";

399

400

// Missing closing tag name

401

const html1 = '<div>Content</>';

402

parse(html1, {

403

onParseError: (error) => {

404

console.log(error.code); // 'missing-end-tag-name'

405

}

406

});

407

408

// Attributes in closing tag

409

const html2 = '<div>Content</div class="error">';

410

parse(html2, {

411

onParseError: (error) => {

412

console.log(error.code); // 'end-tag-with-attributes'

413

}

414

});

415

```

416

417

### Character Encoding Issues

418

419

```typescript

420

import { parse } from "parse5";

421

422

// Null characters in input

423

const htmlWithNull = '<div>Content\0here</div>';

424

parse(htmlWithNull, {

425

onParseError: (error) => {

426

console.log(error.code); // 'unexpected-null-character'

427

}

428

});

429

430

// Invalid character references

431

const htmlWithBadEntity = '<div>Content &invalid; here</div>';

432

parse(htmlWithBadEntity, {

433

onParseError: (error) => {

434

console.log(error.code); // 'unknown-named-character-reference'

435

}

436

});

437

```

438

439

### DOCTYPE Errors

440

441

```typescript

442

import { parse } from "parse5";

443

444

// Malformed DOCTYPE

445

const htmlWithBadDoctype = '<!DOCTYPE html SYSTEM>';

446

parse(htmlWithBadDoctype, {

447

onParseError: (error) => {

448

console.log(error.code); // DOCTYPE-related error

449

}

450

});

451

```