or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

conversion.mdindex.mdoptions.mdtranslators.md

translators.mddocs/

0

# Custom Translators

1

2

Advanced customization system allowing element-specific translation rules with configurable prefix/postfix, content transformation, recursion control, and post-processing hooks.

3

4

## Capabilities

5

6

### TranslatorConfig Interface

7

8

Configuration interface for defining how HTML elements should be converted to Markdown.

9

10

```typescript { .api }

11

/**

12

* Configuration for element translation behavior

13

*/

14

interface TranslatorConfig {

15

/**

16

* Content prefix (added before element content, after surroundingNewlines)

17

*/

18

prefix?: string;

19

20

/**

21

* Content postfix (added after element content, before surroundingNewlines)

22

*/

23

postfix?: string;

24

25

/**

26

* Fixed output content (replaces element content entirely)

27

*/

28

content?: string;

29

30

/**

31

* Post-process content after inner nodes have been rendered

32

* Return undefined to leave content unchanged

33

* Return string to replace content

34

* Return PostProcessResult.RemoveNode to remove element entirely

35

*/

36

postprocess?: (ctx: TranslatorContext & { content: string }) => string | PostProcessResult;

37

38

/**

39

* Whether to process child elements

40

* @default true

41

*/

42

recurse?: boolean;

43

44

/**

45

* Add newlines before and after element

46

* @default false

47

*/

48

surroundingNewlines?: boolean | number;

49

50

/**

51

* Ignore element entirely (skip processing)

52

*/

53

ignore?: boolean;

54

55

/**

56

* Do not escape markdown special characters in content

57

*/

58

noEscape?: boolean;

59

60

/**

61

* Add space if first character matches end of previous content

62

* Prevents markdown formatting conflicts

63

*/

64

spaceIfRepeatingChar?: boolean;

65

66

/**

67

* Ensure translator is visited even if element is empty

68

* Useful for self-closing elements or custom behavior

69

*/

70

preserveIfEmpty?: boolean;

71

72

/**

73

* Preserve whitespace exactly as it appears in HTML

74

*/

75

preserveWhitespace?: boolean;

76

77

/**

78

* Custom translator collection for child elements

79

*/

80

childTranslators?: TranslatorCollection;

81

}

82

```

83

84

### TranslatorConfigFactory

85

86

Factory function type for creating dynamic translator configurations based on context.

87

88

```typescript { .api }

89

/**

90

* Factory function for creating translator configurations dynamically

91

* @param ctx - Translation context with element, options, and metadata

92

* @returns TranslatorConfig for the current element

93

*/

94

type TranslatorConfigFactory = {

95

(ctx: TranslatorContext): TranslatorConfig;

96

/** Optional base configuration to merge with factory result */

97

base?: TranslatorConfig;

98

};

99

100

/**

101

* Context passed to translator factory functions and post-processors

102

* Extends NodeMetadata with additional context properties

103

*/

104

interface TranslatorContext extends Partial<NodeMetadata> {

105

/** Current HTML element being translated */

106

node: ElementNode;

107

/** Conversion options */

108

options: NodeHtmlMarkdownOptions;

109

/** Parent HTML element */

110

parent?: ElementNode;

111

/** Metadata map for all nodes */

112

nodeMetadata: NodeMetadataMap;

113

/** AST visitor instance */

114

visitor: Visitor;

115

/** Base translator configuration */

116

base?: TranslatorConfig;

117

}

118

```

119

120

### TranslatorCollection Class

121

122

Collection class for managing element translators with key-based access and merging support.

123

124

```typescript { .api }

125

/**

126

* Collection for managing element translators

127

*/

128

class TranslatorCollection {

129

/** Number of translators in collection */

130

readonly size: number;

131

132

/**

133

* Add or update translator config for one or more element tags

134

* @param keys - Comma-separated element tag names (e.g., "h1,h2,h3")

135

* @param config - Translator configuration or factory function

136

* @param preserveBase - Internal parameter for merging configurations

137

*/

138

set(

139

keys: string,

140

config: TranslatorConfig | TranslatorConfigFactory,

141

preserveBase?: boolean

142

): void;

143

144

/**

145

* Get translator config for element tag

146

* @param key - Element tag name

147

* @returns Translator configuration or factory function

148

*/

149

get(key: string): TranslatorConfig | TranslatorConfigFactory;

150

151

/**

152

* Get all translator entries

153

* @returns Array of [elementName, config] pairs

154

*/

155

entries(): [elementName: string, config: TranslatorConfig | TranslatorConfigFactory][];

156

157

/**

158

* Remove translator config for one or more element tags

159

* @param keys - Comma-separated element tag names to remove

160

*/

161

remove(keys: string): void;

162

}

163

```

164

165

### Supporting Types

166

167

```typescript { .api }

168

/**

169

* Map of element tags to translator configurations

170

*/

171

type TranslatorConfigObject = {

172

[tags: string]: TranslatorConfig | TranslatorConfigFactory

173

};

174

175

/**

176

* Result codes for post-processing functions

177

*/

178

enum PostProcessResult {

179

/** No changes made to content */

180

NoChange,

181

/** Remove the entire node from output */

182

RemoveNode

183

}

184

```

185

186

### Visitor Class

187

188

Internal AST traversal class that manages the conversion process. While typically not used directly, it's available for advanced customization scenarios.

189

190

```typescript { .api }

191

/**

192

* Internal AST visitor for HTML to Markdown conversion

193

* Properties & methods marked public may be used for middleware/transformer support

194

*/

195

class Visitor {

196

/** NodeHtmlMarkdown instance */

197

readonly instance: NodeHtmlMarkdown;

198

/** Root HTML node being processed */

199

readonly rootNode: HtmlNode;

200

/** Optional filename for context */

201

readonly fileName?: string;

202

/** Conversion result and statistics */

203

result: VisitorResult;

204

/** Metadata map for all nodes */

205

nodeMetadata: NodeMetadataMap;

206

/** URL definitions for reference-style links */

207

urlDefinitions: string[];

208

209

constructor(instance: NodeHtmlMarkdown, rootNode: HtmlNode, fileName?: string);

210

211

/** Add or get URL definition for reference-style links */

212

addOrGetUrlDefinition(url: string): number;

213

/** Append content to result */

214

appendResult(s: string, startPos?: number, spaceIfRepeatingChar?: boolean): void;

215

/** Append newlines to result */

216

appendNewlines(count: number): void;

217

/** Visit and process HTML node */

218

visitNode(node: HtmlNode, textOnly?: boolean, metadata?: NodeMetadata): void;

219

}

220

221

interface VisitorResult {

222

text: string;

223

trailingNewlineStats: {

224

whitespace: number;

225

newLines: number;

226

};

227

}

228

```

229

230

## Usage Examples

231

232

### Basic Static Translators

233

234

```typescript

235

import { NodeHtmlMarkdown, TranslatorConfigObject } from "node-html-markdown";

236

237

// Define custom translators

238

const customTranslators: TranslatorConfigObject = {

239

// Custom emphasis using different delimiters

240

"em": { prefix: "*", postfix: "*" },

241

"strong": { prefix: "__", postfix: "__" },

242

243

// Custom handling for spans

244

"span": { prefix: "`", postfix: "`" },

245

246

// Ignore certain elements

247

"script,style": { ignore: true },

248

249

// Add surrounding newlines to custom block elements

250

"section": { surroundingNewlines: 2 },

251

252

// Fixed content replacement

253

"br": { content: " \n", recurse: false }

254

};

255

256

const nhm = new NodeHtmlMarkdown({}, customTranslators);

257

258

const html = `

259

<section>

260

<strong>Bold</strong> and <em>italic</em> text.

261

<span>Code-like span</span>

262

<script>alert("ignored");</script>

263

</section>

264

`;

265

266

const result = nhm.translate(html);

267

console.log(result);

268

// Output:

269

// __Bold__ and *italic* text. `Code-like span`

270

```

271

272

### Dynamic Translator Factories

273

274

```typescript

275

import { NodeHtmlMarkdown, TranslatorConfigFactory } from "node-html-markdown";

276

277

// Factory for headings with dynamic prefix based on level

278

const headingTranslator: TranslatorConfigFactory = ({ node }) => ({

279

prefix: '#'.repeat(parseInt(node.tagName.charAt(1))) + ' ',

280

surroundingNewlines: 2

281

});

282

283

// Factory for list items with proper indentation

284

const listItemTranslator: TranslatorConfigFactory = ({ indentLevel = 0, listKind, listItemNumber }) => {

285

const indent = ' '.repeat(indentLevel);

286

const marker = listKind === 'OL' ? `${listItemNumber}. ` : '* ';

287

return {

288

prefix: indent + marker,

289

surroundingNewlines: false

290

};

291

};

292

293

const customTranslators: TranslatorConfigObject = {

294

"h1,h2,h3,h4,h5,h6": headingTranslator,

295

"li": listItemTranslator

296

};

297

298

const nhm = new NodeHtmlMarkdown({}, customTranslators);

299

300

const html = `

301

<h2>Subtitle</h2>

302

<ol>

303

<li>First item</li>

304

<li>Second item</li>

305

</ol>

306

`;

307

308

const result = nhm.translate(html);

309

console.log(result);

310

// Output:

311

// ## Subtitle

312

//

313

// 1. First item

314

// 2. Second item

315

```

316

317

### Post-Processing Functions

318

319

```typescript

320

import { NodeHtmlMarkdown, PostProcessResult } from "node-html-markdown";

321

322

const customTranslators = {

323

// Remove empty paragraphs

324

"p": {

325

postprocess: ({ content }) => {

326

if (!content.trim()) {

327

return PostProcessResult.RemoveNode;

328

}

329

return content;

330

}

331

},

332

333

// Transform links with specific class

334

"a": {

335

postprocess: ({ node, content }) => {

336

const href = node.getAttribute('href') || '';

337

const className = node.getAttribute('class') || '';

338

339

if (className.includes('button')) {

340

return `[${content}](${href}){:.button}`;

341

}

342

343

return `[${content}](${href})`;

344

}

345

},

346

347

// Custom code block formatting

348

"pre": {

349

postprocess: ({ node, content }) => {

350

const code = node.querySelector('code');

351

const language = code?.getAttribute('class')?.replace('language-', '') || '';

352

353

if (language) {

354

return `\`\`\`${language}\n${content}\n\`\`\``;

355

}

356

357

return `\`\`\`\n${content}\n\`\`\``;

358

},

359

noEscape: true,

360

preserveWhitespace: true

361

}

362

};

363

364

const nhm = new NodeHtmlMarkdown({}, customTranslators);

365

```

366

367

### Conditional Logic in Factories

368

369

```typescript

370

import { NodeHtmlMarkdown } from "node-html-markdown";

371

372

// Complex factory with conditional logic

373

const imageTranslator = ({ node, options }) => {

374

const src = node.getAttribute('src') || '';

375

const alt = node.getAttribute('alt') || '';

376

const title = node.getAttribute('title');

377

378

// Skip data URIs unless explicitly enabled

379

if (src.startsWith('data:') && !options.keepDataImages) {

380

return { content: `![${alt}]()`, recurse: false };

381

}

382

383

// Format with title if present

384

const titlePart = title ? ` "${title}"` : '';

385

return {

386

content: `![${alt}](${src}${titlePart})`,

387

recurse: false

388

};

389

};

390

391

// Table cell alignment based on class

392

const tableCellTranslator = ({ node }) => {

393

const className = node.getAttribute('class') || '';

394

const isHeader = node.tagName === 'TH';

395

396

let content = isHeader ? '**' : '';

397

let postfix = isHeader ? '**' : '';

398

399

if (className.includes('center')) {

400

content += ' ';

401

postfix = ' ' + postfix;

402

}

403

404

return { prefix: content, postfix };

405

};

406

407

const customTranslators = {

408

"img": imageTranslator,

409

"td,th": tableCellTranslator

410

};

411

412

const nhm = new NodeHtmlMarkdown({}, customTranslators);

413

```

414

415

### Child Translator Collections

416

417

```typescript

418

import { NodeHtmlMarkdown, TranslatorCollection } from "node-html-markdown";

419

420

// Create custom translator collection for code blocks

421

const codeBlockTranslators = new TranslatorCollection();

422

codeBlockTranslators.set('strong,b', { ignore: true }); // Ignore formatting in code

423

codeBlockTranslators.set('em,i', { ignore: true });

424

425

// Create custom translator collection for tables

426

const tableTranslators = new TranslatorCollection();

427

tableTranslators.set('p', { prefix: '', postfix: '' }); // Remove paragraph formatting in table cells

428

429

const customTranslators = {

430

"pre": {

431

childTranslators: codeBlockTranslators,

432

noEscape: true,

433

preserveWhitespace: true

434

},

435

436

"table": {

437

childTranslators: tableTranslators,

438

surroundingNewlines: 2

439

}

440

};

441

442

const nhm = new NodeHtmlMarkdown({}, customTranslators);

443

444

const html = `

445

<pre><code>const <strong>bold</strong> = true;</code></pre>

446

<table>

447

<tr>

448

<td><p>Cell content</p></td>

449

</tr>

450

</table>

451

`;

452

453

const result = nhm.translate(html);

454

// Code blocks won't have bold formatting

455

// Table cells won't have paragraph spacing

456

```

457

458

### Accessing Instance Translators

459

460

```typescript

461

import { NodeHtmlMarkdown } from "node-html-markdown";

462

463

const nhm = new NodeHtmlMarkdown();

464

465

// Access and modify existing translators

466

console.log("Current translators:", nhm.translators.size);

467

468

// Add new translator

469

nhm.translators.set("mark", { prefix: "==", postfix: "==" });

470

471

// Modify existing translator

472

nhm.translators.set("blockquote", { prefix: "> ", surroundingNewlines: 1 });

473

474

// Remove translator

475

nhm.translators.remove("hr");

476

477

// Access specific translator collections

478

nhm.codeBlockTranslators.set("span", { ignore: true });

479

nhm.tableTranslators.set("br", { content: " ", recurse: false });

480

481

const html = '<mark>Highlighted text</mark>';

482

const result = nhm.translate(html);

483

console.log(result); // "==Highlighted text=="

484

```

485

486

### Complete Custom Translator Example

487

488

```typescript

489

import { NodeHtmlMarkdown, TranslatorConfigObject, PostProcessResult } from "node-html-markdown";

490

491

const customTranslators: TranslatorConfigObject = {

492

// Custom article wrapper

493

"article": {

494

prefix: "---\n",

495

postfix: "\n---",

496

surroundingNewlines: 2

497

},

498

499

// Custom figure handling

500

"figure": {

501

surroundingNewlines: 2,

502

postprocess: ({ node, content }) => {

503

const caption = node.querySelector('figcaption')?.textContent || '';

504

if (caption) {

505

return `${content}\n\n*${caption}*`;

506

}

507

return content;

508

}

509

},

510

511

// Skip figcaption (handled by figure post-processor)

512

"figcaption": { ignore: true },

513

514

// Custom code with language detection

515

"code": ({ node, parent }) => {

516

if (parent?.tagName === 'PRE') {

517

return { noEscape: true, preserveWhitespace: true };

518

}

519

return { prefix: "`", postfix: "`", spaceIfRepeatingChar: true };

520

},

521

522

// Custom abbreviation handling

523

"abbr": {

524

postprocess: ({ node, content }) => {

525

const title = node.getAttribute('title');

526

return title ? `${content} (${title})` : content;

527

}

528

}

529

};

530

531

const nhm = new NodeHtmlMarkdown({}, customTranslators);

532

533

const html = `

534

<article>

535

<h1>Title</h1>

536

<p>Content with <abbr title="HyperText Markup Language">HTML</abbr></p>

537

<figure>

538

<img src="image.jpg" alt="Description">

539

<figcaption>Image caption</figcaption>

540

</figure>

541

</article>

542

`;

543

544

const result = nhm.translate(html);

545

console.log(result);

546

// Output includes custom article wrapper, abbreviation expansion, and figure caption handling

547

```