0
# Custom Translators
1
2
Advanced customization system allowing element-specific translation rules with configurable prefix/postfix, content transformation, recursion control, and post-processing hooks.
3
4
## Capabilities
5
6
### TranslatorConfig Interface
7
8
Configuration interface for defining how HTML elements should be converted to Markdown.
9
10
```typescript { .api }
11
/**
12
* Configuration for element translation behavior
13
*/
14
interface TranslatorConfig {
15
/**
16
* Content prefix (added before element content, after surroundingNewlines)
17
*/
18
prefix?: string;
19
20
/**
21
* Content postfix (added after element content, before surroundingNewlines)
22
*/
23
postfix?: string;
24
25
/**
26
* Fixed output content (replaces element content entirely)
27
*/
28
content?: string;
29
30
/**
31
* Post-process content after inner nodes have been rendered
32
* Return undefined to leave content unchanged
33
* Return string to replace content
34
* Return PostProcessResult.RemoveNode to remove element entirely
35
*/
36
postprocess?: (ctx: TranslatorContext & { content: string }) => string | PostProcessResult;
37
38
/**
39
* Whether to process child elements
40
* @default true
41
*/
42
recurse?: boolean;
43
44
/**
45
* Add newlines before and after element
46
* @default false
47
*/
48
surroundingNewlines?: boolean | number;
49
50
/**
51
* Ignore element entirely (skip processing)
52
*/
53
ignore?: boolean;
54
55
/**
56
* Do not escape markdown special characters in content
57
*/
58
noEscape?: boolean;
59
60
/**
61
* Add space if first character matches end of previous content
62
* Prevents markdown formatting conflicts
63
*/
64
spaceIfRepeatingChar?: boolean;
65
66
/**
67
* Ensure translator is visited even if element is empty
68
* Useful for self-closing elements or custom behavior
69
*/
70
preserveIfEmpty?: boolean;
71
72
/**
73
* Preserve whitespace exactly as it appears in HTML
74
*/
75
preserveWhitespace?: boolean;
76
77
/**
78
* Custom translator collection for child elements
79
*/
80
childTranslators?: TranslatorCollection;
81
}
82
```
83
84
### TranslatorConfigFactory
85
86
Factory function type for creating dynamic translator configurations based on context.
87
88
```typescript { .api }
89
/**
90
* Factory function for creating translator configurations dynamically
91
* @param ctx - Translation context with element, options, and metadata
92
* @returns TranslatorConfig for the current element
93
*/
94
type TranslatorConfigFactory = {
95
(ctx: TranslatorContext): TranslatorConfig;
96
/** Optional base configuration to merge with factory result */
97
base?: TranslatorConfig;
98
};
99
100
/**
101
* Context passed to translator factory functions and post-processors
102
* Extends NodeMetadata with additional context properties
103
*/
104
interface TranslatorContext extends Partial<NodeMetadata> {
105
/** Current HTML element being translated */
106
node: ElementNode;
107
/** Conversion options */
108
options: NodeHtmlMarkdownOptions;
109
/** Parent HTML element */
110
parent?: ElementNode;
111
/** Metadata map for all nodes */
112
nodeMetadata: NodeMetadataMap;
113
/** AST visitor instance */
114
visitor: Visitor;
115
/** Base translator configuration */
116
base?: TranslatorConfig;
117
}
118
```
119
120
### TranslatorCollection Class
121
122
Collection class for managing element translators with key-based access and merging support.
123
124
```typescript { .api }
125
/**
126
* Collection for managing element translators
127
*/
128
class TranslatorCollection {
129
/** Number of translators in collection */
130
readonly size: number;
131
132
/**
133
* Add or update translator config for one or more element tags
134
* @param keys - Comma-separated element tag names (e.g., "h1,h2,h3")
135
* @param config - Translator configuration or factory function
136
* @param preserveBase - Internal parameter for merging configurations
137
*/
138
set(
139
keys: string,
140
config: TranslatorConfig | TranslatorConfigFactory,
141
preserveBase?: boolean
142
): void;
143
144
/**
145
* Get translator config for element tag
146
* @param key - Element tag name
147
* @returns Translator configuration or factory function
148
*/
149
get(key: string): TranslatorConfig | TranslatorConfigFactory;
150
151
/**
152
* Get all translator entries
153
* @returns Array of [elementName, config] pairs
154
*/
155
entries(): [elementName: string, config: TranslatorConfig | TranslatorConfigFactory][];
156
157
/**
158
* Remove translator config for one or more element tags
159
* @param keys - Comma-separated element tag names to remove
160
*/
161
remove(keys: string): void;
162
}
163
```
164
165
### Supporting Types
166
167
```typescript { .api }
168
/**
169
* Map of element tags to translator configurations
170
*/
171
type TranslatorConfigObject = {
172
[tags: string]: TranslatorConfig | TranslatorConfigFactory
173
};
174
175
/**
176
* Result codes for post-processing functions
177
*/
178
enum PostProcessResult {
179
/** No changes made to content */
180
NoChange,
181
/** Remove the entire node from output */
182
RemoveNode
183
}
184
```
185
186
### Visitor Class
187
188
Internal AST traversal class that manages the conversion process. While typically not used directly, it's available for advanced customization scenarios.
189
190
```typescript { .api }
191
/**
192
* Internal AST visitor for HTML to Markdown conversion
193
* Properties & methods marked public may be used for middleware/transformer support
194
*/
195
class Visitor {
196
/** NodeHtmlMarkdown instance */
197
readonly instance: NodeHtmlMarkdown;
198
/** Root HTML node being processed */
199
readonly rootNode: HtmlNode;
200
/** Optional filename for context */
201
readonly fileName?: string;
202
/** Conversion result and statistics */
203
result: VisitorResult;
204
/** Metadata map for all nodes */
205
nodeMetadata: NodeMetadataMap;
206
/** URL definitions for reference-style links */
207
urlDefinitions: string[];
208
209
constructor(instance: NodeHtmlMarkdown, rootNode: HtmlNode, fileName?: string);
210
211
/** Add or get URL definition for reference-style links */
212
addOrGetUrlDefinition(url: string): number;
213
/** Append content to result */
214
appendResult(s: string, startPos?: number, spaceIfRepeatingChar?: boolean): void;
215
/** Append newlines to result */
216
appendNewlines(count: number): void;
217
/** Visit and process HTML node */
218
visitNode(node: HtmlNode, textOnly?: boolean, metadata?: NodeMetadata): void;
219
}
220
221
interface VisitorResult {
222
text: string;
223
trailingNewlineStats: {
224
whitespace: number;
225
newLines: number;
226
};
227
}
228
```
229
230
## Usage Examples
231
232
### Basic Static Translators
233
234
```typescript
235
import { NodeHtmlMarkdown, TranslatorConfigObject } from "node-html-markdown";
236
237
// Define custom translators
238
const customTranslators: TranslatorConfigObject = {
239
// Custom emphasis using different delimiters
240
"em": { prefix: "*", postfix: "*" },
241
"strong": { prefix: "__", postfix: "__" },
242
243
// Custom handling for spans
244
"span": { prefix: "`", postfix: "`" },
245
246
// Ignore certain elements
247
"script,style": { ignore: true },
248
249
// Add surrounding newlines to custom block elements
250
"section": { surroundingNewlines: 2 },
251
252
// Fixed content replacement
253
"br": { content: " \n", recurse: false }
254
};
255
256
const nhm = new NodeHtmlMarkdown({}, customTranslators);
257
258
const html = `
259
<section>
260
<strong>Bold</strong> and <em>italic</em> text.
261
<span>Code-like span</span>
262
<script>alert("ignored");</script>
263
</section>
264
`;
265
266
const result = nhm.translate(html);
267
console.log(result);
268
// Output:
269
// __Bold__ and *italic* text. `Code-like span`
270
```
271
272
### Dynamic Translator Factories
273
274
```typescript
275
import { NodeHtmlMarkdown, TranslatorConfigFactory } from "node-html-markdown";
276
277
// Factory for headings with dynamic prefix based on level
278
const headingTranslator: TranslatorConfigFactory = ({ node }) => ({
279
prefix: '#'.repeat(parseInt(node.tagName.charAt(1))) + ' ',
280
surroundingNewlines: 2
281
});
282
283
// Factory for list items with proper indentation
284
const listItemTranslator: TranslatorConfigFactory = ({ indentLevel = 0, listKind, listItemNumber }) => {
285
const indent = ' '.repeat(indentLevel);
286
const marker = listKind === 'OL' ? `${listItemNumber}. ` : '* ';
287
return {
288
prefix: indent + marker,
289
surroundingNewlines: false
290
};
291
};
292
293
const customTranslators: TranslatorConfigObject = {
294
"h1,h2,h3,h4,h5,h6": headingTranslator,
295
"li": listItemTranslator
296
};
297
298
const nhm = new NodeHtmlMarkdown({}, customTranslators);
299
300
const html = `
301
<h2>Subtitle</h2>
302
<ol>
303
<li>First item</li>
304
<li>Second item</li>
305
</ol>
306
`;
307
308
const result = nhm.translate(html);
309
console.log(result);
310
// Output:
311
// ## Subtitle
312
//
313
// 1. First item
314
// 2. Second item
315
```
316
317
### Post-Processing Functions
318
319
```typescript
320
import { NodeHtmlMarkdown, PostProcessResult } from "node-html-markdown";
321
322
const customTranslators = {
323
// Remove empty paragraphs
324
"p": {
325
postprocess: ({ content }) => {
326
if (!content.trim()) {
327
return PostProcessResult.RemoveNode;
328
}
329
return content;
330
}
331
},
332
333
// Transform links with specific class
334
"a": {
335
postprocess: ({ node, content }) => {
336
const href = node.getAttribute('href') || '';
337
const className = node.getAttribute('class') || '';
338
339
if (className.includes('button')) {
340
return `[${content}](${href}){:.button}`;
341
}
342
343
return `[${content}](${href})`;
344
}
345
},
346
347
// Custom code block formatting
348
"pre": {
349
postprocess: ({ node, content }) => {
350
const code = node.querySelector('code');
351
const language = code?.getAttribute('class')?.replace('language-', '') || '';
352
353
if (language) {
354
return `\`\`\`${language}\n${content}\n\`\`\``;
355
}
356
357
return `\`\`\`\n${content}\n\`\`\``;
358
},
359
noEscape: true,
360
preserveWhitespace: true
361
}
362
};
363
364
const nhm = new NodeHtmlMarkdown({}, customTranslators);
365
```
366
367
### Conditional Logic in Factories
368
369
```typescript
370
import { NodeHtmlMarkdown } from "node-html-markdown";
371
372
// Complex factory with conditional logic
373
const imageTranslator = ({ node, options }) => {
374
const src = node.getAttribute('src') || '';
375
const alt = node.getAttribute('alt') || '';
376
const title = node.getAttribute('title');
377
378
// Skip data URIs unless explicitly enabled
379
if (src.startsWith('data:') && !options.keepDataImages) {
380
return { content: `![${alt}]()`, recurse: false };
381
}
382
383
// Format with title if present
384
const titlePart = title ? ` "${title}"` : '';
385
return {
386
content: ``,
387
recurse: false
388
};
389
};
390
391
// Table cell alignment based on class
392
const tableCellTranslator = ({ node }) => {
393
const className = node.getAttribute('class') || '';
394
const isHeader = node.tagName === 'TH';
395
396
let content = isHeader ? '**' : '';
397
let postfix = isHeader ? '**' : '';
398
399
if (className.includes('center')) {
400
content += ' ';
401
postfix = ' ' + postfix;
402
}
403
404
return { prefix: content, postfix };
405
};
406
407
const customTranslators = {
408
"img": imageTranslator,
409
"td,th": tableCellTranslator
410
};
411
412
const nhm = new NodeHtmlMarkdown({}, customTranslators);
413
```
414
415
### Child Translator Collections
416
417
```typescript
418
import { NodeHtmlMarkdown, TranslatorCollection } from "node-html-markdown";
419
420
// Create custom translator collection for code blocks
421
const codeBlockTranslators = new TranslatorCollection();
422
codeBlockTranslators.set('strong,b', { ignore: true }); // Ignore formatting in code
423
codeBlockTranslators.set('em,i', { ignore: true });
424
425
// Create custom translator collection for tables
426
const tableTranslators = new TranslatorCollection();
427
tableTranslators.set('p', { prefix: '', postfix: '' }); // Remove paragraph formatting in table cells
428
429
const customTranslators = {
430
"pre": {
431
childTranslators: codeBlockTranslators,
432
noEscape: true,
433
preserveWhitespace: true
434
},
435
436
"table": {
437
childTranslators: tableTranslators,
438
surroundingNewlines: 2
439
}
440
};
441
442
const nhm = new NodeHtmlMarkdown({}, customTranslators);
443
444
const html = `
445
<pre><code>const <strong>bold</strong> = true;</code></pre>
446
<table>
447
<tr>
448
<td><p>Cell content</p></td>
449
</tr>
450
</table>
451
`;
452
453
const result = nhm.translate(html);
454
// Code blocks won't have bold formatting
455
// Table cells won't have paragraph spacing
456
```
457
458
### Accessing Instance Translators
459
460
```typescript
461
import { NodeHtmlMarkdown } from "node-html-markdown";
462
463
const nhm = new NodeHtmlMarkdown();
464
465
// Access and modify existing translators
466
console.log("Current translators:", nhm.translators.size);
467
468
// Add new translator
469
nhm.translators.set("mark", { prefix: "==", postfix: "==" });
470
471
// Modify existing translator
472
nhm.translators.set("blockquote", { prefix: "> ", surroundingNewlines: 1 });
473
474
// Remove translator
475
nhm.translators.remove("hr");
476
477
// Access specific translator collections
478
nhm.codeBlockTranslators.set("span", { ignore: true });
479
nhm.tableTranslators.set("br", { content: " ", recurse: false });
480
481
const html = '<mark>Highlighted text</mark>';
482
const result = nhm.translate(html);
483
console.log(result); // "==Highlighted text=="
484
```
485
486
### Complete Custom Translator Example
487
488
```typescript
489
import { NodeHtmlMarkdown, TranslatorConfigObject, PostProcessResult } from "node-html-markdown";
490
491
const customTranslators: TranslatorConfigObject = {
492
// Custom article wrapper
493
"article": {
494
prefix: "---\n",
495
postfix: "\n---",
496
surroundingNewlines: 2
497
},
498
499
// Custom figure handling
500
"figure": {
501
surroundingNewlines: 2,
502
postprocess: ({ node, content }) => {
503
const caption = node.querySelector('figcaption')?.textContent || '';
504
if (caption) {
505
return `${content}\n\n*${caption}*`;
506
}
507
return content;
508
}
509
},
510
511
// Skip figcaption (handled by figure post-processor)
512
"figcaption": { ignore: true },
513
514
// Custom code with language detection
515
"code": ({ node, parent }) => {
516
if (parent?.tagName === 'PRE') {
517
return { noEscape: true, preserveWhitespace: true };
518
}
519
return { prefix: "`", postfix: "`", spaceIfRepeatingChar: true };
520
},
521
522
// Custom abbreviation handling
523
"abbr": {
524
postprocess: ({ node, content }) => {
525
const title = node.getAttribute('title');
526
return title ? `${content} (${title})` : content;
527
}
528
}
529
};
530
531
const nhm = new NodeHtmlMarkdown({}, customTranslators);
532
533
const html = `
534
<article>
535
<h1>Title</h1>
536
<p>Content with <abbr title="HyperText Markup Language">HTML</abbr></p>
537
<figure>
538
<img src="image.jpg" alt="Description">
539
<figcaption>Image caption</figcaption>
540
</figure>
541
</article>
542
`;
543
544
const result = nhm.translate(html);
545
console.log(result);
546
// Output includes custom article wrapper, abbreviation expansion, and figure caption handling
547
```