0
# Configuration Options
1
2
Comprehensive configuration system for customizing HTML to Markdown conversion behavior, output formatting, delimiters, escape patterns, and processing rules.
3
4
## Capabilities
5
6
### NodeHtmlMarkdownOptions Interface
7
8
Complete configuration interface with all available options for customizing conversion behavior.
9
10
```typescript { .api }
11
/**
12
* Configuration options for HTML to Markdown conversion
13
*/
14
interface NodeHtmlMarkdownOptions {
15
/**
16
* Use native window DOMParser when available (browser environment)
17
* @default false
18
*/
19
preferNativeParser: boolean;
20
21
/**
22
* Code block fence delimiter
23
* @default "```"
24
*/
25
codeFence: string;
26
27
/**
28
* Bullet marker for unordered lists
29
* @default "*"
30
*/
31
bulletMarker: string;
32
33
/**
34
* Style for code blocks
35
* @default "fenced"
36
*/
37
codeBlockStyle: "indented" | "fenced";
38
39
/**
40
* Emphasis (italic) delimiter
41
* @default "_"
42
*/
43
emDelimiter: string;
44
45
/**
46
* Strong (bold) delimiter
47
* @default "**"
48
*/
49
strongDelimiter: string;
50
51
/**
52
* Strikethrough delimiter
53
* @default "~~"
54
*/
55
strikeDelimiter: string;
56
57
/**
58
* HTML elements to ignore (ignores inner text, does not parse children)
59
*/
60
readonly ignore?: string[];
61
62
/**
63
* HTML elements to treat as blocks (surrounded with blank lines)
64
*/
65
readonly blockElements?: string[];
66
67
/**
68
* Maximum consecutive newlines allowed in output
69
* @default 3
70
*/
71
maxConsecutiveNewlines: number;
72
73
/**
74
* Line start escape pattern for markdown special characters
75
* Overrides default escape settings - consider using textReplace instead
76
*/
77
lineStartEscape: readonly [pattern: RegExp, replacement: string];
78
79
/**
80
* Global escape pattern for markdown special characters
81
* Overrides default escape settings - consider using textReplace instead
82
*/
83
globalEscape: readonly [pattern: RegExp, replacement: string];
84
85
/**
86
* User-defined text replacement patterns
87
* Applied to text content retrieved from HTML nodes
88
*/
89
textReplace?: (readonly [pattern: RegExp, replacement: string])[];
90
91
/**
92
* Keep images with data: URI (can be up to 1MB each)
93
* @default false
94
* @example <img src="data:image/gif;base64,R0lGODlhEAAQAMQAAORHHOVSK......0o/">
95
*/
96
keepDataImages?: boolean;
97
98
/**
99
* Use link reference definitions at bottom of document
100
* @default undefined
101
* @example
102
* Input: Click <a href="/url1">here</a>. Or <a href="/url2">here</a>.
103
* Output: Click [here][1]. Or [here][2].
104
*
105
* [1]: /url1
106
* [2]: /url2
107
*/
108
useLinkReferenceDefinitions?: boolean;
109
110
/**
111
* Wrap URL text in < > instead of []() syntax when link text matches URL
112
* @default true
113
* @example
114
* Input: <a href="https://google.com">https://google.com</a>
115
* Output: <https://google.com> (when true)
116
* Output: [https://google.com](https://google.com) (when false)
117
*/
118
useInlineLinks?: boolean;
119
}
120
```
121
122
## Usage Examples
123
124
### Basic Options Configuration
125
126
```typescript
127
import { NodeHtmlMarkdown, NodeHtmlMarkdownOptions } from "node-html-markdown";
128
129
// Configure basic formatting options
130
const options: Partial<NodeHtmlMarkdownOptions> = {
131
bulletMarker: "-",
132
strongDelimiter: "__",
133
emDelimiter: "*",
134
codeFence: "~~~"
135
};
136
137
const nhm = new NodeHtmlMarkdown(options);
138
139
const html = `
140
<ul>
141
<li><strong>Bold</strong> and <em>italic</em> text</li>
142
</ul>
143
<pre><code class="language-javascript">console.log("hello");</code></pre>
144
`;
145
146
const markdown = nhm.translate(html);
147
console.log(markdown);
148
// Output:
149
// - __Bold__ and *italic* text
150
//
151
// ~~~javascript
152
// console.log("hello");
153
// ~~~
154
```
155
156
### Code Block Style Options
157
158
```typescript
159
import { NodeHtmlMarkdown } from "node-html-markdown";
160
161
// Fenced code blocks (default)
162
const fencedNhm = new NodeHtmlMarkdown({
163
codeBlockStyle: "fenced",
164
codeFence: "```"
165
});
166
167
// Indented code blocks
168
const indentedNhm = new NodeHtmlMarkdown({
169
codeBlockStyle: "indented"
170
});
171
172
const codeHtml = '<pre><code>const x = 42;</code></pre>';
173
174
console.log("Fenced:", fencedNhm.translate(codeHtml));
175
// Output: ```\nconst x = 42;\n```
176
177
console.log("Indented:", indentedNhm.translate(codeHtml));
178
// Output: const x = 42;
179
```
180
181
### Ignore and Block Elements
182
183
```typescript
184
import { NodeHtmlMarkdown } from "node-html-markdown";
185
186
const options = {
187
// Ignore script and style elements completely
188
ignore: ["script", "style", "nav"],
189
// Treat custom elements as block elements
190
blockElements: ["section", "article", "aside"]
191
};
192
193
const nhm = new NodeHtmlMarkdown(options);
194
195
const html = `
196
<article>
197
<h1>Title</h1>
198
<nav>Navigation menu</nav>
199
<script>alert("hello");</script>
200
<p>Content</p>
201
</article>
202
`;
203
204
const result = nhm.translate(html);
205
console.log(result);
206
// Output:
207
// # Title
208
//
209
// Content
210
// (nav and script elements are ignored)
211
```
212
213
### Text Replacement Patterns
214
215
```typescript
216
import { NodeHtmlMarkdown } from "node-html-markdown";
217
218
const options = {
219
textReplace: [
220
// Replace smart quotes with regular quotes
221
[/[""]/g, '"'],
222
[/['']/g, "'"],
223
// Replace em dashes with regular dashes
224
[/—/g, "--"],
225
// Remove excessive whitespace
226
[/\s{3,}/g, " "]
227
]
228
};
229
230
const nhm = new NodeHtmlMarkdown(options);
231
232
const html = '<p>"Smart quotes" and em—dash here with spaces</p>';
233
const result = nhm.translate(html);
234
console.log(result);
235
// Output: "Smart quotes" and em--dash here with spaces
236
```
237
238
### Link Reference Definitions
239
240
```typescript
241
import { NodeHtmlMarkdown } from "node-html-markdown";
242
243
const options = {
244
useLinkReferenceDefinitions: true
245
};
246
247
const nhm = new NodeHtmlMarkdown(options);
248
249
const html = `
250
<p>Visit <a href="https://example.com">our website</a> or
251
<a href="https://github.com">GitHub</a>. Also check
252
<a href="https://example.com">our website</a> again.</p>
253
`;
254
255
const result = nhm.translate(html);
256
console.log(result);
257
// Output:
258
// Visit [our website][1] or [GitHub][2]. Also check [our website][1] again.
259
//
260
// [1]: https://example.com
261
// [2]: https://github.com
262
```
263
264
### Data URI Image Handling
265
266
```typescript
267
import { NodeHtmlMarkdown } from "node-html-markdown";
268
269
// Keep data URI images (default: false)
270
const keepDataNhm = new NodeHtmlMarkdown({
271
keepDataImages: true
272
});
273
274
// Remove data URI images (default behavior)
275
const removeDataNhm = new NodeHtmlMarkdown({
276
keepDataImages: false
277
});
278
279
const html = '<img src="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7" alt="1x1 pixel">';
280
281
console.log("Keep data images:", keepDataNhm.translate(html));
282
// Output: 
283
284
console.log("Remove data images:", removeDataNhm.translate(html));
285
// Output: ![1x1 pixel]()
286
```
287
288
### Newline Control
289
290
```typescript
291
import { NodeHtmlMarkdown } from "node-html-markdown";
292
293
const options = {
294
maxConsecutiveNewlines: 2 // Limit to 2 consecutive newlines
295
};
296
297
const nhm = new NodeHtmlMarkdown(options);
298
299
const html = `
300
<h1>Title</h1>
301
302
303
304
305
<p>Paragraph after many line breaks</p>
306
`;
307
308
const result = nhm.translate(html);
309
console.log(result);
310
// Output:
311
// # Title
312
//
313
// Paragraph after many line breaks
314
// (excessive newlines are reduced to maxConsecutiveNewlines)
315
```
316
317
### Complete Custom Configuration
318
319
```typescript
320
import { NodeHtmlMarkdown } from "node-html-markdown";
321
322
const customOptions: Partial<NodeHtmlMarkdownOptions> = {
323
// Formatting preferences
324
bulletMarker: "+",
325
strongDelimiter: "__",
326
emDelimiter: "*",
327
strikeDelimiter: "~",
328
codeFence: "```",
329
codeBlockStyle: "fenced",
330
331
// Content control
332
maxConsecutiveNewlines: 2,
333
keepDataImages: false,
334
useInlineLinks: false,
335
useLinkReferenceDefinitions: true,
336
337
// Element behavior
338
ignore: ["script", "style", "noscript"],
339
blockElements: ["section", "article", "aside", "details"],
340
341
// Text processing
342
textReplace: [
343
[/\u00A0/g, " "], // Replace non-breaking spaces
344
[/[""]/g, '"'], // Replace smart quotes
345
[/['']/g, "'"], // Replace smart apostrophes
346
]
347
};
348
349
const nhm = new NodeHtmlMarkdown(customOptions);
350
351
// Now use with all custom settings applied
352
const result = nhm.translate("<strong>Bold</strong> text with "smart quotes"");
353
console.log(result); // __Bold__ text with "smart quotes"
354
```