Fast HTML to markdown converter, compatible with both node and the browser
—
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Pending
The risk profile of this skill
Comprehensive configuration system for customizing HTML to Markdown conversion behavior, output formatting, delimiters, escape patterns, and processing rules.
Complete configuration interface with all available options for customizing conversion behavior.
/**
* Configuration options for HTML to Markdown conversion
*/
interface NodeHtmlMarkdownOptions {
/**
* Use native window DOMParser when available (browser environment)
* @default false
*/
preferNativeParser: boolean;
/**
* Code block fence delimiter
* @default "```"
*/
codeFence: string;
/**
* Bullet marker for unordered lists
* @default "*"
*/
bulletMarker: string;
/**
* Style for code blocks
* @default "fenced"
*/
codeBlockStyle: "indented" | "fenced";
/**
* Emphasis (italic) delimiter
* @default "_"
*/
emDelimiter: string;
/**
* Strong (bold) delimiter
* @default "**"
*/
strongDelimiter: string;
/**
* Strikethrough delimiter
* @default "~~"
*/
strikeDelimiter: string;
/**
* HTML elements to ignore (ignores inner text, does not parse children)
*/
readonly ignore?: string[];
/**
* HTML elements to treat as blocks (surrounded with blank lines)
*/
readonly blockElements?: string[];
/**
* Maximum consecutive newlines allowed in output
* @default 3
*/
maxConsecutiveNewlines: number;
/**
* Line start escape pattern for markdown special characters
* Overrides default escape settings - consider using textReplace instead
*/
lineStartEscape: readonly [pattern: RegExp, replacement: string];
/**
* Global escape pattern for markdown special characters
* Overrides default escape settings - consider using textReplace instead
*/
globalEscape: readonly [pattern: RegExp, replacement: string];
/**
* User-defined text replacement patterns
* Applied to text content retrieved from HTML nodes
*/
textReplace?: (readonly [pattern: RegExp, replacement: string])[];
/**
* Keep images with data: URI (can be up to 1MB each)
* @default false
* @example <img src="data:image/gif;base64,R0lGODlhEAAQAMQAAORHHOVSK......0o/">
*/
keepDataImages?: boolean;
/**
* Use link reference definitions at bottom of document
* @default undefined
* @example
* Input: Click <a href="/url1">here</a>. Or <a href="/url2">here</a>.
* Output: Click [here][1]. Or [here][2].
*
* [1]: /url1
* [2]: /url2
*/
useLinkReferenceDefinitions?: boolean;
/**
* Wrap URL text in < > instead of []() syntax when link text matches URL
* @default true
* @example
* Input: <a href="https://google.com">https://google.com</a>
* Output: <https://google.com> (when true)
* Output: [https://google.com](https://google.com) (when false)
*/
useInlineLinks?: boolean;
}import { NodeHtmlMarkdown, NodeHtmlMarkdownOptions } from "node-html-markdown";
// Configure basic formatting options
const options: Partial<NodeHtmlMarkdownOptions> = {
bulletMarker: "-",
strongDelimiter: "__",
emDelimiter: "*",
codeFence: "~~~"
};
const nhm = new NodeHtmlMarkdown(options);
const html = `
<ul>
<li><strong>Bold</strong> and <em>italic</em> text</li>
</ul>
<pre><code class="language-javascript">console.log("hello");</code></pre>
`;
const markdown = nhm.translate(html);
console.log(markdown);
// Output:
// - __Bold__ and *italic* text
//
// ~~~javascript
// console.log("hello");
// ~~~import { NodeHtmlMarkdown } from "node-html-markdown";
// Fenced code blocks (default)
const fencedNhm = new NodeHtmlMarkdown({
codeBlockStyle: "fenced",
codeFence: "```"
});
// Indented code blocks
const indentedNhm = new NodeHtmlMarkdown({
codeBlockStyle: "indented"
});
const codeHtml = '<pre><code>const x = 42;</code></pre>';
console.log("Fenced:", fencedNhm.translate(codeHtml));
// Output: ```\nconst x = 42;\n```
console.log("Indented:", indentedNhm.translate(codeHtml));
// Output: const x = 42;import { NodeHtmlMarkdown } from "node-html-markdown";
const options = {
// Ignore script and style elements completely
ignore: ["script", "style", "nav"],
// Treat custom elements as block elements
blockElements: ["section", "article", "aside"]
};
const nhm = new NodeHtmlMarkdown(options);
const html = `
<article>
<h1>Title</h1>
<nav>Navigation menu</nav>
<script>alert("hello");</script>
<p>Content</p>
</article>
`;
const result = nhm.translate(html);
console.log(result);
// Output:
// # Title
//
// Content
// (nav and script elements are ignored)import { NodeHtmlMarkdown } from "node-html-markdown";
const options = {
textReplace: [
// Replace smart quotes with regular quotes
[/[""]/g, '"'],
[/['']/g, "'"],
// Replace em dashes with regular dashes
[/—/g, "--"],
// Remove excessive whitespace
[/\s{3,}/g, " "]
]
};
const nhm = new NodeHtmlMarkdown(options);
const html = '<p>"Smart quotes" and em—dash here with spaces</p>';
const result = nhm.translate(html);
console.log(result);
// Output: "Smart quotes" and em--dash here with spacesimport { NodeHtmlMarkdown } from "node-html-markdown";
const options = {
useLinkReferenceDefinitions: true
};
const nhm = new NodeHtmlMarkdown(options);
const html = `
<p>Visit <a href="https://example.com">our website</a> or
<a href="https://github.com">GitHub</a>. Also check
<a href="https://example.com">our website</a> again.</p>
`;
const result = nhm.translate(html);
console.log(result);
// Output:
// Visit [our website][1] or [GitHub][2]. Also check [our website][1] again.
//
// [1]: https://example.com
// [2]: https://github.comimport { NodeHtmlMarkdown } from "node-html-markdown";
// Keep data URI images (default: false)
const keepDataNhm = new NodeHtmlMarkdown({
keepDataImages: true
});
// Remove data URI images (default behavior)
const removeDataNhm = new NodeHtmlMarkdown({
keepDataImages: false
});
const html = '<img src="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7" alt="1x1 pixel">';
console.log("Keep data images:", keepDataNhm.translate(html));
// Output: 
console.log("Remove data images:", removeDataNhm.translate(html));
// Output: ![1x1 pixel]()import { NodeHtmlMarkdown } from "node-html-markdown";
const options = {
maxConsecutiveNewlines: 2 // Limit to 2 consecutive newlines
};
const nhm = new NodeHtmlMarkdown(options);
const html = `
<h1>Title</h1>
<p>Paragraph after many line breaks</p>
`;
const result = nhm.translate(html);
console.log(result);
// Output:
// # Title
//
// Paragraph after many line breaks
// (excessive newlines are reduced to maxConsecutiveNewlines)import { NodeHtmlMarkdown } from "node-html-markdown";
const customOptions: Partial<NodeHtmlMarkdownOptions> = {
// Formatting preferences
bulletMarker: "+",
strongDelimiter: "__",
emDelimiter: "*",
strikeDelimiter: "~",
codeFence: "```",
codeBlockStyle: "fenced",
// Content control
maxConsecutiveNewlines: 2,
keepDataImages: false,
useInlineLinks: false,
useLinkReferenceDefinitions: true,
// Element behavior
ignore: ["script", "style", "noscript"],
blockElements: ["section", "article", "aside", "details"],
// Text processing
textReplace: [
[/\u00A0/g, " "], // Replace non-breaking spaces
[/[""]/g, '"'], // Replace smart quotes
[/['']/g, "'"], // Replace smart apostrophes
]
};
const nhm = new NodeHtmlMarkdown(customOptions);
// Now use with all custom settings applied
const result = nhm.translate("<strong>Bold</strong> text with "smart quotes"");
console.log(result); // __Bold__ text with "smart quotes"