Fast HTML to markdown converter, compatible with both node and the browser
npx @tessl/cli install tessl/npm-node-html-markdown@1.3.0node-html-markdown is a fast HTML to markdown converter, compatible with both Node.js and the browser. It was built with two primary goals: speed and human readability. The library provides clean, concise output with consistent spacing rules while offering extensive customization through options and translators.
npm install node-html-markdownimport {
NodeHtmlMarkdown,
NodeHtmlMarkdownOptions,
FileCollection,
TranslatorConfig,
TranslatorConfigFactory,
TranslatorCollection,
TranslatorConfigObject,
PostProcessResult,
NodeMetadata,
NodeMetadataMap,
HtmlNode,
ElementNode,
TextNode,
CommentNode,
NodeType,
isTextNode,
isCommentNode,
isElementNode
} from "node-html-markdown";For CommonJS:
const {
NodeHtmlMarkdown,
NodeHtmlMarkdownOptions,
TranslatorCollection
} = require("node-html-markdown");import { NodeHtmlMarkdown } from "node-html-markdown";
// Single-use static method
const markdown = NodeHtmlMarkdown.translate("<strong>hello</strong>");
console.log(markdown); // "**hello**"
// Reusable instance (better performance for multiple conversions)
const nhm = new NodeHtmlMarkdown();
const result = nhm.translate("<em>world</em>");
console.log(result); // "_world_"
// Multiple files
const files = NodeHtmlMarkdown.translate({
"file1.html": "<h1>Title</h1>",
"file2.html": "<p>Content</p>"
});
// Returns: { "file1.html": "# Title", "file2.html": "Content" }node-html-markdown is built around several key components:
Core conversion functionality providing static and instance methods for translating HTML to Markdown with full customization support.
class NodeHtmlMarkdown {
constructor(
options?: Partial<NodeHtmlMarkdownOptions>,
customTranslators?: TranslatorConfigObject,
customCodeBlockTranslators?: TranslatorConfigObject
);
/** Main element translators collection */
translators: TranslatorCollection;
/** Anchor tag specific translators */
aTagTranslators: TranslatorCollection;
/** Code block translators collection */
codeBlockTranslators: TranslatorCollection;
/** Table translators collection */
tableTranslators: TranslatorCollection;
/** Table row translators collection */
tableRowTranslators: TranslatorCollection;
/** Table cell translators collection */
tableCellTranslators: TranslatorCollection;
/** Configuration options (read-only) */
readonly options: NodeHtmlMarkdownOptions;
static translate(html: string, options?: Partial<NodeHtmlMarkdownOptions>, customTranslators?: TranslatorConfigObject, customCodeBlockTranslators?: TranslatorConfigObject): string;
static translate(files: FileCollection, options?: Partial<NodeHtmlMarkdownOptions>, customTranslators?: TranslatorConfigObject, customCodeBlockTranslators?: TranslatorConfigObject): FileCollection;
translate(html: string): string;
translate(files: FileCollection): FileCollection;
}
type FileCollection = { [fileName: string]: string };Extensive configuration system for customizing output formatting, delimiters, escape patterns, and processing behavior.
interface NodeHtmlMarkdownOptions {
preferNativeParser: boolean;
codeFence: string;
bulletMarker: string;
codeBlockStyle: "indented" | "fenced";
emDelimiter: string;
strongDelimiter: string;
strikeDelimiter: string;
ignore?: string[];
blockElements?: string[];
maxConsecutiveNewlines: number;
lineStartEscape: readonly [pattern: RegExp, replacement: string];
globalEscape: readonly [pattern: RegExp, replacement: string];
textReplace?: (readonly [pattern: RegExp, replacement: string])[];
keepDataImages?: boolean;
useLinkReferenceDefinitions?: boolean;
useInlineLinks?: boolean;
}Advanced customization system allowing element-specific translation rules with prefix/postfix, content transformation, and post-processing hooks.
interface TranslatorConfig {
prefix?: string;
postfix?: string;
content?: string;
postprocess?: (ctx: TranslatorContext & { content: string }) => string | PostProcessResult;
recurse?: boolean;
surroundingNewlines?: boolean | number;
ignore?: boolean;
noEscape?: boolean;
spaceIfRepeatingChar?: boolean;
preserveIfEmpty?: boolean;
preserveWhitespace?: boolean;
childTranslators?: TranslatorCollection;
}
class TranslatorCollection {
readonly size: number;
set(keys: string, config: TranslatorConfig | TranslatorConfigFactory): void;
get(key: string): TranslatorConfig | TranslatorConfigFactory;
entries(): [elementName: string, config: TranslatorConfig | TranslatorConfigFactory][];
remove(keys: string): void;
}type TranslatorConfigFactory = {
(ctx: TranslatorContext): TranslatorConfig;
base?: TranslatorConfig;
};
type TranslatorConfigObject = {
[tags: string]: TranslatorConfig | TranslatorConfigFactory
};
interface TranslatorContext {
node: ElementNode;
options: NodeHtmlMarkdownOptions;
parent?: ElementNode;
nodeMetadata: NodeMetadataMap;
visitor: Visitor;
base?: TranslatorConfig;
indentLevel?: number;
listKind?: "OL" | "UL";
listItemNumber?: number;
noEscape?: boolean;
preserveWhitespace?: boolean;
translators?: TranslatorConfigObject;
tableMeta?: {
node: ElementNode;
caption?: string;
};
}
enum PostProcessResult {
NoChange,
RemoveNode
}
interface NodeMetadata {
indentLevel?: number;
listKind?: "OL" | "UL";
listItemNumber?: number;
noEscape?: boolean;
preserveWhitespace?: boolean;
translators?: TranslatorConfigObject;
tableMeta?: {
node: ElementNode;
caption?: string;
};
}
type NodeMetadataMap = Map<ElementNode, NodeMetadata>;
type HtmlNode = (NHParser.Node | Node) & { preserve?: boolean };
type ElementNode = (NHParser.HTMLElement | HTMLElement) & { preserve?: boolean };
type TextNode = (NHParser.TextNode) & { preserve?: boolean };
// Node type guards
function isTextNode(node: HtmlNode): node is TextNode;
function isCommentNode(node: HtmlNode): node is CommentNode;
function isElementNode(node: HtmlNode): node is ElementNode;
// Node types from node-html-parser
type CommentNode = import('node-html-parser').CommentNode;
enum NodeType {
TEXT_NODE = 3,
COMMENT_NODE = 8,
ELEMENT_NODE = 1
}