Fast HTML to markdown converter, compatible with both node and the browser
—
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Pending
The risk profile of this skill
Advanced customization system allowing element-specific translation rules with configurable prefix/postfix, content transformation, recursion control, and post-processing hooks.
Configuration interface for defining how HTML elements should be converted to Markdown.
/**
* Configuration for element translation behavior
*/
interface TranslatorConfig {
/**
* Content prefix (added before element content, after surroundingNewlines)
*/
prefix?: string;
/**
* Content postfix (added after element content, before surroundingNewlines)
*/
postfix?: string;
/**
* Fixed output content (replaces element content entirely)
*/
content?: string;
/**
* Post-process content after inner nodes have been rendered
* Return undefined to leave content unchanged
* Return string to replace content
* Return PostProcessResult.RemoveNode to remove element entirely
*/
postprocess?: (ctx: TranslatorContext & { content: string }) => string | PostProcessResult;
/**
* Whether to process child elements
* @default true
*/
recurse?: boolean;
/**
* Add newlines before and after element
* @default false
*/
surroundingNewlines?: boolean | number;
/**
* Ignore element entirely (skip processing)
*/
ignore?: boolean;
/**
* Do not escape markdown special characters in content
*/
noEscape?: boolean;
/**
* Add space if first character matches end of previous content
* Prevents markdown formatting conflicts
*/
spaceIfRepeatingChar?: boolean;
/**
* Ensure translator is visited even if element is empty
* Useful for self-closing elements or custom behavior
*/
preserveIfEmpty?: boolean;
/**
* Preserve whitespace exactly as it appears in HTML
*/
preserveWhitespace?: boolean;
/**
* Custom translator collection for child elements
*/
childTranslators?: TranslatorCollection;
}Factory function type for creating dynamic translator configurations based on context.
/**
* Factory function for creating translator configurations dynamically
* @param ctx - Translation context with element, options, and metadata
* @returns TranslatorConfig for the current element
*/
type TranslatorConfigFactory = {
(ctx: TranslatorContext): TranslatorConfig;
/** Optional base configuration to merge with factory result */
base?: TranslatorConfig;
};
/**
* Context passed to translator factory functions and post-processors
* Extends NodeMetadata with additional context properties
*/
interface TranslatorContext extends Partial<NodeMetadata> {
/** Current HTML element being translated */
node: ElementNode;
/** Conversion options */
options: NodeHtmlMarkdownOptions;
/** Parent HTML element */
parent?: ElementNode;
/** Metadata map for all nodes */
nodeMetadata: NodeMetadataMap;
/** AST visitor instance */
visitor: Visitor;
/** Base translator configuration */
base?: TranslatorConfig;
}Collection class for managing element translators with key-based access and merging support.
/**
* Collection for managing element translators
*/
class TranslatorCollection {
/** Number of translators in collection */
readonly size: number;
/**
* Add or update translator config for one or more element tags
* @param keys - Comma-separated element tag names (e.g., "h1,h2,h3")
* @param config - Translator configuration or factory function
* @param preserveBase - Internal parameter for merging configurations
*/
set(
keys: string,
config: TranslatorConfig | TranslatorConfigFactory,
preserveBase?: boolean
): void;
/**
* Get translator config for element tag
* @param key - Element tag name
* @returns Translator configuration or factory function
*/
get(key: string): TranslatorConfig | TranslatorConfigFactory;
/**
* Get all translator entries
* @returns Array of [elementName, config] pairs
*/
entries(): [elementName: string, config: TranslatorConfig | TranslatorConfigFactory][];
/**
* Remove translator config for one or more element tags
* @param keys - Comma-separated element tag names to remove
*/
remove(keys: string): void;
}/**
* Map of element tags to translator configurations
*/
type TranslatorConfigObject = {
[tags: string]: TranslatorConfig | TranslatorConfigFactory
};
/**
* Result codes for post-processing functions
*/
enum PostProcessResult {
/** No changes made to content */
NoChange,
/** Remove the entire node from output */
RemoveNode
}Internal AST traversal class that manages the conversion process. While typically not used directly, it's available for advanced customization scenarios.
/**
* Internal AST visitor for HTML to Markdown conversion
* Properties & methods marked public may be used for middleware/transformer support
*/
class Visitor {
/** NodeHtmlMarkdown instance */
readonly instance: NodeHtmlMarkdown;
/** Root HTML node being processed */
readonly rootNode: HtmlNode;
/** Optional filename for context */
readonly fileName?: string;
/** Conversion result and statistics */
result: VisitorResult;
/** Metadata map for all nodes */
nodeMetadata: NodeMetadataMap;
/** URL definitions for reference-style links */
urlDefinitions: string[];
constructor(instance: NodeHtmlMarkdown, rootNode: HtmlNode, fileName?: string);
/** Add or get URL definition for reference-style links */
addOrGetUrlDefinition(url: string): number;
/** Append content to result */
appendResult(s: string, startPos?: number, spaceIfRepeatingChar?: boolean): void;
/** Append newlines to result */
appendNewlines(count: number): void;
/** Visit and process HTML node */
visitNode(node: HtmlNode, textOnly?: boolean, metadata?: NodeMetadata): void;
}
interface VisitorResult {
text: string;
trailingNewlineStats: {
whitespace: number;
newLines: number;
};
}import { NodeHtmlMarkdown, TranslatorConfigObject } from "node-html-markdown";
// Define custom translators
const customTranslators: TranslatorConfigObject = {
// Custom emphasis using different delimiters
"em": { prefix: "*", postfix: "*" },
"strong": { prefix: "__", postfix: "__" },
// Custom handling for spans
"span": { prefix: "`", postfix: "`" },
// Ignore certain elements
"script,style": { ignore: true },
// Add surrounding newlines to custom block elements
"section": { surroundingNewlines: 2 },
// Fixed content replacement
"br": { content: " \n", recurse: false }
};
const nhm = new NodeHtmlMarkdown({}, customTranslators);
const html = `
<section>
<strong>Bold</strong> and <em>italic</em> text.
<span>Code-like span</span>
<script>alert("ignored");</script>
</section>
`;
const result = nhm.translate(html);
console.log(result);
// Output:
// __Bold__ and *italic* text. `Code-like span`import { NodeHtmlMarkdown, TranslatorConfigFactory } from "node-html-markdown";
// Factory for headings with dynamic prefix based on level
const headingTranslator: TranslatorConfigFactory = ({ node }) => ({
prefix: '#'.repeat(parseInt(node.tagName.charAt(1))) + ' ',
surroundingNewlines: 2
});
// Factory for list items with proper indentation
const listItemTranslator: TranslatorConfigFactory = ({ indentLevel = 0, listKind, listItemNumber }) => {
const indent = ' '.repeat(indentLevel);
const marker = listKind === 'OL' ? `${listItemNumber}. ` : '* ';
return {
prefix: indent + marker,
surroundingNewlines: false
};
};
const customTranslators: TranslatorConfigObject = {
"h1,h2,h3,h4,h5,h6": headingTranslator,
"li": listItemTranslator
};
const nhm = new NodeHtmlMarkdown({}, customTranslators);
const html = `
<h2>Subtitle</h2>
<ol>
<li>First item</li>
<li>Second item</li>
</ol>
`;
const result = nhm.translate(html);
console.log(result);
// Output:
// ## Subtitle
//
// 1. First item
// 2. Second itemimport { NodeHtmlMarkdown, PostProcessResult } from "node-html-markdown";
const customTranslators = {
// Remove empty paragraphs
"p": {
postprocess: ({ content }) => {
if (!content.trim()) {
return PostProcessResult.RemoveNode;
}
return content;
}
},
// Transform links with specific class
"a": {
postprocess: ({ node, content }) => {
const href = node.getAttribute('href') || '';
const className = node.getAttribute('class') || '';
if (className.includes('button')) {
return `[${content}](${href}){:.button}`;
}
return `[${content}](${href})`;
}
},
// Custom code block formatting
"pre": {
postprocess: ({ node, content }) => {
const code = node.querySelector('code');
const language = code?.getAttribute('class')?.replace('language-', '') || '';
if (language) {
return `\`\`\`${language}\n${content}\n\`\`\``;
}
return `\`\`\`\n${content}\n\`\`\``;
},
noEscape: true,
preserveWhitespace: true
}
};
const nhm = new NodeHtmlMarkdown({}, customTranslators);import { NodeHtmlMarkdown } from "node-html-markdown";
// Complex factory with conditional logic
const imageTranslator = ({ node, options }) => {
const src = node.getAttribute('src') || '';
const alt = node.getAttribute('alt') || '';
const title = node.getAttribute('title');
// Skip data URIs unless explicitly enabled
if (src.startsWith('data:') && !options.keepDataImages) {
return { content: `![${alt}]()`, recurse: false };
}
// Format with title if present
const titlePart = title ? ` "${title}"` : '';
return {
content: ``,
recurse: false
};
};
// Table cell alignment based on class
const tableCellTranslator = ({ node }) => {
const className = node.getAttribute('class') || '';
const isHeader = node.tagName === 'TH';
let content = isHeader ? '**' : '';
let postfix = isHeader ? '**' : '';
if (className.includes('center')) {
content += ' ';
postfix = ' ' + postfix;
}
return { prefix: content, postfix };
};
const customTranslators = {
"img": imageTranslator,
"td,th": tableCellTranslator
};
const nhm = new NodeHtmlMarkdown({}, customTranslators);import { NodeHtmlMarkdown, TranslatorCollection } from "node-html-markdown";
// Create custom translator collection for code blocks
const codeBlockTranslators = new TranslatorCollection();
codeBlockTranslators.set('strong,b', { ignore: true }); // Ignore formatting in code
codeBlockTranslators.set('em,i', { ignore: true });
// Create custom translator collection for tables
const tableTranslators = new TranslatorCollection();
tableTranslators.set('p', { prefix: '', postfix: '' }); // Remove paragraph formatting in table cells
const customTranslators = {
"pre": {
childTranslators: codeBlockTranslators,
noEscape: true,
preserveWhitespace: true
},
"table": {
childTranslators: tableTranslators,
surroundingNewlines: 2
}
};
const nhm = new NodeHtmlMarkdown({}, customTranslators);
const html = `
<pre><code>const <strong>bold</strong> = true;</code></pre>
<table>
<tr>
<td><p>Cell content</p></td>
</tr>
</table>
`;
const result = nhm.translate(html);
// Code blocks won't have bold formatting
// Table cells won't have paragraph spacingimport { NodeHtmlMarkdown } from "node-html-markdown";
const nhm = new NodeHtmlMarkdown();
// Access and modify existing translators
console.log("Current translators:", nhm.translators.size);
// Add new translator
nhm.translators.set("mark", { prefix: "==", postfix: "==" });
// Modify existing translator
nhm.translators.set("blockquote", { prefix: "> ", surroundingNewlines: 1 });
// Remove translator
nhm.translators.remove("hr");
// Access specific translator collections
nhm.codeBlockTranslators.set("span", { ignore: true });
nhm.tableTranslators.set("br", { content: " ", recurse: false });
const html = '<mark>Highlighted text</mark>';
const result = nhm.translate(html);
console.log(result); // "==Highlighted text=="import { NodeHtmlMarkdown, TranslatorConfigObject, PostProcessResult } from "node-html-markdown";
const customTranslators: TranslatorConfigObject = {
// Custom article wrapper
"article": {
prefix: "---\n",
postfix: "\n---",
surroundingNewlines: 2
},
// Custom figure handling
"figure": {
surroundingNewlines: 2,
postprocess: ({ node, content }) => {
const caption = node.querySelector('figcaption')?.textContent || '';
if (caption) {
return `${content}\n\n*${caption}*`;
}
return content;
}
},
// Skip figcaption (handled by figure post-processor)
"figcaption": { ignore: true },
// Custom code with language detection
"code": ({ node, parent }) => {
if (parent?.tagName === 'PRE') {
return { noEscape: true, preserveWhitespace: true };
}
return { prefix: "`", postfix: "`", spaceIfRepeatingChar: true };
},
// Custom abbreviation handling
"abbr": {
postprocess: ({ node, content }) => {
const title = node.getAttribute('title');
return title ? `${content} (${title})` : content;
}
}
};
const nhm = new NodeHtmlMarkdown({}, customTranslators);
const html = `
<article>
<h1>Title</h1>
<p>Content with <abbr title="HyperText Markup Language">HTML</abbr></p>
<figure>
<img src="image.jpg" alt="Description">
<figcaption>Image caption</figcaption>
</figure>
</article>
`;
const result = nhm.translate(html);
console.log(result);
// Output includes custom article wrapper, abbreviation expansion, and figure caption handling