A library that converts HTML to Markdown
—
Turndown's rule system provides fine-grained control over how HTML elements are converted to Markdown. The system uses a flexible filter-and-replacement pattern that allows both built-in and custom conversion logic.
Rules define how specific HTML elements should be converted to Markdown using a filter and replacement function.
/**
* Rule object structure
*/
interface Rule {
/** Selector that determines which HTML elements this rule applies to */
filter: string | string[] | Function;
/** Function that converts the matched element to Markdown */
replacement: Function;
/** Optional function that appends content after processing (used internally) */
append?: Function;
}
/**
* Replacement function signature
* @param {string} content - The inner content of the element
* @param {HTMLElement} node - The DOM node being converted
* @param {TurndownOptions} options - TurndownService options
* @returns {string} Markdown representation
*/
type ReplacementFunction = (content: string, node: HTMLElement, options: TurndownOptions) => string;Add custom conversion rules to handle specific HTML elements or patterns.
/**
* Add a custom conversion rule
* @param {string} key - Unique identifier for the rule
* @param {Rule} rule - Rule object with filter and replacement
* @returns {TurndownService} TurndownService instance for chaining
*/
addRule(key, rule)Usage Examples:
const turndownService = new TurndownService();
// Simple element conversion
turndownService.addRule('strikethrough', {
filter: ['del', 's', 'strike'],
replacement: function(content) {
return '~~' + content + '~~';
}
});
// Conditional rule with function filter
turndownService.addRule('customLink', {
filter: function(node, options) {
return (
node.nodeName === 'A' &&
node.getAttribute('href') &&
node.getAttribute('data-custom')
);
},
replacement: function(content, node) {
const href = node.getAttribute('href');
const custom = node.getAttribute('data-custom');
return `[${content}](${href} "${custom}")`;
}
});
// Complex content processing
turndownService.addRule('highlight', {
filter: 'mark',
replacement: function(content, node, options) {
// Use options to customize output
if (options.highlightStyle === 'html') {
return '<mark>' + content + '</mark>';
}
return '==' + content + '==';
}
});Rules use filters to select which HTML elements they should handle.
/**
* Filter types for selecting HTML elements
*/
type RuleFilter = string | string[] | FilterFunction;
/**
* Function filter signature
*/
type FilterFunction = (node: HTMLElement, options: TurndownOptions) => boolean;
// Examples:
filter: 'p' // String filter - matches <p> elements
filter: ['em', 'i'] // Array filter - matches <em> or <i> elements
// Function filter - custom logic for matching elements
filter: function(node, options) {
return node.nodeName === 'DIV' && node.className.includes('special');
}Turndown includes comprehensive built-in rules for standard HTML elements.
/**
* Built-in CommonMark rules (partial list)
*/
const BuiltInRules = {
paragraph: { filter: 'p' },
lineBreak: { filter: 'br' },
heading: { filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] },
blockquote: { filter: 'blockquote' },
list: { filter: ['ul', 'ol'] },
listItem: { filter: 'li' },
indentedCodeBlock: { filter: function(node, options) { /* ... */ } },
fencedCodeBlock: { filter: function(node, options) { /* ... */ } },
horizontalRule: { filter: 'hr' },
inlineLink: { filter: function(node, options) { /* ... */ } },
referenceLink: { filter: function(node, options) { /* ... */ } },
emphasis: { filter: ['em', 'i'] },
strong: { filter: ['strong', 'b'] },
code: { filter: function(node) { /* ... */ } },
image: { filter: 'img' }
};Rules are applied in a specific order of precedence:
addRule()keep()remove()Turndown uses special internal rules for edge cases and element control.
/**
* Special rule types used internally
*/
interface SpecialRules {
/** Handles elements that contain only whitespace */
blankRule: {
replacement: (content: string, node: HTMLElement) => string;
};
/** Handles elements marked to keep as HTML */
keepReplacement: (content: string, node: HTMLElement) => string;
/** Handles unrecognized elements */
defaultRule: {
replacement: (content: string, node: HTMLElement) => string;
};
}Control element processing with keep and remove operations.
/**
* Keep elements as HTML in the output
* @param {string|string[]|Function} filter - Elements to keep
* @returns {TurndownService} Instance for chaining
*/
keep(filter)
/**
* Remove elements entirely from output
* @param {string|string[]|Function} filter - Elements to remove
* @returns {TurndownService} Instance for chaining
*/
remove(filter)Usage Examples:
const turndownService = new TurndownService();
// Keep specific elements as HTML
turndownService.keep(['del', 'ins', 'sub', 'sup']);
const html1 = '<p>H<sub>2</sub>O and E=mc<sup>2</sup></p>';
const result1 = turndownService.turndown(html1);
// Result: "H<sub>2</sub>O and E=mc<sup>2</sup>"
// Remove unwanted elements
turndownService.remove(['script', 'style', 'noscript']);
const html2 = '<p>Content</p><script>alert("bad")</script><style>body{}</style>';
const result2 = turndownService.turndown(html2);
// Result: "Content"
// Function-based keep/remove
turndownService.keep(function(node) {
return node.nodeName === 'SPAN' && node.className.includes('preserve');
});
turndownService.remove(function(node) {
return node.hasAttribute('data-remove');
});Complex rule implementations for specialized conversion needs.
Content Transformation:
turndownService.addRule('codeWithLanguage', {
filter: function(node) {
return (
node.nodeName === 'PRE' &&
node.firstChild &&
node.firstChild.nodeName === 'CODE' &&
node.firstChild.className
);
},
replacement: function(content, node, options) {
const codeNode = node.firstChild;
const className = codeNode.getAttribute('class') || '';
const language = (className.match(/language-(\S+)/) || [null, ''])[1];
const code = codeNode.textContent;
return '\n\n```' + language + '\n' + code + '\n```\n\n';
}
});Attribute Processing:
turndownService.addRule('linkWithTitle', {
filter: function(node) {
return (
node.nodeName === 'A' &&
node.getAttribute('href') &&
node.getAttribute('title')
);
},
replacement: function(content, node) {
const href = node.getAttribute('href');
const title = node.getAttribute('title').replace(/"/g, '\\"');
return `[${content}](${href} "${title}")`;
}
});Nested Content Handling:
turndownService.addRule('definition', {
filter: 'dl',
replacement: function(content, node) {
// Process definition list with custom formatting
const items = [];
const children = Array.from(node.children);
for (let i = 0; i < children.length; i += 2) {
const dt = children[i];
const dd = children[i + 1];
if (dt && dd && dt.nodeName === 'DT' && dd.nodeName === 'DD') {
items.push(`**${dt.textContent}**\n: ${dd.textContent}`);
}
}
return '\n\n' + items.join('\n\n') + '\n\n';
}
});// Test rule with various inputs
const turndownService = new TurndownService();
turndownService.addRule('testRule', myRule);
const testCases = [
'<div class="special">Content</div>',
'<div>Regular content</div>',
'<div class="special"></div>',
];
testCases.forEach(html => {
console.log('Input:', html);
console.log('Output:', turndownService.turndown(html));
});