CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-node-html-markdown

Fast HTML to markdown converter, compatible with both node and the browser

Pending
Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

SecuritybySnyk

Pending

The risk profile of this skill

Overview
Eval results
Files

options.mddocs/

Configuration Options

Comprehensive configuration system for customizing HTML to Markdown conversion behavior, output formatting, delimiters, escape patterns, and processing rules.

Capabilities

NodeHtmlMarkdownOptions Interface

Complete configuration interface with all available options for customizing conversion behavior.

/**
 * Configuration options for HTML to Markdown conversion
 */
interface NodeHtmlMarkdownOptions {
  /**
   * Use native window DOMParser when available (browser environment)
   * @default false
   */
  preferNativeParser: boolean;

  /**
   * Code block fence delimiter
   * @default "```"
   */
  codeFence: string;

  /**
   * Bullet marker for unordered lists
   * @default "*"
   */
  bulletMarker: string;

  /**
   * Style for code blocks
   * @default "fenced"
   */
  codeBlockStyle: "indented" | "fenced";

  /**
   * Emphasis (italic) delimiter
   * @default "_"
   */
  emDelimiter: string;

  /**
   * Strong (bold) delimiter
   * @default "**"
   */
  strongDelimiter: string;

  /**
   * Strikethrough delimiter
   * @default "~~"
   */
  strikeDelimiter: string;

  /**
   * HTML elements to ignore (ignores inner text, does not parse children)
   */
  readonly ignore?: string[];

  /**
   * HTML elements to treat as blocks (surrounded with blank lines)
   */
  readonly blockElements?: string[];

  /**
   * Maximum consecutive newlines allowed in output
   * @default 3
   */
  maxConsecutiveNewlines: number;

  /**
   * Line start escape pattern for markdown special characters
   * Overrides default escape settings - consider using textReplace instead
   */
  lineStartEscape: readonly [pattern: RegExp, replacement: string];

  /**
   * Global escape pattern for markdown special characters
   * Overrides default escape settings - consider using textReplace instead
   */
  globalEscape: readonly [pattern: RegExp, replacement: string];

  /**
   * User-defined text replacement patterns
   * Applied to text content retrieved from HTML nodes
   */
  textReplace?: (readonly [pattern: RegExp, replacement: string])[];

  /**
   * Keep images with data: URI (can be up to 1MB each)
   * @default false
   * @example <img src="data:image/gif;base64,R0lGODlhEAAQAMQAAORHHOVSK......0o/">
   */
  keepDataImages?: boolean;

  /**
   * Use link reference definitions at bottom of document
   * @default undefined
   * @example
   * Input: Click <a href="/url1">here</a>. Or <a href="/url2">here</a>.
   * Output: Click [here][1]. Or [here][2].
   * 
   * [1]: /url1
   * [2]: /url2
   */
  useLinkReferenceDefinitions?: boolean;

  /**
   * Wrap URL text in < > instead of []() syntax when link text matches URL
   * @default true
   * @example
   * Input: <a href="https://google.com">https://google.com</a>
   * Output: <https://google.com> (when true)
   * Output: [https://google.com](https://google.com) (when false)
   */
  useInlineLinks?: boolean;
}

Usage Examples

Basic Options Configuration

import { NodeHtmlMarkdown, NodeHtmlMarkdownOptions } from "node-html-markdown";

// Configure basic formatting options
const options: Partial<NodeHtmlMarkdownOptions> = {
  bulletMarker: "-",
  strongDelimiter: "__",
  emDelimiter: "*",
  codeFence: "~~~"
};

const nhm = new NodeHtmlMarkdown(options);

const html = `
  <ul>
    <li><strong>Bold</strong> and <em>italic</em> text</li>
  </ul>
  <pre><code class="language-javascript">console.log("hello");</code></pre>
`;

const markdown = nhm.translate(html);
console.log(markdown);
// Output:
// - __Bold__ and *italic* text
// 
// ~~~javascript
// console.log("hello");
// ~~~

Code Block Style Options

import { NodeHtmlMarkdown } from "node-html-markdown";

// Fenced code blocks (default)
const fencedNhm = new NodeHtmlMarkdown({
  codeBlockStyle: "fenced",
  codeFence: "```"
});

// Indented code blocks
const indentedNhm = new NodeHtmlMarkdown({
  codeBlockStyle: "indented"
});

const codeHtml = '<pre><code>const x = 42;</code></pre>';

console.log("Fenced:", fencedNhm.translate(codeHtml));
// Output: ```\nconst x = 42;\n```

console.log("Indented:", indentedNhm.translate(codeHtml));
// Output:     const x = 42;

Ignore and Block Elements

import { NodeHtmlMarkdown } from "node-html-markdown";

const options = {
  // Ignore script and style elements completely
  ignore: ["script", "style", "nav"],
  // Treat custom elements as block elements
  blockElements: ["section", "article", "aside"]
};

const nhm = new NodeHtmlMarkdown(options);

const html = `
  <article>
    <h1>Title</h1>
    <nav>Navigation menu</nav>
    <script>alert("hello");</script>
    <p>Content</p>
  </article>
`;

const result = nhm.translate(html);
console.log(result);
// Output:
// # Title
// 
// Content
// (nav and script elements are ignored)

Text Replacement Patterns

import { NodeHtmlMarkdown } from "node-html-markdown";

const options = {
  textReplace: [
    // Replace smart quotes with regular quotes
    [/[""]/g, '"'],
    [/['']/g, "'"],
    // Replace em dashes with regular dashes
    [/—/g, "--"],
    // Remove excessive whitespace
    [/\s{3,}/g, " "]
  ]
};

const nhm = new NodeHtmlMarkdown(options);

const html = '<p>"Smart quotes" and em—dash here   with spaces</p>';
const result = nhm.translate(html);
console.log(result);
// Output: "Smart quotes" and em--dash here with spaces

Link Reference Definitions

import { NodeHtmlMarkdown } from "node-html-markdown";

const options = {
  useLinkReferenceDefinitions: true
};

const nhm = new NodeHtmlMarkdown(options);

const html = `
  <p>Visit <a href="https://example.com">our website</a> or 
  <a href="https://github.com">GitHub</a>. Also check 
  <a href="https://example.com">our website</a> again.</p>
`;

const result = nhm.translate(html);
console.log(result);
// Output:
// Visit [our website][1] or [GitHub][2]. Also check [our website][1] again.
//
// [1]: https://example.com
// [2]: https://github.com

Data URI Image Handling

import { NodeHtmlMarkdown } from "node-html-markdown";

// Keep data URI images (default: false)
const keepDataNhm = new NodeHtmlMarkdown({
  keepDataImages: true
});

// Remove data URI images (default behavior)
const removeDataNhm = new NodeHtmlMarkdown({
  keepDataImages: false
});

const html = '<img src="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7" alt="1x1 pixel">';

console.log("Keep data images:", keepDataNhm.translate(html));
// Output: ![1x1 pixel](data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7)

console.log("Remove data images:", removeDataNhm.translate(html));
// Output: ![1x1 pixel]()

Newline Control

import { NodeHtmlMarkdown } from "node-html-markdown";

const options = {
  maxConsecutiveNewlines: 2 // Limit to 2 consecutive newlines
};

const nhm = new NodeHtmlMarkdown(options);

const html = `
  <h1>Title</h1>
  
  
  
  
  <p>Paragraph after many line breaks</p>
`;

const result = nhm.translate(html);
console.log(result);
// Output:
// # Title
// 
// Paragraph after many line breaks
// (excessive newlines are reduced to maxConsecutiveNewlines)

Complete Custom Configuration

import { NodeHtmlMarkdown } from "node-html-markdown";

const customOptions: Partial<NodeHtmlMarkdownOptions> = {
  // Formatting preferences
  bulletMarker: "+",
  strongDelimiter: "__",
  emDelimiter: "*",
  strikeDelimiter: "~",
  codeFence: "```",
  codeBlockStyle: "fenced",
  
  // Content control
  maxConsecutiveNewlines: 2,
  keepDataImages: false,
  useInlineLinks: false,
  useLinkReferenceDefinitions: true,
  
  // Element behavior
  ignore: ["script", "style", "noscript"],
  blockElements: ["section", "article", "aside", "details"],
  
  // Text processing
  textReplace: [
    [/\u00A0/g, " "], // Replace non-breaking spaces
    [/[""]/g, '"'],   // Replace smart quotes
    [/['']/g, "'"],   // Replace smart apostrophes
  ]
};

const nhm = new NodeHtmlMarkdown(customOptions);

// Now use with all custom settings applied
const result = nhm.translate("<strong>Bold</strong> text with "smart quotes"");
console.log(result); // __Bold__ text with "smart quotes"

docs

conversion.md

index.md

options.md

translators.md

tile.json