CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-node-html-markdown

Fast HTML to markdown converter, compatible with both node and the browser

Pending
Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

SecuritybySnyk

Pending

The risk profile of this skill

Overview
Eval results
Files

translators.mddocs/

Custom Translators

Advanced customization system allowing element-specific translation rules with configurable prefix/postfix, content transformation, recursion control, and post-processing hooks.

Capabilities

TranslatorConfig Interface

Configuration interface for defining how HTML elements should be converted to Markdown.

/**
 * Configuration for element translation behavior
 */
interface TranslatorConfig {
  /**
   * Content prefix (added before element content, after surroundingNewlines)
   */
  prefix?: string;

  /**
   * Content postfix (added after element content, before surroundingNewlines)
   */
  postfix?: string;

  /**
   * Fixed output content (replaces element content entirely)
   */
  content?: string;

  /**
   * Post-process content after inner nodes have been rendered
   * Return undefined to leave content unchanged
   * Return string to replace content
   * Return PostProcessResult.RemoveNode to remove element entirely
   */
  postprocess?: (ctx: TranslatorContext & { content: string }) => string | PostProcessResult;

  /**
   * Whether to process child elements
   * @default true
   */
  recurse?: boolean;

  /**
   * Add newlines before and after element
   * @default false
   */
  surroundingNewlines?: boolean | number;

  /**
   * Ignore element entirely (skip processing)
   */
  ignore?: boolean;

  /**
   * Do not escape markdown special characters in content
   */
  noEscape?: boolean;

  /**
   * Add space if first character matches end of previous content
   * Prevents markdown formatting conflicts
   */
  spaceIfRepeatingChar?: boolean;

  /**
   * Ensure translator is visited even if element is empty
   * Useful for self-closing elements or custom behavior
   */
  preserveIfEmpty?: boolean;

  /**
   * Preserve whitespace exactly as it appears in HTML
   */
  preserveWhitespace?: boolean;

  /**
   * Custom translator collection for child elements
   */
  childTranslators?: TranslatorCollection;
}

TranslatorConfigFactory

Factory function type for creating dynamic translator configurations based on context.

/**
 * Factory function for creating translator configurations dynamically
 * @param ctx - Translation context with element, options, and metadata
 * @returns TranslatorConfig for the current element
 */
type TranslatorConfigFactory = {
  (ctx: TranslatorContext): TranslatorConfig;
  /** Optional base configuration to merge with factory result */
  base?: TranslatorConfig;
};

/**
 * Context passed to translator factory functions and post-processors
 * Extends NodeMetadata with additional context properties
 */
interface TranslatorContext extends Partial<NodeMetadata> {
  /** Current HTML element being translated */
  node: ElementNode;
  /** Conversion options */
  options: NodeHtmlMarkdownOptions;
  /** Parent HTML element */
  parent?: ElementNode;
  /** Metadata map for all nodes */
  nodeMetadata: NodeMetadataMap;
  /** AST visitor instance */
  visitor: Visitor;
  /** Base translator configuration */
  base?: TranslatorConfig;
}

TranslatorCollection Class

Collection class for managing element translators with key-based access and merging support.

/**
 * Collection for managing element translators
 */
class TranslatorCollection {
  /** Number of translators in collection */
  readonly size: number;

  /**
   * Add or update translator config for one or more element tags
   * @param keys - Comma-separated element tag names (e.g., "h1,h2,h3")
   * @param config - Translator configuration or factory function
   * @param preserveBase - Internal parameter for merging configurations
   */
  set(
    keys: string,
    config: TranslatorConfig | TranslatorConfigFactory,
    preserveBase?: boolean
  ): void;

  /**
   * Get translator config for element tag
   * @param key - Element tag name
   * @returns Translator configuration or factory function
   */
  get(key: string): TranslatorConfig | TranslatorConfigFactory;

  /**
   * Get all translator entries
   * @returns Array of [elementName, config] pairs
   */
  entries(): [elementName: string, config: TranslatorConfig | TranslatorConfigFactory][];

  /**
   * Remove translator config for one or more element tags
   * @param keys - Comma-separated element tag names to remove
   */
  remove(keys: string): void;
}

Supporting Types

/**
 * Map of element tags to translator configurations
 */
type TranslatorConfigObject = { 
  [tags: string]: TranslatorConfig | TranslatorConfigFactory 
};

/**
 * Result codes for post-processing functions
 */
enum PostProcessResult {
  /** No changes made to content */
  NoChange,
  /** Remove the entire node from output */
  RemoveNode
}

Visitor Class

Internal AST traversal class that manages the conversion process. While typically not used directly, it's available for advanced customization scenarios.

/**
 * Internal AST visitor for HTML to Markdown conversion
 * Properties & methods marked public may be used for middleware/transformer support
 */
class Visitor {
  /** NodeHtmlMarkdown instance */
  readonly instance: NodeHtmlMarkdown;
  /** Root HTML node being processed */
  readonly rootNode: HtmlNode;
  /** Optional filename for context */
  readonly fileName?: string;
  /** Conversion result and statistics */
  result: VisitorResult;
  /** Metadata map for all nodes */
  nodeMetadata: NodeMetadataMap;
  /** URL definitions for reference-style links */
  urlDefinitions: string[];

  constructor(instance: NodeHtmlMarkdown, rootNode: HtmlNode, fileName?: string);

  /** Add or get URL definition for reference-style links */
  addOrGetUrlDefinition(url: string): number;
  /** Append content to result */
  appendResult(s: string, startPos?: number, spaceIfRepeatingChar?: boolean): void;
  /** Append newlines to result */
  appendNewlines(count: number): void;
  /** Visit and process HTML node */
  visitNode(node: HtmlNode, textOnly?: boolean, metadata?: NodeMetadata): void;
}

interface VisitorResult {
  text: string;
  trailingNewlineStats: {
    whitespace: number;
    newLines: number;
  };
}

Usage Examples

Basic Static Translators

import { NodeHtmlMarkdown, TranslatorConfigObject } from "node-html-markdown";

// Define custom translators
const customTranslators: TranslatorConfigObject = {
  // Custom emphasis using different delimiters
  "em": { prefix: "*", postfix: "*" },
  "strong": { prefix: "__", postfix: "__" },
  
  // Custom handling for spans
  "span": { prefix: "`", postfix: "`" },
  
  // Ignore certain elements
  "script,style": { ignore: true },
  
  // Add surrounding newlines to custom block elements
  "section": { surroundingNewlines: 2 },
  
  // Fixed content replacement
  "br": { content: "  \n", recurse: false }
};

const nhm = new NodeHtmlMarkdown({}, customTranslators);

const html = `
  <section>
    <strong>Bold</strong> and <em>italic</em> text.
    <span>Code-like span</span>
    <script>alert("ignored");</script>
  </section>
`;

const result = nhm.translate(html);
console.log(result);
// Output:
// __Bold__ and *italic* text. `Code-like span`

Dynamic Translator Factories

import { NodeHtmlMarkdown, TranslatorConfigFactory } from "node-html-markdown";

// Factory for headings with dynamic prefix based on level
const headingTranslator: TranslatorConfigFactory = ({ node }) => ({
  prefix: '#'.repeat(parseInt(node.tagName.charAt(1))) + ' ',
  surroundingNewlines: 2
});

// Factory for list items with proper indentation
const listItemTranslator: TranslatorConfigFactory = ({ indentLevel = 0, listKind, listItemNumber }) => {
  const indent = '  '.repeat(indentLevel);
  const marker = listKind === 'OL' ? `${listItemNumber}. ` : '* ';
  return {
    prefix: indent + marker,
    surroundingNewlines: false
  };
};

const customTranslators: TranslatorConfigObject = {
  "h1,h2,h3,h4,h5,h6": headingTranslator,
  "li": listItemTranslator
};

const nhm = new NodeHtmlMarkdown({}, customTranslators);

const html = `
  <h2>Subtitle</h2>
  <ol>
    <li>First item</li>
    <li>Second item</li>
  </ol>
`;

const result = nhm.translate(html);
console.log(result);
// Output:
// ## Subtitle
//
// 1. First item
// 2. Second item

Post-Processing Functions

import { NodeHtmlMarkdown, PostProcessResult } from "node-html-markdown";

const customTranslators = {
  // Remove empty paragraphs
  "p": {
    postprocess: ({ content }) => {
      if (!content.trim()) {
        return PostProcessResult.RemoveNode;
      }
      return content;
    }
  },
  
  // Transform links with specific class
  "a": {
    postprocess: ({ node, content }) => {
      const href = node.getAttribute('href') || '';
      const className = node.getAttribute('class') || '';
      
      if (className.includes('button')) {
        return `[${content}](${href}){:.button}`;
      }
      
      return `[${content}](${href})`;
    }
  },
  
  // Custom code block formatting
  "pre": {
    postprocess: ({ node, content }) => {
      const code = node.querySelector('code');
      const language = code?.getAttribute('class')?.replace('language-', '') || '';
      
      if (language) {
        return `\`\`\`${language}\n${content}\n\`\`\``;
      }
      
      return `\`\`\`\n${content}\n\`\`\``;
    },
    noEscape: true,
    preserveWhitespace: true
  }
};

const nhm = new NodeHtmlMarkdown({}, customTranslators);

Conditional Logic in Factories

import { NodeHtmlMarkdown } from "node-html-markdown";

// Complex factory with conditional logic
const imageTranslator = ({ node, options }) => {
  const src = node.getAttribute('src') || '';
  const alt = node.getAttribute('alt') || '';
  const title = node.getAttribute('title');
  
  // Skip data URIs unless explicitly enabled
  if (src.startsWith('data:') && !options.keepDataImages) {
    return { content: `![${alt}]()`, recurse: false };
  }
  
  // Format with title if present
  const titlePart = title ? ` "${title}"` : '';
  return {
    content: `![${alt}](${src}${titlePart})`,
    recurse: false
  };
};

// Table cell alignment based on class
const tableCellTranslator = ({ node }) => {
  const className = node.getAttribute('class') || '';
  const isHeader = node.tagName === 'TH';
  
  let content = isHeader ? '**' : '';
  let postfix = isHeader ? '**' : '';
  
  if (className.includes('center')) {
    content += ' ';
    postfix = ' ' + postfix;
  }
  
  return { prefix: content, postfix };
};

const customTranslators = {
  "img": imageTranslator,
  "td,th": tableCellTranslator
};

const nhm = new NodeHtmlMarkdown({}, customTranslators);

Child Translator Collections

import { NodeHtmlMarkdown, TranslatorCollection } from "node-html-markdown";

// Create custom translator collection for code blocks
const codeBlockTranslators = new TranslatorCollection();
codeBlockTranslators.set('strong,b', { ignore: true }); // Ignore formatting in code
codeBlockTranslators.set('em,i', { ignore: true });

// Create custom translator collection for tables
const tableTranslators = new TranslatorCollection();
tableTranslators.set('p', { prefix: '', postfix: '' }); // Remove paragraph formatting in table cells

const customTranslators = {
  "pre": {
    childTranslators: codeBlockTranslators,
    noEscape: true,
    preserveWhitespace: true
  },
  
  "table": {
    childTranslators: tableTranslators,
    surroundingNewlines: 2
  }
};

const nhm = new NodeHtmlMarkdown({}, customTranslators);

const html = `
  <pre><code>const <strong>bold</strong> = true;</code></pre>
  <table>
    <tr>
      <td><p>Cell content</p></td>
    </tr>
  </table>
`;

const result = nhm.translate(html);
// Code blocks won't have bold formatting
// Table cells won't have paragraph spacing

Accessing Instance Translators

import { NodeHtmlMarkdown } from "node-html-markdown";

const nhm = new NodeHtmlMarkdown();

// Access and modify existing translators
console.log("Current translators:", nhm.translators.size);

// Add new translator
nhm.translators.set("mark", { prefix: "==", postfix: "==" });

// Modify existing translator
nhm.translators.set("blockquote", { prefix: "> ", surroundingNewlines: 1 });

// Remove translator
nhm.translators.remove("hr");

// Access specific translator collections
nhm.codeBlockTranslators.set("span", { ignore: true });
nhm.tableTranslators.set("br", { content: " ", recurse: false });

const html = '<mark>Highlighted text</mark>';
const result = nhm.translate(html);
console.log(result); // "==Highlighted text=="

Complete Custom Translator Example

import { NodeHtmlMarkdown, TranslatorConfigObject, PostProcessResult } from "node-html-markdown";

const customTranslators: TranslatorConfigObject = {
  // Custom article wrapper
  "article": {
    prefix: "---\n",
    postfix: "\n---",
    surroundingNewlines: 2
  },
  
  // Custom figure handling
  "figure": {
    surroundingNewlines: 2,
    postprocess: ({ node, content }) => {
      const caption = node.querySelector('figcaption')?.textContent || '';
      if (caption) {
        return `${content}\n\n*${caption}*`;
      }
      return content;
    }
  },
  
  // Skip figcaption (handled by figure post-processor)
  "figcaption": { ignore: true },
  
  // Custom code with language detection
  "code": ({ node, parent }) => {
    if (parent?.tagName === 'PRE') {
      return { noEscape: true, preserveWhitespace: true };
    }
    return { prefix: "`", postfix: "`", spaceIfRepeatingChar: true };
  },
  
  // Custom abbreviation handling
  "abbr": {
    postprocess: ({ node, content }) => {
      const title = node.getAttribute('title');
      return title ? `${content} (${title})` : content;
    }
  }
};

const nhm = new NodeHtmlMarkdown({}, customTranslators);

const html = `
  <article>
    <h1>Title</h1>
    <p>Content with <abbr title="HyperText Markup Language">HTML</abbr></p>
    <figure>
      <img src="image.jpg" alt="Description">
      <figcaption>Image caption</figcaption>
    </figure>
  </article>
`;

const result = nhm.translate(html);
console.log(result);
// Output includes custom article wrapper, abbreviation expansion, and figure caption handling

docs

conversion.md

index.md

options.md

translators.md

tile.json