or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

tessl/npm-node-html-markdown

Fast HTML to markdown converter, compatible with both node and the browser

Workspace
tessl
Visibility
Public
Created
Last updated
Describes
npmpkg:npm/node-html-markdown@1.3.x

To install, run

npx @tessl/cli install tessl/npm-node-html-markdown@1.3.0

0

# node-html-markdown

1

2

node-html-markdown is a fast HTML to markdown converter, compatible with both Node.js and the browser. It was built with two primary goals: speed and human readability. The library provides clean, concise output with consistent spacing rules while offering extensive customization through options and translators.

3

4

## Package Information

5

6

- **Package Name**: node-html-markdown

7

- **Package Type**: npm

8

- **Language**: TypeScript

9

- **Installation**: `npm install node-html-markdown`

10

11

## Core Imports

12

13

```typescript

14

import {

15

NodeHtmlMarkdown,

16

NodeHtmlMarkdownOptions,

17

FileCollection,

18

TranslatorConfig,

19

TranslatorConfigFactory,

20

TranslatorCollection,

21

TranslatorConfigObject,

22

PostProcessResult,

23

NodeMetadata,

24

NodeMetadataMap,

25

HtmlNode,

26

ElementNode,

27

TextNode,

28

CommentNode,

29

NodeType,

30

isTextNode,

31

isCommentNode,

32

isElementNode

33

} from "node-html-markdown";

34

```

35

36

For CommonJS:

37

38

```javascript

39

const {

40

NodeHtmlMarkdown,

41

NodeHtmlMarkdownOptions,

42

TranslatorCollection

43

} = require("node-html-markdown");

44

```

45

46

## Basic Usage

47

48

```typescript

49

import { NodeHtmlMarkdown } from "node-html-markdown";

50

51

// Single-use static method

52

const markdown = NodeHtmlMarkdown.translate("<strong>hello</strong>");

53

console.log(markdown); // "**hello**"

54

55

// Reusable instance (better performance for multiple conversions)

56

const nhm = new NodeHtmlMarkdown();

57

const result = nhm.translate("<em>world</em>");

58

console.log(result); // "_world_"

59

60

// Multiple files

61

const files = NodeHtmlMarkdown.translate({

62

"file1.html": "<h1>Title</h1>",

63

"file2.html": "<p>Content</p>"

64

});

65

// Returns: { "file1.html": "# Title", "file2.html": "Content" }

66

```

67

68

## Architecture

69

70

node-html-markdown is built around several key components:

71

72

- **NodeHtmlMarkdown Class**: Main converter with static and instance methods

73

- **Options System**: Comprehensive configuration for output formatting and behavior

74

- **Translator System**: Pluggable element-specific conversion rules with customization support

75

- **Visitor Pattern**: Internal AST traversal for HTML node processing

76

- **Node Types**: Type-safe wrappers around HTML parser nodes

77

- **Performance Focus**: Optimized for high-speed conversion of large HTML documents

78

79

## Capabilities

80

81

### HTML to Markdown Conversion

82

83

Core conversion functionality providing static and instance methods for translating HTML to Markdown with full customization support.

84

85

```typescript { .api }

86

class NodeHtmlMarkdown {

87

constructor(

88

options?: Partial<NodeHtmlMarkdownOptions>,

89

customTranslators?: TranslatorConfigObject,

90

customCodeBlockTranslators?: TranslatorConfigObject

91

);

92

93

/** Main element translators collection */

94

translators: TranslatorCollection;

95

/** Anchor tag specific translators */

96

aTagTranslators: TranslatorCollection;

97

/** Code block translators collection */

98

codeBlockTranslators: TranslatorCollection;

99

/** Table translators collection */

100

tableTranslators: TranslatorCollection;

101

/** Table row translators collection */

102

tableRowTranslators: TranslatorCollection;

103

/** Table cell translators collection */

104

tableCellTranslators: TranslatorCollection;

105

/** Configuration options (read-only) */

106

readonly options: NodeHtmlMarkdownOptions;

107

108

static translate(html: string, options?: Partial<NodeHtmlMarkdownOptions>, customTranslators?: TranslatorConfigObject, customCodeBlockTranslators?: TranslatorConfigObject): string;

109

static translate(files: FileCollection, options?: Partial<NodeHtmlMarkdownOptions>, customTranslators?: TranslatorConfigObject, customCodeBlockTranslators?: TranslatorConfigObject): FileCollection;

110

111

translate(html: string): string;

112

translate(files: FileCollection): FileCollection;

113

}

114

115

type FileCollection = { [fileName: string]: string };

116

```

117

118

[HTML to Markdown Conversion](./conversion.md)

119

120

### Configuration Options

121

122

Extensive configuration system for customizing output formatting, delimiters, escape patterns, and processing behavior.

123

124

```typescript { .api }

125

interface NodeHtmlMarkdownOptions {

126

preferNativeParser: boolean;

127

codeFence: string;

128

bulletMarker: string;

129

codeBlockStyle: "indented" | "fenced";

130

emDelimiter: string;

131

strongDelimiter: string;

132

strikeDelimiter: string;

133

ignore?: string[];

134

blockElements?: string[];

135

maxConsecutiveNewlines: number;

136

lineStartEscape: readonly [pattern: RegExp, replacement: string];

137

globalEscape: readonly [pattern: RegExp, replacement: string];

138

textReplace?: (readonly [pattern: RegExp, replacement: string])[];

139

keepDataImages?: boolean;

140

useLinkReferenceDefinitions?: boolean;

141

useInlineLinks?: boolean;

142

}

143

```

144

145

[Configuration Options](./options.md)

146

147

### Custom Translators

148

149

Advanced customization system allowing element-specific translation rules with prefix/postfix, content transformation, and post-processing hooks.

150

151

```typescript { .api }

152

interface TranslatorConfig {

153

prefix?: string;

154

postfix?: string;

155

content?: string;

156

postprocess?: (ctx: TranslatorContext & { content: string }) => string | PostProcessResult;

157

recurse?: boolean;

158

surroundingNewlines?: boolean | number;

159

ignore?: boolean;

160

noEscape?: boolean;

161

spaceIfRepeatingChar?: boolean;

162

preserveIfEmpty?: boolean;

163

preserveWhitespace?: boolean;

164

childTranslators?: TranslatorCollection;

165

}

166

167

class TranslatorCollection {

168

readonly size: number;

169

set(keys: string, config: TranslatorConfig | TranslatorConfigFactory): void;

170

get(key: string): TranslatorConfig | TranslatorConfigFactory;

171

entries(): [elementName: string, config: TranslatorConfig | TranslatorConfigFactory][];

172

remove(keys: string): void;

173

}

174

```

175

176

[Custom Translators](./translators.md)

177

178

## Types

179

180

```typescript { .api }

181

type TranslatorConfigFactory = {

182

(ctx: TranslatorContext): TranslatorConfig;

183

base?: TranslatorConfig;

184

};

185

186

type TranslatorConfigObject = {

187

[tags: string]: TranslatorConfig | TranslatorConfigFactory

188

};

189

190

interface TranslatorContext {

191

node: ElementNode;

192

options: NodeHtmlMarkdownOptions;

193

parent?: ElementNode;

194

nodeMetadata: NodeMetadataMap;

195

visitor: Visitor;

196

base?: TranslatorConfig;

197

indentLevel?: number;

198

listKind?: "OL" | "UL";

199

listItemNumber?: number;

200

noEscape?: boolean;

201

preserveWhitespace?: boolean;

202

translators?: TranslatorConfigObject;

203

tableMeta?: {

204

node: ElementNode;

205

caption?: string;

206

};

207

}

208

209

enum PostProcessResult {

210

NoChange,

211

RemoveNode

212

}

213

214

interface NodeMetadata {

215

indentLevel?: number;

216

listKind?: "OL" | "UL";

217

listItemNumber?: number;

218

noEscape?: boolean;

219

preserveWhitespace?: boolean;

220

translators?: TranslatorConfigObject;

221

tableMeta?: {

222

node: ElementNode;

223

caption?: string;

224

};

225

}

226

227

type NodeMetadataMap = Map<ElementNode, NodeMetadata>;

228

229

type HtmlNode = (NHParser.Node | Node) & { preserve?: boolean };

230

type ElementNode = (NHParser.HTMLElement | HTMLElement) & { preserve?: boolean };

231

type TextNode = (NHParser.TextNode) & { preserve?: boolean };

232

233

// Node type guards

234

function isTextNode(node: HtmlNode): node is TextNode;

235

function isCommentNode(node: HtmlNode): node is CommentNode;

236

function isElementNode(node: HtmlNode): node is ElementNode;

237

238

// Node types from node-html-parser

239

type CommentNode = import('node-html-parser').CommentNode;

240

enum NodeType {

241

TEXT_NODE = 3,

242

COMMENT_NODE = 8,

243

ELEMENT_NODE = 1

244

}

245

```