tessl install tessl/npm-marked@17.0.0A markdown parser built for speed
The Tokenizer class provides low-level methods for recognizing and creating tokens from markdown syntax patterns. It is used by the Lexer during tokenization.
class Tokenizer {
constructor(options?: MarkedOptions);
/**
* Tokenizer options
* Affects tokenization behavior
*/
options: MarkedOptions;
/**
* Regular expression rules for tokenization (set by lexer)
* Contains block and inline rule sets
*/
rules: Rules;
/**
* Reference to the lexer instance (set by lexer)
* Use for recursive tokenization of nested content
*/
lexer: Lexer;
// Block-level tokenizer methods
/**
* Tokenize whitespace/newlines
* @param src - Source starting with potential space
* @returns Space token or undefined if no match
*/
space(src: string): Tokens.Space | undefined;
/**
* Tokenize indented code block (4+ spaces)
* @param src - Source starting with potential indented code
* @returns Code token or undefined if no match
*/
code(src: string): Tokens.Code | undefined;
/**
* Tokenize fenced code block (``` or ~~~)
* @param src - Source starting with potential fenced code
* @returns Code token or undefined if no match
*/
fences(src: string): Tokens.Code | undefined;
/**
* Tokenize ATX heading (# Heading)
* @param src - Source starting with potential heading
* @returns Heading token or undefined if no match
*/
heading(src: string): Tokens.Heading | undefined;
/**
* Tokenize horizontal rule (---, ***, ___)
* Requires at least 3 characters
* @param src - Source starting with potential hr
* @returns Hr token or undefined if no match
*/
hr(src: string): Tokens.Hr | undefined;
/**
* Tokenize blockquote (> quote)
* @param src - Source starting with potential blockquote
* @returns Blockquote token or undefined if no match
*/
blockquote(src: string): Tokens.Blockquote | undefined;
/**
* Tokenize list (ordered or unordered)
* Handles nested lists and task lists
* @param src - Source starting with potential list
* @returns List token or undefined if no match
*/
list(src: string): Tokens.List | undefined;
/**
* Tokenize block-level HTML
* @param src - Source starting with potential HTML block
* @returns HTML token or undefined if no match
*/
html(src: string): Tokens.HTML | undefined;
/**
* Tokenize link definition ([label]: url "title")
* @param src - Source starting with potential definition
* @returns Def token or undefined if no match
*/
def(src: string): Tokens.Def | undefined;
/**
* Tokenize table (GFM)
* Requires header row and separator row
* @param src - Source starting with potential table
* @returns Table token or undefined if no match
*/
table(src: string): Tokens.Table | undefined;
/**
* Tokenize setext heading (underlined heading)
* Text followed by === or ---
* @param src - Source starting with potential lheading
* @returns Heading token or undefined if no match
*/
lheading(src: string): Tokens.Heading | undefined;
/**
* Tokenize paragraph
* @param src - Source starting with potential paragraph
* @returns Paragraph token or undefined if no match
*/
paragraph(src: string): Tokens.Paragraph | undefined;
/**
* Tokenize block-level text (fallback)
* @param src - Source to tokenize as text
* @returns Text token or undefined if no match
*/
text(src: string): Tokens.Text | undefined;
// Inline-level tokenizer methods
/**
* Tokenize escape sequence (\*)
* @param src - Source starting with potential escape
* @returns Escape token or undefined if no match
*/
escape(src: string): Tokens.Escape | undefined;
/**
* Tokenize inline HTML tag
* @param src - Source starting with potential tag
* @returns Tag token or undefined if no match
*/
tag(src: string): Tokens.Tag | undefined;
/**
* Tokenize link or image ([text](url) or )
* @param src - Source starting with potential link/image
* @returns Link or Image token or undefined if no match
*/
link(src: string): Tokens.Link | Tokens.Image | undefined;
/**
* Tokenize reference link or image ([text][ref])
* @param src - Source starting with potential reflink
* @param links - Map of link references
* @returns Link, Image, or Text token, or undefined if no match
*/
reflink(src: string, links: Links): Tokens.Link | Tokens.Image | Tokens.Text | undefined;
/**
* Tokenize emphasis or strong (*em* or **strong**)
* Handles complex nesting and precedence
* @param src - Source starting with potential em/strong
* @param maskedSrc - Source with masked regions (from emStrongMask hook)
* @param prevChar - Previous character for context (affects delimiter rules)
* @returns Em or Strong token or undefined if no match
*/
emStrong(src: string, maskedSrc: string, prevChar: string): Tokens.Em | Tokens.Strong | undefined;
/**
* Tokenize inline code (`code`)
* Supports multiple backticks
* @param src - Source starting with potential codespan
* @returns Codespan token or undefined if no match
*/
codespan(src: string): Tokens.Codespan | undefined;
/**
* Tokenize line break (two spaces + \n or just \n with breaks: true)
* @param src - Source starting with potential br
* @returns Br token or undefined if no match
*/
br(src: string): Tokens.Br | undefined;
/**
* Tokenize strikethrough (~~deleted~~) (GFM)
* Requires gfm: true
* @param src - Source starting with potential strikethrough
* @returns Del token or undefined if no match
*/
del(src: string): Tokens.Del | undefined;
/**
* Tokenize autolink (<url> or <email>)
* @param src - Source starting with potential autolink
* @returns Link token or undefined if no match
*/
autolink(src: string): Tokens.Link | undefined;
/**
* Tokenize raw URL (GFM)
* Requires gfm: true
* @param src - Source starting with potential URL
* @returns Link token or undefined if no match
*/
url(src: string): Tokens.Link | undefined;
/**
* Tokenize inline text (fallback)
* Matches any text that doesn't match other inline patterns
* @param src - Source to tokenize as text
* @returns Text token or undefined if no match
*/
inlineText(src: string): Tokens.Text | undefined;
}import { Tokenizer, Lexer } from "marked";
const tokenizer = new Tokenizer();
// Must set rules and lexer (normally done by lexer automatically)
const lexer = new Lexer();
tokenizer.rules = Lexer.rules;
tokenizer.lexer = lexer;
// Tokenize specific element
const headingToken = tokenizer.heading('# Hello World');
console.log(headingToken);
// {
// type: 'heading',
// raw: '# Hello World',
// depth: 1,
// text: 'Hello World',
// tokens: [...]
// }Direct Usage Notes:
import { marked, Tokenizer } from "marked";
const tokenizer = new Tokenizer();
// Override heading tokenization to add custom metadata
tokenizer.heading = function(src) {
// Match heading with optional {#id} syntax
const match = src.match(/^(#{1,6})\s+(.+?)(?:\s+\{#([^}]+)\})?\s*$/);
if (match) {
const depth = match[1].length;
const text = match[2];
const customId = match[3];
return {
type: 'heading',
raw: match[0],
depth,
text,
tokens: this.lexer.inline(text),
customId // Custom property
};
}
// Return undefined if no match
};
marked.setOptions({ tokenizer });
// Now "# Title {#my-id}" will include customId in token
const html = marked.parse('# Title {#my-id}');Override Best Practices:
raw property in returned tokenthis.lexer for nested tokenization^ regex anchor)import { marked, Tokenizer } from "marked";
const tokenizer = new Tokenizer();
// Add filename support to fenced code blocks
tokenizer.fences = function(src) {
// Match ```lang [filename]
const match = src.match(/^```(\w+)?(?:\s+\[([^\]]+)\])?\n([\s\S]*?)\n```/);
if (match) {
return {
type: 'code',
raw: match[0],
lang: match[1] || '',
filename: match[2] || null, // Custom property
text: match[3]
};
}
// Return undefined if no match
};
marked.setOptions({ tokenizer });
// Usage: ```js [example.js]
// Token will include filename propertyCode Block Customization:
import { marked, Tokenizer } from "marked";
const tokenizer = new Tokenizer();
// Save original list tokenizer
const originalList = tokenizer.list.bind(tokenizer);
tokenizer.list = function(src) {
// Try original tokenizer first
const token = originalList(src);
if (token) {
return token;
}
// Try custom marker (e.g., ☐ for tasks)
const match = src.match(/^(☐|☑)\s+(.+)/);
if (match) {
const checked = match[1] === '☑';
const text = match[2];
return {
type: 'list',
raw: match[0],
ordered: false,
start: '',
loose: false,
items: [{
type: 'list_item',
raw: match[0],
task: true,
checked: checked,
loose: false,
text: text,
tokens: this.lexer.inline(text)
}]
};
}
return undefined;
};
marked.setOptions({ tokenizer });
// Now supports: ☐ Task itemList Customization:
import { marked, Tokenizer } from "marked";
const tokenizer = new Tokenizer();
// Add support for ==highlighted text==
const originalEmStrong = tokenizer.emStrong.bind(tokenizer);
tokenizer.emStrong = function(src, maskedSrc, prevChar) {
// Check for highlight first
const highlightMatch = src.match(/^==([^=]+)==/);
if (highlightMatch) {
return {
type: 'highlight', // Custom token type
raw: highlightMatch[0],
text: highlightMatch[1],
tokens: this.lexer.inlineTokens(highlightMatch[1])
};
}
// Fall back to original em/strong handling
return originalEmStrong(src, maskedSrc, prevChar);
};
// Also need renderer for custom token type
marked.use({
tokenizer,
renderer: {
highlight(token) {
return `<mark>${this.parser.parseInline(token.tokens)}</mark>`;
}
}
});Inline Customization:
import { marked, Tokenizer } from "marked";
const tokenizer = new Tokenizer();
// Add support for inline math: $x^2$
const originalCodespan = tokenizer.codespan.bind(tokenizer);
tokenizer.codespan = function(src) {
// Check for math expression first
const mathMatch = src.match(/^\$([^$\n]+)\$/);
if (mathMatch) {
return {
type: 'inlineMath',
raw: mathMatch[0],
text: mathMatch[1]
};
}
// Fall back to regular codespan
return originalCodespan(src);
};
// Add support for block math: $$...$$
tokenizer.fences = function(src) {
const mathMatch = src.match(/^\$\$\n([\s\S]+?)\n\$\$/);
if (mathMatch) {
return {
type: 'blockMath',
raw: mathMatch[0],
text: mathMatch[1]
};
}
// Handle regular fenced code...
// (would need to implement or call original)
return undefined;
};
// Add renderers
marked.use({
tokenizer,
renderer: {
inlineMath(token) {
return `<span class="math-inline">\\(${token.text}\\)</span>`;
},
blockMath(token) {
return `<div class="math-block">\\[${token.text}\\]</div>\n`;
}
}
});Math Support Patterns:
$...$$$...$$When overriding tokenizers, you can chain to the default implementation:
import { marked, Tokenizer } from "marked";
const tokenizer = new Tokenizer();
// Save original method
const originalLink = tokenizer.link.bind(tokenizer);
tokenizer.link = function(src) {
// Custom handling for wiki links: [[Page]]
const wikiMatch = src.match(/^\[\[([^\]]+)\]\]/);
if (wikiMatch) {
const page = wikiMatch[1];
const slug = page.toLowerCase().replace(/\s+/g, '-');
return {
type: 'link',
raw: wikiMatch[0],
href: `/wiki/${slug}`,
title: null,
text: page,
tokens: this.lexer.inlineTokens(page)
};
}
// Fall back to original for standard links
return originalLink(src);
};
marked.setOptions({ tokenizer });
// Now supports: [[Wiki Page]]Chaining Pattern:
Tokenizer methods have access to this.lexer for recursive tokenization:
import { Tokenizer } from "marked";
const tokenizer = new Tokenizer();
tokenizer.blockquote = function(src) {
const match = src.match(/^> (.+)/);
if (match) {
const text = match[1];
return {
type: 'blockquote',
raw: match[0],
text,
// Use lexer to recursively tokenize blockquote content
tokens: this.lexer.blockTokens(text)
};
}
return undefined;
};Context Properties:
this.lexer: Lexer instance for nested tokenizationthis.options: Current marked optionsthis.rules: Regular expression rulesLexer Methods Available:
this.lexer.inline(src): Queue inline tokensthis.lexer.inlineTokens(src): Process inline tokensthis.lexer.blockTokens(src): Process block tokensimport { marked, Tokenizer } from "marked";
const tokenizer = new Tokenizer();
// Only recognize GitHub-style task lists when GFM enabled
tokenizer.list = function(src) {
// Only when GFM is enabled
if (!this.options.gfm) {
return undefined; // Use default (non-GFM) handling
}
// Custom GFM task list parsing...
const match = src.match(/^- \[([ x])\] (.+)/);
if (match) {
const checked = match[1] === 'x';
const text = match[2];
return {
type: 'list',
raw: match[0],
ordered: false,
start: '',
loose: false,
items: [{
type: 'list_item',
raw: match[0],
task: true,
checked: checked,
loose: false,
text: text,
tokens: this.lexer.inline(text)
}]
};
}
return undefined;
};
marked.setOptions({ tokenizer, gfm: true });Conditional Logic Uses:
this.options.gfm for GFM featuresthis.options.pedantic for strict modethis.options.breaks for line break handlingTokenizers use regular expression rules stored in this.rules:
import { Tokenizer, Lexer } from "marked";
const tokenizer = new Tokenizer();
tokenizer.rules = Lexer.rules;
// Access rules
console.log(tokenizer.rules.block.heading); // Regex for headings
console.log(tokenizer.rules.inline.link); // Regex for links
// Rules vary by options (GFM, pedantic, breaks)
// Example: GFM rules include table patterns
console.log(tokenizer.rules.block.table); // Table regex (GFM only)Rule Structure:
rules.block: Block-level patternsrules.inline: Inline-level patternstokenizer.heading = function(src) {
const match = src.match(/^#{1,6}\s+(.+)/);
if (!match) {
return undefined; // Explicit undefined (not null or false)
}
return {
type: 'heading',
depth: match[0].indexOf(' '),
text: match[1],
raw: match[0],
tokens: this.lexer.inline(match[1])
};
};Return Value Rules:
undefined if no matchnull or false from tokenizerfalse is for renderer fallbacks only)raw PropertyEvery token must include the raw property:
tokenizer.custom = function(src) {
const match = src.match(/^@(\w+)/);
if (match) {
return {
type: 'custom',
raw: match[0], // REQUIRED: original matched text
value: match[1]
};
}
return undefined;
};raw Property Importance:
tokenizer.blockquote = function(src) {
const match = src.match(/^> (.+)/);
if (match) {
return {
type: 'blockquote',
raw: match[0],
text: match[1],
// Use lexer to parse nested markdown
tokens: this.lexer.blockTokens(match[1])
};
}
return undefined;
};Nested Content Handling:
this.lexer.blockTokens()this.lexer.inlineTokens() or this.lexer.inline()// Bad: greedy, matches too much
const match = src.match(/^(.+)/);
// Good: specific, matches what's needed
const match = src.match(/^[^\n]+/); // Match to end of line
// Better: precise pattern
const match = src.match(/^@\[(\w+)\]\(([^)]+)\)/); // Specific syntaxMatching Guidelines:
.+ or .* without constraints^, $)tokenizer.heading = function(src) {
// Match heading with optional whitespace
const match = src.match(/^(#{1,6})(?:\s+(.+?))?(?:\n|$)/);
if (match) {
const depth = match[1].length;
const text = match[2] || ''; // Handle empty headings
return {
type: 'heading',
raw: match[0],
depth,
text,
tokens: text ? this.lexer.inline(text) : []
};
}
return undefined;
};Common Edge Cases:
undefined if no matchnull or false from tokenizer methods
false only in renderer/extension overrides to fall backundefined for no match// Correct
tokenizer.custom = function(src) {
const match = src.match(/^pattern/);
if (match) {
return { type: 'custom', raw: match[0], /* ... */ };
}
return undefined; // Correct
};
// Incorrect
tokenizer.custom = function(src) {
const match = src.match(/^pattern/);
if (match) {
return { type: 'custom', raw: match[0], /* ... */ };
}
return false; // Wrong! Use undefined
};import { Tokenizer } from "marked";
// Cache regex outside method (compiled once)
const customRegex = /^@\[(\w+)\]\(([^)]+)\)/;
const tokenizer = new Tokenizer();
tokenizer.custom = function(src) {
const match = customRegex.exec(src); // Reuse compiled regex
if (match) {
return {
type: 'custom',
raw: match[0],
name: match[1],
value: match[2]
};
}
return undefined;
};Performance Tips:
import { marked, Tokenizer } from "marked";
const tokenizer = new Tokenizer();
tokenizer.container = function(src) {
// Match :::type ... ::: syntax
const match = src.match(/^:::(\w+)\n([\s\S]*?)\n:::/);
if (match) {
return {
type: 'container',
raw: match[0],
containerType: match[1],
text: match[2],
tokens: this.lexer.blockTokens(match[2])
};
}
return undefined;
};
// Register as extension
marked.use({
extensions: [{
name: 'container',
level: 'block',
tokenizer: tokenizer.container,
renderer(token) {
const body = this.parser.parse(token.tokens);
return `<div class="container-${token.containerType}">${body}</div>\n`;
}
}]
});import { marked, Tokenizer } from "marked";
const tokenizer = new Tokenizer();
tokenizer.deflist = function(src) {
// Match term followed by : definition
const match = src.match(/^(.+)\n:\s+(.+)/);
if (match) {
return {
type: 'deflist',
raw: match[0],
term: match[1],
definition: match[2],
termTokens: this.lexer.inlineTokens(match[1]),
defTokens: this.lexer.inlineTokens(match[2])
};
}
return undefined;
};
marked.use({
extensions: [{
name: 'deflist',
level: 'block',
tokenizer: tokenizer.deflist,
renderer(token) {
const term = this.parser.parseInline(token.termTokens);
const def = this.parser.parseInline(token.defTokens);
return `<dl><dt>${term}</dt><dd>${def}</dd></dl>\n`;
}
}]
});import { marked, Tokenizer } from "marked";
const tokenizer = new Tokenizer();
// Footnote reference: [^1]
tokenizer.footnoteRef = function(src) {
const match = src.match(/^\[\^(\w+)\]/);
if (match) {
return {
type: 'footnoteRef',
raw: match[0],
id: match[1]
};
}
return undefined;
};
// Footnote definition: [^1]: Text
tokenizer.footnoteDef = function(src) {
const match = src.match(/^\[\^(\w+)\]:\s+(.+)/);
if (match) {
return {
type: 'footnoteDef',
raw: match[0],
id: match[1],
text: match[2]
};
}
return undefined;
};
marked.use({
extensions: [
{
name: 'footnoteRef',
level: 'inline',
start: (src) => src.indexOf('[^'),
tokenizer: tokenizer.footnoteRef,
renderer(token) {
return `<sup><a href="#fn-${token.id}" id="fnref-${token.id}">${token.id}</a></sup>`;
}
},
{
name: 'footnoteDef',
level: 'block',
tokenizer: tokenizer.footnoteDef,
renderer() {
return ''; // Rendered separately in postprocess hook
}
}
]
});import { marked, Tokenizer } from "marked";
const tokenizer = new Tokenizer();
// Override heading to support {.class #id} attributes
const originalHeading = tokenizer.heading.bind(tokenizer);
tokenizer.heading = function(src) {
const match = src.match(/^(#{1,6})\s+(.+?)(?:\s+\{([^}]+)\})?\s*$/);
if (match) {
const depth = match[1].length;
const text = match[2];
const attrs = match[3];
// Parse attributes
const classes = attrs ? [...attrs.matchAll(/\.([^\s#]+)/g)].map(m => m[1]) : [];
const id = attrs ? attrs.match(/#([^\s.]+)/)?.[1] : null;
return {
type: 'heading',
raw: match[0],
depth,
text,
tokens: this.lexer.inline(text),
classes,
id
};
}
// Fall back to original
return originalHeading(src);
};
marked.use({
tokenizer,
renderer: {
heading(token) {
const text = this.parser.parseInline(token.tokens);
const id = token.id ? ` id="${token.id}"` : '';
const classes = token.classes?.length ? ` class="${token.classes.join(' ')}"` : '';
return `<h${token.depth}${id}${classes}>${text}</h${token.depth}>\n`;
}
}
});
// Usage: # Heading {.my-class #my-id}