CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-prismjs

Lightweight, robust, elegant syntax highlighting library with support for 280+ languages and multiple themes

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

token-system.mddocs/

Token System

The token system provides structured representation of parsed code through Token objects and token streams. This system converts raw code strings into hierarchical structures that preserve both content and semantic meaning for rendering highlighted markup.

Capabilities

Token Constructor

The main constructor for creating token objects that represent parsed code elements.

/**
 * Constructor for token objects representing parsed code elements
 * @param {string} type - Token type identifier (keyword, string, number, etc.)
 * @param {string|TokenStream} content - Token content or nested token stream
 * @param {string|string[]} [alias] - Additional CSS class names for styling
 * @param {string} [matchedStr] - Original matched string for length calculation
 * @constructor
 */
function Token(type, content, alias, matchedStr);

Properties:

/**
 * Token object properties
 * @typedef {Object} Token
 * @property {string} type - Token type for CSS class generation
 * @property {string|TokenStream} content - Token content or nested tokens
 * @property {string|string[]} alias - Additional CSS classes
 * @property {number} length - Length of original matched string
 */

Usage Examples:

// Create simple tokens
const keywordToken = new Prism.Token('keyword', 'function');
const stringToken = new Prism.Token('string', '"Hello World"');
const numberToken = new Prism.Token('number', '42');

// Token with alias for additional styling
const classToken = new Prism.Token('class-name', 'MyClass', 'important');

// Token with multiple aliases
const operatorToken = new Prism.Token('operator', '===', ['equality', 'strict']);

// Complex token with nested content
const functionToken = new Prism.Token('function', [
    'myFunction',
    new Prism.Token('punctuation', '('),
    'param',
    new Prism.Token('punctuation', ')')
]);

// Access token properties
console.log(keywordToken.type);     // 'keyword'
console.log(keywordToken.content);  // 'function'
console.log(keywordToken.length);   // 8 (if matchedStr was 'function')

Token Stream

Array structure containing strings and Token objects representing parsed code.

/**
 * Array of strings and Token objects representing parsed code
 * @typedef {Array<string|Token>} TokenStream
 */

Properties:

  • No adjacent strings (consolidated during parsing)
  • No empty strings (except single empty string case)
  • Mixed content of raw strings and Token objects

Usage Examples:

// Example token stream from tokenization
const code = 'const message = "Hello";';
const tokens = Prism.tokenize(code, Prism.languages.javascript);

// Typical token stream structure:
// [
//   Token { type: 'keyword', content: 'const' },
//   ' ',
//   Token { type: 'variable', content: 'message' },
//   ' ',
//   Token { type: 'operator', content: '=' },
//   ' ',
//   Token { type: 'string', content: '"Hello"' },
//   Token { type: 'punctuation', content: ';' }
// ]

// Process token stream
function analyzeTokens(tokenStream) {
    const analysis = { tokens: 0, strings: 0, types: {} };
    
    tokenStream.forEach(item => {
        if (item instanceof Prism.Token) {
            analysis.tokens++;
            analysis.types[item.type] = (analysis.types[item.type] || 0) + 1;
        } else if (typeof item === 'string') {
            analysis.strings++;
        }
    });
    
    return analysis;
}

Token Stringification

stringify

Convert tokens and token streams to HTML markup strings.

/**
 * Convert tokens or token streams to HTML string representation
 * @param {string|Token|TokenStream} o - Token, token stream, or string to stringify
 * @param {string} language - Language identifier for CSS class generation
 * @returns {string} HTML markup string with syntax highlighting
 */
Token.stringify(o, language);

Usage Examples:

// Stringify simple token
const token = new Prism.Token('keyword', 'function');
const html = Prism.Token.stringify(token, 'javascript');
console.log(html);
// Output: '<span class="token keyword">function</span>'

// Stringify token with alias
const classToken = new Prism.Token('class-name', 'MyClass', 'important'); 
const classHtml = Prism.Token.stringify(classToken, 'javascript');
console.log(classHtml);
// Output: '<span class="token class-name important">MyClass</span>'

// Stringify token stream
const tokens = [
    new Prism.Token('keyword', 'const'),
    ' ',
    new Prism.Token('variable', 'x'),
    ' ',
    new Prism.Token('operator', '='),
    ' ',
    new Prism.Token('number', '42')
];

const streamHtml = Prism.Token.stringify(tokens, 'javascript');
console.log(streamHtml);
// Output: '<span class="token keyword">const</span> <span class="token variable">x</span> <span class="token operator">=</span> <span class="token number">42</span>'

// Stringify nested tokens
const nestedToken = new Prism.Token('function-call', [
    new Prism.Token('function', 'console'),
    new Prism.Token('punctuation', '.'),
    new Prism.Token('function', 'log'),
    new Prism.Token('punctuation', '('),
    new Prism.Token('string', '"Hello"'),
    new Prism.Token('punctuation', ')')
]);

const nestedHtml = Prism.Token.stringify(nestedToken, 'javascript');
console.log(nestedHtml);
// Output: '<span class="token function-call"><span class="token function">console</span><span class="token punctuation">.</span><span class="token function">log</span><span class="token punctuation">(</span><span class="token string">"Hello"</span><span class="token punctuation">)</span></span>'

Token Processing

Token Manipulation

Working with tokens after tokenization for analysis or modification.

// Extract specific token types
function extractTokensByType(tokenStream, targetType) {
    const matches = [];
    
    function processTokens(tokens) {
        tokens.forEach(token => {
            if (token instanceof Prism.Token) {
                if (token.type === targetType) {
                    matches.push(token.content);
                }
                // Recursively process nested tokens
                if (Array.isArray(token.content)) {
                    processTokens(token.content);
                }
            }
        });
    }
    
    processTokens(tokenStream);
    return matches;
}

// Usage example
const code = 'function getName() { return "John"; }';
const tokens = Prism.tokenize(code, Prism.languages.javascript);
const functions = extractTokensByType(tokens, 'function');
const strings = extractTokensByType(tokens, 'string');

console.log('Functions:', functions); // ['getName']
console.log('Strings:', strings);    // ['"John"']

Token Filtering

// Filter token stream based on criteria
function filterTokens(tokenStream, predicate) {
    return tokenStream.filter(token => {
        if (token instanceof Prism.Token) {
            return predicate(token);
        }
        return true; // Keep strings
    });
}

// Remove comment tokens
const withoutComments = filterTokens(tokens, token => token.type !== 'comment');

// Keep only specific token types
const keywordsOnly = filterTokens(tokens, token => token.type === 'keyword');

Token Transformation

// Transform tokens while preserving structure
function transformTokens(tokenStream, transformer) {
    return tokenStream.map(item => {
        if (item instanceof Prism.Token) {
            const transformed = transformer(item);
            // Handle nested content recursively
            if (Array.isArray(transformed.content)) {
                transformed.content = transformTokens(transformed.content, transformer);
            }
            return transformed;
        }
        return item; // Keep strings unchanged
    });
}

// Example: Add line information to tokens
let lineNumber = 1;
const withLineNumbers = transformTokens(tokens, token => {
    const newToken = new Prism.Token(token.type, token.content, token.alias);
    newToken.line = lineNumber;
    if (typeof token.content === 'string' && token.content.includes('\n')) {
        lineNumber += (token.content.match(/\n/g) || []).length;
    }
    return newToken;
});

Hook Integration

The token system integrates with the hook system for customization:

// Modify tokens after tokenization
Prism.hooks.add('after-tokenize', function(env) {
    // env.tokens contains the token stream
    env.tokens = env.tokens.map(token => {
        if (token instanceof Prism.Token && token.type === 'string') {
            // Add special styling to long strings
            if (token.content.length > 20) {
                token.alias = (token.alias || []).concat(['long-string']);
            }
        }
        return token;
    });
});

// Custom token processing before HTML generation
Prism.hooks.add('wrap', function(env) {
    // env.type, env.content, env.tag, env.attributes, env.language
    if (env.type === 'keyword') {
        env.attributes.title = 'This is a keyword';
    }
});

Advanced Token Patterns

Nested Token Structures

// Complex nested token example (JSX-like)
const jsxToken = new Prism.Token('tag', [
    new Prism.Token('punctuation', '<'),
    new Prism.Token('tag-name', 'Component'),
    ' ',
    new Prism.Token('attr-name', 'prop'),
    new Prism.Token('punctuation', '='),
    new Prism.Token('attr-value', [
        new Prism.Token('punctuation', '{'),
        new Prism.Token('string', '"value"'),
        new Prism.Token('punctuation', '}')
    ]),
    new Prism.Token('punctuation', '>')
]);

Token Analysis

// Analyze token complexity and nesting depth
function analyzeTokenDepth(tokenStream) {
    let maxDepth = 0;
    
    function getDepth(tokens, currentDepth = 0) {
        maxDepth = Math.max(maxDepth, currentDepth);
        
        tokens.forEach(token => {
            if (token instanceof Prism.Token && Array.isArray(token.content)) {
                getDepth(token.content, currentDepth + 1);
            }
        });
    }
    
    getDepth(tokenStream);
    return maxDepth;
}

// Count token statistics
function getTokenStats(tokenStream) {
    const stats = {
        totalTokens: 0,
        tokenTypes: {},
        maxNesting: 0,
        totalContent: 0
    };
    
    function processTokens(tokens, depth = 0) {
        stats.maxNesting = Math.max(stats.maxNesting, depth);
        
        tokens.forEach(token => {
            if (token instanceof Prism.Token) {
                stats.totalTokens++;
                stats.tokenTypes[token.type] = (stats.tokenTypes[token.type] || 0) + 1;
                
                if (typeof token.content === 'string') {
                    stats.totalContent += token.content.length;
                } else if (Array.isArray(token.content)) {
                    processTokens(token.content, depth + 1);
                }
            }
        });
    }
    
    processTokens(tokenStream);
    return stats;
}

Performance Considerations

// Efficient token processing for large streams
function processLargeTokenStream(tokenStream, processor) {
    // Process in chunks to avoid stack overflow
    const chunkSize = 1000;
    const results = [];
    
    for (let i = 0; i < tokenStream.length; i += chunkSize) {
        const chunk = tokenStream.slice(i, i + chunkSize);
        results.push(...chunk.map(processor));
    }
    
    return results;
}

// Memory-efficient token streaming
function* tokenGenerator(code, grammar) {
    const tokens = Prism.tokenize(code, grammar);
    for (const token of tokens) {
        yield token;
    }
}

// Usage with generator
const tokenGen = tokenGenerator(largeCodeString, Prism.languages.javascript);
for (const token of tokenGen) {
    // Process one token at a time
    console.log(token);
}

Install with Tessl CLI

npx tessl i tessl/npm-prismjs

docs

core-highlighting.md

index.md

language-system.md

plugin-system.md

token-system.md

utilities.md

tile.json