Low-level CSS tokenization following W3C CSS Syntax specification for breaking CSS source into tokens.
Breaks CSS source code into tokens following W3C CSS Syntax Module Level 3 specification.
/**
* Tokenizes CSS source code into tokens
* @param source - CSS source code to tokenize
* @param onToken - Callback function called for each token
*/
function tokenize(source: string, onToken: (type: number, start: number, end: number) => void): void;Usage Examples:
import { tokenize, tokenTypes, tokenNames } from 'css-tree/tokenizer';
// Basic tokenization
tokenize('.example { color: red; }', (type, start, end) => {
console.log(`${tokenNames[type]}: ${source.substring(start, end)}`);
});
// Collect all tokens
const tokens = [];
tokenize('color: red', (type, start, end) => {
tokens.push({
type: tokenNames[type],
value: source.substring(start, end),
start,
end
});
});CSS Tree provides comprehensive token type constants matching W3C specifications:
const tokenTypes: {
/** End of file marker */
EOF: 0;
/** CSS identifier (e.g., property names, keywords) */
Ident: 1;
/** Function token (e.g., "url(", "calc(") */
Function: 2;
/** At-rule keyword (e.g., "@media", "@import") */
AtKeyword: 3;
/** Hash token (e.g., "#id", "#ff0000") */
Hash: 4;
/** String literal (e.g., "text", 'text') */
String: 5;
/** Bad string (unclosed string) */
BadString: 6;
/** URL token (e.g., url(image.png)) */
Url: 7;
/** Bad URL (malformed URL) */
BadUrl: 8;
/** Delimiter character (e.g., "+", ">", "~") */
Delim: 9;
/** Numeric value (e.g., "42", "3.14") */
Number: 10;
/** Percentage value (e.g., "50%") */
Percentage: 11;
/** Dimension value (e.g., "10px", "2em") */
Dimension: 12;
/** Whitespace characters */
WhiteSpace: 13;
/** Comment start (<!--) */
CDO: 14;
/** Comment end (-->) */
CDC: 15;
/** Colon character (:) */
Colon: 16;
/** Semicolon character (;) */
Semicolon: 17;
/** Comma character (,) */
Comma: 18;
/** Left square bracket ([) */
LeftSquareBracket: 19;
/** Right square bracket (]) */
RightSquareBracket: 20;
/** Left parenthesis (()) */
LeftParenthesis: 21;
/** Right parenthesis ()) */
RightParenthesis: 22;
/** Left curly bracket ({) */
LeftCurlyBracket: 23;
/** Right curly bracket (}) */
RightCurlyBracket: 24;
/** CSS comment (/* ... */) */
Comment: 25;
};Mapping of token type numbers to human-readable names:
const tokenNames: string[];
// tokenNames[tokenTypes.Ident] === "Ident"
// tokenNames[tokenTypes.Number] === "Number"Advanced token stream for CSS parsing with lookahead and navigation capabilities:
class TokenStream {
/**
* Creates a new token stream
* @param source - CSS source code
* @param tokenize - Tokenization function
*/
constructor(source: string, tokenize: typeof tokenize);
/** Current token type */
tokenType: number;
/** Current token start position */
tokenStart: number;
/** Current token end position */
tokenEnd: number;
/** Current token index */
tokenIndex: number;
/** Reset stream to beginning */
reset(): void;
/** Set new source and reset */
setSource(source: string, tokenize: typeof tokenize): void;
/** Look ahead at token type without consuming */
lookupType(offset: number): number;
/** Look ahead at token type, skipping whitespace and comments */
lookupTypeNonSC(offset: number): number;
/** Look ahead at token start position */
lookupOffset(offset: number): number;
/** Look ahead and compare token value */
lookupValue(offset: number, referenceStr: string): boolean;
/** Skip specified number of tokens */
skip(tokenCount: number): void;
/** Advance to next token */
next(): number;
/** Skip whitespace and comment tokens */
skipSC(): void;
/** Skip to balanced closing bracket/parenthesis */
skipUntilBalanced(startToken: number, stopConsume: number): void;
/** Get start position of token at index */
getTokenStart(tokenIndex: number): number;
/** Get substring from start to current position */
substrToCursor(start: number): string;
/** Check if position is at balance edge */
isBalanceEdge(pos: number): boolean;
/** Check if token is delimiter with specific code */
isDelim(code: number, offset?: number): boolean;
/** Iterate over all tokens */
forEachToken(fn: (type: number, start: number, end: number) => void): void;
/** Debug: dump all tokens */
dump(): Array<{ type: string; chunk: string; balance: number }>;
}TokenStream Usage Examples:
import { TokenStream, tokenize, tokenTypes } from 'css-tree/tokenizer';
const stream = new TokenStream('color: red', tokenize);
// Navigate tokens
while (stream.tokenType !== tokenTypes.EOF) {
console.log(`Token: ${tokenNames[stream.tokenType]}`);
stream.next();
}
// Lookahead
stream.reset();
if (stream.lookupType(0) === tokenTypes.Ident) {
const nextTokenType = stream.lookupType(1);
if (nextTokenType === tokenTypes.Colon) {
console.log('Found property declaration');
}
}
// Skip whitespace
stream.skipSC();
// Check specific delimiters
if (stream.isDelim(0x003E)) { // '>' character
console.log('Found child combinator');
}Converts character offsets to line/column positions for error reporting and source maps:
class OffsetToLocation {
/**
* Creates offset-to-location converter
* @param source - Source code
* @param startOffset - Starting offset
* @param startLine - Starting line number
* @param startColumn - Starting column number
*/
constructor(source: string, startOffset?: number, startLine?: number, startColumn?: number);
/** Update source and reset positions */
setSource(source: string, startOffset?: number, startLine?: number, startColumn?: number): void;
/** Convert offset to location */
getLocation(offset: number, filename?: string): Location;
/** Convert offset range to location range */
getLocationRange(start: number, end: number, filename?: string): LocationRange;
}
interface Location {
offset: number;
line: number;
column: number;
}
interface LocationRange {
source?: string;
start: Location;
end: Location;
}OffsetToLocation Usage Examples:
import { OffsetToLocation } from 'css-tree/tokenizer';
const source = `.example {\n color: red;\n}`;
const offsetToLocation = new OffsetToLocation(source);
// Convert offset to line/column
const location = offsetToLocation.getLocation(15);
console.log(`Line: ${location.line}, Column: ${location.column}`);
// Convert range
const range = offsetToLocation.getLocationRange(0, 10, 'styles.css');
console.log(range); // { source: 'styles.css', start: {...}, end: {...} }Low-level character classification utilities used by the tokenizer:
// Character classification functions
function isNewline(code: number): boolean;
function isName(code: number): boolean;
function isValidEscape(code1: number, code2: number): boolean;
function isNumberStart(code1: number, code2: number, code3: number): boolean;
function isIdentifierStart(code1: number, code2: number, code3: number): boolean;
function isBOM(code: number): boolean;
// Character category constants
const WhiteSpaceCategory: number;
const DigitCategory: number;
const NameStartCategory: number;
const NonPrintableCategory: number;
// Utility functions
function charCodeCategory(code: number): number;
function cmpStr(source: string, start: number, end: number, reference: string): boolean;
function getNewlineLength(source: string, offset: number, code: number): number;
function findWhiteSpaceEnd(source: string, offset: number): number;
function consumeEscaped(source: string, offset: number): number;
function consumeName(source: string, offset: number): number;
function consumeNumber(source: string, offset: number): number;
function consumeBadUrlRemnants(source: string, offset: number): number;// Custom token processing
function processTokens(css) {
const tokens = [];
const functions = [];
const strings = [];
tokenize(css, (type, start, end) => {
const value = css.substring(start, end);
tokens.push({ type, start, end, value });
if (type === tokenTypes.Function) {
functions.push(value);
} else if (type === tokenTypes.String) {
strings.push(value);
}
});
return { tokens, functions, strings };
}
// Error-tolerant tokenization
function safeTokenize(css) {
const errors = [];
const tokens = [];
try {
tokenize(css, (type, start, end) => {
if (type === tokenTypes.BadString || type === tokenTypes.BadUrl) {
errors.push({
type: 'BadToken',
message: `Invalid ${type === tokenTypes.BadString ? 'string' : 'URL'} at position ${start}`,
start,
end
});
}
tokens.push({ type, start, end });
});
} catch (error) {
errors.push({ type: 'TokenizeError', message: error.message });
}
return { tokens, errors };
}