or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

data-structures.mdgeneration.mdindex.mdparsing.mdtokenization.mdtraversal.mdutilities.mdvalidation.md
tile.json

tokenization.mddocs/

CSS Tokenization

Low-level CSS tokenization following W3C CSS Syntax specification for breaking CSS source into tokens.

Capabilities

Tokenize Function

Breaks CSS source code into tokens following W3C CSS Syntax Module Level 3 specification.

/**
 * Tokenizes CSS source code into tokens
 * @param source - CSS source code to tokenize
 * @param onToken - Callback function called for each token
 */
function tokenize(source: string, onToken: (type: number, start: number, end: number) => void): void;

Usage Examples:

import { tokenize, tokenTypes, tokenNames } from 'css-tree/tokenizer';

// Basic tokenization
tokenize('.example { color: red; }', (type, start, end) => {
  console.log(`${tokenNames[type]}: ${source.substring(start, end)}`);
});

// Collect all tokens
const tokens = [];
tokenize('color: red', (type, start, end) => {
  tokens.push({
    type: tokenNames[type],
    value: source.substring(start, end),
    start,
    end
  });
});

Token Types

CSS Tree provides comprehensive token type constants matching W3C specifications:

const tokenTypes: {
  /** End of file marker */
  EOF: 0;
  /** CSS identifier (e.g., property names, keywords) */
  Ident: 1;
  /** Function token (e.g., "url(", "calc(") */
  Function: 2;
  /** At-rule keyword (e.g., "@media", "@import") */
  AtKeyword: 3;
  /** Hash token (e.g., "#id", "#ff0000") */
  Hash: 4;
  /** String literal (e.g., "text", 'text') */
  String: 5;
  /** Bad string (unclosed string) */
  BadString: 6;
  /** URL token (e.g., url(image.png)) */
  Url: 7;
  /** Bad URL (malformed URL) */
  BadUrl: 8;
  /** Delimiter character (e.g., "+", ">", "~") */
  Delim: 9;
  /** Numeric value (e.g., "42", "3.14") */
  Number: 10;
  /** Percentage value (e.g., "50%") */
  Percentage: 11;
  /** Dimension value (e.g., "10px", "2em") */
  Dimension: 12;
  /** Whitespace characters */
  WhiteSpace: 13;
  /** Comment start (<!--) */
  CDO: 14;
  /** Comment end (-->) */
  CDC: 15;
  /** Colon character (:) */
  Colon: 16;
  /** Semicolon character (;) */
  Semicolon: 17;
  /** Comma character (,) */
  Comma: 18;
  /** Left square bracket ([) */
  LeftSquareBracket: 19;
  /** Right square bracket (]) */
  RightSquareBracket: 20;
  /** Left parenthesis (()) */
  LeftParenthesis: 21;
  /** Right parenthesis ()) */
  RightParenthesis: 22;
  /** Left curly bracket ({) */
  LeftCurlyBracket: 23;
  /** Right curly bracket (}) */
  RightCurlyBracket: 24;
  /** CSS comment (/* ... */) */
  Comment: 25;
};

Token Names

Mapping of token type numbers to human-readable names:

const tokenNames: string[];
// tokenNames[tokenTypes.Ident] === "Ident"
// tokenNames[tokenTypes.Number] === "Number"

TokenStream Class

Advanced token stream for CSS parsing with lookahead and navigation capabilities:

class TokenStream {
  /**
   * Creates a new token stream
   * @param source - CSS source code
   * @param tokenize - Tokenization function
   */
  constructor(source: string, tokenize: typeof tokenize);
  
  /** Current token type */
  tokenType: number;
  /** Current token start position */
  tokenStart: number;
  /** Current token end position */
  tokenEnd: number;
  /** Current token index */
  tokenIndex: number;
  
  /** Reset stream to beginning */
  reset(): void;
  
  /** Set new source and reset */
  setSource(source: string, tokenize: typeof tokenize): void;
  
  /** Look ahead at token type without consuming */
  lookupType(offset: number): number;
  
  /** Look ahead at token type, skipping whitespace and comments */
  lookupTypeNonSC(offset: number): number;
  
  /** Look ahead at token start position */
  lookupOffset(offset: number): number;
  
  /** Look ahead and compare token value */
  lookupValue(offset: number, referenceStr: string): boolean;
  
  /** Skip specified number of tokens */
  skip(tokenCount: number): void;
  
  /** Advance to next token */
  next(): number;
  
  /** Skip whitespace and comment tokens */
  skipSC(): void;
  
  /** Skip to balanced closing bracket/parenthesis */
  skipUntilBalanced(startToken: number, stopConsume: number): void;
  
  /** Get start position of token at index */
  getTokenStart(tokenIndex: number): number;
  
  /** Get substring from start to current position */
  substrToCursor(start: number): string;
  
  /** Check if position is at balance edge */
  isBalanceEdge(pos: number): boolean;
  
  /** Check if token is delimiter with specific code */
  isDelim(code: number, offset?: number): boolean;
  
  /** Iterate over all tokens */
  forEachToken(fn: (type: number, start: number, end: number) => void): void;
  
  /** Debug: dump all tokens */
  dump(): Array<{ type: string; chunk: string; balance: number }>;
}

TokenStream Usage Examples:

import { TokenStream, tokenize, tokenTypes } from 'css-tree/tokenizer';

const stream = new TokenStream('color: red', tokenize);

// Navigate tokens
while (stream.tokenType !== tokenTypes.EOF) {
  console.log(`Token: ${tokenNames[stream.tokenType]}`);
  stream.next();
}

// Lookahead
stream.reset();
if (stream.lookupType(0) === tokenTypes.Ident) {
  const nextTokenType = stream.lookupType(1);
  if (nextTokenType === tokenTypes.Colon) {
    console.log('Found property declaration');
  }
}

// Skip whitespace
stream.skipSC();

// Check specific delimiters
if (stream.isDelim(0x003E)) { // '>' character
  console.log('Found child combinator');
}

OffsetToLocation Class

Converts character offsets to line/column positions for error reporting and source maps:

class OffsetToLocation {
  /**
   * Creates offset-to-location converter
   * @param source - Source code
   * @param startOffset - Starting offset
   * @param startLine - Starting line number
   * @param startColumn - Starting column number
   */
  constructor(source: string, startOffset?: number, startLine?: number, startColumn?: number);
  
  /** Update source and reset positions */
  setSource(source: string, startOffset?: number, startLine?: number, startColumn?: number): void;
  
  /** Convert offset to location */
  getLocation(offset: number, filename?: string): Location;
  
  /** Convert offset range to location range */
  getLocationRange(start: number, end: number, filename?: string): LocationRange;
}

interface Location {
  offset: number;
  line: number;
  column: number;
}

interface LocationRange {
  source?: string;
  start: Location;
  end: Location;
}

OffsetToLocation Usage Examples:

import { OffsetToLocation } from 'css-tree/tokenizer';

const source = `.example {\n  color: red;\n}`;
const offsetToLocation = new OffsetToLocation(source);

// Convert offset to line/column
const location = offsetToLocation.getLocation(15);
console.log(`Line: ${location.line}, Column: ${location.column}`);

// Convert range
const range = offsetToLocation.getLocationRange(0, 10, 'styles.css');
console.log(range); // { source: 'styles.css', start: {...}, end: {...} }

Character Code Utilities

Low-level character classification utilities used by the tokenizer:

// Character classification functions
function isNewline(code: number): boolean;
function isName(code: number): boolean;
function isValidEscape(code1: number, code2: number): boolean;
function isNumberStart(code1: number, code2: number, code3: number): boolean;
function isIdentifierStart(code1: number, code2: number, code3: number): boolean;
function isBOM(code: number): boolean;

// Character category constants
const WhiteSpaceCategory: number;
const DigitCategory: number;
const NameStartCategory: number;
const NonPrintableCategory: number;

// Utility functions
function charCodeCategory(code: number): number;
function cmpStr(source: string, start: number, end: number, reference: string): boolean;
function getNewlineLength(source: string, offset: number, code: number): number;
function findWhiteSpaceEnd(source: string, offset: number): number;
function consumeEscaped(source: string, offset: number): number;
function consumeName(source: string, offset: number): number;
function consumeNumber(source: string, offset: number): number;
function consumeBadUrlRemnants(source: string, offset: number): number;

Advanced Tokenization Patterns

// Custom token processing
function processTokens(css) {
  const tokens = [];
  const functions = [];
  const strings = [];
  
  tokenize(css, (type, start, end) => {
    const value = css.substring(start, end);
    
    tokens.push({ type, start, end, value });
    
    if (type === tokenTypes.Function) {
      functions.push(value);
    } else if (type === tokenTypes.String) {
      strings.push(value);
    }
  });
  
  return { tokens, functions, strings };
}

// Error-tolerant tokenization
function safeTokenize(css) {
  const errors = [];
  const tokens = [];
  
  try {
    tokenize(css, (type, start, end) => {
      if (type === tokenTypes.BadString || type === tokenTypes.BadUrl) {
        errors.push({
          type: 'BadToken',
          message: `Invalid ${type === tokenTypes.BadString ? 'string' : 'URL'} at position ${start}`,
          start,
          end
        });
      }
      tokens.push({ type, start, end });
    });
  } catch (error) {
    errors.push({ type: 'TokenizeError', message: error.message });
  }
  
  return { tokens, errors };
}