or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

configuration.mddictionaries.mdindex.mdparser.mdreporter.md
tile.json

parser.mddocs/

Text Parser System

Text parsing interfaces for extracting and processing content from different file types with support for nested parsing and scope tracking.

Capabilities

Parser Interface

Core parser interface for extracting text content from files.

/**
 * Text parser interface for extracting content from files
 */
interface Parser {
  /** Unique parser name/identifier */
  readonly name: ParserName;
  
  /**
   * Parse file content and extract text segments for spell checking
   * @param content - Full content of the file
   * @param filename - Filename for context and parser selection
   * @returns Parse result with extracted text segments
   */
  parse(content: string, filename: string): ParseResult;
}

/**
 * Parser name/identifier string
 */
type ParserName = string;

/**
 * Parser configuration options
 */
type ParserOptions = Record<string, unknown>;

Parse Result

Result of parsing operation containing extracted text segments.

/**
 * Result of parsing a file
 */
interface ParseResult {
  /** Original file content */
  readonly content: string;
  
  /** Filename that was parsed */
  readonly filename: string;
  
  /** Iterable of parsed text segments */
  readonly parsedTexts: Iterable<ParsedText>;
}

Parsed Text Segments

Individual text segments extracted from parsed content.

/**
 * Individual parsed text segment with metadata
 */
interface ParsedText {
  /**
   * The extracted and possibly transformed text content
   */
  readonly text: string;
  
  /**
   * The raw text before transformation (optional)
   */
  readonly rawText?: string | undefined;
  
  /**
   * Start and end offsets of the text in original content
   */
  readonly range: Range;
  
  /**
   * Scope annotation for semantic context
   * Used by spell checker to apply context-specific rules
   */
  readonly scope?: Scope | undefined;
  
  /**
   * Source map for text transformations
   * Maps transformed text positions back to original positions
   */
  readonly map?: SourceMap | undefined;
  
  /**
   * Delegate parsing information for nested content
   * Used to delegate subsections to other parsers
   */
  readonly delegate?: DelegateInfo | undefined;
}

Text Ranges

Range definitions for text positions and spans.

/**
 * Text range as [start, end] offsets
 */
type Range = readonly [start: number, end: number];

Source Maps

Source mapping for text transformations and position tracking.

/**
 * Source map for tracking text transformations
 * 
 * Maps transformed text positions back to original text positions.
 * Used to report correct locations of spelling issues after text transformation.
 * 
 * Format: Array of number pairs (even=source offset, odd=transformed offset)
 * Offsets are relative to the beginning of each string segment.
 * 
 * Example transformation:
 * - Original: "Grand Caf\u00e9 Bj\u00f8rvika"  
 * - Transformed: "Grand Café Bjørvika"
 * - Map: [9, 9, 15, 10, 18, 13, 24, 14]
 * 
 * Mapping:
 * | Source Range | Original Text | Target Range | Transformed Text |
 * |--------------|---------------|--------------|------------------|
 * | 0-9          | "Grand Caf"   | 0-9          | "Grand Caf"      |
 * | 9-15         | "\u00e9"      | 9-10         | "é"              |
 * | 15-18        | " Bj"         | 10-13        | " Bj"            |
 * | 18-24        | "\u00f8"      | 13-14        | "ø"              |
 * | 24-29        | "rvika"       | 14-19        | "rvika"          |
 */
type SourceMap = number[];

Delegation

Parser delegation for nested content parsing.

/**
 * Information for delegating parsing to another parser
 * Used when a parser encounters content that should be handled by a different parser
 */
interface DelegateInfo {
  /**
   * Proposed virtual filename with extension
   * Format: `./${source_filename}/${block_number}.${ext}`
   * Example: `./README.md/1.js` for JavaScript block in README
   */
  readonly filename: string;
  
  /**
   * Original filename containing the delegated content
   * Example: `./README.md`
   */
  readonly originFilename: string;
  
  /**
   * File extension for parser selection
   * Example: `.js`, `.ts`, `.py`
   */
  readonly extension: string;
  
  /**
   * Explicit filetype override for parser selection
   * Example: `javascript`, `typescript`, `python`
   */
  readonly fileType?: string;
}

Scope System

Scope tracking for semantic context and rule application.

/**
 * Scope information (chain or string format)
 */
type Scope = ScopeChain | ScopeString;

/**
 * Hierarchical scope chain from local to global
 * 
 * Example scope hierarchy:
 * `comment.block.documentation.ts` -> `meta.interface.ts` -> `source.ts`
 */
interface ScopeChain {
  /** Current scope value */
  readonly value: string;
  
  /** Parent scope in hierarchy */
  readonly parent?: ScopeChain | undefined;
}

/**
 * String representation of scope chain separated by spaces
 * 
 * Example: "comment.block.documentation.ts meta.interface.ts source.ts"
 */
type ScopeString = string;

Text Mapping

Text transformation and mapping utilities.

/**
 * Text with optional transformation mapping
 */
interface MappedText {
  /** The text content */
  text: string;
  
  /** Optional source map for transformations */
  map?: SourceMap;
}

/**
 * Interface for objects with mapping arrays
 */
interface Mapped {
  /** Source map array */
  map: number[];
}

Usage Examples

Custom Parser Implementation

import type { 
  Parser, 
  ParseResult, 
  ParsedText, 
  Range 
} from "@cspell/cspell-types/Parser";

class MarkdownParser implements Parser {
  readonly name = "markdown";
  
  parse(content: string, filename: string): ParseResult {
    const parsedTexts: ParsedText[] = [];
    
    // Extract text from markdown headers
    const headerRegex = /^#+\s+(.+)$/gm;
    let match;
    
    while ((match = headerRegex.exec(content)) !== null) {
      const text = match[1];
      const start = match.index + match[0].indexOf(text);
      const end = start + text.length;
      
      parsedTexts.push({
        text,
        range: [start, end],
        scope: "markup.heading.markdown"
      });
    }
    
    // Extract text from markdown paragraphs
    const paragraphRegex = /^(?!#|```|\s*$)(.+)$/gm;
    
    while ((match = paragraphRegex.exec(content)) !== null) {
      const text = match[1];
      const start = match.index;
      const end = start + text.length;
      
      parsedTexts.push({
        text,
        range: [start, end],
        scope: "text.markdown"
      });
    }
    
    // Extract and delegate code blocks
    const codeBlockRegex = /```(\w+)?\n([\s\S]*?)\n```/g;
    let blockNumber = 0;
    
    while ((match = codeBlockRegex.exec(content)) !== null) {
      const language = match[1] || "text";
      const codeContent = match[2];
      const start = match.index + match[0].indexOf(codeContent);
      const end = start + codeContent.length;
      
      parsedTexts.push({
        text: codeContent,
        range: [start, end],
        scope: `source.${language}`,
        delegate: {
          filename: `./${filename}/${blockNumber}.${language}`,
          originFilename: filename,
          extension: `.${language}`,
          fileType: language
        }
      });
      
      blockNumber++;
    }
    
    return {
      content,
      filename,
      parsedTexts
    };
  }
}

Using Parser Results

import type { ParseResult, ParsedText } from "@cspell/cspell-types/Parser";

function processParseResult(result: ParseResult): void {
  console.log(`Parsed ${result.filename}:`);
  
  for (const parsedText of result.parsedTexts) {
    console.log(`  Text: "${parsedText.text}"`);
    console.log(`  Range: [${parsedText.range[0]}, ${parsedText.range[1]}]`);
    
    if (parsedText.scope) {
      console.log(`  Scope: ${parsedText.scope}`);
    }
    
    if (parsedText.delegate) {
      console.log(`  Delegate to: ${parsedText.delegate.filename}`);
      console.log(`  File type: ${parsedText.delegate.fileType}`);
    }
    
    if (parsedText.map) {
      console.log(`  Has source map with ${parsedText.map.length / 2} mappings`);
    }
    
    console.log();
  }
}

Scope Chain Utilities

import type { ScopeChain, ScopeString } from "@cspell/cspell-types/Parser";

function scopeChainToString(scope: ScopeChain): ScopeString {
  const parts: string[] = [];
  let current: ScopeChain | undefined = scope;
  
  while (current) {
    parts.unshift(current.value);
    current = current.parent;
  }
  
  return parts.join(" ");
}

function stringToScopeChain(scopeString: ScopeString): ScopeChain {
  const parts = scopeString.split(" ");
  let chain: ScopeChain | undefined;
  
  for (const part of parts.reverse()) {
    chain = {
      value: part,
      parent: chain
    };
  }
  
  return chain!;
}

function matchesScope(scope: Scope, pattern: string): boolean {
  const scopeString = typeof scope === "string" ? scope : scopeChainToString(scope);
  return scopeString.includes(pattern);
}

Source Map Processing

import type { SourceMap } from "@cspell/cspell-types/Parser";
import type { MappedText } from "@cspell/cspell-types";

function applySourceMap(
  transformedOffset: number, 
  sourceMap: SourceMap
): number {
  // Convert transformed position back to original position
  for (let i = 0; i < sourceMap.length; i += 2) {
    const sourceStart = sourceMap[i];
    const transformedStart = sourceMap[i + 1];
    const sourceEnd = sourceMap[i + 2] || sourceStart;
    const transformedEnd = sourceMap[i + 3] || transformedStart;
    
    if (transformedOffset >= transformedStart && transformedOffset < transformedEnd) {
      const offset = transformedOffset - transformedStart;
      return sourceStart + offset;
    }
  }
  
  return transformedOffset;
}

function createIdentityMap(text: string): SourceMap {
  // Create 1:1 mapping for unchanged text
  return [0, 0, text.length, text.length];
}