Text parsing interfaces for extracting and processing content from different file types with support for nested parsing and scope tracking.
Core parser interface for extracting text content from files.
/**
* Text parser interface for extracting content from files
*/
interface Parser {
/** Unique parser name/identifier */
readonly name: ParserName;
/**
* Parse file content and extract text segments for spell checking
* @param content - Full content of the file
* @param filename - Filename for context and parser selection
* @returns Parse result with extracted text segments
*/
parse(content: string, filename: string): ParseResult;
}
/**
* Parser name/identifier string
*/
type ParserName = string;
/**
* Parser configuration options
*/
type ParserOptions = Record<string, unknown>;Result of parsing operation containing extracted text segments.
/**
* Result of parsing a file
*/
interface ParseResult {
/** Original file content */
readonly content: string;
/** Filename that was parsed */
readonly filename: string;
/** Iterable of parsed text segments */
readonly parsedTexts: Iterable<ParsedText>;
}Individual text segments extracted from parsed content.
/**
* Individual parsed text segment with metadata
*/
interface ParsedText {
/**
* The extracted and possibly transformed text content
*/
readonly text: string;
/**
* The raw text before transformation (optional)
*/
readonly rawText?: string | undefined;
/**
* Start and end offsets of the text in original content
*/
readonly range: Range;
/**
* Scope annotation for semantic context
* Used by spell checker to apply context-specific rules
*/
readonly scope?: Scope | undefined;
/**
* Source map for text transformations
* Maps transformed text positions back to original positions
*/
readonly map?: SourceMap | undefined;
/**
* Delegate parsing information for nested content
* Used to delegate subsections to other parsers
*/
readonly delegate?: DelegateInfo | undefined;
}Range definitions for text positions and spans.
/**
* Text range as [start, end] offsets
*/
type Range = readonly [start: number, end: number];Source mapping for text transformations and position tracking.
/**
* Source map for tracking text transformations
*
* Maps transformed text positions back to original text positions.
* Used to report correct locations of spelling issues after text transformation.
*
* Format: Array of number pairs (even=source offset, odd=transformed offset)
* Offsets are relative to the beginning of each string segment.
*
* Example transformation:
* - Original: "Grand Caf\u00e9 Bj\u00f8rvika"
* - Transformed: "Grand Café Bjørvika"
* - Map: [9, 9, 15, 10, 18, 13, 24, 14]
*
* Mapping:
* | Source Range | Original Text | Target Range | Transformed Text |
* |--------------|---------------|--------------|------------------|
* | 0-9 | "Grand Caf" | 0-9 | "Grand Caf" |
* | 9-15 | "\u00e9" | 9-10 | "é" |
* | 15-18 | " Bj" | 10-13 | " Bj" |
* | 18-24 | "\u00f8" | 13-14 | "ø" |
* | 24-29 | "rvika" | 14-19 | "rvika" |
*/
type SourceMap = number[];Parser delegation for nested content parsing.
/**
* Information for delegating parsing to another parser
* Used when a parser encounters content that should be handled by a different parser
*/
interface DelegateInfo {
/**
* Proposed virtual filename with extension
* Format: `./${source_filename}/${block_number}.${ext}`
* Example: `./README.md/1.js` for JavaScript block in README
*/
readonly filename: string;
/**
* Original filename containing the delegated content
* Example: `./README.md`
*/
readonly originFilename: string;
/**
* File extension for parser selection
* Example: `.js`, `.ts`, `.py`
*/
readonly extension: string;
/**
* Explicit filetype override for parser selection
* Example: `javascript`, `typescript`, `python`
*/
readonly fileType?: string;
}Scope tracking for semantic context and rule application.
/**
* Scope information (chain or string format)
*/
type Scope = ScopeChain | ScopeString;
/**
* Hierarchical scope chain from local to global
*
* Example scope hierarchy:
* `comment.block.documentation.ts` -> `meta.interface.ts` -> `source.ts`
*/
interface ScopeChain {
/** Current scope value */
readonly value: string;
/** Parent scope in hierarchy */
readonly parent?: ScopeChain | undefined;
}
/**
* String representation of scope chain separated by spaces
*
* Example: "comment.block.documentation.ts meta.interface.ts source.ts"
*/
type ScopeString = string;Text transformation and mapping utilities.
/**
* Text with optional transformation mapping
*/
interface MappedText {
/** The text content */
text: string;
/** Optional source map for transformations */
map?: SourceMap;
}
/**
* Interface for objects with mapping arrays
*/
interface Mapped {
/** Source map array */
map: number[];
}import type {
Parser,
ParseResult,
ParsedText,
Range
} from "@cspell/cspell-types/Parser";
class MarkdownParser implements Parser {
readonly name = "markdown";
parse(content: string, filename: string): ParseResult {
const parsedTexts: ParsedText[] = [];
// Extract text from markdown headers
const headerRegex = /^#+\s+(.+)$/gm;
let match;
while ((match = headerRegex.exec(content)) !== null) {
const text = match[1];
const start = match.index + match[0].indexOf(text);
const end = start + text.length;
parsedTexts.push({
text,
range: [start, end],
scope: "markup.heading.markdown"
});
}
// Extract text from markdown paragraphs
const paragraphRegex = /^(?!#|```|\s*$)(.+)$/gm;
while ((match = paragraphRegex.exec(content)) !== null) {
const text = match[1];
const start = match.index;
const end = start + text.length;
parsedTexts.push({
text,
range: [start, end],
scope: "text.markdown"
});
}
// Extract and delegate code blocks
const codeBlockRegex = /```(\w+)?\n([\s\S]*?)\n```/g;
let blockNumber = 0;
while ((match = codeBlockRegex.exec(content)) !== null) {
const language = match[1] || "text";
const codeContent = match[2];
const start = match.index + match[0].indexOf(codeContent);
const end = start + codeContent.length;
parsedTexts.push({
text: codeContent,
range: [start, end],
scope: `source.${language}`,
delegate: {
filename: `./${filename}/${blockNumber}.${language}`,
originFilename: filename,
extension: `.${language}`,
fileType: language
}
});
blockNumber++;
}
return {
content,
filename,
parsedTexts
};
}
}import type { ParseResult, ParsedText } from "@cspell/cspell-types/Parser";
function processParseResult(result: ParseResult): void {
console.log(`Parsed ${result.filename}:`);
for (const parsedText of result.parsedTexts) {
console.log(` Text: "${parsedText.text}"`);
console.log(` Range: [${parsedText.range[0]}, ${parsedText.range[1]}]`);
if (parsedText.scope) {
console.log(` Scope: ${parsedText.scope}`);
}
if (parsedText.delegate) {
console.log(` Delegate to: ${parsedText.delegate.filename}`);
console.log(` File type: ${parsedText.delegate.fileType}`);
}
if (parsedText.map) {
console.log(` Has source map with ${parsedText.map.length / 2} mappings`);
}
console.log();
}
}import type { ScopeChain, ScopeString } from "@cspell/cspell-types/Parser";
function scopeChainToString(scope: ScopeChain): ScopeString {
const parts: string[] = [];
let current: ScopeChain | undefined = scope;
while (current) {
parts.unshift(current.value);
current = current.parent;
}
return parts.join(" ");
}
function stringToScopeChain(scopeString: ScopeString): ScopeChain {
const parts = scopeString.split(" ");
let chain: ScopeChain | undefined;
for (const part of parts.reverse()) {
chain = {
value: part,
parent: chain
};
}
return chain!;
}
function matchesScope(scope: Scope, pattern: string): boolean {
const scopeString = typeof scope === "string" ? scope : scopeChainToString(scope);
return scopeString.includes(pattern);
}import type { SourceMap } from "@cspell/cspell-types/Parser";
import type { MappedText } from "@cspell/cspell-types";
function applySourceMap(
transformedOffset: number,
sourceMap: SourceMap
): number {
// Convert transformed position back to original position
for (let i = 0; i < sourceMap.length; i += 2) {
const sourceStart = sourceMap[i];
const transformedStart = sourceMap[i + 1];
const sourceEnd = sourceMap[i + 2] || sourceStart;
const transformedEnd = sourceMap[i + 3] || transformedStart;
if (transformedOffset >= transformedStart && transformedOffset < transformedEnd) {
const offset = transformedOffset - transformedStart;
return sourceStart + offset;
}
}
return transformedOffset;
}
function createIdentityMap(text: string): SourceMap {
// Create 1:1 mapping for unchanged text
return [0, 0, text.length, text.length];
}