or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

index.mdlinguistic-analysis.mdpattern-matching.mdplugin-system.mdtext-analysis.mdtext-transformation.md
tile.json

text-analysis.mddocs/

Text Analysis

Core text analysis functionality including tokenization, pattern matching, and basic transformations. This functionality is available in all module levels (one, two, three).

Capabilities

Main Constructor Function

Creates a View object with analyzed text that provides access to all text processing methods.

/**
 * Parse and analyze text, returning a View object with analysis methods
 * @param text - Input text to analyze
 * @param lexicon - Optional custom word definitions
 * @returns View object with text analysis methods
 */
function nlp(text: string, lexicon?: Lexicon): View;

Usage Examples:

import nlp from "compromise";

// Basic text analysis
const doc = nlp("The quick brown fox jumps over the lazy dog");
console.log(doc.length); // character count
console.log(doc.found); // true (document is not empty)

// With custom lexicon
const customLex = { "llama": "Animal" };
const doc2 = nlp("I saw a llama", customLex);

Tokenization Without Analysis

Interprets text without running the full part-of-speech tagger, providing faster processing for simple operations.

/**
 * Interpret text without POS tagging for faster processing
 * @param text - Input text to tokenize
 * @param lexicon - Optional custom word definitions
 * @returns View object with tokenized text
 */
function tokenize(text: string, lexicon?: Lexicon): View;

Usage Examples:

// Faster tokenization without full analysis
const tokens = nlp.tokenize("Hello world how are you");
console.log(tokens.terms().out('array')); // ['Hello', 'world', 'how', 'are', 'you']

Lazy Analysis

Scan through text with minimal analysis for even faster processing than tokenization.

/**
 * Scan through text with minimal analysis
 * @param text - Input text to scan
 * @param match - Optional specific pattern to look for
 * @returns View object with minimal processing
 */
function lazy(text: string, match?: string): View;

Usage Examples:

// Minimal text scanning (fastest option)
const quickScan = nlp.lazy("The quick brown fox jumps over the lazy dog");
console.log(quickScan.terms().length); // 9 terms with minimal analysis

// Lazy scan with specific pattern focus
const focused = nlp.lazy("Find animals like cats and dogs", "#Animal");
console.log(focused.out('array')); // ['cats', 'dogs'] (minimal processing focused on animals)

Pattern Matching

Find and extract specific patterns from text using flexible match syntax.

/**
 * Return matching patterns in the document
 * @param pattern - Pattern to match (string, regex, or compiled Net)
 * @param group - Optional capture group to extract
 * @param options - Matching options (fuzzy, caseSensitive)
 * @returns View containing matches
 */
match(pattern: string | Net, group?: string | number, options?: object): View;

/**
 * Return only the first match
 */
matchOne(pattern: string | Net, group?: string | number, options?: object): View;

/**
 * Test if pattern exists in the document
 */
has(pattern: string | Net, group?: string | number, options?: object): boolean;

Usage Examples:

const doc = nlp("I love pizza and pasta");

// Simple pattern matching
const foods = doc.match("(pizza|pasta)");
console.log(foods.out('array')); // ['pizza', 'pasta']

// With capture groups
const loves = doc.match("I love [#Food]", 0);

// Test for existence
if (doc.has("pizza")) {
  console.log("Found pizza!");
}

// Case insensitive matching
const matches = doc.match("PIZZA", null, { caseSensitive: false });

Positional Matching

Find text before or after specific patterns.

/**
 * Return terms before each match
 */
before(pattern: string | Net, group?: string | number, options?: object): View;

/**
 * Return terms after each match
 */
after(pattern: string | Net, group?: string | number, options?: object): View;

/**
 * Aliases for before/after
 */
lookBehind(pattern: string | Net, group?: string | number, options?: object): View;
lookAhead(pattern: string | Net, group?: string | number, options?: object): View;

Usage Examples:

const doc = nlp("The quick brown fox jumps over the lazy dog");

// Get words before 'fox'
const beforeFox = doc.before("fox").out('text'); // "The quick brown"

// Get words after 'jumps'
const afterJumps = doc.after("jumps").out('text'); // "over the lazy dog"

Text Splitting and Joining

Split text on patterns or join neighboring terms.

/**
 * Split text on pattern boundaries
 */
split(pattern?: string | Net, group?: string | number): View;
splitOn(pattern?: string | Net, group?: string | number): View;
splitBefore(pattern?: string | Net, group?: string | number): View;
splitAfter(pattern?: string | Net, group?: string | number): View;

/**
 * Join neighboring terms
 */
join(): View;
joinIf(leftMatch?: string | Net, rightMatch?: string | Net): View;

Usage Examples:

const doc = nlp("I went to the store, then to the park");

// Split on commas
const parts = doc.split(",");
console.log(parts.out('array')); // ['I went to the store', 'then to the park']

// Join all terms
const joined = doc.terms().join().out('text');

Term and View Access

Access individual terms or navigate through the document structure.

/**
 * Split results by individual terms
 */
terms(n?: number): View;

/**
 * Access specific parts of results
 */
eq(n: number): View;
first(n?: number): View;
last(n?: number): View;
slice(start: number, end?: number): View;

/**
 * Return to full document scope
 */
all(): View;
none(): View;

Usage Examples:

const doc = nlp("The quick brown fox");

// Get individual terms
const terms = doc.terms();
console.log(terms.out('array')); // ['The', 'quick', 'brown', 'fox']

// Access specific terms
const firstTerm = doc.first().out('text'); // 'The'
const lastTwo = doc.last(2).out('text'); // 'brown fox'
const middle = doc.slice(1, 3).out('text'); // 'quick brown'

Iteration and Filtering

Iterate through matches and filter results.

/**
 * Run function on each phrase as individual document
 */
forEach(fn: (match: View) => void): View;

/**
 * Transform each phrase and create new document
 */
map(fn: (match: View) => any, emptyResult?: any): View | any[];

/**
 * Return only phrases that match condition
 */
filter(fn: (match: View) => boolean): View;

/**
 * Find first phrase matching condition
 */
find(fn: (match: View) => boolean): View | undefined;

/**
 * Test if any phrase matches condition
 */
some(fn: (match: View) => boolean): boolean;

Usage Examples:

const doc = nlp("The quick brown fox jumps over the lazy dog");

// Filter for long words
const longWords = doc.terms().filter(term => term.text().length > 4);
console.log(longWords.out('array')); // ['quick', 'brown', 'jumps']

// Transform each word
const uppercased = doc.terms().map(term => term.text().toUpperCase());
console.log(uppercased); // ['THE', 'QUICK', 'BROWN', ...]

Output Methods

Convert analyzed text to different output formats.

/**
 * Return document as text string
 */
text(options?: object): string;

/**
 * Extract metadata as JSON
 */
json(options?: object): any;

/**
 * Return formatted output
 */
out(format?: string): any;

/**
 * Pretty-print with tags for debugging
 */
debug(): View;

/**
 * Generate HTML output
 */
html(toHighlight?: object): string;
wrap(matches?: object): string;

Usage Examples:

const doc = nlp("Hello world");

// All supported output formats
console.log(doc.out('text')); // 'Hello world' - formatted text
console.log(doc.out('normal')); // 'hello world' - normalized text
console.log(doc.out('array')); // ['Hello world'] - array of matches
console.log(doc.out('terms')); // [{ text: 'Hello', ... }, { text: 'world', ... }] - term objects
console.log(doc.out('tags')); // [['Noun'], ['Noun']] - POS tags for each term
console.log(doc.out('json')); // { text: 'Hello world', terms: [...] } - JSON format
console.log(doc.out('offset')); // [{ text: 'Hello', offset: { start: 0, length: 5 } }] - character positions
console.log(doc.out('topk')); // Top-k most relevant terms
console.log(doc.out('debug')); // Tagged text with detailed analysis info

// JSON output with custom options
const jsonData = doc.json({
  text: true,
  terms: {
    text: true,
    tags: true,
    offset: true
  }
});

// Debug output
doc.debug(); // Shows tagged text with POS information

Set Operations

Combine and compare different View results using set operations.

/**
 * Combine results without duplicates (union)
 */
union(match: string | Net): View;
and(match: string | Net): View; // alias for union

/**
 * Return only overlapping matches (intersection)
 */
intersection(match: string | Net): View;

/**
 * Return all results except the specified match (difference)
 */
not(match: string | Net, options?: object): View;
difference(match: string | Net, options?: object): View; // alias for not

/**
 * Get everything that is not a match (complement)
 */
complement(match: string | Net): View;

/**
 * Remove overlaps in matches
 */
settle(match: string | Net): View;

Usage Examples:

const doc = nlp("I love pizza and pasta, but hate olives");

// Combine food terms
const allFood = doc.match("#Food").union("olives");
console.log(allFood.out('array')); // ['pizza', 'pasta', 'olives']

// Get non-food terms
const nonFood = doc.not("#Food");
console.log(nonFood.out('text')); // 'I love and but hate'

// Find intersection
const loveAndFood = doc.match("love").intersection("pizza");

Text Transformation

Transform text case, formatting, and structure.

/**
 * Case transformations
 */
toLowerCase(): View;
toUpperCase(): View;
toTitleCase(): View;
toCamelCase(): View;

/**
 * Whitespace and punctuation
 */
trim(): View;
hyphenate(): View;
dehyphenate(): View;
deHyphenate(): View; // alias
toQuotations(start?: string, end?: string): View;
toQuotation(start?: string, end?: string): View; // alias
toParentheses(start?: string, end?: string): View;

/**
 * Whitespace manipulation
 */
pre(str?: string, concat?: boolean): View;
post(str?: string, concat?: boolean): View;

Usage Examples:

const doc = nlp("hello world");

// Case transformations
console.log(doc.toTitleCase().out('text')); // 'Hello World'
console.log(doc.toUpperCase().out('text')); // 'HELLO WORLD'
console.log(doc.toCamelCase().out('text')); // 'helloWorld'

// Add formatting
console.log(doc.toQuotations().out('text')); // '"hello world"'
console.log(doc.toParentheses().out('text')); // '(hello world)'

// Whitespace manipulation
const withPrefix = doc.pre(">>> ");
console.log(withPrefix.out('text')); // '>>> hello world'

Content Modification

Insert, replace, and remove content from the document.

/**
 * Insert content
 */
concat(input: string | View): View;
insertBefore(input: string | View): View;
prepend(input: string | View): View; // alias
insertAfter(input: string | View): View;
append(input: string | View): View; // alias
insert(input: string | View): View; // alias for insertAfter

/**
 * Replace content
 */
replace(from: string | View, to?: string | Function, options?: object): View;
replaceWith(to: string | Function, options?: object): View;

/**
 * Remove content
 */
remove(match?: string | Net): View;
delete(match?: string | Net): View; // alias

Usage Examples:

let doc = nlp("I like cats");

// Insert content
doc = doc.append(" and dogs");
console.log(doc.out('text')); // 'I like cats and dogs'

// Replace content
doc = doc.replace("cats", "animals");
console.log(doc.out('text')); // 'I like animals and dogs'

// Remove content
doc = doc.remove("and dogs");
console.log(doc.out('text')); // 'I like animals'

Tagging and Classification

Assign and manipulate part-of-speech tags and custom classifications.

/**
 * Add or remove tags
 */
tag(tag: string, reason?: string): View;
tagSafe(tag: string, reason?: string): View;
unTag(tag: string, reason?: string): View;

/**
 * Filter by tag capability
 */
canBe(tag: string): View;

/**
 * Preserve tag state
 */
freeze(): View;
unfreeze(): View;

Usage Examples:

const doc = nlp("SpaceX is innovative");

// Add custom tags
doc.match("SpaceX").tag("Company");
doc.match("innovative").tag("PositiveAdjective");

// Check what can be tagged
const canBeCompany = doc.canBe("Company");
console.log(canBeCompany.out('array')); // ['SpaceX']

// Remove tags
doc.match("SpaceX").unTag("Company");

Performance and Caching

Optimize processing with caching and lookup operations.

/**
 * Performance optimization
 */
cache(options?: object): View;
uncache(options?: object): View;
random(n?: number): View;
unique(): View;
reverse(): View;
sort(method?: string | Function): View;

/**
 * Fast lookup operations
 */
lookup(trie: object | string[], options?: object): View;
autoFill(): View;

/**
 * Advanced pattern matching
 */
sweep(match: Net, options?: object): { view: View, found: object[] };

Usage Examples:

const doc = nlp("The quick brown fox jumps over the lazy dog");

// Performance operations
const cached = doc.cache(); // Freeze state for repeated operations
const randomTerms = doc.terms().random(3); // Get 3 random terms
const sorted = doc.terms().sort(); // Sort alphabetically

// Fast lookup with pre-built trie
const animals = ['fox', 'dog', 'cat', 'bird'];
const trie = nlp.buildTrie(animals);
const foundAnimals = doc.lookup(trie);
console.log(foundAnimals.out('array')); // ['fox', 'dog']

// Remove duplicates
const unique = doc.terms().unique();

Document Navigation

Navigate and analyze the document structure.

/**
 * Document structure
 */
fullSentences(): View;
firstTerms(): View;
lastTerms(): View;
wordCount(): number;
groups(name?: string): View | object;

/**
 * Document state
 */
isDoc(view?: View): boolean;
toView(pointer?: Pointer | null): View;

Usage Examples:

const doc = nlp("Hello world. How are you today?");

// Navigate structure
const sentences = doc.fullSentences();
console.log(sentences.length); // 2 sentences

const firstWords = sentences.firstTerms();
console.log(firstWords.out('array')); // ['Hello', 'How']

// Count analysis
console.log(doc.wordCount()); // 6 words total

Types

interface View {
  found: boolean;
  docs: Document;
  document: Document;
  pointer: Pointer[] | null;
  fullPointer: Pointer[];
  methods: object;
  model: object;
  hooks: string[];
  length: number;
  isView: boolean;
  
  // Utility methods
  clone(shallow?: boolean): View;
  compute(method: string | string[]): View;
  update(pointer: Pointer | null): View;
  toView(pointer: Pointer | null): View;
  fromText(text: string): View;
  termList(): Term[];
}

interface Document extends Array<Term[]> {}

interface Pointer extends Array<number | string | undefined> {
  0?: number; // document index
  1?: number; // start term index
  2?: number; // end term index  
  3?: string; // start term id
  4?: string; // end term id
}

interface Term {
  text: string;
  pre: string;
  post: string;
  normal: string;
  tags?: Set<string>;
  index?: [number, number];
  id?: string;
  chunk?: string;
  dirty?: boolean;
  syllables?: string[];
}

interface Lexicon {
  [key: string]: string;
}

interface ReplaceWithProps {
  /** preserve the case of the original, ignoring the case of the replacement */
  case?: boolean;
  /** preserve whether the original was a possessive */
  possessives?: boolean;
  /** preserve all of the tags of the original, regardless of the tags of the replacement */
  tags?: boolean;
}

interface JsonProps {
  /** a perfect copy of the input text */
  text?: boolean;
  /** normalized whitespace, case, unicode, punctuation */
  normal?: boolean;
  /** lowercase, trimmed, contractions expanded */
  reduced?: boolean;
  /** cleanup whitespace */
  trim?: boolean;
  /** character-position where this begins */
  offset?: boolean;
  /** frequency of this match in the document */
  count?: boolean;
  /** remove duplicate results */
  unique?: boolean;
  /** starting term # in document */
  index?: boolean;
  /** options for each term */
  terms?: {
    text?: boolean;
    normal?: boolean;
    clean?: boolean;
    implicit?: boolean;
    tags?: boolean;
    whitespace?: boolean;
    id?: boolean;
    offset?: boolean;
    bestTag?: boolean;
  };
}

interface Acronyms extends View {
  /** 'F.B.I.' -> 'FBI' */
  strip(): View;
  /** 'FBI' -> 'F.B.I.' */
  addPeriods(): View;
}

interface Parentheses extends View {
  /** remove ( and ) punctuation */
  strip(): View;
}

interface Possessives extends View {
  /** "spencer's" -> "spencer" */
  strip(): View;
}

interface Quotations extends View {
  /** remove leading and trailing quotation marks */
  strip(): View;
}

interface Slashes extends View {
  /** turn 'love/hate' into 'love hate' */
  split(): View;
}