Core text analysis functionality including tokenization, pattern matching, and basic transformations. This functionality is available in all module levels (one, two, three).
Creates a View object with analyzed text that provides access to all text processing methods.
/**
* Parse and analyze text, returning a View object with analysis methods
* @param text - Input text to analyze
* @param lexicon - Optional custom word definitions
* @returns View object with text analysis methods
*/
function nlp(text: string, lexicon?: Lexicon): View;Usage Examples:
import nlp from "compromise";
// Basic text analysis
const doc = nlp("The quick brown fox jumps over the lazy dog");
console.log(doc.length); // character count
console.log(doc.found); // true (document is not empty)
// With custom lexicon
const customLex = { "llama": "Animal" };
const doc2 = nlp("I saw a llama", customLex);Interprets text without running the full part-of-speech tagger, providing faster processing for simple operations.
/**
* Interpret text without POS tagging for faster processing
* @param text - Input text to tokenize
* @param lexicon - Optional custom word definitions
* @returns View object with tokenized text
*/
function tokenize(text: string, lexicon?: Lexicon): View;Usage Examples:
// Faster tokenization without full analysis
const tokens = nlp.tokenize("Hello world how are you");
console.log(tokens.terms().out('array')); // ['Hello', 'world', 'how', 'are', 'you']Scan through text with minimal analysis for even faster processing than tokenization.
/**
* Scan through text with minimal analysis
* @param text - Input text to scan
* @param match - Optional specific pattern to look for
* @returns View object with minimal processing
*/
function lazy(text: string, match?: string): View;Usage Examples:
// Minimal text scanning (fastest option)
const quickScan = nlp.lazy("The quick brown fox jumps over the lazy dog");
console.log(quickScan.terms().length); // 9 terms with minimal analysis
// Lazy scan with specific pattern focus
const focused = nlp.lazy("Find animals like cats and dogs", "#Animal");
console.log(focused.out('array')); // ['cats', 'dogs'] (minimal processing focused on animals)Find and extract specific patterns from text using flexible match syntax.
/**
* Return matching patterns in the document
* @param pattern - Pattern to match (string, regex, or compiled Net)
* @param group - Optional capture group to extract
* @param options - Matching options (fuzzy, caseSensitive)
* @returns View containing matches
*/
match(pattern: string | Net, group?: string | number, options?: object): View;
/**
* Return only the first match
*/
matchOne(pattern: string | Net, group?: string | number, options?: object): View;
/**
* Test if pattern exists in the document
*/
has(pattern: string | Net, group?: string | number, options?: object): boolean;Usage Examples:
const doc = nlp("I love pizza and pasta");
// Simple pattern matching
const foods = doc.match("(pizza|pasta)");
console.log(foods.out('array')); // ['pizza', 'pasta']
// With capture groups
const loves = doc.match("I love [#Food]", 0);
// Test for existence
if (doc.has("pizza")) {
console.log("Found pizza!");
}
// Case insensitive matching
const matches = doc.match("PIZZA", null, { caseSensitive: false });Find text before or after specific patterns.
/**
* Return terms before each match
*/
before(pattern: string | Net, group?: string | number, options?: object): View;
/**
* Return terms after each match
*/
after(pattern: string | Net, group?: string | number, options?: object): View;
/**
* Aliases for before/after
*/
lookBehind(pattern: string | Net, group?: string | number, options?: object): View;
lookAhead(pattern: string | Net, group?: string | number, options?: object): View;Usage Examples:
const doc = nlp("The quick brown fox jumps over the lazy dog");
// Get words before 'fox'
const beforeFox = doc.before("fox").out('text'); // "The quick brown"
// Get words after 'jumps'
const afterJumps = doc.after("jumps").out('text'); // "over the lazy dog"Split text on patterns or join neighboring terms.
/**
* Split text on pattern boundaries
*/
split(pattern?: string | Net, group?: string | number): View;
splitOn(pattern?: string | Net, group?: string | number): View;
splitBefore(pattern?: string | Net, group?: string | number): View;
splitAfter(pattern?: string | Net, group?: string | number): View;
/**
* Join neighboring terms
*/
join(): View;
joinIf(leftMatch?: string | Net, rightMatch?: string | Net): View;Usage Examples:
const doc = nlp("I went to the store, then to the park");
// Split on commas
const parts = doc.split(",");
console.log(parts.out('array')); // ['I went to the store', 'then to the park']
// Join all terms
const joined = doc.terms().join().out('text');Access individual terms or navigate through the document structure.
/**
* Split results by individual terms
*/
terms(n?: number): View;
/**
* Access specific parts of results
*/
eq(n: number): View;
first(n?: number): View;
last(n?: number): View;
slice(start: number, end?: number): View;
/**
* Return to full document scope
*/
all(): View;
none(): View;Usage Examples:
const doc = nlp("The quick brown fox");
// Get individual terms
const terms = doc.terms();
console.log(terms.out('array')); // ['The', 'quick', 'brown', 'fox']
// Access specific terms
const firstTerm = doc.first().out('text'); // 'The'
const lastTwo = doc.last(2).out('text'); // 'brown fox'
const middle = doc.slice(1, 3).out('text'); // 'quick brown'Iterate through matches and filter results.
/**
* Run function on each phrase as individual document
*/
forEach(fn: (match: View) => void): View;
/**
* Transform each phrase and create new document
*/
map(fn: (match: View) => any, emptyResult?: any): View | any[];
/**
* Return only phrases that match condition
*/
filter(fn: (match: View) => boolean): View;
/**
* Find first phrase matching condition
*/
find(fn: (match: View) => boolean): View | undefined;
/**
* Test if any phrase matches condition
*/
some(fn: (match: View) => boolean): boolean;Usage Examples:
const doc = nlp("The quick brown fox jumps over the lazy dog");
// Filter for long words
const longWords = doc.terms().filter(term => term.text().length > 4);
console.log(longWords.out('array')); // ['quick', 'brown', 'jumps']
// Transform each word
const uppercased = doc.terms().map(term => term.text().toUpperCase());
console.log(uppercased); // ['THE', 'QUICK', 'BROWN', ...]Convert analyzed text to different output formats.
/**
* Return document as text string
*/
text(options?: object): string;
/**
* Extract metadata as JSON
*/
json(options?: object): any;
/**
* Return formatted output
*/
out(format?: string): any;
/**
* Pretty-print with tags for debugging
*/
debug(): View;
/**
* Generate HTML output
*/
html(toHighlight?: object): string;
wrap(matches?: object): string;Usage Examples:
const doc = nlp("Hello world");
// All supported output formats
console.log(doc.out('text')); // 'Hello world' - formatted text
console.log(doc.out('normal')); // 'hello world' - normalized text
console.log(doc.out('array')); // ['Hello world'] - array of matches
console.log(doc.out('terms')); // [{ text: 'Hello', ... }, { text: 'world', ... }] - term objects
console.log(doc.out('tags')); // [['Noun'], ['Noun']] - POS tags for each term
console.log(doc.out('json')); // { text: 'Hello world', terms: [...] } - JSON format
console.log(doc.out('offset')); // [{ text: 'Hello', offset: { start: 0, length: 5 } }] - character positions
console.log(doc.out('topk')); // Top-k most relevant terms
console.log(doc.out('debug')); // Tagged text with detailed analysis info
// JSON output with custom options
const jsonData = doc.json({
text: true,
terms: {
text: true,
tags: true,
offset: true
}
});
// Debug output
doc.debug(); // Shows tagged text with POS informationCombine and compare different View results using set operations.
/**
* Combine results without duplicates (union)
*/
union(match: string | Net): View;
and(match: string | Net): View; // alias for union
/**
* Return only overlapping matches (intersection)
*/
intersection(match: string | Net): View;
/**
* Return all results except the specified match (difference)
*/
not(match: string | Net, options?: object): View;
difference(match: string | Net, options?: object): View; // alias for not
/**
* Get everything that is not a match (complement)
*/
complement(match: string | Net): View;
/**
* Remove overlaps in matches
*/
settle(match: string | Net): View;Usage Examples:
const doc = nlp("I love pizza and pasta, but hate olives");
// Combine food terms
const allFood = doc.match("#Food").union("olives");
console.log(allFood.out('array')); // ['pizza', 'pasta', 'olives']
// Get non-food terms
const nonFood = doc.not("#Food");
console.log(nonFood.out('text')); // 'I love and but hate'
// Find intersection
const loveAndFood = doc.match("love").intersection("pizza");Transform text case, formatting, and structure.
/**
* Case transformations
*/
toLowerCase(): View;
toUpperCase(): View;
toTitleCase(): View;
toCamelCase(): View;
/**
* Whitespace and punctuation
*/
trim(): View;
hyphenate(): View;
dehyphenate(): View;
deHyphenate(): View; // alias
toQuotations(start?: string, end?: string): View;
toQuotation(start?: string, end?: string): View; // alias
toParentheses(start?: string, end?: string): View;
/**
* Whitespace manipulation
*/
pre(str?: string, concat?: boolean): View;
post(str?: string, concat?: boolean): View;Usage Examples:
const doc = nlp("hello world");
// Case transformations
console.log(doc.toTitleCase().out('text')); // 'Hello World'
console.log(doc.toUpperCase().out('text')); // 'HELLO WORLD'
console.log(doc.toCamelCase().out('text')); // 'helloWorld'
// Add formatting
console.log(doc.toQuotations().out('text')); // '"hello world"'
console.log(doc.toParentheses().out('text')); // '(hello world)'
// Whitespace manipulation
const withPrefix = doc.pre(">>> ");
console.log(withPrefix.out('text')); // '>>> hello world'Insert, replace, and remove content from the document.
/**
* Insert content
*/
concat(input: string | View): View;
insertBefore(input: string | View): View;
prepend(input: string | View): View; // alias
insertAfter(input: string | View): View;
append(input: string | View): View; // alias
insert(input: string | View): View; // alias for insertAfter
/**
* Replace content
*/
replace(from: string | View, to?: string | Function, options?: object): View;
replaceWith(to: string | Function, options?: object): View;
/**
* Remove content
*/
remove(match?: string | Net): View;
delete(match?: string | Net): View; // aliasUsage Examples:
let doc = nlp("I like cats");
// Insert content
doc = doc.append(" and dogs");
console.log(doc.out('text')); // 'I like cats and dogs'
// Replace content
doc = doc.replace("cats", "animals");
console.log(doc.out('text')); // 'I like animals and dogs'
// Remove content
doc = doc.remove("and dogs");
console.log(doc.out('text')); // 'I like animals'Assign and manipulate part-of-speech tags and custom classifications.
/**
* Add or remove tags
*/
tag(tag: string, reason?: string): View;
tagSafe(tag: string, reason?: string): View;
unTag(tag: string, reason?: string): View;
/**
* Filter by tag capability
*/
canBe(tag: string): View;
/**
* Preserve tag state
*/
freeze(): View;
unfreeze(): View;Usage Examples:
const doc = nlp("SpaceX is innovative");
// Add custom tags
doc.match("SpaceX").tag("Company");
doc.match("innovative").tag("PositiveAdjective");
// Check what can be tagged
const canBeCompany = doc.canBe("Company");
console.log(canBeCompany.out('array')); // ['SpaceX']
// Remove tags
doc.match("SpaceX").unTag("Company");Optimize processing with caching and lookup operations.
/**
* Performance optimization
*/
cache(options?: object): View;
uncache(options?: object): View;
random(n?: number): View;
unique(): View;
reverse(): View;
sort(method?: string | Function): View;
/**
* Fast lookup operations
*/
lookup(trie: object | string[], options?: object): View;
autoFill(): View;
/**
* Advanced pattern matching
*/
sweep(match: Net, options?: object): { view: View, found: object[] };Usage Examples:
const doc = nlp("The quick brown fox jumps over the lazy dog");
// Performance operations
const cached = doc.cache(); // Freeze state for repeated operations
const randomTerms = doc.terms().random(3); // Get 3 random terms
const sorted = doc.terms().sort(); // Sort alphabetically
// Fast lookup with pre-built trie
const animals = ['fox', 'dog', 'cat', 'bird'];
const trie = nlp.buildTrie(animals);
const foundAnimals = doc.lookup(trie);
console.log(foundAnimals.out('array')); // ['fox', 'dog']
// Remove duplicates
const unique = doc.terms().unique();Navigate and analyze the document structure.
/**
* Document structure
*/
fullSentences(): View;
firstTerms(): View;
lastTerms(): View;
wordCount(): number;
groups(name?: string): View | object;
/**
* Document state
*/
isDoc(view?: View): boolean;
toView(pointer?: Pointer | null): View;Usage Examples:
const doc = nlp("Hello world. How are you today?");
// Navigate structure
const sentences = doc.fullSentences();
console.log(sentences.length); // 2 sentences
const firstWords = sentences.firstTerms();
console.log(firstWords.out('array')); // ['Hello', 'How']
// Count analysis
console.log(doc.wordCount()); // 6 words totalinterface View {
found: boolean;
docs: Document;
document: Document;
pointer: Pointer[] | null;
fullPointer: Pointer[];
methods: object;
model: object;
hooks: string[];
length: number;
isView: boolean;
// Utility methods
clone(shallow?: boolean): View;
compute(method: string | string[]): View;
update(pointer: Pointer | null): View;
toView(pointer: Pointer | null): View;
fromText(text: string): View;
termList(): Term[];
}
interface Document extends Array<Term[]> {}
interface Pointer extends Array<number | string | undefined> {
0?: number; // document index
1?: number; // start term index
2?: number; // end term index
3?: string; // start term id
4?: string; // end term id
}
interface Term {
text: string;
pre: string;
post: string;
normal: string;
tags?: Set<string>;
index?: [number, number];
id?: string;
chunk?: string;
dirty?: boolean;
syllables?: string[];
}
interface Lexicon {
[key: string]: string;
}
interface ReplaceWithProps {
/** preserve the case of the original, ignoring the case of the replacement */
case?: boolean;
/** preserve whether the original was a possessive */
possessives?: boolean;
/** preserve all of the tags of the original, regardless of the tags of the replacement */
tags?: boolean;
}
interface JsonProps {
/** a perfect copy of the input text */
text?: boolean;
/** normalized whitespace, case, unicode, punctuation */
normal?: boolean;
/** lowercase, trimmed, contractions expanded */
reduced?: boolean;
/** cleanup whitespace */
trim?: boolean;
/** character-position where this begins */
offset?: boolean;
/** frequency of this match in the document */
count?: boolean;
/** remove duplicate results */
unique?: boolean;
/** starting term # in document */
index?: boolean;
/** options for each term */
terms?: {
text?: boolean;
normal?: boolean;
clean?: boolean;
implicit?: boolean;
tags?: boolean;
whitespace?: boolean;
id?: boolean;
offset?: boolean;
bestTag?: boolean;
};
}
interface Acronyms extends View {
/** 'F.B.I.' -> 'FBI' */
strip(): View;
/** 'FBI' -> 'F.B.I.' */
addPeriods(): View;
}
interface Parentheses extends View {
/** remove ( and ) punctuation */
strip(): View;
}
interface Possessives extends View {
/** "spencer's" -> "spencer" */
strip(): View;
}
interface Quotations extends View {
/** remove leading and trailing quotation marks */
strip(): View;
}
interface Slashes extends View {
/** turn 'love/hate' into 'love hate' */
split(): View;
}