tessl/npm-compromise

Modest natural language processing library for JavaScript that enables text parsing, analysis, and manipulation in browsers and Node.js.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Securityby

Pending

The risk profile of this skill

Overview

Eval results

Files

Text Analysis

Name: tessl/npm-compromise
Author: tessl

Core text analysis functionality including tokenization, pattern matching, and basic transformations. This functionality is available in all module levels (one, two, three).

Capabilities

Main Constructor Function

Creates a View object with analyzed text that provides access to all text processing methods.

/**
 * Parse and analyze text, returning a View object with analysis methods
 * @param text - Input text to analyze
 * @param lexicon - Optional custom word definitions
 * @returns View object with text analysis methods
 */
function nlp(text: string, lexicon?: Lexicon): View;

Usage Examples:

import nlp from "compromise";

// Basic text analysis
const doc = nlp("The quick brown fox jumps over the lazy dog");
console.log(doc.length); // character count
console.log(doc.found); // true (document is not empty)

// With custom lexicon
const customLex = { "llama": "Animal" };
const doc2 = nlp("I saw a llama", customLex);

Tokenization Without Analysis

Interprets text without running the full part-of-speech tagger, providing faster processing for simple operations.

/**
 * Interpret text without POS tagging for faster processing
 * @param text - Input text to tokenize
 * @param lexicon - Optional custom word definitions
 * @returns View object with tokenized text
 */
function tokenize(text: string, lexicon?: Lexicon): View;

Usage Examples:

// Faster tokenization without full analysis
const tokens = nlp.tokenize("Hello world how are you");
console.log(tokens.terms().out('array')); // ['Hello', 'world', 'how', 'are', 'you']

Lazy Analysis

Scan through text with minimal analysis for even faster processing than tokenization.

/**
 * Scan through text with minimal analysis
 * @param text - Input text to scan
 * @param match - Optional specific pattern to look for
 * @returns View object with minimal processing
 */
function lazy(text: string, match?: string): View;

Usage Examples:

// Minimal text scanning (fastest option)
const quickScan = nlp.lazy("The quick brown fox jumps over the lazy dog");
console.log(quickScan.terms().length); // 9 terms with minimal analysis

// Lazy scan with specific pattern focus
const focused = nlp.lazy("Find animals like cats and dogs", "#Animal");
console.log(focused.out('array')); // ['cats', 'dogs'] (minimal processing focused on animals)

Pattern Matching

Find and extract specific patterns from text using flexible match syntax.

/**
 * Return matching patterns in the document
 * @param pattern - Pattern to match (string, regex, or compiled Net)
 * @param group - Optional capture group to extract
 * @param options - Matching options (fuzzy, caseSensitive)
 * @returns View containing matches
 */
match(pattern: string | Net, group?: string | number, options?: object): View;

/**
 * Return only the first match
 */
matchOne(pattern: string | Net, group?: string | number, options?: object): View;

/**
 * Test if pattern exists in the document
 */
has(pattern: string | Net, group?: string | number, options?: object): boolean;

Usage Examples:

const doc = nlp("I love pizza and pasta");

// Simple pattern matching
const foods = doc.match("(pizza|pasta)");
console.log(foods.out('array')); // ['pizza', 'pasta']

// With capture groups
const loves = doc.match("I love [#Food]", 0);

// Test for existence
if (doc.has("pizza")) {
  console.log("Found pizza!");
}

// Case insensitive matching
const matches = doc.match("PIZZA", null, { caseSensitive: false });

Positional Matching

Find text before or after specific patterns.

/**
 * Return terms before each match
 */
before(pattern: string | Net, group?: string | number, options?: object): View;

/**
 * Return terms after each match
 */
after(pattern: string | Net, group?: string | number, options?: object): View;

/**
 * Aliases for before/after
 */
lookBehind(pattern: string | Net, group?: string | number, options?: object): View;
lookAhead(pattern: string | Net, group?: string | number, options?: object): View;

Usage Examples:

const doc = nlp("The quick brown fox jumps over the lazy dog");

// Get words before 'fox'
const beforeFox = doc.before("fox").out('text'); // "The quick brown"

// Get words after 'jumps'
const afterJumps = doc.after("jumps").out('text'); // "over the lazy dog"

Text Splitting and Joining

Split text on patterns or join neighboring terms.

/**
 * Split text on pattern boundaries
 */
split(pattern?: string | Net, group?: string | number): View;
splitOn(pattern?: string | Net, group?: string | number): View;
splitBefore(pattern?: string | Net, group?: string | number): View;
splitAfter(pattern?: string | Net, group?: string | number): View;

/**
 * Join neighboring terms
 */
join(): View;
joinIf(leftMatch?: string | Net, rightMatch?: string | Net): View;

Usage Examples:

const doc = nlp("I went to the store, then to the park");

// Split on commas
const parts = doc.split(",");
console.log(parts.out('array')); // ['I went to the store', 'then to the park']

// Join all terms
const joined = doc.terms().join().out('text');

Term and View Access

Access individual terms or navigate through the document structure.

/**
 * Split results by individual terms
 */
terms(n?: number): View;

/**
 * Access specific parts of results
 */
eq(n: number): View;
first(n?: number): View;
last(n?: number): View;
slice(start: number, end?: number): View;

/**
 * Return to full document scope
 */
all(): View;
none(): View;

Usage Examples:

const doc = nlp("The quick brown fox");

// Get individual terms
const terms = doc.terms();
console.log(terms.out('array')); // ['The', 'quick', 'brown', 'fox']

// Access specific terms
const firstTerm = doc.first().out('text'); // 'The'
const lastTwo = doc.last(2).out('text'); // 'brown fox'
const middle = doc.slice(1, 3).out('text'); // 'quick brown'

Iteration and Filtering

Iterate through matches and filter results.

/**
 * Run function on each phrase as individual document
 */
forEach(fn: (match: View) => void): View;

/**
 * Transform each phrase and create new document
 */
map(fn: (match: View) => any, emptyResult?: any): View | any[];

/**
 * Return only phrases that match condition
 */
filter(fn: (match: View) => boolean): View;

/**
 * Find first phrase matching condition
 */
find(fn: (match: View) => boolean): View | undefined;

/**
 * Test if any phrase matches condition
 */
some(fn: (match: View) => boolean): boolean;

Usage Examples:

const doc = nlp("The quick brown fox jumps over the lazy dog");

// Filter for long words
const longWords = doc.terms().filter(term => term.text().length > 4);
console.log(longWords.out('array')); // ['quick', 'brown', 'jumps']

// Transform each word
const uppercased = doc.terms().map(term => term.text().toUpperCase());
console.log(uppercased); // ['THE', 'QUICK', 'BROWN', ...]

Output Methods

Convert analyzed text to different output formats.

/**
 * Return document as text string
 */
text(options?: object): string;

/**
 * Extract metadata as JSON
 */
json(options?: object): any;

/**
 * Return formatted output
 */
out(format?: string): any;

/**
 * Pretty-print with tags for debugging
 */
debug(): View;

/**
 * Generate HTML output
 */
html(toHighlight?: object): string;
wrap(matches?: object): string;

Usage Examples:

const doc = nlp("Hello world");

// All supported output formats
console.log(doc.out('text')); // 'Hello world' - formatted text
console.log(doc.out('normal')); // 'hello world' - normalized text
console.log(doc.out('array')); // ['Hello world'] - array of matches
console.log(doc.out('terms')); // [{ text: 'Hello', ... }, { text: 'world', ... }] - term objects
console.log(doc.out('tags')); // [['Noun'], ['Noun']] - POS tags for each term
console.log(doc.out('json')); // { text: 'Hello world', terms: [...] } - JSON format
console.log(doc.out('offset')); // [{ text: 'Hello', offset: { start: 0, length: 5 } }] - character positions
console.log(doc.out('topk')); // Top-k most relevant terms
console.log(doc.out('debug')); // Tagged text with detailed analysis info

// JSON output with custom options
const jsonData = doc.json({
  text: true,
  terms: {
    text: true,
    tags: true,
    offset: true
  }
});

// Debug output
doc.debug(); // Shows tagged text with POS information

Set Operations

Combine and compare different View results using set operations.

/**
 * Combine results without duplicates (union)
 */
union(match: string | Net): View;
and(match: string | Net): View; // alias for union

/**
 * Return only overlapping matches (intersection)
 */
intersection(match: string | Net): View;

/**
 * Return all results except the specified match (difference)
 */
not(match: string | Net, options?: object): View;
difference(match: string | Net, options?: object): View; // alias for not

/**
 * Get everything that is not a match (complement)
 */
complement(match: string | Net): View;

/**
 * Remove overlaps in matches
 */
settle(match: string | Net): View;

Usage Examples:

const doc = nlp("I love pizza and pasta, but hate olives");

// Combine food terms
const allFood = doc.match("#Food").union("olives");
console.log(allFood.out('array')); // ['pizza', 'pasta', 'olives']

// Get non-food terms
const nonFood = doc.not("#Food");
console.log(nonFood.out('text')); // 'I love and but hate'

// Find intersection
const loveAndFood = doc.match("love").intersection("pizza");

Text Transformation

Transform text case, formatting, and structure.

/**
 * Case transformations
 */
toLowerCase(): View;
toUpperCase(): View;
toTitleCase(): View;
toCamelCase(): View;

/**
 * Whitespace and punctuation
 */
trim(): View;
hyphenate(): View;
dehyphenate(): View;
deHyphenate(): View; // alias
toQuotations(start?: string, end?: string): View;
toQuotation(start?: string, end?: string): View; // alias
toParentheses(start?: string, end?: string): View;

/**
 * Whitespace manipulation
 */
pre(str?: string, concat?: boolean): View;
post(str?: string, concat?: boolean): View;

Usage Examples:

const doc = nlp("hello world");

// Case transformations
console.log(doc.toTitleCase().out('text')); // 'Hello World'
console.log(doc.toUpperCase().out('text')); // 'HELLO WORLD'
console.log(doc.toCamelCase().out('text')); // 'helloWorld'

// Add formatting
console.log(doc.toQuotations().out('text')); // '"hello world"'
console.log(doc.toParentheses().out('text')); // '(hello world)'

// Whitespace manipulation
const withPrefix = doc.pre(">>> ");
console.log(withPrefix.out('text')); // '>>> hello world'

Content Modification

Insert, replace, and remove content from the document.

/**
 * Insert content
 */
concat(input: string | View): View;
insertBefore(input: string | View): View;
prepend(input: string | View): View; // alias
insertAfter(input: string | View): View;
append(input: string | View): View; // alias
insert(input: string | View): View; // alias for insertAfter

/**
 * Replace content
 */
replace(from: string | View, to?: string | Function, options?: object): View;
replaceWith(to: string | Function, options?: object): View;

/**
 * Remove content
 */
remove(match?: string | Net): View;
delete(match?: string | Net): View; // alias

Usage Examples:

let doc = nlp("I like cats");

// Insert content
doc = doc.append(" and dogs");
console.log(doc.out('text')); // 'I like cats and dogs'

// Replace content
doc = doc.replace("cats", "animals");
console.log(doc.out('text')); // 'I like animals and dogs'

// Remove content
doc = doc.remove("and dogs");
console.log(doc.out('text')); // 'I like animals'

Tagging and Classification

Assign and manipulate part-of-speech tags and custom classifications.

/**
 * Add or remove tags
 */
tag(tag: string, reason?: string): View;
tagSafe(tag: string, reason?: string): View;
unTag(tag: string, reason?: string): View;

/**
 * Filter by tag capability
 */
canBe(tag: string): View;

/**
 * Preserve tag state
 */
freeze(): View;
unfreeze(): View;

Usage Examples:

const doc = nlp("SpaceX is innovative");

// Add custom tags
doc.match("SpaceX").tag("Company");
doc.match("innovative").tag("PositiveAdjective");

// Check what can be tagged
const canBeCompany = doc.canBe("Company");
console.log(canBeCompany.out('array')); // ['SpaceX']

// Remove tags
doc.match("SpaceX").unTag("Company");

Performance and Caching

Optimize processing with caching and lookup operations.

/**
 * Performance optimization
 */
cache(options?: object): View;
uncache(options?: object): View;
random(n?: number): View;
unique(): View;
reverse(): View;
sort(method?: string | Function): View;

/**
 * Fast lookup operations
 */
lookup(trie: object | string[], options?: object): View;
autoFill(): View;

/**
 * Advanced pattern matching
 */
sweep(match: Net, options?: object): { view: View, found: object[] };

Usage Examples:

const doc = nlp("The quick brown fox jumps over the lazy dog");

// Performance operations
const cached = doc.cache(); // Freeze state for repeated operations
const randomTerms = doc.terms().random(3); // Get 3 random terms
const sorted = doc.terms().sort(); // Sort alphabetically

// Fast lookup with pre-built trie
const animals = ['fox', 'dog', 'cat', 'bird'];
const trie = nlp.buildTrie(animals);
const foundAnimals = doc.lookup(trie);
console.log(foundAnimals.out('array')); // ['fox', 'dog']

// Remove duplicates
const unique = doc.terms().unique();

Document Navigation

Navigate and analyze the document structure.

/**
 * Document structure
 */
fullSentences(): View;
firstTerms(): View;
lastTerms(): View;
wordCount(): number;
groups(name?: string): View | object;

/**
 * Document state
 */
isDoc(view?: View): boolean;
toView(pointer?: Pointer | null): View;

Usage Examples:

const doc = nlp("Hello world. How are you today?");

// Navigate structure
const sentences = doc.fullSentences();
console.log(sentences.length); // 2 sentences

const firstWords = sentences.firstTerms();
console.log(firstWords.out('array')); // ['Hello', 'How']

// Count analysis
console.log(doc.wordCount()); // 6 words total

Types

interface View {
  found: boolean;
  docs: Document;
  document: Document;
  pointer: Pointer[] | null;
  fullPointer: Pointer[];
  methods: object;
  model: object;
  hooks: string[];
  length: number;
  isView: boolean;
  
  // Utility methods
  clone(shallow?: boolean): View;
  compute(method: string | string[]): View;
  update(pointer: Pointer | null): View;
  toView(pointer: Pointer | null): View;
  fromText(text: string): View;
  termList(): Term[];
}

interface Document extends Array<Term[]> {}

interface Pointer extends Array<number | string | undefined> {
  0?: number; // document index
  1?: number; // start term index
  2?: number; // end term index  
  3?: string; // start term id
  4?: string; // end term id
}

interface Term {
  text: string;
  pre: string;
  post: string;
  normal: string;
  tags?: Set<string>;
  index?: [number, number];
  id?: string;
  chunk?: string;
  dirty?: boolean;
  syllables?: string[];
}

interface Lexicon {
  [key: string]: string;
}

interface ReplaceWithProps {
  /** preserve the case of the original, ignoring the case of the replacement */
  case?: boolean;
  /** preserve whether the original was a possessive */
  possessives?: boolean;
  /** preserve all of the tags of the original, regardless of the tags of the replacement */
  tags?: boolean;
}

interface JsonProps {
  /** a perfect copy of the input text */
  text?: boolean;
  /** normalized whitespace, case, unicode, punctuation */
  normal?: boolean;
  /** lowercase, trimmed, contractions expanded */
  reduced?: boolean;
  /** cleanup whitespace */
  trim?: boolean;
  /** character-position where this begins */
  offset?: boolean;
  /** frequency of this match in the document */
  count?: boolean;
  /** remove duplicate results */
  unique?: boolean;
  /** starting term # in document */
  index?: boolean;
  /** options for each term */
  terms?: {
    text?: boolean;
    normal?: boolean;
    clean?: boolean;
    implicit?: boolean;
    tags?: boolean;
    whitespace?: boolean;
    id?: boolean;
    offset?: boolean;
    bestTag?: boolean;
  };
}

interface Acronyms extends View {
  /** 'F.B.I.' -> 'FBI' */
  strip(): View;
  /** 'FBI' -> 'F.B.I.' */
  addPeriods(): View;
}

interface Parentheses extends View {
  /** remove ( and ) punctuation */
  strip(): View;
}

interface Possessives extends View {
  /** "spencer's" -> "spencer" */
  strip(): View;
}

interface Quotations extends View {
  /** remove leading and trailing quotation marks */
  strip(): View;
}

interface Slashes extends View {
  /** turn 'love/hate' into 'love hate' */
  split(): View;
}