Plugin system for extending compromise with custom functionality, linguistic models, and additional processing capabilities. This system allows developers to add new methods, tags, words, and computational logic to the library.
Register plugins to extend compromise functionality.
/**
* Mix in a compromise plugin to extend functionality
* @param plugin - Plugin object with methods, models, etc
* @returns The nlp constructor for chaining
*/
plugin(plugin: Plugin): any;
/**
* Alias for plugin() method
* @param plugin - Plugin object to extend with
* @returns The nlp constructor for chaining
*/
extend(plugin: Plugin): any;Usage Examples:
// Define a custom plugin
const myPlugin = {
methods: {
// Add custom methods to View objects
View: {
customMethod() {
return this.match('#CustomTag');
}
}
},
tags: {
CustomTag: {
isA: 'Noun'
}
},
words: {
'specialword': 'CustomTag'
}
};
// Register the plugin
nlp.plugin(myPlugin);
// Use the extended functionality
const doc = nlp("This is a specialword example");
const customMatches = doc.customMethod();
console.log(customMatches.out('array')); // ['specialword']Add custom words and their linguistic properties to the internal lexicon.
/**
* Add new words to internal lexicon
* @param words - Object mapping words to their tags/properties
* @param isFrozen - Whether words are frozen (unchangeable)
* @returns The nlp constructor for chaining
*/
addWords(words: Lexicon, isFrozen?: boolean): any;
/**
* Add words to the autoFill dictionary for type-ahead
* @param words - Words to add for autocompletion
* @returns The nlp constructor for chaining
*/
typeahead(words: Lexicon): any;Usage Examples:
// Add custom words with tags
const customWords = {
'bitcoin': 'Currency',
'ethereum': 'Currency',
'blockchain': 'Technology',
'kubernetes': 'Technology'
};
nlp.addWords(customWords);
// Now these words will be recognized
const doc = nlp("Bitcoin and ethereum use blockchain technology");
const currencies = doc.match('#Currency');
console.log(currencies.out('array')); // ['Bitcoin', 'ethereum']
// Add frozen words (can't be overridden)
nlp.addWords({ 'API': 'Acronym' }, true);
// Add words for typeahead/autocomplete
nlp.typeahead({
'javascript': 'Language',
'typescript': 'Language'
});Extend the part-of-speech tagging system with custom tags and relationships.
/**
* Connect new tags to tagset graph
* @param tags - Object defining new tags and their relationships
* @returns The nlp constructor for chaining
*/
addTags(tags: object): any;Usage Examples:
// Define custom tags with inheritance
const customTags = {
// Create new base tags
ProgrammingLanguage: {
isA: 'Noun'
},
// Create specific language tags
JavaScript: {
isA: 'ProgrammingLanguage'
},
Python: {
isA: 'ProgrammingLanguage'
},
// Add attributes to existing tags
Framework: {
isA: 'Noun',
not: 'ProgrammingLanguage'
}
};
nlp.addTags(customTags);
// Add words with these tags
nlp.addWords({
'javascript': 'JavaScript',
'python': 'Python',
'react': 'Framework',
'django': 'Framework'
});
// Use the new tag hierarchy
const doc = nlp("I love javascript and react for web development");
const languages = doc.match('#ProgrammingLanguage');
const frameworks = doc.match('#Framework');
console.log(languages.out('array')); // ['javascript']
console.log(frameworks.out('array')); // ['react']Parse match expressions and create efficient text processing structures.
/**
* Parse match-string into JSON objects for internal use
* @param match - Match expression to parse (e.g., "#Noun #Verb")
* @param opts - Parsing options
* @returns Parsed match object array
*/
parseMatch(match: string, opts?: object): ParsedMatch;
/**
* Scan through text with minimal analysis (faster than full parsing)
* @param text - Input text to scan
* @param match - Optional pattern to look for
* @returns View object with lightweight analysis
*/
lazy(text: string, match?: string): View;Usage Examples:
// Parse complex match expressions
const parsed = nlp.parseMatch('#Adjective+ #Noun');
console.log(parsed); // [{ tag: 'Adjective', ... }, { tag: 'Noun', ... }]
// Parse with options
const parsedWithOpts = nlp.parseMatch('(#Verb|#Adjective)', { caseSensitive: true });
// Use lazy parsing for performance
const quickScan = nlp.lazy("The quick brown fox", "#Animal");
console.log(quickScan.out('array')); // ['fox'] (minimal processing)
// Lazy scan without specific pattern
const minimal = nlp.lazy("Some text to scan quickly");
console.log(minimal.terms().length); // Number of terms with minimal analysisCreate optimized data structures for high-performance text processing.
/**
* Turn a list of words into a searchable trie structure
* @param words - Array of words/phrases to index
* @returns Trie object for efficient lookups
*/
buildTrie(words: string[]): object;
/**
* Compile a set of match objects to a more optimized form
* @param matches - Array of match objects to compile
* @returns Net object for efficient pattern matching
*/
buildNet(matches: Match[]): Net;
/**
* Quick find using trie-based lookup
* @param trie - Trie object or array of strings
* @param opts - Lookup options
* @returns View with matching terms
*/
lookup(trie: object | string[], opts?: object): View;Usage Examples:
// Build trie for fast lookups
const techTerms = [
'machine learning',
'artificial intelligence',
'deep learning',
'neural network',
'data science'
];
const trie = nlp.buildTrie(techTerms);
// Build optimized net for complex pattern matching
const matchObjects = [
{ match: '#Adjective+ machine learning', tag: 'AdvancedML' },
{ match: 'artificial #Adjective* intelligence', tag: 'AI' },
{ match: '#Value+ (years|months) of experience', tag: 'Experience' }
];
const net = nlp.buildNet(matchObjects);
// Use trie for fast matching
const doc = nlp("I study machine learning and data science");
const matches = doc.lookup(trie);
console.log(matches.out('array')); // ['machine learning', 'data science']
// Use net for complex pattern matching
const complexDoc = nlp("I have 5 years of experience with advanced machine learning");
const netResults = complexDoc.sweep(net);
console.log(netResults.found); // Shows matches with tags
// Direct array lookup (less efficient but simpler)
const simpleMatches = doc.lookup(techTerms);
console.log(simpleMatches.out('array')); // ['machine learning', 'data science']Access internal library data and methods for debugging and extension.
/**
* Access library internals
* @returns Internal world object with methods, models, etc
*/
world(): object;
/**
* Access library metadata and linguistic models
* @returns Model object with linguistic data
*/
model(): object;
/**
* Access exposed library methods
* @returns Methods object with available functions
*/
methods(): object;
/**
* Get which compute functions run automatically
* @returns Array of hook names
*/
hooks(): string[];
/**
* Get current library version
*/
version: string;
/**
* Enable debug logging for decision-making
* @param toLog - Whether to enable logging or specific log type
* @returns Current verbose setting
*/
verbose(toLog?: boolean | string): any;Usage Examples:
// Inspect library internals
const world = nlp.world();
console.log(Object.keys(world)); // ['methods', 'model', 'compute', 'hooks']
const model = nlp.model();
console.log(Object.keys(model)); // Various linguistic models
const methods = nlp.methods();
console.log(Object.keys(methods)); // Available processing methods
// Check processing hooks
const hooks = nlp.hooks();
console.log(hooks); // ['alias', 'normalize', 'contractions', ...]
// Enable debug logging
nlp.verbose(true);
const doc = nlp("test text"); // Will log processing decisions
// Check version
console.log(nlp.version); // '14.14.4'Complete plugin definition with all available extension points.
interface Plugin {
/** Add new methods to View objects */
methods?: {
[className: string]: {
[methodName: string]: Function;
};
};
/** Add linguistic models and data */
model?: {
[category: string]: any;
};
/** Add compute functions for text processing */
compute?: {
[functionName: string]: Function;
};
/** Add processing hooks that run automatically */
hooks?: string[];
/** Add new part-of-speech tags */
tags?: {
[tagName: string]: {
isA?: string;
not?: string;
[property: string]: any;
};
};
/** Add words to lexicon */
words?: {
[word: string]: string;
};
/** Add frozen (unchangeable) words */
frozen?: {
[word: string]: string;
};
/** Add static methods to nlp constructor */
lib?: {
[methodName: string]: Function;
};
/** Function to add instance methods to View class */
api?: (View: any) => void;
/** Function to mutate world object directly */
mutate?: (world: object, nlp: any) => void;
}Usage Examples:
// Comprehensive plugin example
const comprehensivePlugin = {
// Add View methods
methods: {
View: {
// Find programming-related terms
programming() {
return this.match('#ProgrammingLanguage|#Framework|#Technology');
},
// Get technical difficulty score
techComplexity() {
const advanced = this.match('#AdvancedTech').length;
const basic = this.match('#BasicTech').length;
return { advanced, basic, score: advanced / (basic + advanced) };
}
}
},
// Add linguistic model data
model: {
techDifficulty: {
advanced: ['kubernetes', 'tensorflow', 'blockchain'],
basic: ['html', 'css', 'javascript']
}
},
// Add compute functions
compute: {
techAnalysis(doc) {
// Custom processing logic
doc.terms().forEach(term => {
if (this.model.techDifficulty.advanced.includes(term.normal)) {
term.tag('AdvancedTech');
}
});
}
},
// Add processing hooks
hooks: ['techAnalysis'],
// Add custom tags
tags: {
ProgrammingLanguage: { isA: 'Technology' },
Framework: { isA: 'Technology' },
Technology: { isA: 'Noun' },
AdvancedTech: { isA: 'Technology' },
BasicTech: { isA: 'Technology' }
},
// Add words
words: {
'react': 'Framework',
'vue': 'Framework',
'python': 'ProgrammingLanguage',
'docker': 'Technology'
},
// Add static methods
lib: {
getTechStats(text) {
const doc = nlp(text);
return doc.techComplexity();
}
},
// Add methods via API function
api(View) {
View.prototype.debugTech = function() {
console.log('Tech terms:', this.programming().out('array'));
return this;
};
},
// Mutate world object
mutate(world, nlp) {
// Add custom processing logic to world
world.techEnabled = true;
}
};
// Register the comprehensive plugin
nlp.plugin(comprehensivePlugin);
// Use the extended functionality
const doc = nlp("I use react and kubernetes for web development");
// Use new View methods
const techTerms = doc.programming();
console.log(techTerms.out('array')); // ['react', 'kubernetes']
const complexity = doc.techComplexity();
console.log(complexity); // { advanced: 1, basic: 1, score: 0.5 }
// Use static methods
const stats = nlp.getTechStats("Python and docker are useful tools");
console.log(stats);
// Use API methods
doc.debugTech(); // Logs tech terms to consoleinterface Plugin {
methods?: { [className: string]: { [methodName: string]: Function } };
model?: { [category: string]: any };
compute?: { [functionName: string]: Function };
hooks?: string[];
tags?: { [tagName: string]: TagDefinition };
words?: { [word: string]: string };
frozen?: { [word: string]: string };
lib?: { [methodName: string]: Function };
api?: (View: any) => void;
mutate?: (world: object, nlp: any) => void;
}
interface TagDefinition {
isA?: string;
not?: string;
[property: string]: any;
}
interface Lexicon {
[word: string]: string;
}
interface Match {
match: string;
tag?: string | string[];
unTag?: string | string[];
group?: string | number;
reason?: string;
freeze?: boolean;
}
interface Net {
hooks: object;
always?: any;
isNet: boolean;
}
interface WorldObject {
methods: { [category: string]: { [method: string]: Function } };
model: { [category: string]: any };
compute: { [function: string]: Function };
hooks: string[];
}