Comprehensive natural language processing library with tokenization, stemming, classification, sentiment analysis, phonetics, distance algorithms, and WordNet integration.
npx @tessl/cli install tessl/npm-natural@8.1.0Natural is a comprehensive natural language processing library for Node.js that provides tokenization, stemming, classification, sentiment analysis, phonetics, string distance metrics, n-grams, TF-IDF calculations, and WordNet integration. It supports multiple languages and offers both functional and object-oriented APIs for text processing tasks.
npm install naturalconst natural = require('natural');
// All functionality available on natural object
const { BayesClassifier, PorterStemmer, WordTokenizer } = natural;For ES modules (when supported by your environment):
import natural from 'natural';
// Note: Named imports may not work in all environments
// Use: import natural from 'natural'; then access natural.BayesClassifierNote: Natural.js primarily uses CommonJS exports. ES6 named imports may not work in all environments. For best compatibility, use the default import and access methods via the natural object.
const natural = require('natural');
// Text classification
const classifier = new natural.BayesClassifier();
classifier.addDocument('I love this movie', 'positive');
classifier.addDocument('This movie is terrible', 'negative');
classifier.train();
const sentiment = classifier.classify('This is amazing'); // 'positive'
// Text processing
const tokens = natural.WordTokenizer.tokenize('Hello world, how are you?');
// ['Hello', 'world', 'how', 'are', 'you']
const stemmed = natural.PorterStemmer.stem('running');
// 'run'
// Distance calculation
const distance = natural.JaroWinklerDistance('sitting', 'kitten');
// 0.746
// N-grams
const bigrams = natural.NGrams.bigrams('Hello world how are you');
// [['Hello', 'world'], ['world', 'how'], ['how', 'are'], ['are', 'you']]Natural.js is organized into specialized modules that work independently or together:
Machine learning classifiers for categorizing text into predefined classes. Includes Naive Bayes, Logistic Regression, and Maximum Entropy classifiers with training, persistence, and evaluation capabilities.
class BayesClassifier {
constructor(stemmer?: object, smoothing?: number);
addDocument(text: string, classification: string): void;
train(): void;
classify(observation: string): string;
getClassifications(observation: string): Array<{label: string, value: number}>;
}
class LogisticRegressionClassifier {
constructor(stemmer?: object);
addDocument(text: string, classification: string): void;
train(): void;
classify(observation: string): string;
}Comprehensive text preprocessing tools including tokenization, stemming, and normalization for multiple languages. Essential for preparing raw text for analysis.
// Tokenizers
class WordTokenizer {
static tokenize(text: string): string[];
}
class AggressiveTokenizer {
constructor(options?: object);
tokenize(text: string): string[];
}
// Stemmers
class PorterStemmer {
static stem(word: string): string;
}
class LancasterStemmer {
static stem(word: string): string;
}
// Normalizers
function normalize(tokens: string[]): string[];
function removeDiacritics(text: string): string;Algorithms for calculating similarity between strings, useful for fuzzy matching, spell checking, and text comparison tasks.
function JaroWinklerDistance(s1: string, s2: string): number;
function LevenshteinDistance(s1: string, s2: string): number;
function DamerauLevenshteinDistance(s1: string, s2: string): number;
function DiceCoefficient(s1: string, s2: string): number;
function HammingDistance(s1: string, s2: string): number;Multi-language sentiment analysis using various lexicons and methodologies for determining emotional tone in text.
class SentimentAnalyzer {
constructor(language: string, stemmer?: object, type: string);
getSentiment(words: string[]): number;
}Statistical text analysis tools for creating n-grams and calculating term frequency-inverse document frequency scores.
// N-grams
function ngrams(sequence: string | string[], n: number, startSymbol?: string, endSymbol?: string): string[][];
function bigrams(sequence: string | string[]): string[][];
function trigrams(sequence: string | string[]): string[][];
// TF-IDF
class TfIdf {
constructor();
addDocument(document: string | string[], key?: string): void;
tfidf(terms: string, documentIndex: number): number;
listTerms(documentIndex: number): Array<{term: string, tfidf: number}>;
}Brill tagger implementation for assigning grammatical parts of speech to words in sentences.
class BrillPOSTagger {
constructor(lexicon: object, ruleSet: object);
tag(sentence: string[]): object;
}
class Lexicon {
constructor();
addTaggedWord(word: string, tag: string): void;
}Interface to WordNet lexical database for accessing word definitions, synonyms, and semantic relationships.
class WordNet {
constructor(dataDir?: string);
lookup(word: string, callback: (results: object[]) => void): void;
get(synsetOffset: number, pos: string, callback: (result: object) => void): void;
}Phonetic encoding algorithms for matching words by sound rather than spelling.
class SoundEx {
static process(word: string): string;
}
class Metaphone {
static process(word: string): string;
}
class DoubleMetaphone {
static process(word: string): string[];
}Japanese text transliteration functionality for converting Hiragana and Katakana to romanized text using the modified Hepburn system.
class TransliterateJa {
static transliterate(text: string): string;
}Supporting data structures and utility functions including tries, graph algorithms, storage backends, and spell checking functionality.
class Trie {
constructor();
addString(string: string): void;
contains(string: string): boolean;
findPrefix(prefix: string): string[];
}
class ShortestPathTree {
constructor(graph: EdgeWeightedDigraph, source: number);
distTo(vertex: number): number;
hasPathTo(vertex: number): boolean;
}
class Spellcheck {
constructor();
isCorrect(word: string): boolean;
getCorrections(word: string): string[];
}// Classification result
interface ClassificationResult {
label: string;
value: number;
}
// N-gram statistics
interface NgramStatistics {
ngrams: string[][];
frequencies: {[key: string]: number};
Nr: {[key: string]: number};
numberOfNgrams: number;
}
// TF-IDF term
interface TfIdfTerm {
term: string;
tfidf: number;
}
// WordNet result
interface WordNetResult {
synsetOffset: number;
pos: string;
gloss: string;
synonyms: string[];
}