tessl/npm-natural

Comprehensive natural language processing library with tokenization, stemming, classification, sentiment analysis, phonetics, distance algorithms, and WordNet integration.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Securityby

Pending

The risk profile of this skill

Overview

Eval results

Files

Part-of-Speech Tagging

Name: tessl/npm-natural
Author: tessl

Brill tagger implementation for assigning grammatical parts of speech to words in sentences using transformation-based learning. The system combines lexical lookup with contextual transformation rules.

Capabilities

Brill POS Tagger

Main part-of-speech tagger using Eric Brill's transformation-based approach.

/**
 * Brill transformation-based POS tagger
 * @param lexicon - Lexicon instance for initial word tagging
 * @param ruleSet - RuleSet instance containing transformation rules
 */
class BrillPOSTagger {
  constructor(lexicon: Lexicon, ruleSet: RuleSet);
  
  /**
   * Tag a sentence with part-of-speech tags
   * @param sentence - Array of word strings
   * @returns Sentence object with tagged words
   */
  tag(sentence: string[]): Sentence;
  
  /**
   * Apply initial lexicon-based tagging
   * @param sentence - Array of words
   * @returns Initially tagged sentence
   */
  tagWithLexicon(sentence: string[]): Sentence;
  
  /**
   * Apply transformation rules to improve tagging
   * @param taggedSentence - Sentence with initial tags
   * @returns Sentence with improved tags
   */
  applyRules(taggedSentence: Sentence): Sentence;
}

Lexicon

Dictionary mapping words to their most likely part-of-speech tags.

/**
 * POS tagging lexicon
 */
class Lexicon {
  constructor();
  
  /**
   * Add a word-tag pair to the lexicon
   * @param word - Word to add
   * @param tag - POS tag for the word
   */
  addTaggedWord(word: string, tag: string): void;
  
  /**
   * Get the most likely tag for a word
   * @param word - Word to look up
   * @returns Most likely POS tag
   */
  tagWord(word: string): string;
  
  /**
   * Check if word exists in lexicon
   * @param word - Word to check
   * @returns true if word is in lexicon
   */
  hasWord(word: string): boolean;
}

Rule Set

Collection of transformation rules for improving POS tag accuracy.

/**
 * Set of transformation rules for POS tagging
 */
class RuleSet {
  constructor();
  
  /**
   * Add a transformation rule
   * @param rule - RuleTemplate instance
   */
  addRule(rule: RuleTemplate): void;
  
  /**
   * Apply all rules to a sentence
   * @param sentence - Tagged sentence to transform
   * @returns Sentence with applied transformations
   */
  applyRules(sentence: Sentence): Sentence;
}

/**
 * Individual transformation rule template
 */
class RuleTemplate {
  constructor();
  
  /**
   * Apply this rule to a sentence
   * @param sentence - Sentence to apply rule to
   * @returns Modified sentence
   */
  apply(sentence: Sentence): Sentence;
}

/**
 * Pre-defined rule templates
 */
declare const ruleTemplates: {
  [templateName: string]: RuleTemplate;
};

Sentence and Corpus

Data structures for representing tagged sentences and training corpora.

/**
 * Represents a sentence with POS tags
 */
class Sentence {
  constructor();
  
  /**
   * Add a tagged word to the sentence
   * @param word - Word text
   * @param tag - POS tag
   */
  addTaggedWord(word: string, tag: string): void;
  
  /**
   * Get all tagged words
   * @returns Array of tagged word objects
   */
  getTaggedWords(): TaggedWord[];
  
  /**
   * Get word at specific position
   * @param index - Position in sentence
   * @returns Tagged word at position
   */
  getWordAt(index: number): TaggedWord;
  
  /**
   * Get sentence length
   * @returns Number of words in sentence
   */
  length(): number;
}

/**
 * Tagged word representation
 */
interface TaggedWord {
  word: string;
  tag: string;
}

/**
 * Training corpus for POS tagger
 */
class Corpus {
  constructor();
  
  /**
   * Add a sentence to the corpus
   * @param sentence - Sentence instance
   */
  addSentence(sentence: Sentence): void;
  
  /**
   * Get all sentences in corpus
   * @returns Array of sentences
   */
  getSentences(): Sentence[];
}

Usage Examples:

const natural = require('natural');

// Create lexicon and add word-tag pairs
const lexicon = new natural.Lexicon();
lexicon.addTaggedWord('the', 'DT');
lexicon.addTaggedWord('cat', 'NN');
lexicon.addTaggedWord('dog', 'NN');
lexicon.addTaggedWord('runs', 'VBZ');
lexicon.addTaggedWord('quickly', 'RB');

// Create rule set with transformation rules
const ruleSet = new natural.RuleSet();
// Add rules to improve tagging accuracy
// (In practice, you would load pre-trained rules)

// Create POS tagger
const tagger = new natural.BrillPOSTagger(lexicon, ruleSet);

// Tag a sentence
const sentence = ['the', 'cat', 'runs', 'quickly'];
const taggedSentence = tagger.tag(sentence);

// Display results
console.log('Tagged sentence:');
taggedSentence.getTaggedWords().forEach(word => {
  console.log(`${word.word}/${word.tag}`);
});

Training Components

Trainer for creating custom models:

/**
 * Trainer for Brill POS tagger
 */
class BrillPOSTrainer {
  constructor();
  
  /**
   * Train a tagger on a corpus
   * @param corpus - Training corpus
   * @returns Trained tagger components
   */
  train(corpus: Corpus): {lexicon: Lexicon, ruleSet: RuleSet};
}

/**
 * Tester for evaluating tagger performance
 */
class BrillPOSTester {
  constructor();
  
  /**
   * Test tagger accuracy on test corpus
   * @param tagger - Trained tagger
   * @param testCorpus - Test corpus
   * @returns Accuracy metrics
   */
  test(tagger: BrillPOSTagger, testCorpus: Corpus): TestResults;
}

interface TestResults {
  accuracy: number;
  precision: {[tag: string]: number};
  recall: {[tag: string]: number};
}

Advanced Usage

Complete training and testing pipeline:

const natural = require('natural');

/**
 * Train a custom POS tagger
 */
function trainCustomTagger(trainingData) {
  // Create training corpus
  const corpus = new natural.Corpus();
  
  // Add training sentences
  trainingData.forEach(sentenceData => {
    const sentence = new natural.Sentence();
    sentenceData.forEach(({word, tag}) => {
      sentence.addTaggedWord(word, tag);
    });
    corpus.addSentence(sentence);
  });
  
  // Train the model
  const trainer = new natural.BrillPOSTrainer();
  const {lexicon, ruleSet} = trainer.train(corpus);
  
  // Create tagger
  const tagger = new natural.BrillPOSTagger(lexicon, ruleSet);
  
  return tagger;
}

// Example training data
const trainingData = [
  [
    {word: 'the', tag: 'DT'},
    {word: 'quick', tag: 'JJ'},
    {word: 'brown', tag: 'JJ'},
    {word: 'fox', tag: 'NN'},
    {word: 'jumps', tag: 'VBZ'}
  ],
  [
    {word: 'a', tag: 'DT'},
    {word: 'lazy', tag: 'JJ'},
    {word: 'dog', tag: 'NN'},
    {word: 'sleeps', tag: 'VBZ'}
  ]
  // ... more training sentences
];

// Train custom tagger
const customTagger = trainCustomTagger(trainingData);

// Use trained tagger
const testSentence = ['the', 'big', 'cat', 'runs'];
const result = customTagger.tag(testSentence);
console.log('Custom tagger results:', result.getTaggedWords());

Working with Pre-trained Models

const natural = require('natural');

/**
 * Load and use pre-trained POS tagger
 */
async function usePresentTrainedTagger() {
  // In practice, you would load pre-trained lexicon and rules
  // This example shows the structure for loading saved models
  
  try {
    // Load pre-trained lexicon (would be from file/database)
    const lexicon = new natural.Lexicon();
    
    // Load common English words with tags
    const commonWords = {
      'the': 'DT', 'a': 'DT', 'an': 'DT',
      'cat': 'NN', 'dog': 'NN', 'house': 'NN',
      'run': 'VB', 'runs': 'VBZ', 'running': 'VBG',
      'quick': 'JJ', 'slow': 'JJ', 'big': 'JJ',
      'quickly': 'RB', 'slowly': 'RB'
    };
    
    Object.entries(commonWords).forEach(([word, tag]) => {
      lexicon.addTaggedWord(word, tag);
    });
    
    // Load rule set (would be from trained model)
    const ruleSet = new natural.RuleSet();
    
    // Create tagger
    const tagger = new natural.BrillPOSTagger(lexicon, ruleSet);
    
    return tagger;
    
  } catch (error) {
    console.error('Error loading pre-trained model:', error);
    throw error;
  }
}

// Usage
usePresentTrainedTagger().then(tagger => {
  const sentences = [
    ['the', 'quick', 'brown', 'fox', 'runs'],
    ['a', 'big', 'dog', 'sleeps'],
    ['the', 'house', 'is', 'big']
  ];
  
  sentences.forEach(sentence => {
    const tagged = tagger.tag(sentence);
    console.log('Sentence:', sentence.join(' '));
    console.log('Tagged:', tagged.getTaggedWords().map(w => `${w.word}/${w.tag}`).join(' '));
    console.log('---');
  });
});

Common POS Tags

Natural.js typically uses Penn Treebank POS tag set:

// Common POS tags used in Natural.js
const commonTags = {
  // Nouns
  'NN': 'Noun, singular',
  'NNS': 'Noun, plural',
  'NNP': 'Proper noun, singular',
  'NNPS': 'Proper noun, plural',
  
  // Verbs
  'VB': 'Verb, base form',
  'VBD': 'Verb, past tense',
  'VBG': 'Verb, gerund/present participle',
  'VBN': 'Verb, past participle',
  'VBP': 'Verb, non-3rd person singular present',
  'VBZ': 'Verb, 3rd person singular present',
  
  // Adjectives
  'JJ': 'Adjective',
  'JJR': 'Adjective, comparative',
  'JJS': 'Adjective, superlative',
  
  // Adverbs
  'RB': 'Adverb',
  'RBR': 'Adverb, comparative',
  'RBS': 'Adverb, superlative',
  
  // Determiners
  'DT': 'Determiner',
  
  // Prepositions
  'IN': 'Preposition or subordinating conjunction',
  
  // Pronouns
  'PRP': 'Personal pronoun',
  'PRP$': 'Possessive pronoun'
};