Sentence-level text comparison for natural language processing applications. Automatically detects sentence boundaries based on punctuation marks followed by whitespace.
Performs sentence-level diff between two strings, treating each sentence as a token.
/**
* Compare two strings at the sentence level
* @param oldStr - Original text with sentences
* @param newStr - New text to compare against
* @param options - Configuration options
* @returns Array of change objects representing the diff
*/
function diffSentences(oldStr, newStr, options);Usage Examples:
import { diffSentences } from "diff";
// Basic sentence diff
const result = diffSentences(
"Hello world. This is a test. Good bye.",
"Hello world. This is modified. Good bye."
);
console.log(result);
// [
// { value: "Hello world. ", count: 1 },
// { value: "This is a test. ", removed: true, count: 1 },
// { value: "This is modified. ", added: true, count: 1 },
// { value: "Good bye.", count: 1 }
// ]
// Multiple sentence changes
const multiResult = diffSentences(
"First sentence. Second sentence. Third sentence.",
"First sentence. New second sentence. Third sentence. Added sentence!"
);Pre-configured Diff instance for sentence-level comparisons with sentence-aware tokenization.
/**
* Pre-configured sentence diff instance
* Uses regex-based sentence boundary detection
*/
const sentenceDiff: Diff;The sentence diff uses the following rules for sentence detection:
., !, or ?/(\S.+?[.!?])(?=\s+|$)/import { diffSentences } from "diff";
// Various punctuation marks
const punctuation = diffSentences(
"Statement. Question? Exclamation!",
"New statement. Question? Different exclamation!"
);
// Sentences with complex punctuation
const complex = diffSentences(
"Dr. Smith said hello. Then he left.",
"Dr. Smith said goodbye. Then he left."
);
// Note: "Dr." is not treated as sentence end due to no following whitespaceimport { diffSentences } from "diff";
function analyzeParagraphChanges(oldParagraph, newParagraph) {
const changes = diffSentences(oldParagraph, newParagraph);
const stats = {
unchanged: 0,
added: 0,
removed: 0,
modified: 0
};
changes.forEach(change => {
if (change.added) stats.added++;
else if (change.removed) stats.removed++;
else stats.unchanged++;
});
// Estimate modifications (adjacent add/remove pairs)
for (let i = 0; i < changes.length - 1; i++) {
if (changes[i].removed && changes[i + 1].added) {
stats.modified++;
stats.added--;
stats.removed--;
}
}
return stats;
}
const oldText = "The cat sat on the mat. It was comfortable. The end.";
const newText = "The dog sat on the rug. It was very comfortable. The end.";
const analysis = analyzeParagraphChanges(oldText, newText);import { diffSentences } from "diff";
function compareDocuments(doc1, doc2) {
const sentences = diffSentences(doc1, doc2);
return sentences.map((change, index) => ({
sentenceNumber: index + 1,
content: change.value.trim(),
status: change.added ? 'added' :
change.removed ? 'removed' : 'unchanged',
wordCount: change.value.trim().split(/\s+/).length
}));
}
// Usage for document analysis
const original = "First sentence. Second sentence. Third sentence.";
const revised = "First sentence. Modified second sentence. Third sentence. New sentence.";
const comparison = compareDocuments(original, revised);import { diffSentences } from "diff";
function diffLongDocument(oldDoc, newDoc, callback) {
diffSentences(oldDoc, newDoc, {
callback: callback,
maxEditLength: 1000, // Limit for very long documents
timeout: 15000 // 15 second timeout
});
}
// Usage
diffLongDocument(longDocument1, longDocument2, (result) => {
if (result) {
const sentenceCount = result.length;
const changes = result.filter(r => r.added || r.removed).length;
console.log(`Compared ${sentenceCount} sentences, ${changes} changes found`);
} else {
console.log("Document too complex to diff efficiently");
}
});import { sentenceDiff } from "diff";
// Using the pre-configured instance directly
const directResult = sentenceDiff.diff(
"Old sentence. Another old sentence.",
"New sentence. Another old sentence."
);
// Access tokenization
const sentences = sentenceDiff.tokenize("First. Second! Third?");
console.log("Detected sentences:", sentences);
// ["First. ", "Second! ", "Third?"]// The sentence detector has limitations with:
// Abbreviations
const abbrev = "Dr. Smith works at U.S.A. Corp.";
// May not handle all abbreviations correctly
// Decimal numbers
const numbers = "The price is $12.99. That's expensive.";
// Should work correctly as no whitespace after decimal
// Ellipses
const ellipses = "Well... I think so.";
// Ellipses are not treated as sentence boundariesimport { diffArrays } from "diff";
// For more sophisticated sentence detection, use Intl.Segmenter
function advancedSentenceDiff(oldText, newText) {
if (typeof Intl !== 'undefined' && Intl.Segmenter) {
const segmenter = new Intl.Segmenter('en', { granularity: 'sentence' });
const oldSentences = Array.from(segmenter.segment(oldText), s => s.segment);
const newSentences = Array.from(segmenter.segment(newText), s => s.segment);
return diffArrays(oldSentences, newSentences);
} else {
// Fallback to built-in sentence diff
return diffSentences(oldText, newText);
}
}