JavaScript Unicode normalization library providing NFC, NFD, NFKC, NFKD forms and String.prototype.normalize polyfill
npx @tessl/cli install tessl/npm-unorm@1.6.0Unorm is a JavaScript Unicode normalization library that provides all four Unicode normalization forms (NFC, NFD, NFKC, NFKD) according to Unicode 8.0 standard. It serves as both a standalone library and a polyfill for String.prototype.normalize() in environments that don't natively support it.
npm install unormconst unorm = require('unorm');For AMD (RequireJS):
define(['unorm'], function(unorm) {
// Use unorm functions
});In browser (global):
// Available as global unorm object
unorm.nfc(string);const unorm = require('unorm');
// Example text with mixed Unicode forms
const text = 'The \u212B symbol invented by A. J. \u00C5ngstr\u00F6m';
// Apply different normalization forms
const nfcText = unorm.nfc(text); // Canonical composition
const nfdText = unorm.nfd(text); // Canonical decomposition
const nfkcText = unorm.nfkc(text); // Compatibility composition
const nfkdText = unorm.nfkd(text); // Compatibility decomposition
console.log('Original:', text);
console.log('NFC:', nfcText);
console.log('NFD:', nfdText);
console.log('NFKC:', nfkcText);
console.log('NFKD:', nfkdText);
// Using as String.prototype.normalize polyfill
console.log('Polyfill:', text.normalize('NFC'));Unorm implements Unicode normalization according to Unicode Standard Annex #15, providing a comprehensive solution for text normalization in JavaScript environments.
String.prototype.normalize() when native support is unavailableUnicode normalization addresses the fact that the same text can be represented in multiple ways using different combinations of base characters and combining marks.
Canonical vs Compatibility:
Decomposition vs Composition:
The Four Forms:
The library automatically detects if String.prototype.normalize() is available in the current environment. If not present, it adds the method using Object.defineProperty() with proper error handling that matches the ECMAScript specification. The shimApplied property indicates whether the polyfill was activated.
Applies canonical decomposition followed by canonical composition to produce a composed form.
/**
* Normalize string using Canonical Decomposition followed by Canonical Composition
* @param {string} str - String to normalize
* @returns {string} NFC normalized string
*/
function nfc(str);Usage Example:
const unorm = require('unorm');
// Combining characters are composed into single codepoints when possible
const result = unorm.nfc('a\u0308'); // ä (combining diaeresis) -> ä (single codepoint)
console.log(result); // "\u00e4"Applies canonical decomposition to produce a decomposed form where composite characters are broken down into base characters plus combining marks.
/**
* Normalize string using Canonical Decomposition
* @param {string} str - String to normalize
* @returns {string} NFD normalized string
*/
function nfd(str);Usage Example:
const unorm = require('unorm');
// Composite characters are decomposed into base + combining marks
const result = unorm.nfd('ä'); // ä (single codepoint) -> a + combining diaeresis
console.log(result); // "a\u0308"Applies compatibility decomposition followed by canonical composition, replacing compatibility characters with their canonical equivalents.
/**
* Normalize string using Compatibility Decomposition followed by Canonical Composition
* @param {string} str - String to normalize
* @returns {string} NFKC normalized string
*/
function nfkc(str);Usage Example:
const unorm = require('unorm');
// Compatibility characters like subscripts are replaced with normal equivalents
const result = unorm.nfkc('CO₂'); // Subscript 2 becomes normal 2
console.log(result); // "CO2"Applies compatibility decomposition to replace compatibility characters with their canonical forms and decompose composite characters.
/**
* Normalize string using Compatibility Decomposition
* @param {string} str - String to normalize
* @returns {string} NFKD normalized string
*/
function nfkd(str);Usage Example:
const unorm = require('unorm');
// Useful for search/indexing by removing combining marks
const text = 'Ångström';
const normalized = unorm.nfkd(text);
const withoutMarks = normalized.replace(/[\u0300-\u036F]/g, ''); // Remove combining marks
console.log(withoutMarks); // "Angstrom"Automatically provides String.prototype.normalize() method when not natively available in the JavaScript environment.
/**
* Polyfill for String.prototype.normalize method
* @param {string} [form="NFC"] - Normalization form: "NFC", "NFD", "NFKC", or "NFKD"
* @returns {string} Normalized string according to specified form
* @throws {TypeError} When called on null or undefined
* @throws {RangeError} When invalid normalization form provided
*/
String.prototype.normalize(form);Usage Examples:
// When native normalize() isn't available, unorm provides it
require('unorm'); // Automatically adds polyfill if needed
const text = 'café';
console.log(text.normalize('NFC')); // Uses unorm's implementation
console.log(text.normalize('NFD')); // Decomposes é into e + combining accent
console.log(text.normalize('NFKC')); // Same as NFC for this example
console.log(text.normalize('NFKD')); // Same as NFD for this example
// Error handling
try {
text.normalize('INVALID'); // Throws RangeError
} catch (error) {
console.error(error.message); // "Invalid normalization form: INVALID"
}Property to check whether the String.prototype.normalize polyfill was applied.
/**
* Boolean indicating whether String.prototype.normalize polyfill was applied
* @type {boolean}
*/
unorm.shimApplied;Usage Example:
const unorm = require('unorm');
if (unorm.shimApplied) {
console.log('String.prototype.normalize polyfill was applied');
} else {
console.log('Native String.prototype.normalize is available');
}/**
* Main unorm module interface
*/
interface UnormModule {
/** Canonical Decomposition followed by Canonical Composition */
nfc: (str: string) => string;
/** Canonical Decomposition */
nfd: (str: string) => string;
/** Compatibility Decomposition followed by Canonical Composition */
nfkc: (str: string) => string;
/** Compatibility Decomposition */
nfkd: (str: string) => string;
/** Whether String.prototype.normalize polyfill was applied */
shimApplied: boolean;
}
/**
* Valid normalization forms for String.prototype.normalize
*/
type NormalizationForm = "NFC" | "NFD" | "NFKC" | "NFKD";const unorm = require('unorm');
function normalizeForSearch(text) {
// Use NFKD to decompose, then remove combining marks for search
const decomposed = unorm.nfkd(text);
return decomposed.replace(/[\u0300-\u036F]/g, ''); // Remove combining marks
}
const searchTerm = normalizeForSearch('café');
const document = normalizeForSearch('I love café au lait');
console.log(document.includes(searchTerm)); // trueconst unorm = require('unorm');
function compareStrings(str1, str2) {
// Normalize both strings to same form for accurate comparison
return unorm.nfc(str1) === unorm.nfc(str2);
}
const text1 = 'é'; // Single codepoint
const text2 = 'e\u0301'; // e + combining acute accent
console.log(compareStrings(text1, text2)); // trueconst unorm = require('unorm');
function cleanUserInput(input) {
// Normalize to consistent form and trim
return unorm.nfc(input.trim());
}
const userInput = ' café '; // With inconsistent Unicode
const cleaned = cleanUserInput(userInput);
console.log(cleaned); // Normalized "café"