A well-tested UTF-8 encoder/decoder written in JavaScript
npx @tessl/cli install tessl/npm-utf8@3.0.0UTF-8.js is a well-tested UTF-8 encoder/decoder written in JavaScript that provides proper UTF-8 encoding and decoding according to the WHATWG Encoding Standard. It handles Unicode scalar values correctly and provides comprehensive error handling for malformed input.
npm install utf8const utf8 = require('utf8');For ES modules:
import * as utf8 from 'utf8';Browser usage:
<script src="utf8.js"></script>
<!-- Creates global utf8 object -->const utf8 = require('utf8');
// Encode JavaScript string to UTF-8 byte string
const encoded = utf8.encode('Hello, 世界!');
console.log(encoded); // Output: UTF-8 encoded byte string
// Decode UTF-8 byte string back to JavaScript string
const decoded = utf8.decode(encoded);
console.log(decoded); // Output: 'Hello, 世界!'
// Check library version
console.log(utf8.version); // Output: '3.0.0'Encodes JavaScript strings as UTF-8 byte strings with proper Unicode scalar value handling.
/**
* Encodes any given JavaScript string as UTF-8
* @param {string} string - JavaScript string to encode as UTF-8
* @returns {string} UTF-8-encoded byte string
* @throws {Error} When input contains non-scalar values (lone surrogates)
*/
utf8.encode(string);Usage Examples:
// Basic ASCII encoding
utf8.encode('Hello');
// → 'Hello'
// Unicode characters
utf8.encode('\xA9'); // U+00A9 COPYRIGHT SIGN
// → '\xC2\xA9'
// Supplementary characters (surrogate pairs)
utf8.encode('\uD800\uDC01'); // U+10001 LINEAR B SYLLABLE B038 E
// → '\xF0\x90\x80\x81'
// Multi-byte Unicode
utf8.encode('世界'); // Chinese characters
// → '\xE4\xB8\x96\xE7\x95\x8C'Error Handling:
try {
// This will throw an error due to lone surrogate
utf8.encode('\uD800'); // High surrogate without matching low surrogate
} catch (error) {
console.error(error.message); // "Lone surrogate U+D800 is not a scalar value"
}Decodes UTF-8 byte strings back to JavaScript strings with malformed input detection.
/**
* Decodes any given UTF-8-encoded string as UTF-8
* @param {string} byteString - UTF-8 encoded byte string to decode
* @returns {string} JavaScript string (UTF-8 decoded)
* @throws {Error} When malformed UTF-8 is detected
*/
utf8.decode(byteString);Usage Examples:
// Basic decoding
utf8.decode('\xC2\xA9');
// → '\xA9' (U+00A9 COPYRIGHT SIGN)
// Supplementary characters
utf8.decode('\xF0\x90\x80\x81');
// → '\uD800\uDC01' (U+10001 LINEAR B SYLLABLE B038 E)
// Multi-byte sequences
utf8.decode('\xE4\xB8\x96\xE7\x95\x8C');
// → '世界'Error Handling:
try {
// This will throw an error due to malformed UTF-8
utf8.decode('\xFF\xFE'); // Invalid UTF-8 sequence
} catch (error) {
console.error(error.message); // "Invalid UTF-8 detected"
}
try {
// This will throw an error due to incomplete sequence
utf8.decode('\xC2'); // Incomplete 2-byte sequence
} catch (error) {
console.error(error.message); // "Invalid byte index"
}Provides the semantic version number of the library.
/**
* Semantic version number of the utf8 library
* @type {string}
*/
utf8.version;Usage Example:
console.log(`Using utf8.js version ${utf8.version}`);
// Output: "Using utf8.js version 3.0.0"The library throws standard JavaScript Error objects with descriptive messages:
encode() encounters unpaired surrogate charactersdecode() encounters malformed UTF-8 sequencesThe library has been tested and works in: