A robust HTML entities encoder/decoder with full Unicode support.
npx @tessl/cli install tessl/npm-he@1.2.0he (for "HTML entities") is a robust HTML entity encoder/decoder written in JavaScript. It supports all standardized named character references as per HTML specification, handles ambiguous ampersands and other edge cases just like a browser would, and supports full Unicode including astral symbols. Perfect for HTML parsers, content management systems, and web applications requiring precise HTML entity handling.
npm install heconst he = require('he');For ES modules (Node.js with type: "module" or bundlers):
import * as he from 'he';
// or
import he from 'he';For AMD (RequireJS):
require(['he'], function(he) {
// use he
});For browser global:
<script src="he.js"></script>
<script>
// window.he is available
</script>const he = require('he');
// Encode text for safe HTML insertion
const encoded = he.encode('foo Β© bar β baz π qux');
// β 'foo © bar ≠ baz 𝌆 qux'
// Decode HTML entities back to text
const decoded = he.decode('foo © bar ≠ baz 𝌆 qux');
// β 'foo Β© bar β baz π qux'
// Escape unsafe characters for HTML contexts
const escaped = he.escape('<img src="x" onerror="alert(1)">');
// β '<img src="x" onerror="alert(1)">'
// Using named references for better readability
const withNames = he.encode('foo Β© bar', { useNamedReferences: true });
// β 'foo © bar'he is built around several key components:
Converts Unicode text to HTML entities for safe insertion into HTML documents.
/**
* Encodes a string by converting symbols to character references
* @param {string} string - The input string to encode
* @param {object} [options] - Optional configuration object
* @param {boolean} [options.useNamedReferences=false] - Use named references like © instead of ©
* @param {boolean} [options.decimal=false] - Use decimal escapes © instead of hex ©
* @param {boolean} [options.encodeEverything=false] - Encode all symbols including printable ASCII
* @param {boolean} [options.strict=false] - Throw errors on invalid code points
* @param {boolean} [options.allowUnsafeSymbols=false] - Don't encode unsafe HTML chars &<>"'`
* @returns {string} Encoded string safe for HTML insertion
*/
he.encode(string, options)Usage Examples:
// Basic encoding (hex escapes, safe characters only)
he.encode('foo Β© bar β baz');
// β 'foo © bar ≠ baz'
// Using named references
he.encode('foo Β© bar β baz', { useNamedReferences: true });
// β 'foo © bar ≠ baz'
// Using decimal escapes
he.encode('foo Β© bar', { decimal: true });
// β 'foo © bar'
// Encoding everything including ASCII
he.encode('hello', { encodeEverything: true });
// β 'hello'
// Strict mode throws on invalid code points
he.encode('foo\x00bar', { strict: true });
// β Throws Parse error
// Allow unsafe symbols (don't escape HTML characters)
he.encode('foo & bar', { allowUnsafeSymbols: true });
// β 'foo & bar'Decodes named and numerical character references back to Unicode text using the HTML specification algorithm.
/**
* Decodes HTML entities in a string
* @param {string} html - HTML string containing entities to decode
* @param {object} [options] - Optional configuration object
* @param {boolean} [options.isAttributeValue=false] - Treat input as HTML attribute value context
* @param {boolean} [options.strict=false] - Throw errors on malformed character references
* @returns {string} Decoded Unicode string
*/
he.decode(html, options)Usage Examples:
// Basic decoding
he.decode('foo © bar ≠ baz 𝌆 qux');
// β 'foo Β© bar β baz π qux'
// Handles ambiguous ampersands (text context)
he.decode('foo&bar');
// β 'foo&bar'
// Attribute value context (different parsing rules)
he.decode('foo&bar', { isAttributeValue: true });
// β 'foo&bar'
// Strict mode throws on malformed entities
he.decode('foo&bar', { strict: true });
// β Throws Parse error
// Mixed entity types
he.decode('© © ©');
// β 'Β© Β© Β©'Escapes unsafe characters for safe use in HTML text contexts.
/**
* Escapes unsafe HTML characters for text contexts
* @param {string} string - The input string to escape
* @returns {string} String with unsafe characters escaped
*/
he.escape(string)Usage Examples:
// Escape HTML-unsafe characters
he.escape('<script>alert("xss")</script>');
// β '<script>alert("xss")</script>'
// Escape attribute content
he.escape('value="malicious"');
// β 'value="malicious"'
// Handles all unsafe characters: & < > " ' `
he.escape('&<>"\'`');
// β '&<>"'`'Alias for decode function providing semantic clarity for unescaping operations.
/**
* Unescapes HTML entities (alias for decode)
* @param {string} html - HTML string containing entities to decode
* @param {object} [options] - Optional configuration object (same as decode)
* @returns {string} Decoded Unicode string
*/
he.unescape(html, options)Usage Examples:
// Identical to he.decode()
he.unescape('<script>');
// β '<script>'
// Same options as decode
he.unescape('foo&bar', { isAttributeValue: true });
// β 'foo&bar'Default options can be modified globally to avoid passing options repeatedly.
/** Global default options for encode function */
he.encode.options = {
allowUnsafeSymbols: false, // Don't encode unsafe HTML chars &<>"'`
encodeEverything: false, // Only encode necessary characters
strict: false, // Don't throw on invalid code points
useNamedReferences: false, // Use hex escapes instead of names
decimal: false // Use hex instead of decimal escapes
};
/** Global default options for decode function */
he.decode.options = {
isAttributeValue: false, // Treat input as HTML text context
strict: false // Don't throw on malformed entities
};Usage Examples:
// Override global encode defaults
he.encode.options.useNamedReferences = true;
he.encode('foo Β© bar'); // Now uses named refs by default
// β 'foo © bar'
// Override global decode defaults
he.decode.options.strict = true;
he.decode('foo&bar'); // Now throws on malformed entities
// β Parse error
// Read current defaults
console.log(he.encode.options.decimal); // β false
console.log(he.decode.options.isAttributeValue); // β falseAccess to the library version for compatibility checks.
/** Semantic version string of the library */
he.version // '1.2.0'Usage Examples:
console.log(he.version); // β '1.2.0'
// Version-based feature detection
if (he.version >= '1.2.0') {
// Use newer features
}he provides a command-line interface for batch processing and shell integration.
npm install -g he# Encode text
he --encode 'fΓΆo β₯ bΓ₯r π baz'
# β föo ♥ bår 𝌆 baz
# Encode with named references
he --encode --use-named-refs 'fΓΆo β₯ bΓ₯r'
# β föo ♥ bår
# Decode entities
he --decode 'föo ♥ bår'
# β fΓΆo β₯ bΓ₯r
# Escape HTML
he --escape '<img src="x" onerror="alert(1)">'
# β <img src="x" onerror="alert(1)"># Use named character references
he --encode --use-named-refs 'text Β© symbol'
# Encode everything including ASCII
he --encode --everything 'hello'
# Use decimal instead of hex escapes
he --encode --decimal 'text Β© symbol'
# Allow unsafe HTML characters
he --encode --allow-unsafe 'text & symbol'# Treat as HTML attribute value
he --decode --attribute 'foo&bar'
# Enable strict parsing mode
he --decode --strict 'foo&bar'# Process files with redirection
he --encode < input.txt > output.html
he --decode < input.html > output.txt
# Process remote content
curl -s "https://example.com/data.txt" | he --encode > encoded.html# Show version
he --version
he -v
# Show help
he --help
he -hhe provides comprehensive error handling for various edge cases:
Invalid Code Points:
// In non-strict mode, invalid code points are preserved
he.encode('foo\x00bar');
// β 'foo\x00bar'
// In strict mode, throws Parse error
he.encode('foo\x00bar', { strict: true });
// β Parse error: forbidden code pointMalformed Entities:
// In non-strict mode, malformed entities are left as-is
he.decode('foo&bar');
// β 'foo&bar'
// In strict mode, throws Parse error
he.decode('foo&bar', { strict: true });
// β Parse error: named character reference was not terminated by a semicolonUnicode Edge Cases:
// Handles astral symbols (4-byte Unicode) correctly
he.encode('π'); // Mathematical symbol
// β '𝌆'
he.decode('𝌆');
// β 'π'
// Handles surrogate pairs correctly
he.encode('\uD834\uDF06'); // Same symbol as above
// β '𝌆'/** Main library object */
const he = {
version: '1.2.0',
encode: function(string, options) { /* ... */ },
decode: function(html, options) { /* ... */ },
escape: function(string) { /* ... */ },
unescape: function(html, options) { /* ... */ } // alias for decode
};
/** Encode options object properties */
const encodeOptions = {
useNamedReferences: false, // Use named refs like © instead of ©
decimal: false, // Use decimal © instead of hex ©
encodeEverything: false, // Encode all symbols including ASCII
strict: false, // Throw on invalid code points
allowUnsafeSymbols: false // Don't encode &<>"'` characters
};
/** Decode options object properties */
const decodeOptions = {
isAttributeValue: false, // Treat as HTML attribute value context
strict: false // Throw on malformed character references
};
/** Error thrown in strict mode for invalid input */
throw new Error('Parse error: ' + message);