Character encoding detector for automatic detection of text encodings using occurrence analysis
npx @tessl/cli install tessl/npm-chardet@2.1.0chardet is a character encoding detection library written in pure TypeScript. It uses occurrence analysis to determine the most probable character encoding of text data, supporting over 20 different encodings. The library works in all environments (Node.js, Browser, Native) with zero dependencies and provides both synchronous and asynchronous APIs.
npm install chardetimport chardet from "chardet";
import { detect, analyse, detectFile, detectFileSync, Match, EncodingName, Options } from "chardet";For CommonJS:
const chardet = require("chardet");
const { detect, analyse, detectFile, detectFileSync } = require("chardet");import chardet from "chardet";
// Detect encoding from a buffer
const buffer = Buffer.from("Hello, world!");
const encoding = chardet.detect(buffer);
console.log(encoding); // "UTF-8"
// Get all possible encodings with confidence scores
const results = chardet.analyse(buffer);
console.log(results);
// [
// { confidence: 100, name: "UTF-8" },
// { confidence: 32, name: "windows-1252", lang: "fr" },
// ...
// ]
// Detect encoding from a file
const fileEncoding = await chardet.detectFile("./example.txt");
console.log(fileEncoding); // "UTF-8"
// Synchronous file detection with options
const syncEncoding = chardet.detectFileSync("./example.txt", {
sampleSize: 1024,
offset: 0
});Detects the most probable character encoding from byte data.
/**
* Detects character encoding and returns the most probable encoding name
* @param buffer - Input data to analyze (Buffer or Uint8Array)
* @returns The detected encoding name or null if no encoding detected
* @throws Error if input is not a valid byte array
*/
function detect(buffer: Uint8Array): string | null;Analyzes byte data and returns all possible encodings with confidence scores.
/**
* Analyzes buffer and returns all possible encodings with confidence scores
* @param buffer - Input data to analyze (Buffer or Uint8Array)
* @returns Array of Match objects sorted by confidence (descending)
* @throws Error if input is not a valid byte array
*/
function analyse(buffer: Uint8Array): AnalyseResult;Detects character encoding of a file asynchronously with optional sampling.
/**
* Detects encoding of a file asynchronously
* @param filepath - Path to the file to analyze
* @param opts - Optional detection options
* @returns Promise resolving to encoding name or null
* @throws Error for filesystem-related errors
*/
function detectFile(
filepath: string,
opts: Options = {}
): Promise<DetectResult>;Usage Example:
// Basic file detection
const encoding = await chardet.detectFile("./document.txt");
// With sampling for large files (performance optimization)
const encoding = await chardet.detectFile("./large-file.txt", {
sampleSize: 2048, // Only read first 2KB
offset: 100 // Start reading from byte 100
});Detects character encoding of a file synchronously with optional sampling.
/**
* Detects encoding of a file synchronously
* @param filepath - Path to the file to analyze
* @param opts - Optional detection options
* @returns Encoding name or null
* @throws Error for filesystem-related errors
*/
function detectFileSync(
filepath: string,
opts: Options = {}
): DetectResult;Usage Example:
// Basic synchronous file detection
const encoding = chardet.detectFileSync("./document.txt");
// With sampling options
const encoding = chardet.detectFileSync("./large-file.txt", {
sampleSize: 1024,
offset: 0
});The default export provides access to all main functions as an object.
/**
* Default export containing all main chardet functions
*/
interface ChardetDefault {
analyse: typeof analyse;
detect: typeof detect;
detectFileSync: typeof detectFileSync;
detectFile: typeof detectFile;
}Usage Example:
import chardet from "chardet";
// All functions are available on the default export
const encoding = chardet.detect(buffer);
const results = chardet.analyse(buffer);
const fileEncoding = await chardet.detectFile("./file.txt");
const syncEncoding = chardet.detectFileSync("./file.txt");Configuration options for file detection operations.
interface Options {
/** Number of bytes to sample from the file (for performance optimization) */
sampleSize?: number;
/** Byte offset to start reading from in the file */
offset?: number;
}Represents a detection match result with confidence score and optional language.
interface Match {
/** Confidence score from 0-100 indicating detection certainty */
confidence: number;
/** Detected encoding name */
name: EncodingName;
/** Optional detected language code (e.g., 'en', 'fr', 'zh') */
lang?: string;
}Union type of all supported character encoding names.
type EncodingName =
| "ASCII"
| "Big5"
| "EUC-JP"
| "EUC-KR"
| "GB18030"
| "ISO_2022"
| "ISO-2022-CN"
| "ISO-2022-JP"
| "ISO-2022-KR"
| "ISO-8859-1"
| "ISO-8859-2"
| "ISO-8859-5"
| "ISO-8859-6"
| "ISO-8859-7"
| "ISO-8859-8"
| "ISO-8859-9"
| "KOI8-R"
| "mbcs"
| "sbcs"
| "Shift_JIS"
| "UTF-16BE"
| "UTF-16LE"
| "UTF-32"
| "UTF-32BE"
| "UTF-32LE"
| "UTF-8"
| "windows-1251"
| "windows-1256";Type aliases for function return values.
/** Result of analyse() function - array of detection matches */
type AnalyseResult = Match[];
/** Result of detect functions - encoding name or null */
type DetectResult = string | null;Full functionality including file system access for detectFile and detectFileSync.
// All functions work in Node.js
import chardet from "chardet";
import fs from "fs";
const buffer = fs.readFileSync("./file.txt");
const encoding = chardet.detect(buffer);
const fileEncoding = await chardet.detectFile("./file.txt");Buffer detection works with Uint8Array. File functions throw errors as filesystem access is not available.
// Works in browsers
import chardet from "chardet";
const uint8Array = new Uint8Array([0x48, 0x65, 0x6c, 0x6c, 0x6f]);
const encoding = chardet.detect(uint8Array);
// File functions will throw Error: "File system is not available"
// await chardet.detectFile("./file.txt"); // ❌ Throws errorFunctions throw errors for invalid input types:
// ❌ Throws: "Input must be a byte array, e.g. Buffer or Uint8Array"
chardet.detect("invalid string input" as any);
chardet.detect(123 as any);
chardet.detect(null as any);File functions may throw or reject with filesystem-related errors:
try {
const encoding = await chardet.detectFile("./nonexistent.txt");
} catch (error) {
// Handle file not found, permission errors, etc.
console.error("File detection failed:", error.message);
}File functions throw errors in browser environments:
try {
await chardet.detectFile("./file.txt");
} catch (error) {
// In browser: "File system is not available"
console.error(error.message);
}chardet supports 24 character encodings across multiple categories:
Unicode Encodings:
ISO Standards:
Windows Code Pages:
Asian Encodings:
Other Encodings:
Each encoding includes language detection where applicable (e.g., 'zh' for Chinese, 'ja' for Japanese, 'ru' for Russian).