or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

index.md
tile.json

tessl/npm-tldjs

JavaScript API to work against complex domain names, subdomains and URIs.

Workspace
tessl
Visibility
Public
Created
Last updated
Describes
npmpkg:npm/tldjs@2.3.x

To install, run

npx @tessl/cli install tessl/npm-tldjs@2.3.0

index.mddocs/

tldjs

tldjs is a JavaScript library for working with complex domain names, subdomains and well-known TLDs. It provides utilities to parse URLs/hostnames and extract domain components based on Mozilla's Public Suffix List, answering questions like "what is mail.google.com's domain?" and "is big.data's TLD well-known?".

Package Information

  • Package Name: tldjs
  • Package Type: npm
  • Language: JavaScript
  • Installation: npm install tldjs

Core Imports

const { parse, tldExists, getDomain, getSubdomain, getPublicSuffix, isValidHostname, extractHostname } = require('tldjs');

Or import the entire module:

const tldjs = require('tldjs');

For ES6 modules:

import { parse, tldExists, getDomain, getSubdomain, getPublicSuffix, isValidHostname, extractHostname } from 'tldjs';

Or import the entire module:

import tldjs from 'tldjs';

Basic Usage

const tldjs = require('tldjs');

// Parse a URL completely
const result = tldjs.parse('https://spark-public.s3.amazonaws.com/dataanalysis/loansData.csv');
console.log(result);
// {
//   hostname: 'spark-public.s3.amazonaws.com',
//   isValid: true,
//   isIp: false,
//   tldExists: true,
//   publicSuffix: 's3.amazonaws.com',
//   domain: 'spark-public.s3.amazonaws.com',
//   subdomain: ''
// }

// Check if TLD exists
console.log(tldjs.tldExists('google.com'));      // true
console.log(tldjs.tldExists('google.local'));    // false

// Extract specific parts
console.log(tldjs.getDomain('fr.google.com'));     // 'google.com'
console.log(tldjs.getSubdomain('fr.google.com'));  // 'fr'
console.log(tldjs.getPublicSuffix('google.co.uk')); // 'co.uk'

Architecture

tldjs is built around several key components:

  • Public Suffix List: Uses Mozilla's Public Suffix List for accurate TLD recognition
  • Hostname Extraction: Robust URL parsing to extract hostnames from complex URLs
  • Validation Layer: RFC-compliant hostname validation
  • Trie Data Structure: Efficient suffix lookup using a trie for fast public suffix matching
  • Factory Pattern: Customizable instances with user-defined rules and validation hosts

Capabilities

URL/Hostname Parsing

Complete parsing of URLs or hostnames with all domain components extracted in a single operation.

/**
 * Parse URL/hostname and return complete information about domain components
 * @param {string} url - URL or hostname to parse
 * @param {number} [_step] - Internal step control for optimization
 * @returns {ParseResult} Complete parsing result
 */
function parse(url, _step);

interface ParseResult {
  hostname: string | null;     // Extracted hostname
  isValid: boolean;           // Whether hostname is valid per RFC
  isIp: boolean;              // Whether hostname is an IP address
  tldExists: boolean;         // Whether TLD is well-known
  publicSuffix: string | null; // Public suffix portion
  domain: string | null;      // Domain portion  
  subdomain: string | null;   // Subdomain portion
}

Usage Examples:

// Standard web URL
tldjs.parse('https://www.example.com/path');
// { hostname: 'www.example.com', isValid: true, isIp: false, 
//   tldExists: true, publicSuffix: 'com', domain: 'example.com', subdomain: 'www' }

// Complex AWS hostname
tldjs.parse('https://spark-public.s3.amazonaws.com/data.csv');
// { hostname: 'spark-public.s3.amazonaws.com', isValid: true, isIp: false,
//   tldExists: true, publicSuffix: 's3.amazonaws.com', 
//   domain: 'spark-public.s3.amazonaws.com', subdomain: '' }

// IP address
tldjs.parse('https://192.168.0.1/admin');
// { hostname: '192.168.0.1', isValid: true, isIp: true, 
//   tldExists: false, publicSuffix: null, domain: null, subdomain: null }

// Invalid/unknown TLD
tldjs.parse('domain.unknown');
// { hostname: 'domain.unknown', isValid: true, isIp: false,
//   tldExists: false, publicSuffix: 'unknown', domain: 'domain.unknown', subdomain: '' }

TLD Existence Checking

Validates whether a TLD is well-known according to the Public Suffix List.

/**
 * Check if TLD exists for given URL/hostname
 * @param {string} url - URL or hostname to check
 * @returns {boolean} True if TLD is well-known
 */
function tldExists(url);

Usage Examples:

tldjs.tldExists('google.com');        // true
tldjs.tldExists('google.local');      // false (not registered TLD)
tldjs.tldExists('com');               // true  
tldjs.tldExists('uk');                // true
tldjs.tldExists('co.uk');             // true
tldjs.tldExists('amazon.co.uk');      // true (because 'uk' is valid)
tldjs.tldExists('https://user:password@example.co.uk:8080/path'); // true

Public Suffix Extraction

Extracts the public suffix (effective TLD) from URLs or hostnames.

/**
 * Extract public suffix from URL/hostname
 * @param {string} url - URL or hostname to analyze
 * @returns {string | null} Public suffix or null if invalid
 */
function getPublicSuffix(url);

Usage Examples:

tldjs.getPublicSuffix('google.com');       // 'com'
tldjs.getPublicSuffix('fr.google.com');    // 'com'
tldjs.getPublicSuffix('google.co.uk');     // 'co.uk'
tldjs.getPublicSuffix('s3.amazonaws.com'); // 's3.amazonaws.com'
tldjs.getPublicSuffix('tld.is.unknown');   // 'unknown'

Domain Extraction

Extracts the domain (second-level domain + public suffix) from URLs or hostnames.

/**
 * Extract domain from URL/hostname
 * @param {string} url - URL or hostname to analyze
 * @returns {string | null} Domain or null if invalid
 */
function getDomain(url);

Usage Examples:

tldjs.getDomain('google.com');        // 'google.com'
tldjs.getDomain('fr.google.com');     // 'google.com'
tldjs.getDomain('fr.google.google');  // 'google.google'
tldjs.getDomain('foo.google.co.uk');  // 'google.co.uk'
tldjs.getDomain('t.co');              // 't.co'
tldjs.getDomain('fr.t.co');           // 't.co'
tldjs.getDomain('https://user:password@example.co.uk:8080/some/path?query#hash'); // 'example.co.uk'

Subdomain Extraction

Extracts the subdomain portion from URLs or hostnames.

/**
 * Extract subdomain from URL/hostname
 * @param {string} url - URL or hostname to analyze
 * @returns {string | null} Subdomain, empty string if none, or null if invalid
 */
function getSubdomain(url);

Usage Examples:

tldjs.getSubdomain('google.com');             // ''
tldjs.getSubdomain('fr.google.com');          // 'fr'
tldjs.getSubdomain('google.co.uk');           // ''
tldjs.getSubdomain('foo.google.co.uk');       // 'foo'
tldjs.getSubdomain('moar.foo.google.co.uk');  // 'moar.foo'
tldjs.getSubdomain('t.co');                   // ''
tldjs.getSubdomain('fr.t.co');                // 'fr'
tldjs.getSubdomain('https://secure.example.co.uk:443/path'); // 'secure'

Hostname Extraction

Extracts and validates hostname from URLs or validates existing hostnames.

/**
 * Extract hostname from URL or validate hostname
 * @param {string} url - URL or hostname to process
 * @returns {string | null} Clean hostname or null if invalid
 */
function extractHostname(url);

Usage Examples:

tldjs.extractHostname(' example.CO.uk ');        // 'example.co.uk'
tldjs.extractHostname('example.co.uk/some/path'); // 'example.co.uk'
tldjs.extractHostname('user:password@example.co.uk:8080/path'); // 'example.co.uk'
tldjs.extractHostname('https://www.example.com/'); // 'www.example.com'
tldjs.extractHostname('台灣');                     // 'xn--kpry57d' (punycode)
tldjs.extractHostname(42);                        // '42' (returns stringified input if invalid)

Hostname Validation

Validates hostnames according to RFC 1035 standards.

/**
 * Validate hostname according to RFC 1035
 * @param {string} hostname - Hostname to validate
 * @returns {boolean} True if hostname is valid per RFC
 */
function isValidHostname(hostname);

Usage Examples:

tldjs.isValidHostname('google.com');      // true
tldjs.isValidHostname('.google.com');     // false
tldjs.isValidHostname('my.fake.domain');  // true
tldjs.isValidHostname('localhost');       // false
tldjs.isValidHostname('192.168.0.0');     // true
tldjs.isValidHostname('https://example.com'); // false (full URL, not hostname)

Deprecated: isValid

Legacy hostname validation function (use isValidHostname instead).

/**
 * @deprecated Use isValidHostname instead
 * Validate hostname according to RFC 1035
 * @param {string} hostname - Hostname to validate  
 * @returns {boolean} True if hostname is valid per RFC
 */
function isValid(hostname);

Custom Configuration Factory

Creates customized tldjs instances with user-defined settings for specialized use cases.

/**
 * Create customized tldjs instance with user settings
 * @param {FactoryOptions} options - Configuration options
 * @returns {tldjs} Customized tldjs instance with same API
 */
function fromUserSettings(options);

interface FactoryOptions {
  rules?: SuffixTrie;                           // Custom suffix trie for lookups
  validHosts?: string[];                        // Additional hosts to treat as valid domains
  extractHostname?: (url: string) => string | null; // Custom hostname extraction function
}

Usage Examples:

// Default behavior - localhost is not recognized
tldjs.getDomain('localhost');           // null
tldjs.getSubdomain('vhost.localhost');  // null

// Custom instance with localhost support
const myTldjs = tldjs.fromUserSettings({
  validHosts: ['localhost']
});

myTldjs.getDomain('localhost');           // 'localhost'
myTldjs.getSubdomain('vhost.localhost');  // 'vhost'
myTldjs.getDomain('api.localhost');       // 'localhost'
myTldjs.getSubdomain('api.localhost');    // 'api'

Types

interface ParseResult {
  hostname: string | null;     // Extracted hostname from input
  isValid: boolean;           // Whether hostname follows RFC 1035
  isIp: boolean;              // Whether hostname is IPv4/IPv6 address
  tldExists: boolean;         // Whether TLD exists in Public Suffix List
  publicSuffix: string | null; // Public suffix (effective TLD)
  domain: string | null;      // Domain name (SLD + public suffix)
  subdomain: string | null;   // Subdomain portion
}

class SuffixTrie {
  constructor(rules?: PlainRules);              // Create trie with optional rules
  static fromJson(json: object): SuffixTrie;    // Create trie from JSON rules
  hasTld(value: string): boolean;               // Check if TLD exists in trie
  suffixLookup(hostname: string): string | null; // Find public suffix for hostname
  exceptions: object;                           // Exception rules trie
  rules: object;                                // Standard rules trie
}

interface PlainRules {
  parts: string[];                              // Domain parts in reverse order
  exception: boolean;                           // Whether this is an exception rule
}[]

Error Handling

All tldjs functions handle invalid input gracefully:

  • Invalid URLs return null for extracted components
  • Malformed hostnames are detected via isValid: false in parse results
  • IP addresses are properly identified and bypass TLD validation
  • Unknown TLDs are handled transparently (marked as tldExists: false)

Performance Notes

tldjs is optimized for performance with different input types:

  • Cleaned hostnames: ~850,000-8,700,000 ops/sec depending on function
  • Full URLs: ~230,000-25,400,000 ops/sec depending on function
  • Lazy evaluation: The parse() function uses early termination to avoid unnecessary processing
  • Custom hostname extraction: You can provide optimized extractHostname functions for specialized use cases

Browser Compatibility

tldjs works in browsers via bundlers like browserify, webpack, and others. The library has no Node.js-specific dependencies and uses only standard JavaScript features.

TLD List Updates

The library bundles Mozilla's Public Suffix List but supports updates:

# Update TLD rules during installation
npm install tldjs --tldjs-update-rules

# Update existing installation  
npm install --tldjs-update-rules