Extended regular expressions with augmented syntax, named capture groups, Unicode support, and cross-browser compatibility
Tools for extending XRegExp syntax and managing optional features to customize behavior and add new capabilities.
Extend XRegExp syntax with custom tokens and flags.
/**
* Extends XRegExp syntax and allows custom flags
* @param regex - Regex object that matches the new token
* @param handler - Function that returns new pattern string using native regex syntax
* @param options - Options object with optional properties
*/
function addToken(regex: RegExp, handler: (match: MatchArray, scope: TokenScope, flags: string) => string, options?: TokenOptions): void;
interface TokenOptions {
/** Scope where the token applies: 'default', 'class', or 'all' */
scope?: 'default' | 'class' | 'all';
/** Single-character flag that triggers the token */
flag?: string;
/** Custom flags checked within token handler (not required to trigger) */
optionalFlags?: string;
/** Whether handler output should be reparseable by other tokens */
reparse?: boolean;
/** Single character at beginning of successful matches (performance hint) */
leadChar?: string;
}
type TokenScope = 'default' | 'class';
type MatchArray = RegExpMatchArray & { [propName: string]: any };Usage Examples:
// Basic usage: Add \\a for ALERT control code
XRegExp.addToken(
/\\\\a/,
() => '\\\\x07',
{ scope: 'all' }
);
XRegExp('\\\\a[\\\\a-\\\\n]+').test('\\x07\\n\\x07'); // true
// Add custom flag: U (ungreedy) reverses greedy and lazy quantifiers
XRegExp.addToken(
/([?*+]|{\\d+(?:,\\d*)?})(\\??)/,
(match) => \`\${match[1]}\${match[2] ? '' : '?'}\`,
{ flag: 'U' }
);
XRegExp('a+', 'U').exec('aaa')[0]; // 'a' (lazy)
XRegExp('a+?', 'U').exec('aaa')[0]; // 'aaa' (greedy)
// Token with reparse option for token chaining
XRegExp.addToken(
/\\\\macro{([^}]+)}/,
(match) => \`{{expanded_\${match[1]}}}\`,
{ reparse: true } // Output will be processed by other tokens
);
// Performance optimization with leadChar
XRegExp.addToken(
/\\\\custom\\d+/,
(match) => \`[0-9]{\${match[0].length - 7}}\`,
{
leadChar: '\\\\', // Only check positions starting with backslash
scope: 'default'
}
);Install and manage optional XRegExp features.
/**
* Installs optional features according to specified options
* @param options - Options object or string specifying features to install
*/
function install(options: string | FeatureOptions): void;
/**
* Uninstalls optional features according to specified options
* @param options - Options object or string specifying features to uninstall
*/
function uninstall(options: string | FeatureOptions): void;
/**
* Checks whether an individual optional feature is installed
* @param feature - Name of feature to check: 'astral' or 'namespacing'
* @returns Whether the feature is installed
*/
function isInstalled(feature: 'astral' | 'namespacing'): boolean;
interface FeatureOptions {
/** Enables support for astral code points in Unicode addons */
astral?: boolean;
/** Adds named capture groups to the groups property of matches */
namespacing?: boolean;
}Usage Examples:
// Install features with options object
XRegExp.install({
// Enables support for astral code points (implicitly sets flag A)
astral: true,
// Adds named capture groups to the groups property of matches
namespacing: true
});
// Install features with options string
XRegExp.install('astral namespacing');
// Check if features are installed
XRegExp.isInstalled('astral'); // true
XRegExp.isInstalled('namespacing'); // true
// Uninstall features
XRegExp.uninstall({
astral: true,
namespacing: true
});
// Check installation status
XRegExp.isInstalled('astral'); // false
XRegExp.isInstalled('namespacing'); // falseAccess internal utility functions for addon development.
/**
* Internal utility functions exposed for testing and addons
*/
// Remove duplicate characters from string
function _clipDuplicates(str: string): string;
// Check if browser natively supports a regex flag
function _hasNativeFlag(flag: string): boolean;
// Convert hexadecimal to decimal
function _dec(hex: string): number;
// Convert decimal to hexadecimal
function _hex(dec: number | string): string;
// Add leading zeros to make 4-character hex string
function _pad4(str: string): string;Usage Examples:
// These are primarily for internal use and addon development
XRegExp._clipDuplicates('aabbcc'); // 'abc'
XRegExp._hasNativeFlag('u'); // true (if browser supports Unicode flag)
XRegExp._dec('FF'); // 255
XRegExp._hex(255); // 'ff'
XRegExp._pad4('A'); // '000A'Enables 21-bit Unicode support for characters beyond the Basic Multilingual Plane.
When Installed:
A is automatically added to all new XRegExpsUsage Examples:
// Before installing astral feature
const regex1 = XRegExp('\\\\p{Letter}'); // Only matches BMP characters
regex1.test('𝒜'); // false (mathematical script A is astral)
// Install astral feature
XRegExp.install('astral');
// After installing - flag A automatically added
const regex2 = XRegExp('\\\\p{Letter}'); // Automatically gets flag A
regex2.test('𝒜'); // true (now matches astral characters)
// Explicit flag A still works
const regex3 = XRegExp('\\\\p{Letter}', 'A');
regex3.test('𝒜'); // trueControls where named capture groups appear in match results.
When Installed (Default in XRegExp 5+):
match.groups object (ES2018 standard)When Uninstalled (Legacy Mode):
Usage Examples:
const regex = XRegExp('(?<name>\\\\w+) (?<age>\\\\d+)');
const match = XRegExp.exec('John 25', regex);
// With namespacing installed (default)
XRegExp.install('namespacing');
console.log(match.groups.name); // 'John'
console.log(match.groups.age); // '25'
// With namespacing uninstalled (legacy)
XRegExp.uninstall('namespacing');
console.log(match.name); // 'John' (directly on match array)
console.log(match.age); // '25'// Add \\R for generic line break
XRegExp.addToken(
/\\\\R/,
() => '(?:\\r\\n|[\\r\\n\\u2028\\u2029])',
{ scope: 'all' }
);
// Usage
XRegExp('line1\\\\Rline2').test('line1\\r\\nline2'); // true
XRegExp('line1\\\\Rline2').test('line1\\nline2'); // true// Add \\Q...\\E for literal text (like Perl)
let inLiteral = false;
XRegExp.addToken(
/\\\\[QE]/,
function(match) {
if (match[0] === '\\\\Q') {
inLiteral = true;
return '(?:';
} else { // \\E
inLiteral = false;
return ')';
}
},
{ scope: 'default' }
);
// Escape literal content between \\Q and \\E
XRegExp.addToken(
/[^\\\\]+/,
function(match) {
return inLiteral ? XRegExp.escape(match[0]) : match[0];
},
{ scope: 'default' }
);// Add \\w+ enhancement with flag W (extended word characters)
XRegExp.addToken(
/\\\\w/,
(match, scope, flags) => {
if (flags.includes('W')) {
return '[\\\\w\\u00C0-\\u017F]'; // Include Latin extended characters
}
return match[0]; // Standard \\w
},
{
flag: 'W',
scope: 'default'
}
);
// Usage
XRegExp('\\\\w+', 'W').test('café'); // true (includes é)
XRegExp('\\\\w+').test('café'); // false (standard behavior)// Token that expands to other tokens that need further processing
XRegExp.addToken(
/\\\\identifier/,
() => '\\\\p{ID_Start}\\\\p{ID_Continue}*',
{
reparse: true, // Allow Unicode tokens to be processed
optionalFlags: 'A' // Register flag A as optional
}
);
// Usage (requires Unicode Base addon)
XRegExp('\\\\identifier', 'A').test('變數名'); // trueControl pattern and regex caching for performance.
// Internal cache flush (used by addToken and addUnicodeData)
XRegExp.cache.flush('patterns'); // Flush pattern cache
XRegExp.cache.flush(); // Flush regex cacheUsage Examples:
// Cache is automatically managed, but can be manually controlled
const regex1 = XRegExp.cache('\\\\d+', 'g'); // Cached
const regex2 = XRegExp.cache('\\\\d+', 'g'); // Same cached instance
console.log(regex1 === regex2); // true
// Adding tokens automatically flushes pattern cache
XRegExp.addToken(/\\\\test/, () => 'tested');
// Pattern cache is flushed, new compilations use updated tokensProper error handling for invalid tokens and configurations.
// Invalid flag characters
try {
XRegExp.addToken(/test/, () => '', { flag: 'ab' }); // Multi-char flag
} catch (e) {
console.log(e.message); // "Flag must be a single character A-Za-z0-9_$"
}
// Unknown flags in patterns
try {
XRegExp('test', 'Q'); // Unknown flag
} catch (e) {
console.log(e.message); // "Unknown regex flag Q"
}
// Invalid token usage
try {
XRegExp('\\\\p{UnknownProperty}', 'A'); // Unregistered Unicode property
} catch (e) {
console.log(e.message); // "Unknown Unicode token \\p{UnknownProperty}"
}Install with Tessl CLI
npx tessl i tessl/npm-xregexp