Extended regular expressions with augmented syntax, named capture groups, Unicode support, and cross-browser compatibility
Specialized matching capabilities including recursive/balanced delimiters and chained matching for complex text processing scenarios.
Matches balanced delimiters with comprehensive configuration options.
/**
* Returns array of match strings between outermost left and right delimiters
* @param str - String to search
* @param left - Left delimiter as XRegExp pattern
* @param right - Right delimiter as XRegExp pattern
* @param flags - Any combination of XRegExp flags for delimiters
* @param options - Configuration options for matching behavior
* @returns Array of matches or detailed match objects
*/
function matchRecursive(str: string, left: string, right: string, flags?: string, options?: MatchRecursiveOptions): string[] | MatchRecursiveValueNameMatch[];
interface MatchRecursiveOptions {
/** Single char used to escape delimiters within the subject string */
escapeChar?: string;
/** Array of 4 strings naming the parts to return in extended mode */
valueNames?: [string | null, string | null, string | null, string | null];
/** Handling mode for unbalanced delimiters */
unbalanced?: 'error' | 'skip' | 'skip-lazy';
}
interface MatchRecursiveValueNameMatch {
name: string;
value: string;
start: number;
end: number;
}Usage Examples:
// Basic balanced delimiter matching
const str1 = '(t((e))s)t()(ing)';
XRegExp.matchRecursive(str1, '\\\\(', '\\\\)', 'g');
// Result: ['t((e))s', '', 'ing']
// Extended information mode with valueNames
const str2 = 'Here is <div> <div>an</div></div> example';
XRegExp.matchRecursive(str2, '<div\\\\s*>', '</div>', 'gi', {
valueNames: ['between', 'left', 'match', 'right']
});
// Result: [
// {name: 'between', value: 'Here is ', start: 0, end: 8},
// {name: 'left', value: '<div>', start: 8, end: 13},
// {name: 'match', value: ' <div>an</div>', start: 13, end: 27},
// {name: 'right', value: '</div>', start: 27, end: 33},
// {name: 'between', value: ' example', start: 33, end: 41}
// ]
// Using escape characters
const str3 = '...{1}.\\\\{{function(x,y){return {y:x}}}';
XRegExp.matchRecursive(str3, '{', '}', 'g', {
valueNames: ['literal', null, 'value', null],
escapeChar: '\\\\'
});
// Result: [
// {name: 'literal', value: '...', start: 0, end: 3},
// {name: 'value', value: '1', start: 4, end: 5},
// {name: 'literal', value: '.\\\\{', start: 6, end: 9},
// {name: 'value', value: 'function(x,y){return {y:x}}', start: 10, end: 37}
// ]Chains regexes for successive matching within previous results.
/**
* Retrieves matches from searching using a chain of regexes
* @param str - String to search
* @param chain - Array of regexes or objects with regex and backref properties
* @returns Matches by the last regex in the chain, or empty array
*/
function matchChain(str: string, chain: (RegExp | ChainArrayElement)[]): string[];
interface ChainArrayElement {
/** The regex to use */
regex: RegExp;
/** The named or numbered backreference to pass forward */
backref: number | string;
}Usage Examples:
// Basic usage - matches numbers within <b> tags
XRegExp.matchChain('1 <b>2</b> 3 <b>4 a 56</b>', [
XRegExp('(?is)<b>.*?</b>'),
/\\d+/
]);
// Result: ['2', '4', '56']
// Passing forward and returning specific backreferences
const html = \`<a href="http://xregexp.com/api/">XRegExp</a>
<a href="http://www.google.com/">Google</a>\`;
XRegExp.matchChain(html, [
{regex: /<a href="([^"]+)">/i, backref: 1},
{regex: XRegExp('(?i)^https?://(?<domain>[^/?#]+)'), backref: 'domain'}
]);
// Result: ['xregexp.com', 'www.google.com']
// Multi-step extraction
const data = 'user:john@example.com, user:jane@test.org';
XRegExp.matchChain(data, [
/user:([^,]+)/g, // Extract user entries
/@([^\\s]+)/, // Extract domain from each entry
/([^.]+)\\./ // Extract domain name without TLD
]);
// Result: ['example', 'test']Control what parts of matches are returned:
// valueNames array maps to:
// [0] - String segments outside matches (before, between, after)
// [1] - Matched left delimiters
// [2] - Content between outermost delimiters
// [3] - Matched right delimiters
// Get only content between delimiters
XRegExp.matchRecursive(str, '{', '}', 'g', {
valueNames: [null, null, 'content', null]
});
// Get everything except right delimiters
XRegExp.matchRecursive(str, '{', '}', 'g', {
valueNames: ['between', 'left', 'content', null]
});Handle escaped delimiters within content:
const code = 'var obj = {key: "value\\\\}", nested: {inner: true}};';
XRegExp.matchRecursive(code, '{', '}', 'g', {
escapeChar: '\\\\',
valueNames: [null, null, 'content', null]
});
// Properly handles escaped } in string literalConfigure behavior for unbalanced delimiters:
const unbalanced = 'Here is <div> <div>content</div> missing close';
// Error on unbalanced (default)
try {
XRegExp.matchRecursive(unbalanced, '<div>', '</div>', 'gi');
} catch (e) {
console.log('Unbalanced delimiter error');
}
// Skip unbalanced delimiters
XRegExp.matchRecursive(unbalanced, '<div>', '</div>', 'gi', {
unbalanced: 'skip'
});
// Result: ['content'] (skips unbalanced opening div)
// Skip lazily (minimal advancement)
XRegExp.matchRecursive(unbalanced, '<div>', '</div>', 'gi', {
unbalanced: 'skip-lazy'
});Use flag y for sticky matching:
const str = '<1><<<2>>><3>4<5>';
XRegExp.matchRecursive(str, '<', '>', 'gy');
// Result: ['1', '<<2>>', '3']
// Stops at first non-match due to sticky modePass specific capture groups between chain stages:
// Forward numbered backreferences
const chain1 = [
/(\\w+):(\\w+)/g, // Capture key:value pairs
{regex: /./, backref: 2} // Forward only the value part
];
// Forward named backreferences
const chain2 = [
{regex: XRegExp('(?<protocol>https?)://(?<domain>[^/]+)'), backref: 'domain'},
{regex: /([^.]+)/, backref: 1} // Extract subdomain
];Chain multiple processing stages:
const logData = \`
[INFO] 2021-01-15 Server started on port 8080
[ERROR] 2021-01-15 Database connection failed
[DEBUG] 2021-01-15 Cache initialized
\`;
// Extract error messages with timestamps
XRegExp.matchChain(logData, [
/\\[ERROR\\]([^\\n]+)/g, // Find error lines
/\\d{4}-\\d{2}-\\d{2}(.+)/, // Extract message after date
/\\s*(.+)/ // Trim leading whitespace
]);Chain matching provides clear error messages for invalid backreferences:
try {
XRegExp.matchChain('test', [
/(\\w+)/,
{regex: /\\w/, backref: 2} // Invalid - no group 2
]);
} catch (e) {
console.log(e.message); // "Backreference to undefined group: 2"
}
try {
XRegExp.matchChain('test', [
XRegExp('(?<word>\\\\w+)'),
{regex: /\\w/, backref: 'missing'} // Invalid - no 'missing' group
]);
} catch (e) {
console.log(e.message); // "Backreference to undefined group: missing"
}Extract content from nested markup or code:
// Parse nested function calls
const code = 'outer(inner(a, b), middle(c), last)';
const args = XRegExp.matchRecursive(code, '\\\\(', '\\\\)', 'g');
// Result: ['inner(a, b), middle(c), last']
// Parse JSON-like structures
const json = '{name: "John", data: {age: 30, city: "NYC"}}';
const objects = XRegExp.matchRecursive(json, '{', '}', 'g');Combine recursive and chain matching:
// Extract and process HTML attributes
const html = '<div class="highlight active" data-id="123">Content</div>';
// First extract tag attributes, then process each
XRegExp.matchChain(html, [
/<[^>]+>/, // Get the full tag
/\\s([^>]+)/, // Extract attributes portion
/([\\w-]+)="([^"]*)"/g // Extract name="value" pairs
]);Parse template syntax with balanced delimiters:
const template = 'Hello {{user.name}}, you have {{#if messages}}{{messages.length}}{{/if}} messages';
// Extract template expressions
const expressions = XRegExp.matchRecursive(template, '{{', '}}', 'g');
// Result: ['user.name', '#if messages', 'messages.length', '/if']
// Process expressions further
const conditionals = expressions.filter(expr => expr.startsWith('#if'));Install with Tessl CLI
npx tessl i tessl/npm-xregexp