0
# Extensibility and Configuration
1
2
Tools for extending XRegExp syntax and managing optional features to customize behavior and add new capabilities.
3
4
## Capabilities
5
6
### Token Extension System
7
8
Extend XRegExp syntax with custom tokens and flags.
9
10
```javascript { .api }
11
/**
12
* Extends XRegExp syntax and allows custom flags
13
* @param regex - Regex object that matches the new token
14
* @param handler - Function that returns new pattern string using native regex syntax
15
* @param options - Options object with optional properties
16
*/
17
function addToken(regex: RegExp, handler: (match: MatchArray, scope: TokenScope, flags: string) => string, options?: TokenOptions): void;
18
19
interface TokenOptions {
20
/** Scope where the token applies: 'default', 'class', or 'all' */
21
scope?: 'default' | 'class' | 'all';
22
/** Single-character flag that triggers the token */
23
flag?: string;
24
/** Custom flags checked within token handler (not required to trigger) */
25
optionalFlags?: string;
26
/** Whether handler output should be reparseable by other tokens */
27
reparse?: boolean;
28
/** Single character at beginning of successful matches (performance hint) */
29
leadChar?: string;
30
}
31
32
type TokenScope = 'default' | 'class';
33
type MatchArray = RegExpMatchArray & { [propName: string]: any };
34
```
35
36
**Usage Examples:**
37
38
```javascript
39
// Basic usage: Add \\a for ALERT control code
40
XRegExp.addToken(
41
/\\\\a/,
42
() => '\\\\x07',
43
{ scope: 'all' }
44
);
45
XRegExp('\\\\a[\\\\a-\\\\n]+').test('\\x07\\n\\x07'); // true
46
47
// Add custom flag: U (ungreedy) reverses greedy and lazy quantifiers
48
XRegExp.addToken(
49
/([?*+]|{\\d+(?:,\\d*)?})(\\??)/,
50
(match) => \`\${match[1]}\${match[2] ? '' : '?'}\`,
51
{ flag: 'U' }
52
);
53
XRegExp('a+', 'U').exec('aaa')[0]; // 'a' (lazy)
54
XRegExp('a+?', 'U').exec('aaa')[0]; // 'aaa' (greedy)
55
56
// Token with reparse option for token chaining
57
XRegExp.addToken(
58
/\\\\macro{([^}]+)}/,
59
(match) => \`{{expanded_\${match[1]}}}\`,
60
{ reparse: true } // Output will be processed by other tokens
61
);
62
63
// Performance optimization with leadChar
64
XRegExp.addToken(
65
/\\\\custom\\d+/,
66
(match) => \`[0-9]{\${match[0].length - 7}}\`,
67
{
68
leadChar: '\\\\', // Only check positions starting with backslash
69
scope: 'default'
70
}
71
);
72
```
73
74
### Feature Management
75
76
Install and manage optional XRegExp features.
77
78
```javascript { .api }
79
/**
80
* Installs optional features according to specified options
81
* @param options - Options object or string specifying features to install
82
*/
83
function install(options: string | FeatureOptions): void;
84
85
/**
86
* Uninstalls optional features according to specified options
87
* @param options - Options object or string specifying features to uninstall
88
*/
89
function uninstall(options: string | FeatureOptions): void;
90
91
/**
92
* Checks whether an individual optional feature is installed
93
* @param feature - Name of feature to check: 'astral' or 'namespacing'
94
* @returns Whether the feature is installed
95
*/
96
function isInstalled(feature: 'astral' | 'namespacing'): boolean;
97
98
interface FeatureOptions {
99
/** Enables support for astral code points in Unicode addons */
100
astral?: boolean;
101
/** Adds named capture groups to the groups property of matches */
102
namespacing?: boolean;
103
}
104
```
105
106
**Usage Examples:**
107
108
```javascript
109
// Install features with options object
110
XRegExp.install({
111
// Enables support for astral code points (implicitly sets flag A)
112
astral: true,
113
// Adds named capture groups to the groups property of matches
114
namespacing: true
115
});
116
117
// Install features with options string
118
XRegExp.install('astral namespacing');
119
120
// Check if features are installed
121
XRegExp.isInstalled('astral'); // true
122
XRegExp.isInstalled('namespacing'); // true
123
124
// Uninstall features
125
XRegExp.uninstall({
126
astral: true,
127
namespacing: true
128
});
129
130
// Check installation status
131
XRegExp.isInstalled('astral'); // false
132
XRegExp.isInstalled('namespacing'); // false
133
```
134
135
### Internal Utilities
136
137
Access internal utility functions for addon development.
138
139
```javascript { .api }
140
/**
141
* Internal utility functions exposed for testing and addons
142
*/
143
// Remove duplicate characters from string
144
function _clipDuplicates(str: string): string;
145
146
// Check if browser natively supports a regex flag
147
function _hasNativeFlag(flag: string): boolean;
148
149
// Convert hexadecimal to decimal
150
function _dec(hex: string): number;
151
152
// Convert decimal to hexadecimal
153
function _hex(dec: number | string): string;
154
155
// Add leading zeros to make 4-character hex string
156
function _pad4(str: string): string;
157
```
158
159
**Usage Examples:**
160
161
```javascript
162
// These are primarily for internal use and addon development
163
XRegExp._clipDuplicates('aabbcc'); // 'abc'
164
XRegExp._hasNativeFlag('u'); // true (if browser supports Unicode flag)
165
XRegExp._dec('FF'); // 255
166
XRegExp._hex(255); // 'ff'
167
XRegExp._pad4('A'); // '000A'
168
```
169
170
## Feature Details
171
172
### Astral Feature
173
174
Enables 21-bit Unicode support for characters beyond the Basic Multilingual Plane.
175
176
**When Installed:**
177
- Flag `A` is automatically added to all new XRegExps
178
- Unicode tokens can match astral code points (U+10000-U+10FFFF)
179
- Requires Unicode Base addon to be loaded
180
181
**Usage Examples:**
182
183
```javascript
184
// Before installing astral feature
185
const regex1 = XRegExp('\\\\p{Letter}'); // Only matches BMP characters
186
regex1.test('𝒜'); // false (mathematical script A is astral)
187
188
// Install astral feature
189
XRegExp.install('astral');
190
191
// After installing - flag A automatically added
192
const regex2 = XRegExp('\\\\p{Letter}'); // Automatically gets flag A
193
regex2.test('𝒜'); // true (now matches astral characters)
194
195
// Explicit flag A still works
196
const regex3 = XRegExp('\\\\p{Letter}', 'A');
197
regex3.test('𝒜'); // true
198
```
199
200
### Namespacing Feature
201
202
Controls where named capture groups appear in match results.
203
204
**When Installed (Default in XRegExp 5+):**
205
- Named captures appear on `match.groups` object (ES2018 standard)
206
- Follows modern JavaScript standards
207
208
**When Uninstalled (Legacy Mode):**
209
- Named captures appear directly on match array
210
- Backward compatible with XRegExp 4.x behavior
211
212
**Usage Examples:**
213
214
```javascript
215
const regex = XRegExp('(?<name>\\\\w+) (?<age>\\\\d+)');
216
const match = XRegExp.exec('John 25', regex);
217
218
// With namespacing installed (default)
219
XRegExp.install('namespacing');
220
console.log(match.groups.name); // 'John'
221
console.log(match.groups.age); // '25'
222
223
// With namespacing uninstalled (legacy)
224
XRegExp.uninstall('namespacing');
225
console.log(match.name); // 'John' (directly on match array)
226
console.log(match.age); // '25'
227
```
228
229
## Custom Token Examples
230
231
### Simple Token Replacement
232
233
```javascript
234
// Add \\R for generic line break
235
XRegExp.addToken(
236
/\\\\R/,
237
() => '(?:\\r\\n|[\\r\\n\\u2028\\u2029])',
238
{ scope: 'all' }
239
);
240
241
// Usage
242
XRegExp('line1\\\\Rline2').test('line1\\r\\nline2'); // true
243
XRegExp('line1\\\\Rline2').test('line1\\nline2'); // true
244
```
245
246
### Context-Aware Tokens
247
248
```javascript
249
// Add \\Q...\\E for literal text (like Perl)
250
let inLiteral = false;
251
252
XRegExp.addToken(
253
/\\\\[QE]/,
254
function(match) {
255
if (match[0] === '\\\\Q') {
256
inLiteral = true;
257
return '(?:';
258
} else { // \\E
259
inLiteral = false;
260
return ')';
261
}
262
},
263
{ scope: 'default' }
264
);
265
266
// Escape literal content between \\Q and \\E
267
XRegExp.addToken(
268
/[^\\\\]+/,
269
function(match) {
270
return inLiteral ? XRegExp.escape(match[0]) : match[0];
271
},
272
{ scope: 'default' }
273
);
274
```
275
276
### Flag-Based Tokens
277
278
```javascript
279
// Add \\w+ enhancement with flag W (extended word characters)
280
XRegExp.addToken(
281
/\\\\w/,
282
(match, scope, flags) => {
283
if (flags.includes('W')) {
284
return '[\\\\w\\u00C0-\\u017F]'; // Include Latin extended characters
285
}
286
return match[0]; // Standard \\w
287
},
288
{
289
flag: 'W',
290
scope: 'default'
291
}
292
);
293
294
// Usage
295
XRegExp('\\\\w+', 'W').test('café'); // true (includes é)
296
XRegExp('\\\\w+').test('café'); // false (standard behavior)
297
```
298
299
### Reparse Tokens
300
301
```javascript
302
// Token that expands to other tokens that need further processing
303
XRegExp.addToken(
304
/\\\\identifier/,
305
() => '\\\\p{ID_Start}\\\\p{ID_Continue}*',
306
{
307
reparse: true, // Allow Unicode tokens to be processed
308
optionalFlags: 'A' // Register flag A as optional
309
}
310
);
311
312
// Usage (requires Unicode Base addon)
313
XRegExp('\\\\identifier', 'A').test('變數名'); // true
314
```
315
316
## Cache Management
317
318
Control pattern and regex caching for performance.
319
320
```javascript { .api }
321
// Internal cache flush (used by addToken and addUnicodeData)
322
XRegExp.cache.flush('patterns'); // Flush pattern cache
323
XRegExp.cache.flush(); // Flush regex cache
324
```
325
326
**Usage Examples:**
327
328
```javascript
329
// Cache is automatically managed, but can be manually controlled
330
const regex1 = XRegExp.cache('\\\\d+', 'g'); // Cached
331
const regex2 = XRegExp.cache('\\\\d+', 'g'); // Same cached instance
332
console.log(regex1 === regex2); // true
333
334
// Adding tokens automatically flushes pattern cache
335
XRegExp.addToken(/\\\\test/, () => 'tested');
336
// Pattern cache is flushed, new compilations use updated tokens
337
```
338
339
## Error Handling
340
341
Proper error handling for invalid tokens and configurations.
342
343
```javascript
344
// Invalid flag characters
345
try {
346
XRegExp.addToken(/test/, () => '', { flag: 'ab' }); // Multi-char flag
347
} catch (e) {
348
console.log(e.message); // "Flag must be a single character A-Za-z0-9_$"
349
}
350
351
// Unknown flags in patterns
352
try {
353
XRegExp('test', 'Q'); // Unknown flag
354
} catch (e) {
355
console.log(e.message); // "Unknown regex flag Q"
356
}
357
358
// Invalid token usage
359
try {
360
XRegExp('\\\\p{UnknownProperty}', 'A'); // Unregistered Unicode property
361
} catch (e) {
362
console.log(e.message); // "Unknown Unicode token \\p{UnknownProperty}"
363
}
364
```