or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced-matching.mdconstruction.mdexecution.mdextensibility.mdindex.mdpattern-building.mdstring-processing.mdunicode-support.md

extensibility.mddocs/

0

# Extensibility and Configuration

1

2

Tools for extending XRegExp syntax and managing optional features to customize behavior and add new capabilities.

3

4

## Capabilities

5

6

### Token Extension System

7

8

Extend XRegExp syntax with custom tokens and flags.

9

10

```javascript { .api }

11

/**

12

* Extends XRegExp syntax and allows custom flags

13

* @param regex - Regex object that matches the new token

14

* @param handler - Function that returns new pattern string using native regex syntax

15

* @param options - Options object with optional properties

16

*/

17

function addToken(regex: RegExp, handler: (match: MatchArray, scope: TokenScope, flags: string) => string, options?: TokenOptions): void;

18

19

interface TokenOptions {

20

/** Scope where the token applies: 'default', 'class', or 'all' */

21

scope?: 'default' | 'class' | 'all';

22

/** Single-character flag that triggers the token */

23

flag?: string;

24

/** Custom flags checked within token handler (not required to trigger) */

25

optionalFlags?: string;

26

/** Whether handler output should be reparseable by other tokens */

27

reparse?: boolean;

28

/** Single character at beginning of successful matches (performance hint) */

29

leadChar?: string;

30

}

31

32

type TokenScope = 'default' | 'class';

33

type MatchArray = RegExpMatchArray & { [propName: string]: any };

34

```

35

36

**Usage Examples:**

37

38

```javascript

39

// Basic usage: Add \\a for ALERT control code

40

XRegExp.addToken(

41

/\\\\a/,

42

() => '\\\\x07',

43

{ scope: 'all' }

44

);

45

XRegExp('\\\\a[\\\\a-\\\\n]+').test('\\x07\\n\\x07'); // true

46

47

// Add custom flag: U (ungreedy) reverses greedy and lazy quantifiers

48

XRegExp.addToken(

49

/([?*+]|{\\d+(?:,\\d*)?})(\\??)/,

50

(match) => \`\${match[1]}\${match[2] ? '' : '?'}\`,

51

{ flag: 'U' }

52

);

53

XRegExp('a+', 'U').exec('aaa')[0]; // 'a' (lazy)

54

XRegExp('a+?', 'U').exec('aaa')[0]; // 'aaa' (greedy)

55

56

// Token with reparse option for token chaining

57

XRegExp.addToken(

58

/\\\\macro{([^}]+)}/,

59

(match) => \`{{expanded_\${match[1]}}}\`,

60

{ reparse: true } // Output will be processed by other tokens

61

);

62

63

// Performance optimization with leadChar

64

XRegExp.addToken(

65

/\\\\custom\\d+/,

66

(match) => \`[0-9]{\${match[0].length - 7}}\`,

67

{

68

leadChar: '\\\\', // Only check positions starting with backslash

69

scope: 'default'

70

}

71

);

72

```

73

74

### Feature Management

75

76

Install and manage optional XRegExp features.

77

78

```javascript { .api }

79

/**

80

* Installs optional features according to specified options

81

* @param options - Options object or string specifying features to install

82

*/

83

function install(options: string | FeatureOptions): void;

84

85

/**

86

* Uninstalls optional features according to specified options

87

* @param options - Options object or string specifying features to uninstall

88

*/

89

function uninstall(options: string | FeatureOptions): void;

90

91

/**

92

* Checks whether an individual optional feature is installed

93

* @param feature - Name of feature to check: 'astral' or 'namespacing'

94

* @returns Whether the feature is installed

95

*/

96

function isInstalled(feature: 'astral' | 'namespacing'): boolean;

97

98

interface FeatureOptions {

99

/** Enables support for astral code points in Unicode addons */

100

astral?: boolean;

101

/** Adds named capture groups to the groups property of matches */

102

namespacing?: boolean;

103

}

104

```

105

106

**Usage Examples:**

107

108

```javascript

109

// Install features with options object

110

XRegExp.install({

111

// Enables support for astral code points (implicitly sets flag A)

112

astral: true,

113

// Adds named capture groups to the groups property of matches

114

namespacing: true

115

});

116

117

// Install features with options string

118

XRegExp.install('astral namespacing');

119

120

// Check if features are installed

121

XRegExp.isInstalled('astral'); // true

122

XRegExp.isInstalled('namespacing'); // true

123

124

// Uninstall features

125

XRegExp.uninstall({

126

astral: true,

127

namespacing: true

128

});

129

130

// Check installation status

131

XRegExp.isInstalled('astral'); // false

132

XRegExp.isInstalled('namespacing'); // false

133

```

134

135

### Internal Utilities

136

137

Access internal utility functions for addon development.

138

139

```javascript { .api }

140

/**

141

* Internal utility functions exposed for testing and addons

142

*/

143

// Remove duplicate characters from string

144

function _clipDuplicates(str: string): string;

145

146

// Check if browser natively supports a regex flag

147

function _hasNativeFlag(flag: string): boolean;

148

149

// Convert hexadecimal to decimal

150

function _dec(hex: string): number;

151

152

// Convert decimal to hexadecimal

153

function _hex(dec: number | string): string;

154

155

// Add leading zeros to make 4-character hex string

156

function _pad4(str: string): string;

157

```

158

159

**Usage Examples:**

160

161

```javascript

162

// These are primarily for internal use and addon development

163

XRegExp._clipDuplicates('aabbcc'); // 'abc'

164

XRegExp._hasNativeFlag('u'); // true (if browser supports Unicode flag)

165

XRegExp._dec('FF'); // 255

166

XRegExp._hex(255); // 'ff'

167

XRegExp._pad4('A'); // '000A'

168

```

169

170

## Feature Details

171

172

### Astral Feature

173

174

Enables 21-bit Unicode support for characters beyond the Basic Multilingual Plane.

175

176

**When Installed:**

177

- Flag `A` is automatically added to all new XRegExps

178

- Unicode tokens can match astral code points (U+10000-U+10FFFF)

179

- Requires Unicode Base addon to be loaded

180

181

**Usage Examples:**

182

183

```javascript

184

// Before installing astral feature

185

const regex1 = XRegExp('\\\\p{Letter}'); // Only matches BMP characters

186

regex1.test('𝒜'); // false (mathematical script A is astral)

187

188

// Install astral feature

189

XRegExp.install('astral');

190

191

// After installing - flag A automatically added

192

const regex2 = XRegExp('\\\\p{Letter}'); // Automatically gets flag A

193

regex2.test('𝒜'); // true (now matches astral characters)

194

195

// Explicit flag A still works

196

const regex3 = XRegExp('\\\\p{Letter}', 'A');

197

regex3.test('𝒜'); // true

198

```

199

200

### Namespacing Feature

201

202

Controls where named capture groups appear in match results.

203

204

**When Installed (Default in XRegExp 5+):**

205

- Named captures appear on `match.groups` object (ES2018 standard)

206

- Follows modern JavaScript standards

207

208

**When Uninstalled (Legacy Mode):**

209

- Named captures appear directly on match array

210

- Backward compatible with XRegExp 4.x behavior

211

212

**Usage Examples:**

213

214

```javascript

215

const regex = XRegExp('(?<name>\\\\w+) (?<age>\\\\d+)');

216

const match = XRegExp.exec('John 25', regex);

217

218

// With namespacing installed (default)

219

XRegExp.install('namespacing');

220

console.log(match.groups.name); // 'John'

221

console.log(match.groups.age); // '25'

222

223

// With namespacing uninstalled (legacy)

224

XRegExp.uninstall('namespacing');

225

console.log(match.name); // 'John' (directly on match array)

226

console.log(match.age); // '25'

227

```

228

229

## Custom Token Examples

230

231

### Simple Token Replacement

232

233

```javascript

234

// Add \\R for generic line break

235

XRegExp.addToken(

236

/\\\\R/,

237

() => '(?:\\r\\n|[\\r\\n\\u2028\\u2029])',

238

{ scope: 'all' }

239

);

240

241

// Usage

242

XRegExp('line1\\\\Rline2').test('line1\\r\\nline2'); // true

243

XRegExp('line1\\\\Rline2').test('line1\\nline2'); // true

244

```

245

246

### Context-Aware Tokens

247

248

```javascript

249

// Add \\Q...\\E for literal text (like Perl)

250

let inLiteral = false;

251

252

XRegExp.addToken(

253

/\\\\[QE]/,

254

function(match) {

255

if (match[0] === '\\\\Q') {

256

inLiteral = true;

257

return '(?:';

258

} else { // \\E

259

inLiteral = false;

260

return ')';

261

}

262

},

263

{ scope: 'default' }

264

);

265

266

// Escape literal content between \\Q and \\E

267

XRegExp.addToken(

268

/[^\\\\]+/,

269

function(match) {

270

return inLiteral ? XRegExp.escape(match[0]) : match[0];

271

},

272

{ scope: 'default' }

273

);

274

```

275

276

### Flag-Based Tokens

277

278

```javascript

279

// Add \\w+ enhancement with flag W (extended word characters)

280

XRegExp.addToken(

281

/\\\\w/,

282

(match, scope, flags) => {

283

if (flags.includes('W')) {

284

return '[\\\\w\\u00C0-\\u017F]'; // Include Latin extended characters

285

}

286

return match[0]; // Standard \\w

287

},

288

{

289

flag: 'W',

290

scope: 'default'

291

}

292

);

293

294

// Usage

295

XRegExp('\\\\w+', 'W').test('café'); // true (includes é)

296

XRegExp('\\\\w+').test('café'); // false (standard behavior)

297

```

298

299

### Reparse Tokens

300

301

```javascript

302

// Token that expands to other tokens that need further processing

303

XRegExp.addToken(

304

/\\\\identifier/,

305

() => '\\\\p{ID_Start}\\\\p{ID_Continue}*',

306

{

307

reparse: true, // Allow Unicode tokens to be processed

308

optionalFlags: 'A' // Register flag A as optional

309

}

310

);

311

312

// Usage (requires Unicode Base addon)

313

XRegExp('\\\\identifier', 'A').test('變數名'); // true

314

```

315

316

## Cache Management

317

318

Control pattern and regex caching for performance.

319

320

```javascript { .api }

321

// Internal cache flush (used by addToken and addUnicodeData)

322

XRegExp.cache.flush('patterns'); // Flush pattern cache

323

XRegExp.cache.flush(); // Flush regex cache

324

```

325

326

**Usage Examples:**

327

328

```javascript

329

// Cache is automatically managed, but can be manually controlled

330

const regex1 = XRegExp.cache('\\\\d+', 'g'); // Cached

331

const regex2 = XRegExp.cache('\\\\d+', 'g'); // Same cached instance

332

console.log(regex1 === regex2); // true

333

334

// Adding tokens automatically flushes pattern cache

335

XRegExp.addToken(/\\\\test/, () => 'tested');

336

// Pattern cache is flushed, new compilations use updated tokens

337

```

338

339

## Error Handling

340

341

Proper error handling for invalid tokens and configurations.

342

343

```javascript

344

// Invalid flag characters

345

try {

346

XRegExp.addToken(/test/, () => '', { flag: 'ab' }); // Multi-char flag

347

} catch (e) {

348

console.log(e.message); // "Flag must be a single character A-Za-z0-9_$"

349

}

350

351

// Unknown flags in patterns

352

try {

353

XRegExp('test', 'Q'); // Unknown flag

354

} catch (e) {

355

console.log(e.message); // "Unknown regex flag Q"

356

}

357

358

// Invalid token usage

359

try {

360

XRegExp('\\\\p{UnknownProperty}', 'A'); // Unregistered Unicode property

361

} catch (e) {

362

console.log(e.message); // "Unknown Unicode token \\p{UnknownProperty}"

363

}

364

```