or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

ast-types.mdcore-functions.mdindex.mdparsing.mdvalidation.mdvisitor.md

parsing.mddocs/

0

# Regular Expression Parsing

1

2

The RegExpParser class provides advanced parsing functionality for converting regex strings into detailed Abstract Syntax Trees with full ECMAScript compliance.

3

4

## Capabilities

5

6

### RegExp Parser Class

7

8

Creates a parser instance with configurable options for parsing regular expressions into ASTs.

9

10

```typescript { .api }

11

/**

12

* Regular expression parser that converts regex strings to Abstract Syntax Trees

13

*/

14

class RegExpParser {

15

/**

16

* Initialize the parser with options

17

* @param options - Parser configuration options

18

*/

19

constructor(options?: RegExpParser.Options);

20

}

21

22

interface RegExpParser.Options {

23

/** The flag to disable Annex B syntax. Default is false */

24

strict?: boolean;

25

/**

26

* ECMAScript version. Default is 2022

27

* - 2015 added u and y flags

28

* - 2018 added s flag, Named Capturing Group, Lookbehind Assertion, and Unicode Property Escape

29

* - 2020 added d flag (hasIndices)

30

*/

31

ecmaVersion?: EcmaVersion;

32

}

33

```

34

35

**Usage Examples:**

36

37

```typescript

38

import { RegExpParser } from "regexpp";

39

40

// Create parser with default options (ES2022, non-strict)

41

const parser = new RegExpParser();

42

43

// Create parser for specific ECMAScript version

44

const es2018Parser = new RegExpParser({

45

ecmaVersion: 2018,

46

strict: false

47

});

48

49

// Create strict parser (disables Annex B features)

50

const strictParser = new RegExpParser({

51

strict: true,

52

ecmaVersion: 2022

53

});

54

```

55

56

### Parse Literal

57

58

Parses a complete regular expression literal including pattern and flags.

59

60

```typescript { .api }

61

/**

62

* Parse a regular expression literal

63

* @param source - The source code to parse (e.g., "/abc/g")

64

* @param start - The start index in the source code. Default is 0

65

* @param end - The end index in the source code. Default is source.length

66

* @returns The AST of the regular expression literal

67

*/

68

parseLiteral(source: string, start?: number, end?: number): AST.RegExpLiteral;

69

```

70

71

**Usage Examples:**

72

73

```typescript

74

import { RegExpParser } from "regexpp";

75

76

const parser = new RegExpParser();

77

78

// Parse complete literal

79

const ast1 = parser.parseLiteral("/[a-z]+/gi");

80

console.log(ast1.type); // "RegExpLiteral"

81

console.log(ast1.pattern.alternatives.length); // Number of alternatives

82

console.log(ast1.flags.global); // true

83

console.log(ast1.flags.ignoreCase); // true

84

85

// Parse substring of source

86

const source = "const pattern = /\\d{2,4}/g;";

87

const ast2 = parser.parseLiteral(source, 16, 26); // Just the "/\\d{2,4}/g" part

88

console.log(ast2.raw); // "/\\d{2,4}/g"

89

90

// Parse complex patterns

91

const ast3 = parser.parseLiteral("/(?<year>\\d{4})-(?<month>\\d{2})/");

92

console.log(ast3.pattern.alternatives[0].elements[0].name); // "year"

93

```

94

95

### Parse Pattern

96

97

Parses just the pattern part of a regular expression (without delimiters and flags).

98

99

```typescript { .api }

100

/**

101

* Parse a regular expression pattern

102

* @param source - The source code to parse (e.g., "abc")

103

* @param start - The start index in the source code. Default is 0

104

* @param end - The end index in the source code. Default is source.length

105

* @param uFlag - The flag to enable Unicode mode

106

* @returns The AST of the regular expression pattern

107

*/

108

parsePattern(source: string, start?: number, end?: number, uFlag?: boolean): AST.Pattern;

109

```

110

111

**Usage Examples:**

112

113

```typescript

114

import { RegExpParser } from "regexpp";

115

116

const parser = new RegExpParser();

117

118

// Parse simple pattern

119

const pattern1 = parser.parsePattern("abc");

120

console.log(pattern1.type); // "Pattern"

121

console.log(pattern1.alternatives[0].elements.length); // 3 (a, b, c)

122

123

// Parse pattern with Unicode flag

124

const pattern2 = parser.parsePattern("\\p{Letter}+", 0, undefined, true);

125

console.log(pattern2.alternatives[0].elements[0].type); // "CharacterSet"

126

127

// Parse complex pattern

128

const pattern3 = parser.parsePattern("(\\d{2})|(\\w+)");

129

console.log(pattern3.alternatives.length); // 2 alternatives

130

console.log(pattern3.alternatives[0].elements[0].type); // "CapturingGroup"

131

132

// Parse substring

133

const source = "prefix-[a-z]+-suffix";

134

const pattern4 = parser.parsePattern(source, 7, 13); // Just "[a-z]+"

135

console.log(pattern4.alternatives[0].elements.length); // 2 (character class + quantifier)

136

```

137

138

### Parse Flags

139

140

Parses just the flags part of a regular expression.

141

142

```typescript { .api }

143

/**

144

* Parse regular expression flags

145

* @param source - The source code to parse (e.g., "gim")

146

* @param start - The start index in the source code. Default is 0

147

* @param end - The end index in the source code. Default is source.length

148

* @returns The AST of the regular expression flags

149

*/

150

parseFlags(source: string, start?: number, end?: number): AST.Flags;

151

```

152

153

**Usage Examples:**

154

155

```typescript

156

import { RegExpParser } from "regexpp";

157

158

const parser = new RegExpParser();

159

160

// Parse flags

161

const flags1 = parser.parseFlags("gim");

162

console.log(flags1.type); // "Flags"

163

console.log(flags1.global); // true

164

console.log(flags1.ignoreCase); // true

165

console.log(flags1.multiline); // true

166

console.log(flags1.unicode); // false

167

168

// Parse all possible flags

169

const flags2 = parser.parseFlags("gimsuyd");

170

console.log(flags2.global); // true (g)

171

console.log(flags2.ignoreCase); // true (i)

172

console.log(flags2.multiline); // true (m)

173

console.log(flags2.dotAll); // true (s)

174

console.log(flags2.unicode); // true (u)

175

console.log(flags2.sticky); // true (y)

176

console.log(flags2.hasIndices); // true (d)

177

178

// Parse flags from substring

179

const source = "/pattern/gi;";

180

const flags3 = parser.parseFlags(source, 9, 11); // Just "gi"

181

console.log(flags3.global); // true

182

console.log(flags3.ignoreCase); // true

183

```

184

185

## Error Handling

186

187

All parsing methods may throw RegExpSyntaxError for invalid syntax:

188

189

```typescript

190

import { RegExpParser } from "regexpp";

191

192

const parser = new RegExpParser();

193

194

try {

195

parser.parseLiteral("/[z-a]/"); // Invalid character class range

196

} catch (error) {

197

console.log(error.name); // "RegExpSyntaxError"

198

console.log(error.message); // Detailed error message

199

console.log(error.index); // Position where error occurred

200

}

201

```

202

203

## Types

204

205

```typescript { .api }

206

type EcmaVersion = 5 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022;

207

208

// Return types from parsing methods

209

interface AST.RegExpLiteral extends AST.NodeBase {

210

type: "RegExpLiteral";

211

parent: null;

212

pattern: AST.Pattern;

213

flags: AST.Flags;

214

}

215

216

interface AST.Pattern extends AST.NodeBase {

217

type: "Pattern";

218

parent: AST.RegExpLiteral | null;

219

alternatives: AST.Alternative[];

220

}

221

222

interface AST.Flags extends AST.NodeBase {

223

type: "Flags";

224

parent: AST.RegExpLiteral | null;

225

dotAll: boolean; // s flag

226

global: boolean; // g flag

227

hasIndices: boolean; // d flag

228

ignoreCase: boolean; // i flag

229

multiline: boolean; // m flag

230

sticky: boolean; // y flag

231

unicode: boolean; // u flag

232

}

233

```