or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

character-sets.mdindex.mdreconstruction.mdtokenization.md

reconstruction.mddocs/

0

# Token Reconstruction

1

2

Converts token structures back into valid regular expression strings, enabling regex transformation and analysis workflows. The reconstruction process handles proper escaping and formatting for all token types.

3

4

## Capabilities

5

6

### Reconstruct Function

7

8

Converts any token (or token tree) back into its regular expression string representation.

9

10

```typescript { .api }

11

/**

12

* Reconstructs a regular expression string from a token structure

13

* @param token - Any token (Root, Group, Character, etc.) to reconstruct

14

* @returns String representation of the regex component

15

* @throws Error for invalid token types

16

*/

17

function reconstruct(token: Tokens): string;

18

```

19

20

**Usage Examples:**

21

22

```typescript

23

import { tokenizer, reconstruct, types } from "ret";

24

25

// Reconstruct entire regex

26

const tokens = tokenizer("foo|bar");

27

const regex = reconstruct(tokens); // "foo|bar"

28

29

// Reconstruct individual tokens

30

const charToken = { type: types.CHAR, value: 102 };

31

reconstruct(charToken); // "f"

32

33

// Reconstruct complex structures

34

const setToken = {

35

type: types.SET,

36

set: [

37

{ type: types.CHAR, value: 97 }, // 'a'

38

{ type: types.CHAR, value: 98 }, // 'b'

39

{ type: types.CHAR, value: 99 } // 'c'

40

],

41

not: true

42

};

43

reconstruct(setToken); // "[^abc]"

44

45

// Reconstruct groups

46

const groupToken = {

47

type: types.GROUP,

48

remember: false,

49

stack: [

50

{ type: types.CHAR, value: 97 }, // 'a'

51

{ type: types.CHAR, value: 98 } // 'b'

52

]

53

};

54

reconstruct(groupToken); // "(?:ab)"

55

```

56

57

## Reconstruction Rules

58

59

### Character Reconstruction

60

61

Characters are converted to their string representation with proper escaping:

62

63

```typescript

64

// Special regex characters are escaped

65

reconstruct({ type: types.CHAR, value: 42 }); // "\\*" (asterisk)

66

reconstruct({ type: types.CHAR, value: 46 }); // "\\." (dot)

67

reconstruct({ type: types.CHAR, value: 91 }); // "\\[" (left bracket)

68

69

// Regular characters remain unescaped

70

reconstruct({ type: types.CHAR, value: 97 }); // "a"

71

reconstruct({ type: types.CHAR, value: 49 }); // "1"

72

```

73

74

### Position Reconstruction

75

76

Position tokens represent anchors and boundaries:

77

78

```typescript

79

reconstruct({ type: types.POSITION, value: "^" }); // "^"

80

reconstruct({ type: types.POSITION, value: "$" }); // "$"

81

reconstruct({ type: types.POSITION, value: "b" }); // "\\b"

82

reconstruct({ type: types.POSITION, value: "B" }); // "\\B"

83

```

84

85

### Reference Reconstruction

86

87

Backreferences are formatted with backslash prefix:

88

89

```typescript

90

reconstruct({ type: types.REFERENCE, value: 1 }); // "\\1"

91

reconstruct({ type: types.REFERENCE, value: 9 }); // "\\9"

92

```

93

94

### Set Reconstruction

95

96

Character sets are reconstructed with proper bracket notation:

97

98

```typescript

99

// Regular character set

100

const regularSet = {

101

type: types.SET,

102

set: [{ type: types.CHAR, value: 97 }],

103

not: false

104

};

105

reconstruct(regularSet); // "[a]"

106

107

// Negated character set

108

const negatedSet = {

109

type: types.SET,

110

set: [{ type: types.CHAR, value: 97 }],

111

not: true

112

};

113

reconstruct(negatedSet); // "[^a]"

114

115

// Character range

116

const rangeSet = {

117

type: types.SET,

118

set: [{ type: types.RANGE, from: 97, to: 122 }],

119

not: false

120

};

121

reconstruct(rangeSet); // "[a-z]"

122

```

123

124

### Group Reconstruction

125

126

Groups are reconstructed with appropriate modifiers:

127

128

```typescript

129

// Capturing group

130

const capturingGroup = {

131

type: types.GROUP,

132

remember: true,

133

stack: [{ type: types.CHAR, value: 97 }]

134

};

135

reconstruct(capturingGroup); // "(a)"

136

137

// Non-capturing group

138

const nonCapturingGroup = {

139

type: types.GROUP,

140

remember: false,

141

stack: [{ type: types.CHAR, value: 97 }]

142

};

143

reconstruct(nonCapturingGroup); // "(?:a)"

144

145

// Named group

146

const namedGroup = {

147

type: types.GROUP,

148

remember: true,

149

name: "mygroup",

150

stack: [{ type: types.CHAR, value: 97 }]

151

};

152

reconstruct(namedGroup); // "(?<mygroup>a)"

153

154

// Positive lookahead

155

const lookahead = {

156

type: types.GROUP,

157

remember: false,

158

followedBy: true,

159

stack: [{ type: types.CHAR, value: 97 }]

160

};

161

reconstruct(lookahead); // "(?=a)"

162

163

// Negative lookahead

164

const negativeLookahead = {

165

type: types.GROUP,

166

remember: false,

167

notFollowedBy: true,

168

stack: [{ type: types.CHAR, value: 97 }]

169

};

170

reconstruct(negativeLookahead); // "(?!a)"

171

```

172

173

### Repetition Reconstruction

174

175

Quantifiers are reconstructed in their appropriate forms:

176

177

```typescript

178

// Optional (0 or 1)

179

const optional = {

180

type: types.REPETITION,

181

min: 0,

182

max: 1,

183

value: { type: types.CHAR, value: 97 }

184

};

185

reconstruct(optional); // "a?"

186

187

// One or more

188

const oneOrMore = {

189

type: types.REPETITION,

190

min: 1,

191

max: Infinity,

192

value: { type: types.CHAR, value: 97 }

193

};

194

reconstruct(oneOrMore); // "a+"

195

196

// Zero or more

197

const zeroOrMore = {

198

type: types.REPETITION,

199

min: 0,

200

max: Infinity,

201

value: { type: types.CHAR, value: 97 }

202

};

203

reconstruct(zeroOrMore); // "a*"

204

205

// Exact count

206

const exact = {

207

type: types.REPETITION,

208

min: 3,

209

max: 3,

210

value: { type: types.CHAR, value: 97 }

211

};

212

reconstruct(exact); // "a{3}"

213

214

// Range

215

const range = {

216

type: types.REPETITION,

217

min: 2,

218

max: 5,

219

value: { type: types.CHAR, value: 97 }

220

};

221

reconstruct(range); // "a{2,5}"

222

223

// Minimum with no maximum

224

const minimum = {

225

type: types.REPETITION,

226

min: 2,

227

max: Infinity,

228

value: { type: types.CHAR, value: 97 }

229

};

230

reconstruct(minimum); // "a{2,}"

231

```

232

233

### Root Reconstruction

234

235

Root tokens handle alternation and sequential patterns:

236

237

```typescript

238

// Sequential pattern

239

const sequential = {

240

type: types.ROOT,

241

stack: [

242

{ type: types.CHAR, value: 97 }, // 'a'

243

{ type: types.CHAR, value: 98 } // 'b'

244

]

245

};

246

reconstruct(sequential); // "ab"

247

248

// Alternation pattern

249

const alternation = {

250

type: types.ROOT,

251

options: [

252

[{ type: types.CHAR, value: 97 }], // 'a'

253

[{ type: types.CHAR, value: 98 }] // 'b'

254

]

255

};

256

reconstruct(alternation); // "a|b"

257

```

258

259

## Common Use Cases

260

261

### Regex Transformation

262

263

```typescript

264

import { tokenizer, reconstruct } from "ret";

265

266

// Parse, modify, and reconstruct

267

const tokens = tokenizer("a+");

268

const repetition = tokens.stack[0] as any;

269

repetition.min = 2; // Change from 1+ to 2+

270

repetition.max = 4; // Change to exactly 2-4

271

const modified = reconstruct(tokens); // "a{2,4}"

272

```

273

274

### Regex Analysis

275

276

```typescript

277

import { tokenizer, reconstruct } from "ret";

278

279

function extractGroups(regexStr: string): string[] {

280

const tokens = tokenizer(regexStr);

281

const groups: string[] = [];

282

283

function walkTokens(token: any) {

284

if (token.type === types.GROUP && token.remember) {

285

groups.push(reconstruct(token));

286

}

287

if (token.stack) {

288

token.stack.forEach(walkTokens);

289

}

290

if (token.options) {

291

token.options.forEach((option: any) => option.forEach(walkTokens));

292

}

293

}

294

295

walkTokens(tokens);

296

return groups;

297

}

298

299

const groups = extractGroups("(foo)|(bar)"); // ["(foo)", "(bar)"]

300

```