or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

ast-compilation.mdcli.mdgenerated-parsers.mdgrammar-parsing.mdindex.mdparser-generation.md

grammar-parsing.mddocs/

0

# Grammar Parsing

1

2

Low-level grammar parsing functionality for converting PEG grammar strings into Abstract Syntax Trees. The `parser` namespace provides direct access to Peggy's grammar parser and syntax error handling.

3

4

## Capabilities

5

6

### Parse Grammar Function

7

8

Parses PEG grammar definitions into AST representations for further processing by the compiler.

9

10

```typescript { .api }

11

namespace parser {

12

/**

13

* Parses grammar and returns the grammar AST

14

* @param grammar - Source text of the PEG grammar

15

* @param options - Parser options for grammar parsing

16

* @returns Grammar AST ready for compilation

17

* @throws {SyntaxError} If grammar has incorrect syntax

18

*/

19

function parse(grammar: string, options?: Options): ast.Grammar;

20

21

/**

22

* Parser options for grammar parsing

23

*/

24

interface Options {

25

/** Source identifier attached to LocationRange objects */

26

grammarSource?: any;

27

/** Words not allowed as label names */

28

reservedWords: string[];

29

/** Start rule - must be "Grammar" */

30

startRule?: "Grammar";

31

}

32

}

33

```

34

35

**Usage Examples:**

36

37

```typescript

38

import { parser } from "peggy";

39

40

// Basic grammar parsing

41

const grammar = `

42

start = "hello" " " name:[a-z]+ { return "Hello, " + name.join(""); }

43

`;

44

45

try {

46

const ast = parser.parse(grammar, {

47

reservedWords: ["function", "class", "var"],

48

grammarSource: "my-grammar.peggy"

49

});

50

51

console.log(ast.rules.length); // Number of rules

52

console.log(ast.rules[0].name); // "start"

53

} catch (error) {

54

console.error("Parse error:", error.message);

55

}

56

```

57

58

### Grammar Syntax Errors

59

60

Detailed syntax error reporting with location information and expectation details.

61

62

```typescript { .api }

63

namespace parser {

64

/**

65

* Grammar syntax error with detailed location and expectation info

66

*/

67

class SyntaxError extends globalThis.SyntaxError {

68

/** Location where error occurred */

69

location: LocationRange;

70

/** Possible tokens expected at error location */

71

expected: Expectation[] | null;

72

/** Character found at error location */

73

found: string | null;

74

75

constructor(

76

message: string,

77

expected: Expectation[] | null,

78

found: string | null,

79

location: LocationRange

80

);

81

82

/**

83

* Format error with source context

84

* @param sources - Source text mapping for context

85

* @returns Formatted error with source lines

86

*/

87

format(sources: SourceText[]): string;

88

89

/**

90

* Build human-readable message from expectations

91

* @param expected - Array of expected items

92

* @param found - Text found instead of expected

93

* @returns Human-readable error message

94

*/

95

static buildMessage(expected: Expectation[], found: string): string;

96

}

97

}

98

```

99

100

**Error Handling Example:**

101

102

```typescript

103

import { parser } from "peggy";

104

105

try {

106

const ast = parser.parse("start = @invalid", {

107

reservedWords: [],

108

grammarSource: "test.peggy"

109

});

110

} catch (error) {

111

if (error instanceof parser.SyntaxError) {

112

console.log("Syntax error at line", error.location.start.line);

113

console.log("Expected:", error.expected);

114

console.log("Found:", error.found);

115

116

// Format with source context

117

const formatted = error.format([{

118

source: "test.peggy",

119

text: "start = @invalid"

120

}]);

121

console.log(formatted);

122

}

123

}

124

```

125

126

### Expectation Types

127

128

Different types of expectations that can appear in syntax errors, describing what the parser was looking for.

129

130

```typescript { .api }

131

namespace parser {

132

/**

133

* Union of all expectation types

134

*/

135

type Expectation =

136

| LiteralExpectation

137

| ClassExpectation

138

| AnyExpectation

139

| EndExpectation

140

| OtherExpectation;

141

142

/**

143

* Expected literal string

144

*/

145

interface LiteralExpectation {

146

type: "literal";

147

/** Expected sequence of characters */

148

text: string;

149

/** If true, case-insensitive matching */

150

ignoreCase: boolean;

151

}

152

153

/**

154

* Expected character class

155

*/

156

interface ClassExpectation {

157

type: "class";

158

/** Character ranges and individual characters */

159

parts: (string[] | string)[];

160

/** If true, negated character class */

161

inverted: boolean;

162

/** If true, case-insensitive matching */

163

ignoreCase: boolean;

164

}

165

166

/**

167

* Expected any character

168

*/

169

interface AnyExpectation {

170

type: "any";

171

}

172

173

/**

174

* Expected end of input

175

*/

176

interface EndExpectation {

177

type: "end";

178

}

179

180

/**

181

* Expected other item (from expected() or rule names)

182

*/

183

interface OtherExpectation {

184

type: "other";

185

/** Human-readable description */

186

description: string;

187

}

188

}

189

```

190

191

## AST Structure

192

193

The Abstract Syntax Tree structure returned by the parser, representing the complete grammar definition.

194

195

### Grammar Node

196

197

The root node representing the entire grammar with all its components.

198

199

```typescript { .api }

200

namespace ast {

201

/**

202

* Root grammar AST node

203

*/

204

interface Grammar extends Node<"grammar"> {

205

/** Imported external grammars */

206

imports: GrammarImport[];

207

/** Code executed once when importing parser module */

208

topLevelInitializer?: TopLevelInitializer | TopLevelInitializer[];

209

/** Code executed each time parse() is called */

210

initializer?: Initializer | Initializer[];

211

/** All rules in the grammar */

212

rules: Rule[];

213

214

// Added by compiler passes:

215

/** Generated JavaScript code and source map */

216

code?: SourceNode;

217

/** String literals used in bytecode */

218

literals?: string[];

219

/** Character classes used in bytecode */

220

classes?: GrammarCharacterClass[];

221

/** Error expectations used in bytecode */

222

expectations?: GrammarExpectation[];

223

/** Imported names for library references */

224

importedNames?: string[];

225

/** User-defined functions (actions, predicates) */

226

functions?: FunctionConst[];

227

/** Location ranges used in bytecode */

228

locations?: LocationRange[];

229

}

230

231

/**

232

* Base AST node interface

233

*/

234

interface Node<T> {

235

/** Node type identifier */

236

type: T;

237

/** Source location of this node */

238

location: LocationRange;

239

}

240

}

241

```

242

243

### Rule and Expression Types

244

245

Grammar rules and the various expression types that can appear in rule definitions.

246

247

```typescript { .api }

248

namespace ast {

249

/**

250

* Grammar rule definition

251

*/

252

interface Rule extends Expr<"rule"> {

253

/** Rule identifier */

254

name: string;

255

/** Location of rule name for error reporting */

256

nameLocation: LocationRange;

257

/** Rule's parsing expression */

258

expression: Expression | Named;

259

/** Generated bytecode (added by compiler) */

260

bytecode?: number[];

261

}

262

263

/**

264

* Named rule expression

265

*/

266

interface Named extends Expr<"named"> {

267

/** Human-readable rule name for errors */

268

name: string;

269

/** Underlying expression */

270

expression: Expression;

271

}

272

273

/**

274

* Union of all expression types

275

*/

276

type Expression =

277

| Action

278

| Choice

279

| Labeled

280

| Prefixed

281

| Primary

282

| Repeated

283

| Sequence

284

| Suffixed;

285

286

/**

287

* Base expression interface

288

*/

289

interface Expr<T> extends Node<T> {

290

/** Match result estimation (added by compiler) */

291

match?: MatchResult;

292

}

293

294

/**

295

* Match result enumeration

296

*/

297

enum MatchResult {

298

ALWAYS = 1, // Always matches

299

SOMETIMES = 0, // May or may not match

300

NEVER = -1 // Never matches

301

}

302

}

303

```

304

305

### Primary Expression Types

306

307

The fundamental expression types that form the building blocks of PEG grammars.

308

309

```typescript { .api }

310

namespace ast {

311

/**

312

* Union of primary expression types

313

*/

314

type Primary =

315

| Any

316

| CharacterClass

317

| Group

318

| LibraryReference

319

| Literal

320

| RuleReference

321

| SemanticPredicate;

322

323

/**

324

* Reference to another rule

325

*/

326

interface RuleReference extends Expr<"rule_ref"> {

327

/** Name of referenced rule */

328

name: string;

329

}

330

331

/**

332

* Reference to rule in imported library

333

*/

334

interface LibraryReference extends Expr<"library_ref"> {

335

/** Rule name (undefined for default rule) */

336

name: string | undefined;

337

/** Library namespace name */

338

library: string;

339

/** Import statement index */

340

libraryNumber: number;

341

}

342

343

/**

344

* Literal string match

345

*/

346

interface Literal extends Expr<"literal"> {

347

/** String to match */

348

value: string;

349

/** Case-insensitive matching */

350

ignoreCase: boolean;

351

}

352

353

/**

354

* Character class match

355

*/

356

interface CharacterClass extends Expr<"class"> {

357

/** Character ranges and individual characters */

358

parts: (string[] | string | ClassEscape)[];

359

/** Negated character class */

360

inverted: boolean;

361

/** Case-insensitive matching */

362

ignoreCase: boolean;

363

/** Unicode mode */

364

unicode: boolean;

365

}

366

367

/**

368

* Match any character

369

*/

370

interface Any extends Expr<"any"> {}

371

372

/**

373

* Semantic predicate (code that returns boolean)

374

*/

375

interface SemanticPredicate extends CodeBlockExpr<"semantic_and" | "semantic_not"> {}

376

377

/**

378

* Grouping expression for label scoping

379

*/

380

interface Group extends Expr<"group"> {

381

expression: Labeled | Sequence;

382

}

383

}

384

```

385

386

**AST Usage Example:**

387

388

```typescript

389

import { parser } from "peggy";

390

391

const grammar = `

392

start = "hello" name:word { return "Hello, " + name; }

393

word = [a-z]+

394

`;

395

396

const ast = parser.parse(grammar, { reservedWords: [] });

397

398

// Examine the AST structure

399

console.log("Number of rules:", ast.rules.length);

400

console.log("First rule name:", ast.rules[0].name);

401

console.log("First rule type:", ast.rules[0].expression.type);

402

403

// Walk through rules

404

ast.rules.forEach(rule => {

405

console.log(`Rule ${rule.name} at line ${rule.location.start.line}`);

406

});

407

```

408

409

## Complete AST Types

410

411

The `ast` namespace provides comprehensive type definitions for all AST node types in Peggy grammars.

412

413

### Core AST Interfaces

414

415

Base interfaces used throughout the AST type system.

416

417

```typescript { .api }

418

namespace ast {

419

/**

420

* Base AST node interface

421

* @template T - Node type identifier

422

*/

423

interface Node<T> {

424

/** Node type identifier */

425

type: T;

426

/** Source location of this node */

427

location: LocationRange;

428

}

429

430

/**

431

* Base expression node interface

432

* @template T - Expression type identifier

433

*/

434

interface Expr<T> extends Node<T> {

435

/** Match result estimation (added by compiler passes) */

436

match?: MatchResult;

437

}

438

439

/**

440

* Match result enumeration for expressions

441

*/

442

enum MatchResult {

443

ALWAYS = 1, // Expression always matches

444

SOMETIMES = 0, // Expression may match

445

NEVER = -1 // Expression never matches

446

}

447

}

448

```