Tessl Tile for maven/org.antlr/antlr-runtime@3.5.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

character-streams.md debug-support.md error-handling.md index.md lexical-analysis.md parsing.md token-streams.md tree-construction.md

parsing.mddocs/

0
# Parsing
1

2
Core parsing infrastructure including parser base classes, rule return scopes, and recognizer functionality. The parsing system processes token streams to build parse trees and abstract syntax trees.
3

4
## Capabilities
5

6
### Parser Base Class
7

8
Concrete parser class for processing token streams with error recovery and backtracking support.
9

10
```java { .api }
11
/**
12
 * A parser for TokenStreams. "parser grammars" result in a subclass of this.
13
 */
14
public class Parser extends BaseRecognizer {
15
    public TokenStream input;
16
    
17
    public Parser(TokenStream input);
18
    public Parser(TokenStream input, RecognizerSharedState state);
19
    
20
    public void reset();
21
    public void setTokenStream(TokenStream input);
22
    public TokenStream getTokenStream();
23
    
24
    protected Object getCurrentInputSymbol(IntStream input);
25
    protected Object getMissingSymbol(IntStream input, RecognitionException e, 
26
                                     int expectedTokenType, BitSet follow);
27
    
28
    /** Set the token stream and reset the parser */
29
    public void setTokenStream(TokenStream input);
30
    
31
    public void traceIn(String ruleName, int ruleIndex);
32
    public void traceOut(String ruleName, int ruleIndex);
33
}
34
```
35

36
**Usage Examples:**
37

38
```java
39
import org.antlr.runtime.*;
40

41
// Create parser with token stream
42
MyLexer lexer = new MyLexer(new ANTLRStringStream("x = 42;"));
43
CommonTokenStream tokens = new CommonTokenStream(lexer);
44
MyParser parser = new MyParser(tokens);
45

46
// Parse starting from rule
47
try {
48
    MyParser.program_return result = parser.program();
49
    System.out.println("Parse successful");
50
    
51
    // Get parse tree if available
52
    Object tree = result.getTree();
53
    if (tree != null) {
54
        System.out.println("Tree: " + tree.toString());
55
    }
56
} catch (RecognitionException e) {
57
    System.err.println("Parse error: " + e.getMessage());
58
}
59

60
// Reset parser for reuse
61
parser.reset();
62
parser.setTokenStream(new CommonTokenStream(new MyLexer(new ANTLRStringStream("y = 10;"))));
63
```
64

65
### Base Recognizer
66

67
Abstract base class providing common functionality for all ANTLR recognizers.
68

69
```java { .api }
70
/**
71
 * A generic recognizer that can handle recognizers generated from
72
 * lexer, parser, and tree grammars. This is all the parsing
73
 * support code essentially; most of it is error recovery stuff and
74
 * backtracking.
75
 */
76
public abstract class BaseRecognizer {
77
    public static final int MEMO_RULE_FAILED = -2;
78
    public static final int MEMO_RULE_UNKNOWN = -1;
79
    public static final int INITIAL_FOLLOW_STACK_SIZE = 100;
80
    public static final int DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL;
81
    public static final int HIDDEN = Token.HIDDEN_CHANNEL;
82
    public static final String NEXT_TOKEN_RULE_NAME = "nextToken";
83
    
84
    protected RecognizerSharedState state;
85
    
86
    public BaseRecognizer();
87
    public BaseRecognizer(RecognizerSharedState state);
88
    
89
    /** reset the parser's state.  Subclasses must rewinds the input stream. */
90
    public void reset();
91
    
92
    /** Match current input symbol against ttype.  Attempt
93
     *  single token insertion or deletion error recovery.  If
94
     *  that fails, throw MismatchedTokenException.
95
     */
96
    public Object match(IntStream input, int ttype, BitSet follow) throws RecognitionException;
97
    
98
    /** Match the wildcard: in a symbol */
99
    public void matchAny(IntStream input);
100
    
101
    public boolean mismatchIsUnwantedToken(IntStream input, int ttype);
102
    public boolean mismatchIsMissingToken(IntStream input, BitSet follow);
103
    
104
    /** Report a recognition problem.
105
     *  This method sets errorRecovery to indicate the parser is recovering
106
     *  not parsing.  Once in recovery mode, no errors are generated.
107
     *  To get out of recovery mode, the parser must successfully match
108
     *  a token (after a resync).  So it will go:
109
     *
110
     *      1. error occurs
111
     *      2. enter recovery mode, report error
112
     *      3. consume until token found in resynch set
113
     *      4. try to resume parsing
114
     *      5. next match() will reset errorRecovery mode
115
     */
116
    public void reportError(RecognitionException e);
117
    
118
    public void displayRecognitionError(String[] tokenNames, RecognitionException e);
119
    
120
    /** What error message should be generated for the various exception types? */
121
    public String getErrorMessage(RecognitionException e, String[] tokenNames);
122
    
123
    /** How should a token be displayed in an error message? The default
124
     *  is to display just the text, but during development you might
125
     *  want to have lots of information spit out.  Override in that case
126
     *  to use t.toString() (which, for CommonToken, dumps everything about
127
     *  the token). This is better than forcing you to override a method in
128
     *  your token objects because you don't have to go modify your lexer
129
     *  so that it creates a new Java type.
130
     */
131
    public String getTokenErrorDisplay(Token t);
132
    
133
    /** Override this method to change where error messages go */
134
    public void emitErrorMessage(String msg);
135
    
136
    /** Recover from an error found on the input stream.  This is for NoViableAlt and
137
     *  mismatched symbol exceptions.  If you enable single token insertion and deletion,
138
     *  this will usually not handle mismatched symbol exceptions but there could be
139
     *  a mismatched token that the match() routine could not recover from.
140
     */
141
    public void recover(IntStream input, RecognitionException re);
142
    
143
    /** A hook to listen in on the token consumption during error recovery.
144
     *  The DebugParser subclasses this to fire events to the listenter.
145
     */
146
    public void beginResync();
147
    public void endResync();
148
    
149
    /** Compute the error recovery set for the current rule.  During
150
     *  rule invocation, the parser pushes the set of tokens that can
151
     *  follow that rule reference on the stack; this amounts to
152
     *  computing FIRST of what follows the rule reference in the
153
     *  enclosing rule. This local follow set only includes tokens
154
     *  from within the rule; i.e., the FIRST computation done on
155
     *  FOLLOW(r) for r not in context.
156
     */
157
    protected BitSet computeErrorRecoverySet();
158
    
159
    /** Compute the context-sensitive FOLLOW set for current rule.
160
     *  This is set of token types that can follow a specific rule
161
     *  reference given a specific call chain.  You get the set of
162
     *  viable tokens that can possibly come next (lookahead depth 1)
163
     *  given the current call chain.  Contrast this with the
164
     *  definition of plain FOLLOW for rule r:
165
     *
166
     *   FOLLOW(r) = set of (local) tokens that can possibly follow
167
     *               references to r in *any* sentential form
168
     *               (r-->(alpha) beta, r->(alpha) gamma, ...)
169
     *
170
     *  Note that the parser pushes just the local FOLLOW sets and upon
171
     *  error, we want to combine these below all scope until we reach
172
     *  a recovery rule (typically the top-level rule).
173
     */
174
    protected BitSet computeContextSensitiveRuleFOLLOW();
175
    
176
    protected BitSet combineFollows(boolean exact);
177
    
178
    /** Attempt to recover from a single missing or extra token.
179
     *
180
     *  EXTRA TOKEN
181
     *
182
     *  LA(1) is not what we are looking for.  If LA(2) has the right token,
183
     *  however, then assume LA(1) is some extra token and delete it.
184
     *  LA(2) must be part of the follow set of the missing token.
185
     *  This can only happen if you are trying to match two tokens and
186
     *  the first one is wrong.  To match two tokens, either we backtrack
187
     *  (which is slow) or match the second token after seeing the first
188
     *  token is wrong. If we match second token, we have to delete the
189
     *  first token. This is hard to do so typically we backtrack or
190
     *  just do the default recovery which is to panic and skip until
191
     *  we find something we know about.
192
     *
193
     *  MISSING TOKEN
194
     *
195
     *  If current token is consistent with what could come after missing
196
     *  token, then it is ok to "insert" the missing token.  LA(1) is
197
     *  what we want to match next and if LA(1) is consistent with what
198
     *  could come after the expected token, then we assume the expected token
199
     *  is missing and we insert it.  For example, Input "i=(3;" is missing the
200
     *  ')'.  At ')' we have LA(1)==';' which is consistent with
201
     *  what could come after ')' so we generate().
202
     */
203
    protected Object recoverFromMismatchedToken(IntStream input, int ttype, BitSet follow) 
204
        throws RecognitionException;
205
    
206
    /** Not currently used */
207
    public Object recoverFromMismatchedSet(IntStream input, RecognitionException e, BitSet follow) 
208
        throws RecognitionException;
209
    
210
    protected Object getCurrentInputSymbol(IntStream input);
211
    protected Object getMissingSymbol(IntStream input, RecognitionException e, 
212
                                     int expectedTokenType, BitSet follow);
213
    
214
    /** Conjure up a missing token during error recovery.
215
     *  The recognizer attempts to recover from single missing
216
     *  symbols. But, actions might refer to that missing symbol.
217
     *  For example, x=ID {f($x);}. The f($x) refers to the missing ID.
218
     *  If you just return User.INVALID_TOKEN_TYPE, then f($x) will try to invoke
219
     *  you action with a null argument. It's better to conjure one up
220
     *  as follows:
221
     *
222
     *      ID(CommonToken(ID, "ID"));
223
     *
224
     *  where ID is the token type of the missing token. getMissingSymbol
225
     *  makes no assumptions about which Token object you use for
226
     *  the missing symbol.  
227
     *
228
     *  This method is used primarily for error recovery not on the
229
     *  critical path.
230
     */
231
    protected Token getMissingSymbol(IntStream input, RecognitionException e, 
232
                                   int expectedTokenType, BitSet follow);
233
    
234
    public void pushFollow(BitSet fset);
235
    protected void popFollow();
236
    
237
    /** Return List<String> of the rules in your parser instance
238
     *  leading up to a call to the current rule.  You could override if
239
     *  you want more details such as the file/line info of where
240
     *  in the parser java code a rule is invoked.
241
     *
242
     *  This is very useful for error messages.
243
     */
244
    public List<String> getRuleInvocationStack();
245
    public List<String> getRuleInvocationStack(Throwable e);
246
    
247
    public String getBacktrackingLevel();
248
    
249
    /** Used to print out token names like ID during debugging and
250
     *  error reporting.  The generated parsers implement a method
251
     *  that overrides this to point to their String[] tokenNames.
252
     */
253
    public String[] getTokenNames();
254
    
255
    /** For debugging and other purposes, might want the grammar name.
256
     *  Have ANTLR generate an implementation for this method.
257
     */
258
    public String getGrammarFileName();
259
    
260
    public String getSourceName();
261
    
262
    /** A convenience method for use most often with template rewrites.
263
     *  Convert a List<Token> to List<String>
264
     */
265
    public List<String> toStrings(List<Token> tokens);
266
    
267
    /** Given a rule number and a start token index number, return
268
     *  MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
269
     *  start index.  If this rule has parsed input starting from the
270
     *  start index before, then return where the rule stopped parsing.
271
     *  It returns the index of the last token matched by the rule.
272
     *
273
     *  For now we use a hashtable and just the slow Object-based one.
274
     *  Later, we can make a special one for ints and also one that
275
     *  tosses out entries after we commit past input position i.
276
     */
277
    public Integer getRuleMemoization(int ruleIndex, int ruleStartIndex);
278
    
279
    /** Has this rule already parsed input at the current index in the
280
     *  input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
281
     *  If we attempted but failed to parse properly before, return
282
     *  MEMO_RULE_FAILED.
283
     *
284
     *  This method has a side-effect: if we have seen this input for
285
     *  this rule before, then seek ahead to 1 past the stop token matched
286
     *  for this rule last time.
287
     */
288
    public boolean alreadyParsedRule(IntStream input, int ruleIndex);
289
    
290
    /** Record whether or not this rule parsed the input at this position
291
     *  successfully.  Use a standard java hashtable for now.
292
     */
293
    public void memoize(IntStream input, int ruleIndex, int ruleStartIndex);
294
}
295
```
296

297
**Usage Examples:**
298

299
```java
300
// Error handling in parser
301
public class MyParser extends Parser {
302
    public void displayRecognitionError(String[] tokenNames, RecognitionException e) {
303
        String hdr = getErrorHeader(e);
304
        String msg = getErrorMessage(e, tokenNames);
305
        System.err.println(hdr + " " + msg);
306
    }
307
    
308
    public String getErrorMessage(RecognitionException e, String[] tokenNames) {
309
        // Custom error message formatting
310
        return "Syntax error at line " + e.line + ": " + super.getErrorMessage(e, tokenNames);
311
    }
312
}
313

314
// Using follow sets for error recovery
315
BitSet follow = new BitSet();
316
follow.add(MyParser.SEMICOLON);
317
follow.add(MyParser.RBRACE);
318
Token recovered = (Token)recoverFromMismatchedToken(input, MyParser.IDENTIFIER, follow);
319
```
320

321
### Rule Return Scopes
322

323
Return value containers for parser rules supporting tokens, trees, and custom attributes.
324

325
```java { .api }
326
/**
327
 * Return value holder for rule invocations
328
 */
329
public class RuleReturnScope {
330
    /** Return the start token or tree */
331
    public Object getStart();
332
    public void setStart(Object start);
333
    
334
    /** Return the stop token or tree */
335
    public Object getStop();
336
    public void setStop(Object stop);
337
    
338
    /** Has a value potentially if output=AST; */
339
    public Object getTree();
340
    public void setTree(Object tree);
341
}
342

343
/**
344
 * Parser rule return scope
345
 */
346
public class ParserRuleReturnScope extends RuleReturnScope {
347
    /** First token matched by this rule */
348
    public Token start;
349
    /** Last token matched by this rule */
350
    public Token stop;
351
    
352
    public Object getStart();
353
    public Object getStop();
354
}
355
```
356

357
**Usage Examples:**
358

359
```java
360
// Generated parser rule return type
361
public static class expression_return extends ParserRuleReturnScope {
362
    public CommonTree tree;
363
    public Object getTree() { return tree; }
364
};
365

366
// Using rule return in parser
367
public final expression_return expression() throws RecognitionException {
368
    expression_return retval = new expression_return();
369
    retval.start = input.LT(1);
370
    
371
    try {
372
        // Rule implementation...
373
        
374
        retval.stop = input.LT(-1);
375
        // retval.tree = ...
376
        
377
    } catch (RecognitionException re) {
378
        reportError(re);
379
        recover(input, re);
380
    }
381
    
382
    return retval;
383
}
384

385
// Using returned values
386
MyParser.expression_return result = parser.expression();
387
Token startToken = result.start;
388
Token stopToken = result.stop;
389
CommonTree tree = (CommonTree) result.getTree();
390
```
391

392
### Recognizer Shared State
393

394
Shared state object for managing parser state across rule invocations.
395

396
```java { .api }
397
/**
398
 * Shared state between recognizer instances
399
 */
400
public class RecognizerSharedState {
401
    /** Track the set of token types that can follow any rule invocation.
402
     *  Stack grows upwards.
403
     */
404
    public BitSet[] following;
405
    public int _fsp = -1;
406
    
407
    /** This is true when we see an error and before having successfully
408
     *  matched a token.  Prevents generation of more than one error message
409
     *  per error.
410
     */
411
    public boolean errorRecovery = false;
412
    
413
    /** The index into the input stream where the last error occurred.
414
     *  This is used to prevent infinite loops where an error is found
415
     *  but no token is consumed during recovery...another error is found,
416
     *  ad nauseam.  This is a failsafe mechanism to guarantee that at least
417
     *  one token/tree node is consumed for two errors.
418
     */
419
    public int lastErrorIndex = -1;
420
    
421
    /** In lieu of a return value, this indicates that a rule or token
422
     *  has failed to match.  Reset to false upon valid token match.
423
     */
424
    public boolean failed = false;
425
    
426
    /** Did the recognizer encounter a syntax error?  Track how many. */
427
    public int syntaxErrors = 0;
428
    
429
    /** If 0, no backtracking is going on.  Safe to exec actions etc...
430
     *  If >0 then it's the level of backtracking.
431
     */
432
    public int backtracking = 0;
433
    
434
    /** An array[size num rules] of Map<Integer,Integer> that tracks
435
     *  the stop token index for each rule.  ruleMemo[ruleIndex] is
436
     *  the memoization table for ruleIndex.  For key ruleStartIndex, you
437
     *  get back the stop token for associated rule or MEMO_RULE_FAILED.
438
     *
439
     *  This is only used if rule memoization is on.
440
     */
441
    public Map<Integer, Integer>[] ruleMemo;
442
    
443
    // LEXER FIELDS (must be in same state object to avoid casting
444
    // constantly in generated code and Lexer object)
445
    
446
    /** The goal of all lexer rules/methods is to create a token object.
447
     *  This is an instance variable as multiple rules may collaborate to
448
     *  create a single token.  nextToken will return this object after
449
     *  matching lexer rule(s).  If you subclass to allow multiple token
450
     *  emissions, then set this to the last token to be matched or
451
     *  something nonnull so that the auto token emit mechanism will not
452
     *  emit another token.
453
     */
454
    public Token token;
455
    
456
    /** What character index in the stream did the current token start at? */
457
    public int tokenStartCharIndex = -1;
458
    
459
    /** The line on which the first character of the token resides */
460
    public int tokenStartLine;
461
    
462
    /** The character position of first character within the line */
463
    public int tokenStartCharPositionInLine;
464
    
465
    /** The channel number for the current token */
466
    public int channel;
467
    
468
    /** The token type for the current token */
469
    public int type;
470
    
471
    /** You can set the text for the current token to override what is in
472
     *  the input char buffer.  Use setText() or can set this instance var.
473
     */
474
    public String text;
475
}
476
```
477

478
## Types
479

480
### Parser State Management
481

482
```java { .api }
483
public class RecognizerSharedState {
484
    // Following stack for error recovery
485
    public BitSet[] following;
486
    public int _fsp = -1;
487
    
488
    // Error recovery state
489
    public boolean errorRecovery = false;
490
    public int lastErrorIndex = -1;
491
    public boolean failed = false;
492
    public int syntaxErrors = 0;
493
    
494
    // Backtracking state
495
    public int backtracking = 0;
496
    
497
    // Memoization tables
498
    public Map<Integer, Integer>[] ruleMemo;
499
    
500
    // Token creation state (for lexers)
501
    public Token token;
502
    public int tokenStartCharIndex = -1;
503
    public int tokenStartLine;
504
    public int tokenStartCharPositionInLine;
505
    public int channel;
506
    public int type;
507
    public String text;
508
}
509
```
510

511
## Common Patterns
512

513
### Custom Error Recovery
514

515
```java
516
public class MyParser extends Parser {
517
    @Override
518
    public void recover(IntStream input, RecognitionException re) {
519
        // Custom recovery logic
520
        if (re instanceof MismatchedTokenException) {
521
            MismatchedTokenException mte = (MismatchedTokenException) re;
522
            // Try to find a recovery token
523
            while (input.LA(1) != Token.EOF && 
524
                   input.LA(1) != SEMICOLON && 
525
                   input.LA(1) != RBRACE) {
526
                input.consume();
527
            }
528
        } else {
529
            super.recover(input, re);
530
        }
531
    }
532
}
533
```
534

535
### Rule Memoization
536

537
```java
538
// In generated parser with memoization
539
public final expression_return expression() throws RecognitionException {
540
    if (state.backtracking > 0 && alreadyParsedRule(input, 5)) {
541
        return null; // Already tried this rule
542
    }
543
    
544
    expression_return retval = new expression_return();
545
    // ... rule implementation ...
546
    
547
    if (state.backtracking > 0) {
548
        memoize(input, 5, retval.start);
549
    }
550
    
551
    return retval;
552
}
553
```
554

555
### Backtracking Support
556

557
```java
558
// Generated code with backtracking
559
int alt1 = 2;
560
state.backtracking++;
561
try {
562
    alt1 = dfa1.predict(input);
563
} catch (RecognitionException re) {
564
    state.failed = true;
565
    throw re;
566
} finally {
567
    state.backtracking--;
568
}
569

570
if (state.failed) return retval;
571

572
switch (alt1) {
573
    case 1 :
574
        // First alternative
575
        break;
576
    case 2 :
577
        // Second alternative  
578
        break;
579
}
580
```
581

582
### Follow Set Management
583

584
```java
585
// Push follow set before rule call
586
pushFollow(FOLLOW_expression_in_assignment123);
587
expression();
588
state._fsp--;
589

590
// Error recovery with follow sets
591
BitSet follow = computeErrorRecoverySet();
592
Token recovered = (Token)recoverFromMismatchedToken(input, IDENTIFIER, follow);
593
```

Version

Tile

Files

parsing.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

parsing.mddocs/