0
# Token Streams
1
2
Token stream implementations for managing sequences of tokens produced by lexers. Token streams provide lookahead, filtering, and rewriting capabilities for efficient parsing.
3
4
## Capabilities
5
6
### TokenStream Interface
7
8
Base interface for all token streams providing token access and manipulation.
9
10
```java { .api }
11
/**
12
* A stream of tokens accessing tokens from a TokenSource
13
*/
14
public interface TokenStream extends IntStream {
15
/**
16
* Get Token at current input pointer + i ahead where i=1 is next Token.
17
* i<0 indicates tokens in the past. So -1 is previous token and -2 is
18
* two tokens ago. LT(0) is undefined. For i>=n, return Token.EOFToken.
19
* Return null for LT(0) and any index that results in an absolute address
20
* that is negative.
21
*/
22
public Token LT(int k);
23
24
/**
25
* How far ahead has the stream been asked to look? The return
26
* value is a valid index from 0..n-1.
27
*/
28
int range();
29
30
/**
31
* Get a token at an absolute index i; 0..n-1. This is really only
32
* needed for profiling and debugging and token stream rewriting.
33
* If you don't want to buffer up tokens, then this method makes no
34
* sense for you. Naturally you can't use the rewrite stream feature.
35
*/
36
public Token get(int i);
37
38
/**
39
* Where is this stream pulling tokens from? This is not the name, but
40
* the object that provides Token objects.
41
*/
42
public TokenSource getTokenSource();
43
44
/**
45
* Return the text of all tokens from start to stop, inclusive.
46
* If the stream does not buffer all the tokens then it can just
47
* return "" or null; Users should not access $ruleLabel.text in
48
* an action of course in that case.
49
*/
50
public String toString(int start, int stop);
51
52
/**
53
* Because the user is not required to use a token with an index stored
54
* in it, we must provide a means for two token objects themselves to
55
* indicate the start/end location. Most often this will just delegate
56
* to the other toString(int,int). This is also parallel with
57
* the TreeNodeStream.toString(Object,Object).
58
*/
59
public String toString(Token start, Token stop);
60
}
61
```
62
63
### Common Token Stream
64
65
Most commonly used token stream implementation with channel filtering support.
66
67
```java { .api }
68
/**
69
* Most common token stream implementation
70
*/
71
public class CommonTokenStream implements TokenStream {
72
protected TokenSource tokenSource;
73
protected List<Token> tokens;
74
protected int p;
75
protected int channel;
76
77
public CommonTokenStream();
78
public CommonTokenStream(TokenSource tokenSource);
79
public CommonTokenStream(TokenSource tokenSource, int channel);
80
81
public void setTokenSource(TokenSource tokenSource);
82
public TokenSource getTokenSource();
83
84
/** Reset this token stream by setting its token source */
85
public void reset();
86
87
/** Load all tokens from the token source and put in tokens list */
88
public void fillBuffer();
89
90
protected void sync(int i);
91
protected int fetch(int n);
92
public Token get(int i);
93
public Token LT(int k);
94
95
/** Look backwards k tokens on-channel tokens */
96
public Token LB(int k);
97
98
public void consume();
99
100
/** Given a starting index, return the index of the next token on channel */
101
public int skipOffTokenChannels(int i);
102
public int skipOffTokenChannelsReverse(int i);
103
104
/** Given a starting index, return the index of the previous token on channel */
105
protected int skipOffTokenChannelsReverse(int i, int channel);
106
107
/** Reset this token stream by setting its token source */
108
public void reset();
109
110
/** Move the input pointer to the next incoming token */
111
public void consume();
112
113
public void seek(int index);
114
public int size();
115
public int index();
116
public int mark();
117
public void release(int marker);
118
public void rewind(int marker);
119
public void rewind();
120
121
public String toString(int start, int stop);
122
public String toString(Token start, Token stop);
123
public String toString();
124
125
/** Get all tokens from start..stop inclusively */
126
public List<Token> getTokens();
127
public List<Token> getTokens(int start, int stop);
128
public List<Token> getTokens(int start, int stop, BitSet types);
129
public List<Token> getTokens(int start, int stop, List<Integer> types);
130
public List<Token> getTokens(int start, int stop, int ttype);
131
132
public String getSourceName();
133
}
134
```
135
136
**Usage Examples:**
137
138
```java
139
import org.antlr.runtime.*;
140
141
// Create token stream from lexer
142
MyLexer lexer = new MyLexer(new ANTLRStringStream("hello world"));
143
CommonTokenStream tokens = new CommonTokenStream(lexer);
144
145
// Look ahead at tokens
146
Token nextToken = tokens.LT(1); // First token
147
Token secondToken = tokens.LT(2); // Second token
148
149
// Get all tokens
150
tokens.fillBuffer();
151
System.out.println("Total tokens: " + tokens.size());
152
153
// Get tokens by type
154
List<Token> identifiers = tokens.getTokens(0, tokens.size()-1, MyLexer.IDENTIFIER);
155
156
// Convert range to string
157
String text = tokens.toString(0, 2); // First three tokens as text
158
```
159
160
### Channel-Filtered Token Stream
161
162
```java
163
// Create token stream that only shows tokens on default channel
164
CommonTokenStream tokens = new CommonTokenStream(lexer, Token.DEFAULT_CHANNEL);
165
166
// Hidden tokens (like whitespace/comments) are filtered out automatically
167
Token visible = tokens.LT(1); // Only sees DEFAULT_CHANNEL tokens
168
```
169
170
### Buffered Token Stream
171
172
Base buffered implementation providing token buffering with random access.
173
174
```java { .api }
175
/**
176
* Buffer all tokens from token source for random access
177
*/
178
public class BufferedTokenStream implements TokenStream {
179
protected TokenSource tokenSource;
180
protected List<Token> tokens;
181
protected int p;
182
protected int range;
183
184
public BufferedTokenStream();
185
public BufferedTokenStream(TokenSource tokenSource);
186
187
public void setTokenSource(TokenSource tokenSource);
188
public TokenSource getTokenSource();
189
190
public Token get(int i);
191
public Token LT(int k);
192
193
/** Load all tokens from the token source and put in tokens list */
194
public void fillBuffer();
195
196
protected void sync(int i);
197
protected int fetch(int n);
198
199
public void consume();
200
public void seek(int index);
201
public int size();
202
public int index();
203
public int range();
204
public int mark();
205
public void release(int marker);
206
public void rewind(int marker);
207
public void rewind();
208
209
public String toString(int start, int stop);
210
public String toString(Token start, Token stop);
211
public String toString();
212
213
public String getSourceName();
214
}
215
```
216
217
### Token Rewrite Stream
218
219
Advanced token stream supporting in-place token rewriting and replacement.
220
221
```java { .api }
222
/**
223
* Token stream supporting rewrite operations
224
*/
225
public class TokenRewriteStream extends CommonTokenStream {
226
public static final String DEFAULT_PROGRAM_NAME = "default";
227
public static final int PROGRAM_INIT_SIZE = 100;
228
public static final int MIN_TOKEN_INDEX = 0;
229
230
protected Map<String, List<RewriteOperation>> programs;
231
protected Map<String, Integer> lastRewriteTokenIndexes;
232
233
public TokenRewriteStream();
234
public TokenRewriteStream(TokenSource tokenSource);
235
public TokenRewriteStream(TokenSource tokenSource, int channel);
236
237
/** Replace the text for tokens start..stop with the text */
238
public void replace(int start, int stop, Object text);
239
public void replace(String programName, int start, int stop, Object text);
240
public void replace(Token indexT, Object text);
241
public void replace(String programName, Token indexT, Object text);
242
public void replace(Token start, Token stop, Object text);
243
public void replace(String programName, Token start, Token stop, Object text);
244
245
/** Delete the text for tokens start..stop */
246
public void delete(int start, int stop);
247
public void delete(String programName, int start, int stop);
248
public void delete(Token indexT);
249
public void delete(String programName, Token indexT);
250
public void delete(Token start, Token stop);
251
public void delete(String programName, Token start, Token stop);
252
253
/** Insert text after the specified token index */
254
public void insertAfter(int index, Object text);
255
public void insertAfter(String programName, int index, Object text);
256
public void insertAfter(Token t, Object text);
257
public void insertAfter(String programName, Token t, Object text);
258
259
/** Insert text before the specified token index */
260
public void insertBefore(int index, Object text);
261
public void insertBefore(String programName, int index, Object text);
262
public void insertBefore(Token t, Object text);
263
public void insertBefore(String programName, Token t, Object text);
264
265
/** Return the text from the original tokens altered per the instructions given to this stream */
266
public String toString();
267
public String toString(String programName);
268
public String toString(int start, int stop);
269
public String toString(String programName, int start, int stop);
270
271
public String toOriginalString();
272
public String toOriginalString(int start, int stop);
273
}
274
```
275
276
**Usage Examples:**
277
278
```java
279
import org.antlr.runtime.*;
280
281
// Create rewrite stream
282
MyLexer lexer = new MyLexer(new ANTLRStringStream("hello world"));
283
TokenRewriteStream tokens = new TokenRewriteStream(lexer);
284
285
// Parse to identify tokens to modify
286
MyParser parser = new MyParser(tokens);
287
parser.program(); // Parse the input
288
289
// Rewrite operations
290
tokens.replace(0, "HELLO"); // Replace first token
291
tokens.insertAfter(1, " beautiful"); // Insert after second token
292
tokens.delete(2); // Delete third token
293
294
// Get rewritten text
295
String rewritten = tokens.toString(); // "HELLO beautiful"
296
297
// Multiple rewrite programs
298
tokens.replace("program1", 0, "Hi");
299
tokens.replace("program2", 0, "Hey");
300
301
String result1 = tokens.toString("program1");
302
String result2 = tokens.toString("program2");
303
```
304
305
### Unbuffered Token Stream
306
307
Memory-efficient token stream that doesn't buffer all tokens.
308
309
```java { .api }
310
/**
311
* Token stream that doesn't buffer all tokens
312
*/
313
public class UnbufferedTokenStream implements TokenStream {
314
protected TokenSource tokenSource;
315
protected Token[] lookahead;
316
protected int p;
317
protected int numMarkers;
318
protected int lastMarker;
319
protected int currentTokenIndex;
320
321
public UnbufferedTokenStream(TokenSource tokenSource);
322
public UnbufferedTokenStream(TokenSource tokenSource, int bufferSize);
323
324
public Token get(int i) throws UnsupportedOperationException;
325
public Token LT(int i);
326
protected void sync(int i);
327
protected void fill(int n);
328
protected void add(Token t);
329
public int mark();
330
public void release(int marker);
331
public int index();
332
public void seek(int index) throws UnsupportedOperationException;
333
public int size() throws UnsupportedOperationException;
334
public String getSourceName();
335
public String toString(int start, int stop);
336
public String toString(Token start, Token stop);
337
public void consume();
338
339
public TokenSource getTokenSource();
340
public String toString();
341
}
342
```
343
344
**Usage Examples:**
345
346
```java
347
import org.antlr.runtime.*;
348
349
// For large inputs where memory is a concern
350
MyLexer lexer = new MyLexer(new ANTLRFileStream("large-file.txt"));
351
UnbufferedTokenStream tokens = new UnbufferedTokenStream(lexer);
352
353
// Limited lookahead capability
354
Token next = tokens.LT(1);
355
Token nextNext = tokens.LT(2);
356
357
// Cannot access arbitrary tokens by index
358
// tokens.get(100); // Throws UnsupportedOperationException
359
```
360
361
## Types
362
363
### Token Interface
364
365
```java { .api }
366
public interface Token {
367
public static final int EOR_TOKEN_TYPE = 1;
368
public static final int DOWN = 2;
369
public static final int UP = 3;
370
public static final int MIN_TOKEN_TYPE = UP+1;
371
public static final int EOF = CharStream.EOF;
372
public static final int INVALID_TOKEN_TYPE = 0;
373
public static final Token INVALID_TOKEN = new CommonToken(INVALID_TOKEN_TYPE);
374
public static final Token SKIP_TOKEN = new CommonToken(INVALID_TOKEN_TYPE);
375
public static final int DEFAULT_CHANNEL = 0;
376
public static final int HIDDEN_CHANNEL = 99;
377
378
public String getText();
379
public void setText(String text);
380
public int getType();
381
public void setType(int ttype);
382
public int getLine();
383
public void setLine(int line);
384
public int getCharPositionInLine();
385
public void setCharPositionInLine(int pos);
386
public int getChannel();
387
public void setChannel(int channel);
388
public int getTokenIndex();
389
public void setTokenIndex(int index);
390
public CharStream getInputStream();
391
public void setInputStream(CharStream input);
392
}
393
```
394
395
### TokenSource Interface
396
397
```java { .api }
398
public interface TokenSource {
399
public Token nextToken();
400
public String getSourceName();
401
}
402
```
403
404
## Common Patterns
405
406
### Token Filtering by Channel
407
408
```java
409
// Get only tokens on default channel (skips whitespace, comments)
410
CommonTokenStream tokens = new CommonTokenStream(lexer, Token.DEFAULT_CHANNEL);
411
412
// Get all tokens including hidden ones
413
CommonTokenStream allTokens = new CommonTokenStream(lexer);
414
List<Token> hiddenTokens = allTokens.getTokens(0, allTokens.size()-1, Token.HIDDEN_CHANNEL);
415
```
416
417
### Token Range Operations
418
419
```java
420
// Get text from token range
421
String text = tokens.toString(startToken, stopToken);
422
423
// Get specific token types in range
424
List<Token> keywords = tokens.getTokens(0, 10, MyLexer.KEYWORD);
425
426
// Get multiple token types
427
List<Integer> types = Arrays.asList(MyLexer.IDENTIFIER, MyLexer.NUMBER);
428
List<Token> filtered = tokens.getTokens(0, tokens.size()-1, types);
429
```
430
431
### Stream Positioning and Marking
432
433
```java
434
// Mark current position
435
int marker = tokens.mark();
436
437
// Consume some tokens
438
tokens.consume();
439
tokens.consume();
440
441
// Look ahead without consuming
442
Token next = tokens.LT(1);
443
444
// Return to marked position
445
tokens.rewind(marker);
446
447
// Or seek to specific position
448
tokens.seek(5);
449
```