or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

character-streams.mddebug-support.mderror-handling.mdindex.mdlexical-analysis.mdparsing.mdtoken-streams.mdtree-construction.md

token-streams.mddocs/

0

# Token Streams

1

2

Token stream implementations for managing sequences of tokens produced by lexers. Token streams provide lookahead, filtering, and rewriting capabilities for efficient parsing.

3

4

## Capabilities

5

6

### TokenStream Interface

7

8

Base interface for all token streams providing token access and manipulation.

9

10

```java { .api }

11

/**

12

* A stream of tokens accessing tokens from a TokenSource

13

*/

14

public interface TokenStream extends IntStream {

15

/**

16

* Get Token at current input pointer + i ahead where i=1 is next Token.

17

* i<0 indicates tokens in the past. So -1 is previous token and -2 is

18

* two tokens ago. LT(0) is undefined. For i>=n, return Token.EOFToken.

19

* Return null for LT(0) and any index that results in an absolute address

20

* that is negative.

21

*/

22

public Token LT(int k);

23

24

/**

25

* How far ahead has the stream been asked to look? The return

26

* value is a valid index from 0..n-1.

27

*/

28

int range();

29

30

/**

31

* Get a token at an absolute index i; 0..n-1. This is really only

32

* needed for profiling and debugging and token stream rewriting.

33

* If you don't want to buffer up tokens, then this method makes no

34

* sense for you. Naturally you can't use the rewrite stream feature.

35

*/

36

public Token get(int i);

37

38

/**

39

* Where is this stream pulling tokens from? This is not the name, but

40

* the object that provides Token objects.

41

*/

42

public TokenSource getTokenSource();

43

44

/**

45

* Return the text of all tokens from start to stop, inclusive.

46

* If the stream does not buffer all the tokens then it can just

47

* return "" or null; Users should not access $ruleLabel.text in

48

* an action of course in that case.

49

*/

50

public String toString(int start, int stop);

51

52

/**

53

* Because the user is not required to use a token with an index stored

54

* in it, we must provide a means for two token objects themselves to

55

* indicate the start/end location. Most often this will just delegate

56

* to the other toString(int,int). This is also parallel with

57

* the TreeNodeStream.toString(Object,Object).

58

*/

59

public String toString(Token start, Token stop);

60

}

61

```

62

63

### Common Token Stream

64

65

Most commonly used token stream implementation with channel filtering support.

66

67

```java { .api }

68

/**

69

* Most common token stream implementation

70

*/

71

public class CommonTokenStream implements TokenStream {

72

protected TokenSource tokenSource;

73

protected List<Token> tokens;

74

protected int p;

75

protected int channel;

76

77

public CommonTokenStream();

78

public CommonTokenStream(TokenSource tokenSource);

79

public CommonTokenStream(TokenSource tokenSource, int channel);

80

81

public void setTokenSource(TokenSource tokenSource);

82

public TokenSource getTokenSource();

83

84

/** Reset this token stream by setting its token source */

85

public void reset();

86

87

/** Load all tokens from the token source and put in tokens list */

88

public void fillBuffer();

89

90

protected void sync(int i);

91

protected int fetch(int n);

92

public Token get(int i);

93

public Token LT(int k);

94

95

/** Look backwards k tokens on-channel tokens */

96

public Token LB(int k);

97

98

public void consume();

99

100

/** Given a starting index, return the index of the next token on channel */

101

public int skipOffTokenChannels(int i);

102

public int skipOffTokenChannelsReverse(int i);

103

104

/** Given a starting index, return the index of the previous token on channel */

105

protected int skipOffTokenChannelsReverse(int i, int channel);

106

107

/** Reset this token stream by setting its token source */

108

public void reset();

109

110

/** Move the input pointer to the next incoming token */

111

public void consume();

112

113

public void seek(int index);

114

public int size();

115

public int index();

116

public int mark();

117

public void release(int marker);

118

public void rewind(int marker);

119

public void rewind();

120

121

public String toString(int start, int stop);

122

public String toString(Token start, Token stop);

123

public String toString();

124

125

/** Get all tokens from start..stop inclusively */

126

public List<Token> getTokens();

127

public List<Token> getTokens(int start, int stop);

128

public List<Token> getTokens(int start, int stop, BitSet types);

129

public List<Token> getTokens(int start, int stop, List<Integer> types);

130

public List<Token> getTokens(int start, int stop, int ttype);

131

132

public String getSourceName();

133

}

134

```

135

136

**Usage Examples:**

137

138

```java

139

import org.antlr.runtime.*;

140

141

// Create token stream from lexer

142

MyLexer lexer = new MyLexer(new ANTLRStringStream("hello world"));

143

CommonTokenStream tokens = new CommonTokenStream(lexer);

144

145

// Look ahead at tokens

146

Token nextToken = tokens.LT(1); // First token

147

Token secondToken = tokens.LT(2); // Second token

148

149

// Get all tokens

150

tokens.fillBuffer();

151

System.out.println("Total tokens: " + tokens.size());

152

153

// Get tokens by type

154

List<Token> identifiers = tokens.getTokens(0, tokens.size()-1, MyLexer.IDENTIFIER);

155

156

// Convert range to string

157

String text = tokens.toString(0, 2); // First three tokens as text

158

```

159

160

### Channel-Filtered Token Stream

161

162

```java

163

// Create token stream that only shows tokens on default channel

164

CommonTokenStream tokens = new CommonTokenStream(lexer, Token.DEFAULT_CHANNEL);

165

166

// Hidden tokens (like whitespace/comments) are filtered out automatically

167

Token visible = tokens.LT(1); // Only sees DEFAULT_CHANNEL tokens

168

```

169

170

### Buffered Token Stream

171

172

Base buffered implementation providing token buffering with random access.

173

174

```java { .api }

175

/**

176

* Buffer all tokens from token source for random access

177

*/

178

public class BufferedTokenStream implements TokenStream {

179

protected TokenSource tokenSource;

180

protected List<Token> tokens;

181

protected int p;

182

protected int range;

183

184

public BufferedTokenStream();

185

public BufferedTokenStream(TokenSource tokenSource);

186

187

public void setTokenSource(TokenSource tokenSource);

188

public TokenSource getTokenSource();

189

190

public Token get(int i);

191

public Token LT(int k);

192

193

/** Load all tokens from the token source and put in tokens list */

194

public void fillBuffer();

195

196

protected void sync(int i);

197

protected int fetch(int n);

198

199

public void consume();

200

public void seek(int index);

201

public int size();

202

public int index();

203

public int range();

204

public int mark();

205

public void release(int marker);

206

public void rewind(int marker);

207

public void rewind();

208

209

public String toString(int start, int stop);

210

public String toString(Token start, Token stop);

211

public String toString();

212

213

public String getSourceName();

214

}

215

```

216

217

### Token Rewrite Stream

218

219

Advanced token stream supporting in-place token rewriting and replacement.

220

221

```java { .api }

222

/**

223

* Token stream supporting rewrite operations

224

*/

225

public class TokenRewriteStream extends CommonTokenStream {

226

public static final String DEFAULT_PROGRAM_NAME = "default";

227

public static final int PROGRAM_INIT_SIZE = 100;

228

public static final int MIN_TOKEN_INDEX = 0;

229

230

protected Map<String, List<RewriteOperation>> programs;

231

protected Map<String, Integer> lastRewriteTokenIndexes;

232

233

public TokenRewriteStream();

234

public TokenRewriteStream(TokenSource tokenSource);

235

public TokenRewriteStream(TokenSource tokenSource, int channel);

236

237

/** Replace the text for tokens start..stop with the text */

238

public void replace(int start, int stop, Object text);

239

public void replace(String programName, int start, int stop, Object text);

240

public void replace(Token indexT, Object text);

241

public void replace(String programName, Token indexT, Object text);

242

public void replace(Token start, Token stop, Object text);

243

public void replace(String programName, Token start, Token stop, Object text);

244

245

/** Delete the text for tokens start..stop */

246

public void delete(int start, int stop);

247

public void delete(String programName, int start, int stop);

248

public void delete(Token indexT);

249

public void delete(String programName, Token indexT);

250

public void delete(Token start, Token stop);

251

public void delete(String programName, Token start, Token stop);

252

253

/** Insert text after the specified token index */

254

public void insertAfter(int index, Object text);

255

public void insertAfter(String programName, int index, Object text);

256

public void insertAfter(Token t, Object text);

257

public void insertAfter(String programName, Token t, Object text);

258

259

/** Insert text before the specified token index */

260

public void insertBefore(int index, Object text);

261

public void insertBefore(String programName, int index, Object text);

262

public void insertBefore(Token t, Object text);

263

public void insertBefore(String programName, Token t, Object text);

264

265

/** Return the text from the original tokens altered per the instructions given to this stream */

266

public String toString();

267

public String toString(String programName);

268

public String toString(int start, int stop);

269

public String toString(String programName, int start, int stop);

270

271

public String toOriginalString();

272

public String toOriginalString(int start, int stop);

273

}

274

```

275

276

**Usage Examples:**

277

278

```java

279

import org.antlr.runtime.*;

280

281

// Create rewrite stream

282

MyLexer lexer = new MyLexer(new ANTLRStringStream("hello world"));

283

TokenRewriteStream tokens = new TokenRewriteStream(lexer);

284

285

// Parse to identify tokens to modify

286

MyParser parser = new MyParser(tokens);

287

parser.program(); // Parse the input

288

289

// Rewrite operations

290

tokens.replace(0, "HELLO"); // Replace first token

291

tokens.insertAfter(1, " beautiful"); // Insert after second token

292

tokens.delete(2); // Delete third token

293

294

// Get rewritten text

295

String rewritten = tokens.toString(); // "HELLO beautiful"

296

297

// Multiple rewrite programs

298

tokens.replace("program1", 0, "Hi");

299

tokens.replace("program2", 0, "Hey");

300

301

String result1 = tokens.toString("program1");

302

String result2 = tokens.toString("program2");

303

```

304

305

### Unbuffered Token Stream

306

307

Memory-efficient token stream that doesn't buffer all tokens.

308

309

```java { .api }

310

/**

311

* Token stream that doesn't buffer all tokens

312

*/

313

public class UnbufferedTokenStream implements TokenStream {

314

protected TokenSource tokenSource;

315

protected Token[] lookahead;

316

protected int p;

317

protected int numMarkers;

318

protected int lastMarker;

319

protected int currentTokenIndex;

320

321

public UnbufferedTokenStream(TokenSource tokenSource);

322

public UnbufferedTokenStream(TokenSource tokenSource, int bufferSize);

323

324

public Token get(int i) throws UnsupportedOperationException;

325

public Token LT(int i);

326

protected void sync(int i);

327

protected void fill(int n);

328

protected void add(Token t);

329

public int mark();

330

public void release(int marker);

331

public int index();

332

public void seek(int index) throws UnsupportedOperationException;

333

public int size() throws UnsupportedOperationException;

334

public String getSourceName();

335

public String toString(int start, int stop);

336

public String toString(Token start, Token stop);

337

public void consume();

338

339

public TokenSource getTokenSource();

340

public String toString();

341

}

342

```

343

344

**Usage Examples:**

345

346

```java

347

import org.antlr.runtime.*;

348

349

// For large inputs where memory is a concern

350

MyLexer lexer = new MyLexer(new ANTLRFileStream("large-file.txt"));

351

UnbufferedTokenStream tokens = new UnbufferedTokenStream(lexer);

352

353

// Limited lookahead capability

354

Token next = tokens.LT(1);

355

Token nextNext = tokens.LT(2);

356

357

// Cannot access arbitrary tokens by index

358

// tokens.get(100); // Throws UnsupportedOperationException

359

```

360

361

## Types

362

363

### Token Interface

364

365

```java { .api }

366

public interface Token {

367

public static final int EOR_TOKEN_TYPE = 1;

368

public static final int DOWN = 2;

369

public static final int UP = 3;

370

public static final int MIN_TOKEN_TYPE = UP+1;

371

public static final int EOF = CharStream.EOF;

372

public static final int INVALID_TOKEN_TYPE = 0;

373

public static final Token INVALID_TOKEN = new CommonToken(INVALID_TOKEN_TYPE);

374

public static final Token SKIP_TOKEN = new CommonToken(INVALID_TOKEN_TYPE);

375

public static final int DEFAULT_CHANNEL = 0;

376

public static final int HIDDEN_CHANNEL = 99;

377

378

public String getText();

379

public void setText(String text);

380

public int getType();

381

public void setType(int ttype);

382

public int getLine();

383

public void setLine(int line);

384

public int getCharPositionInLine();

385

public void setCharPositionInLine(int pos);

386

public int getChannel();

387

public void setChannel(int channel);

388

public int getTokenIndex();

389

public void setTokenIndex(int index);

390

public CharStream getInputStream();

391

public void setInputStream(CharStream input);

392

}

393

```

394

395

### TokenSource Interface

396

397

```java { .api }

398

public interface TokenSource {

399

public Token nextToken();

400

public String getSourceName();

401

}

402

```

403

404

## Common Patterns

405

406

### Token Filtering by Channel

407

408

```java

409

// Get only tokens on default channel (skips whitespace, comments)

410

CommonTokenStream tokens = new CommonTokenStream(lexer, Token.DEFAULT_CHANNEL);

411

412

// Get all tokens including hidden ones

413

CommonTokenStream allTokens = new CommonTokenStream(lexer);

414

List<Token> hiddenTokens = allTokens.getTokens(0, allTokens.size()-1, Token.HIDDEN_CHANNEL);

415

```

416

417

### Token Range Operations

418

419

```java

420

// Get text from token range

421

String text = tokens.toString(startToken, stopToken);

422

423

// Get specific token types in range

424

List<Token> keywords = tokens.getTokens(0, 10, MyLexer.KEYWORD);

425

426

// Get multiple token types

427

List<Integer> types = Arrays.asList(MyLexer.IDENTIFIER, MyLexer.NUMBER);

428

List<Token> filtered = tokens.getTokens(0, tokens.size()-1, types);

429

```

430

431

### Stream Positioning and Marking

432

433

```java

434

// Mark current position

435

int marker = tokens.mark();

436

437

// Consume some tokens

438

tokens.consume();

439

tokens.consume();

440

441

// Look ahead without consuming

442

Token next = tokens.LT(1);

443

444

// Return to marked position

445

tokens.rewind(marker);

446

447

// Or seek to specific position

448

tokens.seek(5);

449

```