CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-antlr--antlr-runtime

Java runtime library for ANTLR v3 - a framework for constructing recognizers, interpreters, compilers, and translators from grammatical descriptions.

Pending
Overview
Eval results
Files

token-streams.mddocs/

Token Streams

Token stream implementations for managing sequences of tokens produced by lexers. Token streams provide lookahead, filtering, and rewriting capabilities for efficient parsing.

Capabilities

TokenStream Interface

Base interface for all token streams providing token access and manipulation.

/**
 * A stream of tokens accessing tokens from a TokenSource
 */
public interface TokenStream extends IntStream {
    /**
     * Get Token at current input pointer + i ahead where i=1 is next Token.
     * i<0 indicates tokens in the past. So -1 is previous token and -2 is
     * two tokens ago. LT(0) is undefined. For i>=n, return Token.EOFToken.
     * Return null for LT(0) and any index that results in an absolute address
     * that is negative.
     */
    public Token LT(int k);
    
    /**
     * How far ahead has the stream been asked to look? The return
     * value is a valid index from 0..n-1.
     */
    int range();
    
    /**
     * Get a token at an absolute index i; 0..n-1. This is really only
     * needed for profiling and debugging and token stream rewriting.
     * If you don't want to buffer up tokens, then this method makes no
     * sense for you. Naturally you can't use the rewrite stream feature.
     */
    public Token get(int i);
    
    /**
     * Where is this stream pulling tokens from? This is not the name, but
     * the object that provides Token objects.
     */
    public TokenSource getTokenSource();
    
    /**
     * Return the text of all tokens from start to stop, inclusive.
     * If the stream does not buffer all the tokens then it can just
     * return "" or null; Users should not access $ruleLabel.text in
     * an action of course in that case.
     */
    public String toString(int start, int stop);
    
    /**
     * Because the user is not required to use a token with an index stored
     * in it, we must provide a means for two token objects themselves to
     * indicate the start/end location. Most often this will just delegate
     * to the other toString(int,int). This is also parallel with
     * the TreeNodeStream.toString(Object,Object).
     */
    public String toString(Token start, Token stop);
}

Common Token Stream

Most commonly used token stream implementation with channel filtering support.

/**
 * Most common token stream implementation
 */
public class CommonTokenStream implements TokenStream {
    protected TokenSource tokenSource;
    protected List<Token> tokens;
    protected int p;
    protected int channel;
    
    public CommonTokenStream();
    public CommonTokenStream(TokenSource tokenSource);
    public CommonTokenStream(TokenSource tokenSource, int channel);
    
    public void setTokenSource(TokenSource tokenSource);
    public TokenSource getTokenSource();
    
    /** Reset this token stream by setting its token source */
    public void reset();
    
    /** Load all tokens from the token source and put in tokens list */
    public void fillBuffer();
    
    protected void sync(int i);
    protected int fetch(int n);
    public Token get(int i);
    public Token LT(int k);
    
    /** Look backwards k tokens on-channel tokens */
    public Token LB(int k);
    
    public void consume();
    
    /** Given a starting index, return the index of the next token on channel */
    public int skipOffTokenChannels(int i);
    public int skipOffTokenChannelsReverse(int i);
    
    /** Given a starting index, return the index of the previous token on channel */
    protected int skipOffTokenChannelsReverse(int i, int channel);
    
    /** Reset this token stream by setting its token source */
    public void reset();
    
    /** Move the input pointer to the next incoming token */
    public void consume();
    
    public void seek(int index);
    public int size();
    public int index();
    public int mark();
    public void release(int marker);
    public void rewind(int marker);
    public void rewind();
    
    public String toString(int start, int stop);
    public String toString(Token start, Token stop);
    public String toString();
    
    /** Get all tokens from start..stop inclusively */
    public List<Token> getTokens();
    public List<Token> getTokens(int start, int stop);
    public List<Token> getTokens(int start, int stop, BitSet types);
    public List<Token> getTokens(int start, int stop, List<Integer> types);
    public List<Token> getTokens(int start, int stop, int ttype);
    
    public String getSourceName();
}

Usage Examples:

import org.antlr.runtime.*;

// Create token stream from lexer
MyLexer lexer = new MyLexer(new ANTLRStringStream("hello world"));
CommonTokenStream tokens = new CommonTokenStream(lexer);

// Look ahead at tokens
Token nextToken = tokens.LT(1);  // First token
Token secondToken = tokens.LT(2); // Second token

// Get all tokens
tokens.fillBuffer();
System.out.println("Total tokens: " + tokens.size());

// Get tokens by type
List<Token> identifiers = tokens.getTokens(0, tokens.size()-1, MyLexer.IDENTIFIER);

// Convert range to string
String text = tokens.toString(0, 2); // First three tokens as text

Channel-Filtered Token Stream

// Create token stream that only shows tokens on default channel
CommonTokenStream tokens = new CommonTokenStream(lexer, Token.DEFAULT_CHANNEL);

// Hidden tokens (like whitespace/comments) are filtered out automatically
Token visible = tokens.LT(1); // Only sees DEFAULT_CHANNEL tokens

Buffered Token Stream

Base buffered implementation providing token buffering with random access.

/**
 * Buffer all tokens from token source for random access
 */
public class BufferedTokenStream implements TokenStream {
    protected TokenSource tokenSource;
    protected List<Token> tokens;
    protected int p;
    protected int range;
    
    public BufferedTokenStream();
    public BufferedTokenStream(TokenSource tokenSource);
    
    public void setTokenSource(TokenSource tokenSource);
    public TokenSource getTokenSource();
    
    public Token get(int i);
    public Token LT(int k);
    
    /** Load all tokens from the token source and put in tokens list */
    public void fillBuffer();
    
    protected void sync(int i);
    protected int fetch(int n);
    
    public void consume();
    public void seek(int index);
    public int size();
    public int index();
    public int range();
    public int mark();
    public void release(int marker);
    public void rewind(int marker);
    public void rewind();
    
    public String toString(int start, int stop);
    public String toString(Token start, Token stop);
    public String toString();
    
    public String getSourceName();
}

Token Rewrite Stream

Advanced token stream supporting in-place token rewriting and replacement.

/**
 * Token stream supporting rewrite operations
 */
public class TokenRewriteStream extends CommonTokenStream {
    public static final String DEFAULT_PROGRAM_NAME = "default";
    public static final int PROGRAM_INIT_SIZE = 100;
    public static final int MIN_TOKEN_INDEX = 0;
    
    protected Map<String, List<RewriteOperation>> programs;
    protected Map<String, Integer> lastRewriteTokenIndexes;
    
    public TokenRewriteStream();
    public TokenRewriteStream(TokenSource tokenSource);
    public TokenRewriteStream(TokenSource tokenSource, int channel);
    
    /** Replace the text for tokens start..stop with the text */
    public void replace(int start, int stop, Object text);
    public void replace(String programName, int start, int stop, Object text);
    public void replace(Token indexT, Object text);
    public void replace(String programName, Token indexT, Object text);
    public void replace(Token start, Token stop, Object text);
    public void replace(String programName, Token start, Token stop, Object text);
    
    /** Delete the text for tokens start..stop */
    public void delete(int start, int stop);
    public void delete(String programName, int start, int stop);
    public void delete(Token indexT);
    public void delete(String programName, Token indexT);
    public void delete(Token start, Token stop);
    public void delete(String programName, Token start, Token stop);
    
    /** Insert text after the specified token index */
    public void insertAfter(int index, Object text);
    public void insertAfter(String programName, int index, Object text);
    public void insertAfter(Token t, Object text);
    public void insertAfter(String programName, Token t, Object text);
    
    /** Insert text before the specified token index */
    public void insertBefore(int index, Object text);
    public void insertBefore(String programName, int index, Object text);
    public void insertBefore(Token t, Object text);
    public void insertBefore(String programName, Token t, Object text);
    
    /** Return the text from the original tokens altered per the instructions given to this stream */
    public String toString();
    public String toString(String programName);
    public String toString(int start, int stop);
    public String toString(String programName, int start, int stop);
    
    public String toOriginalString();
    public String toOriginalString(int start, int stop);
}

Usage Examples:

import org.antlr.runtime.*;

// Create rewrite stream
MyLexer lexer = new MyLexer(new ANTLRStringStream("hello world"));
TokenRewriteStream tokens = new TokenRewriteStream(lexer);

// Parse to identify tokens to modify
MyParser parser = new MyParser(tokens);
parser.program(); // Parse the input

// Rewrite operations
tokens.replace(0, "HELLO");        // Replace first token
tokens.insertAfter(1, " beautiful"); // Insert after second token  
tokens.delete(2);                  // Delete third token

// Get rewritten text
String rewritten = tokens.toString(); // "HELLO beautiful"

// Multiple rewrite programs
tokens.replace("program1", 0, "Hi");
tokens.replace("program2", 0, "Hey");

String result1 = tokens.toString("program1");
String result2 = tokens.toString("program2");

Unbuffered Token Stream

Memory-efficient token stream that doesn't buffer all tokens.

/**
 * Token stream that doesn't buffer all tokens
 */
public class UnbufferedTokenStream implements TokenStream {
    protected TokenSource tokenSource;
    protected Token[] lookahead;
    protected int p;
    protected int numMarkers;
    protected int lastMarker;
    protected int currentTokenIndex;
    
    public UnbufferedTokenStream(TokenSource tokenSource);
    public UnbufferedTokenStream(TokenSource tokenSource, int bufferSize);
    
    public Token get(int i) throws UnsupportedOperationException;
    public Token LT(int i);
    protected void sync(int i);
    protected void fill(int n);
    protected void add(Token t);
    public int mark();
    public void release(int marker);
    public int index();
    public void seek(int index) throws UnsupportedOperationException;
    public int size() throws UnsupportedOperationException;
    public String getSourceName();
    public String toString(int start, int stop);
    public String toString(Token start, Token stop);
    public void consume();
    
    public TokenSource getTokenSource();
    public String toString();
}

Usage Examples:

import org.antlr.runtime.*;

// For large inputs where memory is a concern
MyLexer lexer = new MyLexer(new ANTLRFileStream("large-file.txt"));
UnbufferedTokenStream tokens = new UnbufferedTokenStream(lexer);

// Limited lookahead capability
Token next = tokens.LT(1);
Token nextNext = tokens.LT(2);

// Cannot access arbitrary tokens by index
// tokens.get(100); // Throws UnsupportedOperationException

Types

Token Interface

public interface Token {
    public static final int EOR_TOKEN_TYPE = 1;
    public static final int DOWN = 2;
    public static final int UP = 3;
    public static final int MIN_TOKEN_TYPE = UP+1;
    public static final int EOF = CharStream.EOF;
    public static final int INVALID_TOKEN_TYPE = 0;
    public static final Token INVALID_TOKEN = new CommonToken(INVALID_TOKEN_TYPE);
    public static final Token SKIP_TOKEN = new CommonToken(INVALID_TOKEN_TYPE);
    public static final int DEFAULT_CHANNEL = 0;
    public static final int HIDDEN_CHANNEL = 99;
    
    public String getText();
    public void setText(String text);
    public int getType();
    public void setType(int ttype);
    public int getLine();
    public void setLine(int line);
    public int getCharPositionInLine();
    public void setCharPositionInLine(int pos);
    public int getChannel();
    public void setChannel(int channel);
    public int getTokenIndex();
    public void setTokenIndex(int index);
    public CharStream getInputStream();
    public void setInputStream(CharStream input);
}

TokenSource Interface

public interface TokenSource {
    public Token nextToken();
    public String getSourceName();
}

Common Patterns

Token Filtering by Channel

// Get only tokens on default channel (skips whitespace, comments)
CommonTokenStream tokens = new CommonTokenStream(lexer, Token.DEFAULT_CHANNEL);

// Get all tokens including hidden ones
CommonTokenStream allTokens = new CommonTokenStream(lexer);
List<Token> hiddenTokens = allTokens.getTokens(0, allTokens.size()-1, Token.HIDDEN_CHANNEL);

Token Range Operations

// Get text from token range
String text = tokens.toString(startToken, stopToken);

// Get specific token types in range
List<Token> keywords = tokens.getTokens(0, 10, MyLexer.KEYWORD);

// Get multiple token types
List<Integer> types = Arrays.asList(MyLexer.IDENTIFIER, MyLexer.NUMBER);
List<Token> filtered = tokens.getTokens(0, tokens.size()-1, types);

Stream Positioning and Marking

// Mark current position
int marker = tokens.mark();

// Consume some tokens
tokens.consume();
tokens.consume();

// Look ahead without consuming
Token next = tokens.LT(1);

// Return to marked position
tokens.rewind(marker);

// Or seek to specific position
tokens.seek(5);

Install with Tessl CLI

npx tessl i tessl/maven-org-antlr--antlr-runtime

docs

character-streams.md

debug-support.md

error-handling.md

index.md

lexical-analysis.md

parsing.md

token-streams.md

tree-construction.md

tile.json