ANTLR is a powerful parser generator for reading, processing, executing, or translating structured text or binary files
The ANTLR4 runtime API provides the core infrastructure for executing generated parsers and lexers, managing input streams, and handling tokens.
Base class for all generated parsers providing core parsing functionality.
/**
* Base class for all generated parsers
*/
public abstract class Parser extends Recognizer<Token, ParserATNSimulator> {
/** Reset parser state to initial conditions */
public void reset();
/** Match expected token type, consuming if successful */
public Token match(int ttype) throws RecognitionException;
/** Consume current input token and return it */
public Token consume();
/** Match and consume wildcard token */
public Token matchWildcard() throws RecognitionException;
/** Enter a parser rule */
public void enterRule(ParserRuleContext localctx, int state, int ruleIndex);
/** Exit current parser rule */
public void exitRule();
/** Add parse listener to this parser */
public void addParseListener(ParseTreeListener listener);
/** Remove parse listener from this parser */
public void removeParseListener(ParseTreeListener listener);
/** Get all registered parse listeners */
public List<ParseTreeListener> getParseListeners();
/** Remove all parse listeners */
public void removeParseListeners();
/** Get current token stream */
public TokenStream getTokenStream();
/** Set token stream for this parser */
public void setTokenStream(TokenStream input);
/** Get current rule context */
public ParserRuleContext getContext();
/** Get error recovery strategy */
public ANTLRErrorStrategy getErrorHandler();
/** Set error recovery strategy */
public void setErrorHandler(ANTLRErrorStrategy handler);
/** Get number of syntax errors encountered */
public int getNumberOfSyntaxErrors();
/** Notify all error listeners with message */
public void notifyErrorListeners(String msg);
/** Notify error listeners with token and exception */
public void notifyErrorListeners(Token offendingToken, String msg, RecognitionException e);
/** Create terminal node for parse tree */
public TerminalNode createTerminalNode(ParserRuleContext parent, Token t);
/** Create error node for parse tree */
public ErrorNode createErrorNode(ParserRuleContext parent, Token t);
/** Enter outer alternative with number */
public void enterOuterAlt(ParserRuleContext localctx, int altNum);
/** Get current precedence level */
public int getPrecedence();
/** Enter recursion rule with precedence */
public void enterRecursionRule(ParserRuleContext localctx, int state, int ruleIndex, int precedence);
/** Push new recursion context */
public void pushNewRecursionContext(ParserRuleContext localctx, int state, int ruleIndex);
/** Unroll recursion contexts */
public void unrollRecursionContexts(ParserRuleContext _parentctx);
/** Get invoking context for rule */
public ParserRuleContext getInvokingContext(int ruleIndex);
/** Set current parser context */
public void setContext(ParserRuleContext ctx);
/** Check if token is expected at current position */
public boolean isExpectedToken(int symbol);
/** Check if matched EOF */
public boolean isMatchedEOF();
/** Get expected tokens within current rule */
public IntervalSet getExpectedTokensWithinCurrentRule();
/** Get rule index by name */
public int getRuleIndex(String ruleName);
/** Get current rule context */
public ParserRuleContext getRuleContext();
/** Get rule invocation stack */
public List<String> getRuleInvocationStack();
/** Get rule invocation stack from context */
public List<String> getRuleInvocationStack(RuleContext p);
/** Enable/disable profiling */
public void setProfile(boolean profile);
/** Check if trace is enabled */
public boolean isTrace();
/** Set whether to build parse trees */
public void setBuildParseTree(boolean buildParseTrees);
/** Check if building parse trees */
public boolean getBuildParseTree();
/** Set whether to trim parse trees */
public void setTrimParseTree(boolean trimParseTrees);
/** Check if trimming parse trees */
public boolean getTrimParseTree();
/** Get expected tokens at current position */
public IntervalSet getExpectedTokens();
/** Get current token */
public Token getCurrentToken();
/** Enable/disable parse trace output */
public void setTrace(boolean trace);
}Usage Example:
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.tree.*;
// Create parser instance
CharStream input = CharStreams.fromString("1 + 2 * 3");
ExprLexer lexer = new ExprLexer(input);
CommonTokenStream tokens = new CommonTokenStream(lexer);
ExprParser parser = new ExprParser(tokens);
// Configure parser
parser.setBuildParseTree(true);
parser.addParseListener(new ExprBaseListener() {
@Override
public void enterExpr(ExprParser.ExprContext ctx) {
System.out.println("Entering expression: " + ctx.getText());
}
});
// Parse
ParseTree tree = parser.expr();Base class for all generated lexers providing tokenization functionality.
/**
* Base class for all generated lexers
*/
public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator> {
/** Get next token from input stream */
public Token nextToken();
/** Skip current token (don't return to parser) */
public void skip();
/** Emit more text for current token */
public void more();
/** Switch to specified lexer mode */
public void mode(int m);
/** Push current mode onto mode stack and switch to new mode */
public void pushMode(int m);
/** Pop mode from mode stack and switch to it */
public int popMode();
/** Get current lexer mode */
public int getCurrentMode();
/** Emit specified token */
public void emit(Token token);
/** Emit token of specified type with current text */
public Token emit(int type);
/** Get current input character stream */
public CharStream getInputStream();
/** Set input character stream */
public void setInputStream(CharStream input);
/** Get token source name for error reporting */
public String getSourceName();
/** Get current token factory */
public TokenFactory<?> getTokenFactory();
/** Set token factory for creating tokens */
public void setTokenFactory(TokenFactory<?> factory);
/** Reset lexer state */
public void reset();
/** Get all tokens from input as list */
public List<? extends Token> getAllTokens();
}Usage Example:
import org.antlr.v4.runtime.*;
// Create lexer
CharStream input = CharStreams.fromString("hello world");
MyLexer lexer = new MyLexer(input);
// Tokenize input
Token token;
while ((token = lexer.nextToken()).getType() != Token.EOF) {
System.out.println("Token: " + token.getText() + " Type: " + token.getType());
}Token representation and management.
/**
* Basic token interface with type, text, and position information
*/
public interface Token {
/** End-of-file token type */
int EOF = -1;
/** Invalid token type */
int INVALID_TYPE = 0;
/** Default channel for tokens */
int DEFAULT_CHANNEL = 0;
/** Hidden channel for whitespace/comments */
int HIDDEN_CHANNEL = 1;
/** Get token type */
int getType();
/** Get token text */
String getText();
/** Get line number (1-based) */
int getLine();
/** Get character position in line (0-based) */
int getCharPositionInLine();
/** Get channel number */
int getChannel();
/** Get token index in stream */
int getTokenIndex();
/** Get start character index in input */
int getStartIndex();
/** Get stop character index in input */
int getStopIndex();
/** Get token source */
TokenSource getTokenSource();
/** Get input stream where token originated */
CharStream getInputStream();
}
/**
* Standard token implementation
*/
public class CommonToken implements WritableToken {
/** Create token with type and text */
public CommonToken(int type, String text);
/** Create token from another token */
public CommonToken(Token oldToken);
/** Create token with type */
public CommonToken(int type);
/** Create token from source and type */
public CommonToken(Pair<TokenSource, CharStream> source, int type,
int channel, int start, int stop);
@Override
public String getText();
@Override
public void setText(String text);
@Override
public int getType();
@Override
public void setType(int type);
@Override
public int getLine();
@Override
public void setLine(int line);
@Override
public int getCharPositionInLine();
@Override
public void setCharPositionInLine(int pos);
@Override
public int getChannel();
@Override
public void setChannel(int channel);
@Override
public int getTokenIndex();
@Override
public void setTokenIndex(int index);
}Token stream interfaces and implementations for feeding tokens to parsers.
/**
* Stream of tokens with buffering and seeking capabilities
*/
public interface TokenStream extends IntStream {
/** Get token at specified index */
Token get(int index);
/** Get token source */
TokenSource getTokenSource();
/** Get all tokens from start to stop (inclusive) */
List<Token> getTokens(int start, int stop);
/** Get all tokens of specified type */
List<Token> getTokens(int start, int stop, int ttype);
/** Get all tokens of specified types */
List<Token> getTokens(int start, int stop, Set<Integer> types);
/** Get text from start to stop tokens */
String getText(Interval interval);
/** Get text from specified token range */
String getText(RuleContext ctx);
/** Get text from parse tree */
String getText(ParseTree parseTree);
}
/**
* Buffered token stream implementation
*/
public class BufferedTokenStream implements TokenStream {
/** Create buffered stream from token source */
public BufferedTokenStream(TokenSource tokenSource);
/** Fill buffer up to specified index */
protected void sync(int i);
/** Fill buffer completely */
public void fill();
/** Get all tokens in buffer */
public List<Token> getTokens();
/** Get tokens between start and stop indices */
public List<Token> getTokens(int start, int stop, int ttype);
/** Get tokens by type set */
public List<Token> getTokens(int start, int stop, Set<Integer> types);
/** Get hidden tokens to left of token index */
public List<Token> getHiddenTokensToLeft(int tokenIndex);
/** Get hidden tokens to left of specified channel */
public List<Token> getHiddenTokensToLeft(int tokenIndex, int channel);
/** Get hidden tokens to right of token index */
public List<Token> getHiddenTokensToRight(int tokenIndex);
/** Get hidden tokens to right of specified channel */
public List<Token> getHiddenTokensToRight(int tokenIndex, int channel);
}
/**
* Common token stream filtering hidden tokens
*/
public class CommonTokenStream extends BufferedTokenStream {
/** Create stream from token source */
public CommonTokenStream(TokenSource tokenSource);
/** Create stream with specified channel */
public CommonTokenStream(TokenSource tokenSource, int channel);
/** Adjust seek index to ignore tokens on hidden channels */
protected int adjustSeekIndex(int i);
/** Look backward for token on channel */
protected Token LB(int k);
/** Look forward for token on channel */
public Token LT(int k);
/** Get number of tokens on default channel */
public int getNumberOfOnChannelTokens();
}Input stream management for feeding characters to lexers.
/**
* Character stream factory methods (4.7+)
*/
public final class CharStreams {
/** Create character stream from file */
public static CharStream fromFileName(String fileName) throws IOException;
/** Create character stream from file with encoding */
public static CharStream fromFileName(String fileName, Charset encoding) throws IOException;
/** Create character stream from string */
public static CharStream fromString(String s);
/** Create character stream from string with source name */
public static CharStream fromString(String s, String sourceName);
/** Create character stream from Reader */
public static CharStream fromReader(Reader r) throws IOException;
/** Create character stream from Reader with source name */
public static CharStream fromReader(Reader r, String sourceName) throws IOException;
/** Create character stream from InputStream */
public static CharStream fromStream(InputStream is) throws IOException;
/** Create character stream from InputStream with encoding */
public static CharStream fromStream(InputStream is, Charset encoding) throws IOException;
/** Create character stream from ReadableByteChannel */
public static CharStream fromChannel(ReadableByteChannel channel) throws IOException;
/** Create character stream from ReadableByteChannel with encoding */
public static CharStream fromChannel(ReadableByteChannel channel, Charset encoding) throws IOException;
}
/**
* Character stream interface
*/
public interface CharStream extends IntStream {
/** Get substring from start to stop (inclusive) */
String getText(Interval interval);
/** Convert stream to string */
String toString();
}Common base class for Parser and Lexer with shared functionality.
/**
* Base class for Parser and Lexer with common functionality
*/
public abstract class Recognizer<Symbol, ATNInterpreter extends ATNSimulator> {
/** Get rule names for this recognizer */
public abstract String[] getRuleNames();
/** Get vocabulary (token names) for this recognizer */
public abstract Vocabulary getVocabulary();
/** Get grammar file name */
public abstract String getGrammarFileName();
/** Get ATN used by this recognizer */
public abstract ATN getATN();
/** Get ATN interpreter/simulator */
public ATNInterpreter getInterpreter();
/** Set ATN interpreter/simulator */
public void setInterpreter(ATNInterpreter interpreter);
/** Get error recovery strategy */
public ANTLRErrorStrategy getErrorHandler();
/** Set error recovery strategy */
public void setErrorHandler(ANTLRErrorStrategy handler);
/** Get input stream */
public abstract IntStream getInputStream();
/** Set input stream */
public abstract void setInputStream(IntStream input);
/** Get current token factory */
public TokenFactory<?> getTokenFactory();
/** Set token factory */
public void setTokenFactory(TokenFactory<?> factory);
/** Add error listener */
public void addErrorListener(ANTLRErrorListener listener);
/** Remove error listener */
public void removeErrorListener(ANTLRErrorListener listener);
/** Remove all error listeners */
public void removeErrorListeners();
/** Get all error listeners */
public List<ANTLRErrorListener> getErrorListeners();
/** Get error count */
public int getNumberOfSyntaxErrors();
/** Get current state */
public int getState();
/** Set current state */
public void setState(int atnState);
}import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.tree.*;
// Create input stream
CharStream input = CharStreams.fromFileName("input.txt");
// Create lexer
MyLexer lexer = new MyLexer(input);
// Create token stream
CommonTokenStream tokens = new CommonTokenStream(lexer);
// Create parser
MyParser parser = new MyParser(tokens);
// Parse starting from rule
ParseTree tree = parser.startRule();import org.antlr.v4.runtime.*;
// Custom token factory
TokenFactory<CommonToken> factory = new CommonTokenFactory(true); // copyText=true
// Set on lexer
MyLexer lexer = new MyLexer(input);
lexer.setTokenFactory(factory);import org.antlr.v4.runtime.*;
// Create lexer
MyLexer lexer = new MyLexer(input);
CommonTokenStream tokens = new CommonTokenStream(lexer);
// Access all tokens
tokens.fill();
List<Token> allTokens = tokens.getTokens();
// Get specific token types
List<Token> identifiers = tokens.getTokens(0, tokens.size()-1, MyLexer.ID);
// Get hidden tokens (comments, whitespace)
List<Token> hiddenLeft = tokens.getHiddenTokensToLeft(5, Token.HIDDEN_CHANNEL);
List<Token> hiddenRight = tokens.getHiddenTokensToRight(5, Token.HIDDEN_CHANNEL);Install with Tessl CLI
npx tessl i tessl/maven-org-antlr--antlr4-master