org.antlr.v4.tool.GrammarParserInterpreter Maven / Gradle / Ivy
/*
* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
package org.antlr.v4.tool;
import org.antlr.v4.runtime.BailErrorStrategy;
import org.antlr.v4.runtime.DefaultErrorStrategy;
import org.antlr.v4.runtime.InputMismatchException;
import org.antlr.v4.runtime.InterpreterRuleContext;
import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.ParserInterpreter;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.TokenStream;
import org.antlr.v4.runtime.Vocabulary;
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNDeserializer;
import org.antlr.v4.runtime.atn.ATNSerializer;
import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.atn.DecisionState;
import org.antlr.v4.runtime.atn.PredictionMode;
import org.antlr.v4.runtime.atn.RuleStartState;
import org.antlr.v4.runtime.atn.StarLoopEntryState;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.tree.Trees;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collection;
import java.util.List;
/** A heavier weight {@link ParserInterpreter} that creates parse trees
* that track alternative numbers for subtree roots.
*
* @since 4.5.1
*
*/
public class GrammarParserInterpreter extends ParserInterpreter {
/** The grammar associated with this interpreter. Unlike the
* {@link ParserInterpreter} from the standard distribution,
* this can reference Grammar, which is in the tools area not
* purely runtime.
*/
protected final Grammar g;
protected BitSet decisionStatesThatSetOuterAltNumInContext;
/** Cache {@link LeftRecursiveRule#getPrimaryAlts()} and
* {@link LeftRecursiveRule#getRecursiveOpAlts()} for states in
* {@link #decisionStatesThatSetOuterAltNumInContext}. It only
* caches decisions in left-recursive rules.
*/
protected int[][] stateToAltsMap;
public GrammarParserInterpreter(Grammar g,
String grammarFileName,
Vocabulary vocabulary,
Collection ruleNames,
ATN atn,
TokenStream input) {
super(grammarFileName, vocabulary, ruleNames, atn, input);
this.g = g;
}
public GrammarParserInterpreter(Grammar g, ATN atn, TokenStream input) {
super(g.fileName, g.getVocabulary(),
Arrays.asList(g.getRuleNames()),
atn, // must run ATN through serializer to set some state flags
input);
this.g = g;
decisionStatesThatSetOuterAltNumInContext = findOuterMostDecisionStates();
stateToAltsMap = new int[g.atn.states.size()][];
}
@Override
protected InterpreterRuleContext createInterpreterRuleContext(ParserRuleContext parent,
int invokingStateNumber,
int ruleIndex)
{
return new GrammarInterpreterRuleContext(parent, invokingStateNumber, ruleIndex);
}
@Override
public void reset() {
super.reset();
overrideDecisionRoot = null;
}
/** identify the ATN states where we need to set the outer alt number.
* For regular rules, that's the block at the target to rule start state.
* For left-recursive rules, we track the primary block, which looks just
* like a regular rule's outer block, and the star loop block (always
* there even if 1 alt).
*/
public BitSet findOuterMostDecisionStates() {
BitSet track = new BitSet(atn.states.size());
int numberOfDecisions = atn.getNumberOfDecisions();
for (int i = 0; i < numberOfDecisions; i++) {
DecisionState decisionState = atn.getDecisionState(i);
RuleStartState startState = atn.ruleToStartState[decisionState.ruleIndex];
// Look for StarLoopEntryState that is in any left recursive rule
if ( decisionState instanceof StarLoopEntryState) {
StarLoopEntryState loopEntry = (StarLoopEntryState)decisionState;
if ( loopEntry.isPrecedenceDecision ) {
// Recursive alts always result in a (...)* in the transformed
// left recursive rule and that always has a BasicBlockStartState
// even if just 1 recursive alt exists.
ATNState blockStart = loopEntry.transition(0).target;
// track the StarBlockStartState associated with the recursive alternatives
track.set(blockStart.stateNumber);
}
}
else if ( startState.transition(0).target == decisionState ) {
// always track outermost block for any rule if it exists
track.set(decisionState.stateNumber);
}
}
return track;
}
/** Override this method so that we can record which alternative
* was taken at each decision point. For non-left recursive rules,
* it's simple. Set decisionStatesThatSetOuterAltNumInContext
* indicates which decision states should set the outer alternative number.
*
* Left recursive rules are much more complicated to deal with:
* there is typically a decision for the primary alternatives and a
* decision to choose between the recursive operator alternatives.
* For example, the following left recursive rule has two primary and 2
* recursive alternatives.
*
e : e '*' e
| '-' INT
| e '+' e
| ID
;
* ANTLR rewrites that rule to be
e[int precedence]
: ('-' INT | ID)
( {...}? '*' e[5]
| {...}? '+' e[3]
)*
;
*
* So, there are two decisions associated with picking the outermost alt.
* This complicates our tracking significantly. The outermost alternative number
* is a function of the decision (ATN state) within a left recursive rule and the
* predicted alternative coming back from adaptivePredict().
*
* We use stateToAltsMap as a cache to avoid expensive calls to
* getRecursiveOpAlts().
*/
@Override
protected int visitDecisionState(DecisionState p) {
int predictedAlt = super.visitDecisionState(p);
if( p.getNumberOfTransitions() > 1) {
// System.out.println("decision "+p.decision+": "+predictedAlt);
if( p.decision == this.overrideDecision &&
this._input.index() == this.overrideDecisionInputIndex )
{
overrideDecisionRoot = (GrammarInterpreterRuleContext)getContext();
}
}
GrammarInterpreterRuleContext ctx = (GrammarInterpreterRuleContext)_ctx;
if ( decisionStatesThatSetOuterAltNumInContext.get(p.stateNumber) ) {
ctx.outerAltNum = predictedAlt;
Rule r = g.getRule(p.ruleIndex);
if ( atn.ruleToStartState[r.index].isLeftRecursiveRule ) {
int[] alts = stateToAltsMap[p.stateNumber];
LeftRecursiveRule lr = (LeftRecursiveRule) g.getRule(p.ruleIndex);
if (p.getStateType() == ATNState.BLOCK_START) {
if ( alts==null ) {
alts = lr.getPrimaryAlts();
stateToAltsMap[p.stateNumber] = alts; // cache it
}
}
else if ( p.getStateType() == ATNState.STAR_BLOCK_START ) {
if ( alts==null ) {
alts = lr.getRecursiveOpAlts();
stateToAltsMap[p.stateNumber] = alts; // cache it
}
}
ctx.outerAltNum = alts[predictedAlt];
}
}
return predictedAlt;
}
/** Given an ambiguous parse information, return the list of ambiguous parse trees.
* An ambiguity occurs when a specific token sequence can be recognized
* in more than one way by the grammar. These ambiguities are detected only
* at decision points.
*
* The list of trees includes the actual interpretation (that for
* the minimum alternative number) and all ambiguous alternatives.
* The actual interpretation is always first.
*
* This method reuses the same physical input token stream used to
* detect the ambiguity by the original parser in the first place.
* This method resets/seeks within but does not alter originalParser.
*
* The trees are rooted at the node whose start..stop token indices
* include the start and stop indices of this ambiguity event. That is,
* the trees returned will always include the complete ambiguous subphrase
* identified by the ambiguity event. The subtrees returned will
* also always contain the node associated with the overridden decision.
*
* Be aware that this method does NOT notify error or parse listeners as
* it would trigger duplicate or otherwise unwanted events.
*
* This uses a temporary ParserATNSimulator and a ParserInterpreter
* so we don't mess up any statistics, event lists, etc...
* The parse tree constructed while identifying/making ambiguityInfo is
* not affected by this method as it creates a new parser interp to
* get the ambiguous interpretations.
*
* Nodes in the returned ambig trees are independent of the original parse
* tree (constructed while identifying/creating ambiguityInfo).
*
* @since 4.5.1
*
* @param g From which grammar should we drive alternative
* numbers and alternative labels.
*
* @param originalParser The parser used to create ambiguityInfo; it
* is not modified by this routine and can be either
* a generated or interpreted parser. It's token
* stream *is* reset/seek()'d.
* @param tokens A stream of tokens to use with the temporary parser.
* This will often be just the token stream within the
* original parser but here it is for flexibility.
*
* @param decision Which decision to try different alternatives for.
*
* @param alts The set of alternatives to try while re-parsing.
*
* @param startIndex The index of the first token of the ambiguous
* input or other input of interest.
*
* @param stopIndex The index of the last token of the ambiguous input.
* The start and stop indexes are used primarily to
* identify how much of the resulting parse tree
* to return.
*
* @param startRuleIndex The start rule for the entire grammar, not
* the ambiguous decision. We re-parse the entire input
* and so we need the original start rule.
*
* @return The list of all possible interpretations of
* the input for the decision in ambiguityInfo.
* The actual interpretation chosen by the parser
* is always given first because this method
* retests the input in alternative order and
* ANTLR always resolves ambiguities by choosing
* the first alternative that matches the input.
* The subtree returned
*
* @throws RecognitionException Throws upon syntax error while matching
* ambig input.
*/
public static List getAllPossibleParseTrees(Grammar g,
Parser originalParser,
TokenStream tokens,
int decision,
BitSet alts,
int startIndex,
int stopIndex,
int startRuleIndex)
throws RecognitionException {
List trees = new ArrayList();
// Create a new parser interpreter to parse the ambiguous subphrase
ParserInterpreter parser = deriveTempParserInterpreter(g, originalParser, tokens);
if ( stopIndex>=(tokens.size()-1) ) { // if we are pointing at EOF token
// EOF is not in tree, so must be 1 less than last non-EOF token
stopIndex = tokens.size()-2;
}
// get ambig trees
int alt = alts.nextSetBit(0);
while ( alt>=0 ) {
// re-parse entire input for all ambiguous alternatives
// (don't have to do first as it's been parsed, but do again for simplicity
// using this temp parser.)
parser.reset();
parser.addDecisionOverride(decision, startIndex, alt);
ParserRuleContext t = parser.parse(startRuleIndex);
GrammarInterpreterRuleContext ambigSubTree =
(GrammarInterpreterRuleContext) Trees.getRootOfSubtreeEnclosingRegion(t, startIndex, stopIndex);
// Use higher of overridden decision tree or tree enclosing all tokens
if ( Trees.isAncestorOf(parser.getOverrideDecisionRoot(), ambigSubTree) ) {
ambigSubTree = (GrammarInterpreterRuleContext) parser.getOverrideDecisionRoot();
}
trees.add(ambigSubTree);
alt = alts.nextSetBit(alt+1);
}
return trees;
}
/** Return a list of parse trees, one for each alternative in a decision
* given the same input.
*
* Very similar to {@link #getAllPossibleParseTrees} except
* that it re-parses the input for every alternative in a decision,
* not just the ambiguous ones (there is no alts parameter here).
* This method also tries to reduce the size of the parse trees
* by stripping away children of the tree that are completely out of range
* of startIndex..stopIndex. Also, because errors are expected, we
* use a specialized error handler that more or less bails out
* but that also consumes the first erroneous token at least. This
* ensures that an error node will be in the parse tree for display.
*
* NOTES:
// we must parse the entire input now with decision overrides
// we cannot parse a subset because it could be that a decision
// above our decision of interest needs to read way past
// lookaheadInfo.stopIndex. It seems like there is no escaping
// the use of a full and complete token stream if we are
// resetting to token index 0 and re-parsing from the start symbol.
// It's not easy to restart parsing somewhere in the middle like a
// continuation because our call stack does not match the
// tree stack because of left recursive rule rewriting. grrrr!
*
* @since 4.5.1
*/
public static List getLookaheadParseTrees(Grammar g,
ParserInterpreter originalParser,
TokenStream tokens,
int startRuleIndex,
int decision,
int startIndex,
int stopIndex) {
List trees = new ArrayList();
// Create a new parser interpreter to parse the ambiguous subphrase
ParserInterpreter parser = deriveTempParserInterpreter(g, originalParser, tokens);
DecisionState decisionState = originalParser.getATN().decisionToState.get(decision);
for (int alt = 1; alt<=decisionState.getTransitions().length; alt++) {
// re-parse entire input for all ambiguous alternatives
// (don't have to do first as it's been parsed, but do again for simplicity
// using this temp parser.)
GrammarParserInterpreter.BailButConsumeErrorStrategy errorHandler =
new GrammarParserInterpreter.BailButConsumeErrorStrategy();
parser.setErrorHandler(errorHandler);
parser.reset();
parser.addDecisionOverride(decision, startIndex, alt);
ParserRuleContext tt = parser.parse(startRuleIndex);
int stopTreeAt = stopIndex;
if ( errorHandler.firstErrorTokenIndex>=0 ) {
stopTreeAt = errorHandler.firstErrorTokenIndex; // cut off rest at first error
}
Interval overallRange = tt.getSourceInterval();
if ( stopTreeAt>overallRange.b ) {
// If we try to look beyond range of tree, stopTreeAt must be EOF
// for which there is no EOF ref in grammar. That means tree
// will not have node for stopTreeAt; limit to overallRange.b
stopTreeAt = overallRange.b;
}
ParserRuleContext subtree =
Trees.getRootOfSubtreeEnclosingRegion(tt,
startIndex,
stopTreeAt);
// Use higher of overridden decision tree or tree enclosing all tokens
if ( Trees.isAncestorOf(parser.getOverrideDecisionRoot(), subtree) ) {
subtree = parser.getOverrideDecisionRoot();
}
Trees.stripChildrenOutOfRange(subtree, parser.getOverrideDecisionRoot(), startIndex, stopTreeAt);
trees.add(subtree);
}
return trees;
}
/** Derive a new parser from an old one that has knowledge of the grammar.
* The Grammar object is used to correctly compute outer alternative
* numbers for parse tree nodes. A parser of the same type is created
* for subclasses of {@link ParserInterpreter}.
*/
public static ParserInterpreter deriveTempParserInterpreter(Grammar g, Parser originalParser, TokenStream tokens) {
ParserInterpreter parser;
if (originalParser instanceof ParserInterpreter) {
Class extends ParserInterpreter> c = originalParser.getClass().asSubclass(ParserInterpreter.class);
try {
Constructor extends ParserInterpreter> ctor = c.getConstructor(Grammar.class, ATN.class, TokenStream.class);
parser = ctor.newInstance(g, originalParser.getATN(), originalParser.getTokenStream());
}
catch (Exception e) {
throw new IllegalArgumentException("can't create parser to match incoming "+originalParser.getClass().getSimpleName(), e);
}
}
else { // must've been a generated parser
char[] serializedAtn = ATNSerializer.getSerializedAsChars(originalParser.getATN());
ATN deserialized = new ATNDeserializer().deserialize(serializedAtn);
parser = new ParserInterpreter(originalParser.getGrammarFileName(),
originalParser.getVocabulary(),
Arrays.asList(originalParser.getRuleNames()),
deserialized,
tokens);
}
parser.setInputStream(tokens);
// Make sure that we don't get any error messages from using this temporary parser
parser.setErrorHandler(new BailErrorStrategy());
parser.removeErrorListeners();
parser.removeParseListeners();
parser.getInterpreter().setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION);
return parser;
}
/** We want to stop and track the first error but we cannot bail out like
* {@link BailErrorStrategy} as consume() constructs trees. We make sure
* to create an error node during recovery with this strategy. We
* consume() 1 token during the "bail out of rule" mechanism in recover()
* and let it fall out of the rule to finish constructing trees. For
* recovery in line, we throw InputMismatchException to engage recover().
*/
public static class BailButConsumeErrorStrategy extends DefaultErrorStrategy {
public int firstErrorTokenIndex = -1;
@Override
public void recover(Parser recognizer, RecognitionException e) {
int errIndex = recognizer.getInputStream().index();
if ( firstErrorTokenIndex == -1 ) {
firstErrorTokenIndex = errIndex; // latch
}
// System.err.println("recover: error at " + errIndex);
TokenStream input = recognizer.getInputStream();
if ( input.index()