All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.antlr.v4.runtime.DefaultErrorStrategy Maven / Gradle / Ivy

There is a newer version: 2.12.15
Show newest version
/*
 * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
 * Use of this file is governed by the BSD 3-clause license that
 * can be found in the LICENSE.txt file in the project root.
 */

package org.antlr.v4.runtime;

import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.atn.RuleTransition;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.Pair;

/**
 * This is the default implementation of {@link ANTLRErrorStrategy} used for
 * error reporting and recovery in ANTLR parsers.
 */
public class DefaultErrorStrategy implements ANTLRErrorStrategy {
	/**
	 * Indicates whether the error strategy is currently "recovering from an
	 * error". This is used to suppress reporting multiple error messages while
	 * attempting to recover from a detected syntax error.
	 *
	 * @see #inErrorRecoveryMode
	 */
	protected boolean errorRecoveryMode = false;

	/** The index into the input stream where the last error occurred.
	 * 	This is used to prevent infinite loops where an error is found
	 *  but no token is consumed during recovery...another error is found,
	 *  ad nauseum.  This is a failsafe mechanism to guarantee that at least
	 *  one token/tree node is consumed for two errors.
	 */
	protected int lastErrorIndex = -1;

	protected IntervalSet lastErrorStates;

	/**
	 * This field is used to propagate information about the lookahead following
	 * the previous match. Since prediction prefers completing the current rule
	 * to error recovery efforts, error reporting may occur later than the
	 * original point where it was discoverable. The original context is used to
	 * compute the true expected sets as though the reporting occurred as early
	 * as possible.
	 */
	protected ParserRuleContext nextTokensContext;

	/**
	 * @see #nextTokensContext
	 */
	protected int nextTokensState;

	/**
	 * {@inheritDoc}
	 *
	 * 

The default implementation simply calls {@link #endErrorCondition} to * ensure that the handler is not in error recovery mode.

*/ @Override public void reset(Parser recognizer) { endErrorCondition(recognizer); } /** * This method is called to enter error recovery mode when a recognition * exception is reported. * * @param recognizer the parser instance */ protected void beginErrorCondition(Parser recognizer) { errorRecoveryMode = true; } /** * {@inheritDoc} */ @Override public boolean inErrorRecoveryMode(Parser recognizer) { return errorRecoveryMode; } /** * This method is called to leave error recovery mode after recovering from * a recognition exception. * * @param recognizer */ protected void endErrorCondition(Parser recognizer) { errorRecoveryMode = false; lastErrorStates = null; lastErrorIndex = -1; } /** * {@inheritDoc} * *

The default implementation simply calls {@link #endErrorCondition}.

*/ @Override public void reportMatch(Parser recognizer) { endErrorCondition(recognizer); } /** * {@inheritDoc} * *

The default implementation returns immediately if the handler is already * in error recovery mode. Otherwise, it calls {@link #beginErrorCondition} * and dispatches the reporting task based on the runtime type of {@code e} * according to the following table.

* *
    *
  • {@link NoViableAltException}: Dispatches the call to * {@link #reportNoViableAlternative}
  • *
  • {@link InputMismatchException}: Dispatches the call to * {@link #reportInputMismatch}
  • *
  • {@link FailedPredicateException}: Dispatches the call to * {@link #reportFailedPredicate}
  • *
  • All other types: calls {@link Parser#notifyErrorListeners} to report * the exception
  • *
*/ @Override public void reportError(Parser recognizer, RecognitionException e) { // if we've already reported an error and have not matched a token // yet successfully, don't report any errors. if (inErrorRecoveryMode(recognizer)) { // System.err.print("[SPURIOUS] "); return; // don't report spurious errors } beginErrorCondition(recognizer); if ( e instanceof NoViableAltException ) { reportNoViableAlternative(recognizer, (NoViableAltException) e); } else if ( e instanceof InputMismatchException ) { reportInputMismatch(recognizer, (InputMismatchException)e); } else if ( e instanceof FailedPredicateException ) { reportFailedPredicate(recognizer, (FailedPredicateException)e); } else { System.err.println("unknown recognition error type: "+e.getClass().getName()); recognizer.notifyErrorListeners(e.getOffendingToken(), e.getMessage(), e); } } /** * {@inheritDoc} * *

The default implementation resynchronizes the parser by consuming tokens * until we find one in the resynchronization set--loosely the set of tokens * that can follow the current rule.

*/ @Override public void recover(Parser recognizer, RecognitionException e) { // System.out.println("recover in "+recognizer.getRuleInvocationStack()+ // " index="+recognizer.getInputStream().index()+ // ", lastErrorIndex="+ // lastErrorIndex+ // ", states="+lastErrorStates); if ( lastErrorIndex==recognizer.getInputStream().index() && lastErrorStates != null && lastErrorStates.contains(recognizer.getState()) ) { // uh oh, another error at same token index and previously-visited // state in ATN; must be a case where LT(1) is in the recovery // token set so nothing got consumed. Consume a single token // at least to prevent an infinite loop; this is a failsafe. // System.err.println("seen error condition before index="+ // lastErrorIndex+", states="+lastErrorStates); // System.err.println("FAILSAFE consumes "+recognizer.getTokenNames()[recognizer.getInputStream().LA(1)]); recognizer.consume(); } lastErrorIndex = recognizer.getInputStream().index(); if ( lastErrorStates==null ) lastErrorStates = new IntervalSet(); lastErrorStates.add(recognizer.getState()); IntervalSet followSet = getErrorRecoverySet(recognizer); consumeUntil(recognizer, followSet); } /** * The default implementation of {@link ANTLRErrorStrategy#sync} makes sure * that the current lookahead symbol is consistent with what were expecting * at this point in the ATN. You can call this anytime but ANTLR only * generates code to check before subrules/loops and each iteration. * *

Implements Jim Idle's magic sync mechanism in closures and optional * subrules. E.g.,

* *
	 * a : sync ( stuff sync )* ;
	 * sync : {consume to what can follow sync} ;
	 * 
* * At the start of a sub rule upon error, {@link #sync} performs single * token deletion, if possible. If it can't do that, it bails on the current * rule and uses the default error recovery, which consumes until the * resynchronization set of the current rule. * *

If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block * with an empty alternative), then the expected set includes what follows * the subrule.

* *

During loop iteration, it consumes until it sees a token that can start a * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to * stay in the loop as long as possible.

* *

ORIGINS

* *

Previous versions of ANTLR did a poor job of their recovery within loops. * A single mismatch token or missing token would force the parser to bail * out of the entire rules surrounding the loop. So, for rule

* *
	 * classDef : 'class' ID '{' member* '}'
	 * 
* * input with an extra token between members would force the parser to * consume until it found the next class definition rather than the next * member definition of the current class. * *

This functionality cost a little bit of effort because the parser has to * compare token set at the start of the loop and at each iteration. If for * some reason speed is suffering for you, you can turn off this * functionality by simply overriding this method as a blank { }.

*/ @Override public void sync(Parser recognizer) throws RecognitionException { ATNState s = recognizer.getInterpreter().atn.states.get(recognizer.getState()); // System.err.println("sync @ "+s.stateNumber+"="+s.getClass().getSimpleName()); // If already recovering, don't try to sync if (inErrorRecoveryMode(recognizer)) { return; } TokenStream tokens = recognizer.getInputStream(); int la = tokens.LA(1); // try cheaper subset first; might get lucky. seems to shave a wee bit off IntervalSet nextTokens = recognizer.getATN().nextTokens(s); if (nextTokens.contains(la)) { // We are sure the token matches nextTokensContext = null; nextTokensState = ATNState.INVALID_STATE_NUMBER; return; } if (nextTokens.contains(Token.EPSILON)) { if (nextTokensContext == null) { // It's possible the next token won't match; information tracked // by sync is restricted for performance. nextTokensContext = recognizer.getContext(); nextTokensState = recognizer.getState(); } return; } switch (s.getStateType()) { case ATNState.BLOCK_START: case ATNState.STAR_BLOCK_START: case ATNState.PLUS_BLOCK_START: case ATNState.STAR_LOOP_ENTRY: // report error and recover if possible if ( singleTokenDeletion(recognizer)!=null ) { return; } throw new InputMismatchException(recognizer); case ATNState.PLUS_LOOP_BACK: case ATNState.STAR_LOOP_BACK: // System.err.println("at loop back: "+s.getClass().getSimpleName()); reportUnwantedToken(recognizer); IntervalSet expecting = recognizer.getExpectedTokens(); IntervalSet whatFollowsLoopIterationOrRule = expecting.or(getErrorRecoverySet(recognizer)); consumeUntil(recognizer, whatFollowsLoopIterationOrRule); break; default: // do nothing if we can't identify the exact kind of ATN state break; } } /** * This is called by {@link #reportError} when the exception is a * {@link NoViableAltException}. * * @see #reportError * * @param recognizer the parser instance * @param e the recognition exception */ protected void reportNoViableAlternative(Parser recognizer, NoViableAltException e) { TokenStream tokens = recognizer.getInputStream(); String input; if ( tokens!=null ) { if ( e.getStartToken().getType()==Token.EOF ) input = ""; else input = tokens.getText(e.getStartToken(), e.getOffendingToken()); } else { input = ""; } String msg = "no viable alternative at input "+escapeWSAndQuote(input); recognizer.notifyErrorListeners(e.getOffendingToken(), msg, e); } /** * This is called by {@link #reportError} when the exception is an * {@link InputMismatchException}. * * @see #reportError * * @param recognizer the parser instance * @param e the recognition exception */ protected void reportInputMismatch(Parser recognizer, InputMismatchException e) { String msg = "mismatched input "+getTokenErrorDisplay(e.getOffendingToken())+ " expecting "+e.getExpectedTokens().toString(recognizer.getVocabulary()); recognizer.notifyErrorListeners(e.getOffendingToken(), msg, e); } /** * This is called by {@link #reportError} when the exception is a * {@link FailedPredicateException}. * * @see #reportError * * @param recognizer the parser instance * @param e the recognition exception */ protected void reportFailedPredicate(Parser recognizer, FailedPredicateException e) { String ruleName = recognizer.getRuleNames()[recognizer._ctx.getRuleIndex()]; String msg = "rule "+ruleName+" "+e.getMessage(); recognizer.notifyErrorListeners(e.getOffendingToken(), msg, e); } /** * This method is called to report a syntax error which requires the removal * of a token from the input stream. At the time this method is called, the * erroneous symbol is current {@code LT(1)} symbol and has not yet been * removed from the input stream. When this method returns, * {@code recognizer} is in error recovery mode. * *

This method is called when {@link #singleTokenDeletion} identifies * single-token deletion as a viable recovery strategy for a mismatched * input error.

* *

The default implementation simply returns if the handler is already in * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to * enter error recovery mode, followed by calling * {@link Parser#notifyErrorListeners}.

* * @param recognizer the parser instance */ protected void reportUnwantedToken(Parser recognizer) { if (inErrorRecoveryMode(recognizer)) { return; } beginErrorCondition(recognizer); Token t = recognizer.getCurrentToken(); String tokenName = getTokenErrorDisplay(t); IntervalSet expecting = getExpectedTokens(recognizer); String msg = "extraneous input "+tokenName+" expecting "+ expecting.toString(recognizer.getVocabulary()); recognizer.notifyErrorListeners(t, msg, null); } /** * This method is called to report a syntax error which requires the * insertion of a missing token into the input stream. At the time this * method is called, the missing token has not yet been inserted. When this * method returns, {@code recognizer} is in error recovery mode. * *

This method is called when {@link #singleTokenInsertion} identifies * single-token insertion as a viable recovery strategy for a mismatched * input error.

* *

The default implementation simply returns if the handler is already in * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to * enter error recovery mode, followed by calling * {@link Parser#notifyErrorListeners}.

* * @param recognizer the parser instance */ protected void reportMissingToken(Parser recognizer) { if (inErrorRecoveryMode(recognizer)) { return; } beginErrorCondition(recognizer); Token t = recognizer.getCurrentToken(); IntervalSet expecting = getExpectedTokens(recognizer); String msg = "missing "+expecting.toString(recognizer.getVocabulary())+ " at "+getTokenErrorDisplay(t); recognizer.notifyErrorListeners(t, msg, null); } /** * {@inheritDoc} * *

The default implementation attempts to recover from the mismatched input * by using single token insertion and deletion as described below. If the * recovery attempt fails, this method throws an * {@link InputMismatchException}.

* *

EXTRA TOKEN (single token deletion)

* *

{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the * right token, however, then assume {@code LA(1)} is some extra spurious * token and delete it. Then consume and return the next token (which was * the {@code LA(2)} token) as the successful result of the match operation.

* *

This recovery strategy is implemented by {@link #singleTokenDeletion}.

* *

MISSING TOKEN (single token insertion)

* *

If current token (at {@code LA(1)}) is consistent with what could come * after the expected {@code LA(1)} token, then assume the token is missing * and use the parser's {@link TokenFactory} to create it on the fly. The * "insertion" is performed by returning the created token as the successful * result of the match operation.

* *

This recovery strategy is implemented by {@link #singleTokenInsertion}.

* *

EXAMPLE

* *

For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When * the parser returns from the nested call to {@code expr}, it will have * call chain:

* *
	 * stat → expr → atom
	 * 
* * and it will be trying to match the {@code ')'} at this point in the * derivation: * *
	 * => ID '=' '(' INT ')' ('+' atom)* ';'
	 *                    ^
	 * 
* * The attempt to match {@code ')'} will fail when it sees {@code ';'} and * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'} * is in the set of tokens that can follow the {@code ')'} token reference * in rule {@code atom}. It can assume that you forgot the {@code ')'}. */ @Override public Token recoverInline(Parser recognizer) throws RecognitionException { // SINGLE TOKEN DELETION Token matchedSymbol = singleTokenDeletion(recognizer); if ( matchedSymbol!=null ) { // we have deleted the extra token. // now, move past ttype token as if all were ok recognizer.consume(); return matchedSymbol; } // SINGLE TOKEN INSERTION if ( singleTokenInsertion(recognizer) ) { return getMissingSymbol(recognizer); } // even that didn't work; must throw the exception InputMismatchException e; if (nextTokensContext == null) { e = new InputMismatchException(recognizer); } else { e = new InputMismatchException(recognizer, nextTokensState, nextTokensContext); } throw e; } /** * This method implements the single-token insertion inline error recovery * strategy. It is called by {@link #recoverInline} if the single-token * deletion strategy fails to recover from the mismatched input. If this * method returns {@code true}, {@code recognizer} will be in error recovery * mode. * *

This method determines whether or not single-token insertion is viable by * checking if the {@code LA(1)} input symbol could be successfully matched * if it were instead the {@code LA(2)} symbol. If this method returns * {@code true}, the caller is responsible for creating and inserting a * token with the correct type to produce this behavior.

* * @param recognizer the parser instance * @return {@code true} if single-token insertion is a viable recovery * strategy for the current mismatched input, otherwise {@code false} */ protected boolean singleTokenInsertion(Parser recognizer) { int currentSymbolType = recognizer.getInputStream().LA(1); // if current token is consistent with what could come after current // ATN state, then we know we're missing a token; error recovery // is free to conjure up and insert the missing token ATNState currentState = recognizer.getInterpreter().atn.states.get(recognizer.getState()); ATNState next = currentState.transition(0).target; ATN atn = recognizer.getInterpreter().atn; IntervalSet expectingAtLL2 = atn.nextTokens(next, recognizer._ctx); // System.out.println("LT(2) set="+expectingAtLL2.toString(recognizer.getTokenNames())); if ( expectingAtLL2.contains(currentSymbolType) ) { reportMissingToken(recognizer); return true; } return false; } /** * This method implements the single-token deletion inline error recovery * strategy. It is called by {@link #recoverInline} to attempt to recover * from mismatched input. If this method returns null, the parser and error * handler state will not have changed. If this method returns non-null, * {@code recognizer} will not be in error recovery mode since the * returned token was a successful match. * *

If the single-token deletion is successful, this method calls * {@link #reportUnwantedToken} to report the error, followed by * {@link Parser#consume} to actually "delete" the extraneous token. Then, * before returning {@link #reportMatch} is called to signal a successful * match.

* * @param recognizer the parser instance * @return the successfully matched {@link Token} instance if single-token * deletion successfully recovers from the mismatched input, otherwise * {@code null} */ protected Token singleTokenDeletion(Parser recognizer) { int nextTokenType = recognizer.getInputStream().LA(2); IntervalSet expecting = getExpectedTokens(recognizer); if ( expecting.contains(nextTokenType) ) { reportUnwantedToken(recognizer); /* System.err.println("recoverFromMismatchedToken deleting "+ ((TokenStream)recognizer.getInputStream()).LT(1)+ " since "+((TokenStream)recognizer.getInputStream()).LT(2)+ " is what we want"); */ recognizer.consume(); // simply delete extra token // we want to return the token we're actually matching Token matchedSymbol = recognizer.getCurrentToken(); reportMatch(recognizer); // we know current token is correct return matchedSymbol; } return null; } /** Conjure up a missing token during error recovery. * * The recognizer attempts to recover from single missing * symbols. But, actions might refer to that missing symbol. * For example, x=ID {f($x);}. The action clearly assumes * that there has been an identifier matched previously and that * $x points at that token. If that token is missing, but * the next token in the stream is what we want we assume that * this token is missing and we keep going. Because we * have to return some token to replace the missing token, * we have to conjure one up. This method gives the user control * over the tokens returned for missing tokens. Mostly, * you will want to create something special for identifier * tokens. For literals such as '{' and ',', the default * action in the parser or tree parser works. It simply creates * a CommonToken of the appropriate type. The text will be the token. * If you change what tokens must be created by the lexer, * override this method to create the appropriate tokens. */ protected Token getMissingSymbol(Parser recognizer) { Token currentSymbol = recognizer.getCurrentToken(); IntervalSet expecting = getExpectedTokens(recognizer); int expectedTokenType = Token.INVALID_TYPE; if ( !expecting.isNil() ) { expectedTokenType = expecting.getMinElement(); // get any element } String tokenText; if ( expectedTokenType== Token.EOF ) tokenText = ""; else tokenText = ""; Token current = currentSymbol; Token lookback = recognizer.getInputStream().LT(-1); if ( current.getType() == Token.EOF && lookback!=null ) { current = lookback; } return recognizer.getTokenFactory().create(new Pair(current.getTokenSource(), current.getTokenSource().getInputStream()), expectedTokenType, tokenText, Token.DEFAULT_CHANNEL, -1, -1, current.getLine(), current.getCharPositionInLine()); } protected IntervalSet getExpectedTokens(Parser recognizer) { return recognizer.getExpectedTokens(); } /** How should a token be displayed in an error message? The default * is to display just the text, but during development you might * want to have a lot of information spit out. Override in that case * to use t.toString() (which, for CommonToken, dumps everything about * the token). This is better than forcing you to override a method in * your token objects because you don't have to go modify your lexer * so that it creates a new Java type. */ protected String getTokenErrorDisplay(Token t) { if ( t==null ) return ""; String s = getSymbolText(t); if ( s==null ) { if ( getSymbolType(t)==Token.EOF ) { s = ""; } else { s = "<"+getSymbolType(t)+">"; } } return escapeWSAndQuote(s); } protected String getSymbolText(Token symbol) { return symbol.getText(); } protected int getSymbolType(Token symbol) { return symbol.getType(); } protected String escapeWSAndQuote(String s) { // if ( s==null ) return s; s = s.replace("\n","\\n"); s = s.replace("\r","\\r"); s = s.replace("\t","\\t"); return "'"+s+"'"; } /* Compute the error recovery set for the current rule. During * rule invocation, the parser pushes the set of tokens that can * follow that rule reference on the stack; this amounts to * computing FIRST of what follows the rule reference in the * enclosing rule. See LinearApproximator.FIRST(). * This local follow set only includes tokens * from within the rule; i.e., the FIRST computation done by * ANTLR stops at the end of a rule. * * EXAMPLE * * When you find a "no viable alt exception", the input is not * consistent with any of the alternatives for rule r. The best * thing to do is to consume tokens until you see something that * can legally follow a call to r *or* any rule that called r. * You don't want the exact set of viable next tokens because the * input might just be missing a token--you might consume the * rest of the input looking for one of the missing tokens. * * Consider grammar: * * a : '[' b ']' * | '(' b ')' * ; * b : c '^' INT ; * c : ID * | INT * ; * * At each rule invocation, the set of tokens that could follow * that rule is pushed on a stack. Here are the various * context-sensitive follow sets: * * FOLLOW(b1_in_a) = FIRST(']') = ']' * FOLLOW(b2_in_a) = FIRST(')') = ')' * FOLLOW(c_in_b) = FIRST('^') = '^' * * Upon erroneous input "[]", the call chain is * * a -> b -> c * * and, hence, the follow context stack is: * * depth follow set start of rule execution * 0 a (from main()) * 1 ']' b * 2 '^' c * * Notice that ')' is not included, because b would have to have * been called from a different context in rule a for ')' to be * included. * * For error recovery, we cannot consider FOLLOW(c) * (context-sensitive or otherwise). We need the combined set of * all context-sensitive FOLLOW sets--the set of all tokens that * could follow any reference in the call chain. We need to * resync to one of those tokens. Note that FOLLOW(c)='^' and if * we resync'd to that token, we'd consume until EOF. We need to * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. * In this case, for input "[]", LA(1) is ']' and in the set, so we would * not consume anything. After printing an error, rule c would * return normally. Rule b would not find the required '^' though. * At this point, it gets a mismatched token error and throws an * exception (since LA(1) is not in the viable following token * set). The rule exception handler tries to recover, but finds * the same recovery set and doesn't consume anything. Rule b * exits normally returning to rule a. Now it finds the ']' (and * with the successful match exits errorRecovery mode). * * So, you can see that the parser walks up the call chain looking * for the token that was a member of the recovery set. * * Errors are not generated in errorRecovery mode. * * ANTLR's error recovery mechanism is based upon original ideas: * * "Algorithms + Data Structures = Programs" by Niklaus Wirth * * and * * "A note on error recovery in recursive descent parsers": * http://portal.acm.org/citation.cfm?id=947902.947905 * * Later, Josef Grosch had some good ideas: * * "Efficient and Comfortable Error Recovery in Recursive Descent * Parsers": * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip * * Like Grosch I implement context-sensitive FOLLOW sets that are combined * at run-time upon error to avoid overhead during parsing. */ protected IntervalSet getErrorRecoverySet(Parser recognizer) { ATN atn = recognizer.getInterpreter().atn; RuleContext ctx = recognizer._ctx; IntervalSet recoverSet = new IntervalSet(); while ( ctx!=null && ctx.invokingState>=0 ) { // compute what follows who invoked us ATNState invokingState = atn.states.get(ctx.invokingState); RuleTransition rt = (RuleTransition)invokingState.transition(0); IntervalSet follow = atn.nextTokens(rt.followState); recoverSet.addAll(follow); ctx = ctx.parent; } recoverSet.remove(Token.EPSILON); // System.out.println("recover set "+recoverSet.toString(recognizer.getTokenNames())); return recoverSet; } /** Consume tokens until one matches the given token set. */ protected void consumeUntil(Parser recognizer, IntervalSet set) { // System.err.println("consumeUntil("+set.toString(recognizer.getTokenNames())+")"); int ttype = recognizer.getInputStream().LA(1); while (ttype != Token.EOF && !set.contains(ttype) ) { //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]); // recognizer.getInputStream().consume(); recognizer.consume(); ttype = recognizer.getInputStream().LA(1); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy