![JAR search and dependency download from the Maven repository](/logo.png)
org.parboiled.parserunners.RecoveringParseRunner Maven / Gradle / Ivy
/*
* Copyright (C) 2009-2011 Mathias Doenitz
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.parboiled.parserunners;
import org.parboiled.MatchHandler;
import org.parboiled.MatcherContext;
import org.parboiled.Rule;
import org.parboiled.buffers.InputBuffer;
import org.parboiled.buffers.MutableInputBuffer;
import org.parboiled.common.ImmutableLinkedList;
import org.parboiled.common.ImmutableList;
import org.parboiled.common.Preconditions;
import org.parboiled.errors.InvalidInputError;
import org.parboiled.matchers.*;
import org.parboiled.matchervisitors.*;
import org.parboiled.support.Checks;
import org.parboiled.support.MatcherPath;
import org.parboiled.support.ParsingResult;
import java.util.ArrayList;
import java.util.List;
import static org.parboiled.common.Preconditions.checkArgNotNull;
import static org.parboiled.common.Preconditions.checkState;
import static org.parboiled.matchers.MatcherUtils.unwrap;
import static org.parboiled.support.Chars.*;
/**
* A {@link org.parboiled.parserunners.ParseRunner} implementation that is able to recover from {@link org.parboiled.errors.InvalidInputError}s in the input and therefore
* report more than just the first {@link org.parboiled.errors.InvalidInputError} if the input does not conform to the rule grammar.
* Error recovery is done by attempting to either delete an error character, insert a potentially missing character
* or do both at once (which is equivalent to a one char replace) whereby this implementation is able to determine
* itself which of these options is the best strategy.
* If the parse error cannot be overcome by either deleting, inserting or replacing one character a resynchronization
* rule is determined and the parsing process resynchronized, so that parsing can still continue.
* In this way the RecoveringParseRunner is able to completely parse all input texts (This ParseRunner never returns
* an unmatched {@link org.parboiled.support.ParsingResult}).
* If the input is error free this {@link org.parboiled.parserunners.ParseRunner} implementation will only perform one parsing run, with the same
* speed as the {@link org.parboiled.parserunners.BasicParseRunner}. However, if there are {@link org.parboiled.errors.InvalidInputError}s in the input potentially
* many more runs are performed to properly report all errors and test the various recovery strategies.
*/
public class RecoveringParseRunner extends AbstractParseRunner {
public static class TimeoutException extends RuntimeException {
public final Rule rule;
public final InputBuffer inputBuffer;
public final ParsingResult> lastParsingResult;
public TimeoutException(Rule rule, InputBuffer inputBuffer, ParsingResult> lastParsingResult) {
this.rule = rule;
this.inputBuffer = inputBuffer;
this.lastParsingResult = lastParsingResult;
}
}
private final long timeout;
private long startTimeStamp;
private int errorIndex;
private InvalidInputError currentError;
private MutableInputBuffer buffer;
private ParsingResult lastParsingResult;
private Matcher rootMatcherWithoutPTB; // the root matcher with parse tree building disabled
/**
* Create a new RecoveringParseRunner instance with the given rule and input text and returns the result of
* its {@link #run(String)} method invocation.
*
* @param rule the parser rule to run
* @param input the input text to run on
* @return the ParsingResult for the parsing run
* @deprecated As of 0.11.0 you should use the "regular" constructor and one of the "run" methods rather than
* this static method. This method will be removed in one of the coming releases.
*/
@Deprecated
public static ParsingResult run(Rule rule, String input) {
checkArgNotNull(rule, "rule");
checkArgNotNull(input, "input");
return new RecoveringParseRunner(rule).run(input);
}
/**
* Creates a new RecoveringParseRunner instance for the given rule.
*
* @param rule the parser rule
*/
public RecoveringParseRunner(Rule rule) {
this(rule, Long.MAX_VALUE);
}
/**
* Creates a new RecoveringParseRunner instance for the given rule.
* A parsing run will throw a TimeoutException if it takes longer than the given number if milliseconds.
*
* @param rule the parser rule
* @param timeout the timeout value in milliseconds
*/
public RecoveringParseRunner(Rule rule, long timeout) {
super(rule);
this.timeout = timeout;
}
public ParsingResult run(InputBuffer inputBuffer) {
checkArgNotNull(inputBuffer, "inputBuffer");
startTimeStamp = System.currentTimeMillis();
resetValueStack();
// first, run a basic match
ParseRunner basicRunner = new BasicParseRunner(getRootMatcher())
.withParseErrors(getParseErrors())
.withValueStack(getValueStack());
lastParsingResult = basicRunner.run(inputBuffer);
if (!lastParsingResult.matched) {
// for better performance disable parse tree building during the recovery runs
rootMatcherWithoutPTB = (Matcher) getRootMatcher().suppressNode();
// locate first error
performLocatingRun(inputBuffer);
checkState(errorIndex >= 0); // we failed before so we must fail again
// in order to be able to apply fixes we need to wrap the input buffer with a mutability wrapper
buffer = new MutableInputBuffer(inputBuffer);
// report first error
performReportingRun();
// fix and report until done
while (!fixError(errorIndex)) {
performReportingRun();
}
// rerun once more with parse tree building enabled to create a parse tree for the fixed input
if (!getRootMatcher().isNodeSuppressed()) {
performFinalRun();
checkState(lastParsingResult.matched);
}
}
return lastParsingResult;
}
private boolean performLocatingRun(InputBuffer inputBuffer) {
resetValueStack();
ParseRunner locatingRunner = new ErrorLocatingParseRunner(rootMatcherWithoutPTB, getInnerHandler())
.withParseErrors(getParseErrors())
.withValueStack(getValueStack());
lastParsingResult = locatingRunner.run(inputBuffer);
errorIndex = lastParsingResult.matched ? -1 :
getParseErrors().remove(getParseErrors().size() - 1).getStartIndex();
return lastParsingResult.matched;
}
private void performReportingRun() {
resetValueStack();
ParseRunner reportingRunner = new ErrorReportingParseRunner(rootMatcherWithoutPTB, errorIndex,
getInnerHandler())
.withParseErrors(getParseErrors())
.withValueStack(getValueStack());
ParsingResult result = reportingRunner.run(buffer);
Preconditions.checkState(!result.matched); // we failed before so we should really be failing again
currentError = (InvalidInputError) getParseErrors().get(getParseErrors().size() - 1);
}
private void performFinalRun() {
resetValueStack();
Handler handler = new Handler();
MatcherContext rootContext = createRootContext(buffer, handler, false);
boolean matched = handler.match(rootContext);
lastParsingResult = createParsingResult(matched, rootContext);
}
private MatchHandler getInnerHandler() {
return errorIndex >= 0 ? new Handler() : null;
}
private boolean fixError(int fixIndex) {
if (tryFixBySingleCharDeletion(fixIndex)) return true;
int nextErrorAfterDeletion = errorIndex;
Character bestInsertionCharacter = findBestSingleCharInsertion(fixIndex);
if (bestInsertionCharacter == null) return true;
int nextErrorAfterBestInsertion = errorIndex;
Character bestReplacementCharacter = findBestSingleCharReplacement(fixIndex);
if (bestReplacementCharacter == null) return true;
int nextErrorAfterBestReplacement = errorIndex;
int nextErrorAfterBestSingleCharFix =
Math.max(Math.max(nextErrorAfterDeletion, nextErrorAfterBestInsertion), nextErrorAfterBestReplacement);
if (nextErrorAfterBestSingleCharFix > fixIndex) {
// we are able to overcome the error with a single char fix, so apply the best one found
if (nextErrorAfterBestSingleCharFix == nextErrorAfterDeletion) {
buffer.insertChar(fixIndex, DEL_ERROR);
errorIndex = nextErrorAfterDeletion + 1;
currentError.shiftIndexDeltaBy(1);
} else if (nextErrorAfterBestSingleCharFix == nextErrorAfterBestInsertion) {
// we need to insert the characters in reverse order, since we insert twice at the same location
buffer.insertChar(fixIndex, bestInsertionCharacter);
buffer.insertChar(fixIndex, INS_ERROR);
errorIndex = nextErrorAfterBestInsertion + 2;
currentError.shiftIndexDeltaBy(2);
} else {
// we need to insert the characters in reverse order, since we insert three times at the same location
buffer.insertChar(fixIndex + 1, bestReplacementCharacter);
buffer.insertChar(fixIndex + 1, INS_ERROR);
buffer.insertChar(fixIndex, DEL_ERROR);
errorIndex = nextErrorAfterBestReplacement + 5;
currentError.shiftIndexDeltaBy(1);
}
} else {
// we can't fix the error with a single char fix, so fall back to resynchronization
if (buffer.charAt(fixIndex) == EOI) {
buffer.insertChar(fixIndex, RESYNC_EOI);
currentError.shiftIndexDeltaBy(1);
return true;
}
buffer.insertChar(fixIndex, RESYNC);
currentError.shiftIndexDeltaBy(1);
performLocatingRun(buffer); // find the next parse error
}
return errorIndex == -1;
}
private boolean tryFixBySingleCharDeletion(int fixIndex) {
buffer.insertChar(fixIndex, DEL_ERROR);
boolean nowErrorFree = performLocatingRun(buffer);
if (nowErrorFree) {
currentError.shiftIndexDeltaBy(1); // compensate for the inserted DEL_ERROR char
} else {
buffer.undoCharInsertion(fixIndex);
errorIndex = Math.max(errorIndex - 1, 0);
}
return nowErrorFree;
}
@SuppressWarnings( {"ConstantConditions"})
private Character findBestSingleCharInsertion(int fixIndex) {
GetStarterCharVisitor getStarterCharVisitor = new GetStarterCharVisitor();
int bestNextErrorIndex = -1;
Character bestChar = '\u0000'; // non-null default
for (MatcherPath failedMatcherPath : currentError.getFailedMatchers()) {
Character starterChar = failedMatcherPath.element.matcher.accept(getStarterCharVisitor);
checkState(starterChar != null); // we should only have single character matchers
if (starterChar == EOI) {
continue; // we should never conjure up an EOI character (that would be cheating :)
}
buffer.insertChar(fixIndex, starterChar);
buffer.insertChar(fixIndex, INS_ERROR);
if (performLocatingRun(buffer)) {
currentError.shiftIndexDeltaBy(2); // compensate for the inserted chars
return null; // success, exit immediately
}
buffer.undoCharInsertion(fixIndex);
buffer.undoCharInsertion(fixIndex);
errorIndex = Math.max(errorIndex - 2, 0);
if (bestNextErrorIndex < errorIndex) {
bestNextErrorIndex = errorIndex;
bestChar = starterChar;
}
}
errorIndex = bestNextErrorIndex;
return bestChar;
}
private Character findBestSingleCharReplacement(int fixIndex) {
buffer.insertChar(fixIndex, DEL_ERROR);
Character bestChar = findBestSingleCharInsertion(fixIndex + 2);
if (bestChar == null) { // success, we found a fix that renders the complete input error free
currentError
.shiftIndexDeltaBy(-1); // delta from DEL_ERROR char insertion and index shift by insertion method
} else {
buffer.undoCharInsertion(fixIndex);
errorIndex = Math.max(errorIndex - 3, 0);
}
return bestChar;
}
/**
* A {@link org.parboiled.MatchHandler} implementation that recognizes the special
* {@link org.parboiled.support.Chars#RESYNC} character to overcome {@link InvalidInputError}s at the respective
* error indices.
*/
private class Handler implements MatchHandler {
private final IsSingleCharMatcherVisitor isSingleCharMatcherVisitor = new IsSingleCharMatcherVisitor();
private int fringeIndex;
private MatcherPath lastMatchPath;
public boolean match(MatcherContext> context) {
Matcher matcher = context.getMatcher();
if (matcher.accept(isSingleCharMatcherVisitor)) {
if (prepareErrorLocation(context) && matcher.match(context)) {
if (fringeIndex < context.getCurrentIndex()) {
fringeIndex = context.getCurrentIndex();
lastMatchPath = context.getPath();
}
return true;
}
return false;
}
if (matcher.match(context)) {
return true;
}
// if we didn't match we might have to resynchronize
if (matcher instanceof SequenceMatcher) {
switch(context.getCurrentChar()) {
case RESYNC:
case RESYNC_START:
case RESYNC_EOI:
// however we only resynchronize if we are at a RESYNC location and the matcher is a SequenceMatcher
// that has already matched at least one character and that is a parent of the last match
return qualifiesForResync(context) && resynchronize(context);
}
// check for timeout only on failures of sequences so as to not add too much overhead
if (System.currentTimeMillis() - startTimeStamp > timeout) {
throw new TimeoutException(getRootMatcher(), buffer, lastParsingResult);
}
}
return false;
}
private boolean qualifiesForResync(MatcherContext context) {
if (context.getCurrentIndex() == context.getStartIndex() || !context.getPath().isPrefixOf(lastMatchPath)) {
// if we have a sequence that hasn't match anything yet or is not a prefix we might still have to
// resync on it if there is no other sequence parent anymore
MatcherContext parent = context.getParent();
while (parent != null) {
if (parent.getMatcher() instanceof SequenceMatcher) return false;
parent = parent.getParent();
}
}
return true;
}
private boolean prepareErrorLocation(MatcherContext context) {
switch (context.getCurrentChar()) {
case DEL_ERROR:
return willMatchDelError(context);
case INS_ERROR:
return willMatchInsError(context);
case RESYNC:
case RESYNC_START:
case RESYNC_EOI:
return false;
default:
return true;
}
}
private boolean willMatchDelError(MatcherContext context) {
int preSkipIndex = context.getCurrentIndex();
context.advanceIndex(2); // skip del marker char and illegal char
if (!runTestMatch(context)) {
// if we wouldn't succeed with the match do not swallow the ERROR char & Co
context.setCurrentIndex(preSkipIndex);
return false;
}
context.setStartIndex(context.getCurrentIndex());
if (context.getParent() != null) context.getParent().markError();
return true;
}
private boolean willMatchInsError(MatcherContext context) {
int preSkipIndex = context.getCurrentIndex();
context.advanceIndex(1); // skip ins marker char
if (!runTestMatch(context)) {
// if we wouldn't succeed with the match do not swallow the ERROR char
context.setCurrentIndex(preSkipIndex);
return false;
}
context.setStartIndex(context.getCurrentIndex());
context.markError();
return true;
}
private boolean runTestMatch(MatcherContext context) {
TestMatcher testMatcher = new TestMatcher(context.getMatcher());
MatcherContext testContext = testMatcher.getSubContext(context);
return prepareErrorLocation(testContext) && testContext.runMatcher();
}
private boolean resynchronize(MatcherContext context) {
context.markError();
// create a node for the failed Sequence, taking ownership of all sub nodes created so far
context.createNode();
// by resyncing we flip an unmatched sequence to a matched one, so in order to keep the value stack
// consistent we go into a special "error action mode" and execute the minimal set of actions underneath
// the resync sequence
rerunAndExecuteErrorActions(context);
// skip over all characters that are not legal followers of the failed Sequence
switch (context.getCurrentChar()) {
case RESYNC:
// this RESYNC error is the last error, we establish the length of the bad sequence and
// change this RESYNC marker to a RESYNC_START / RESYNC_END block
context.advanceIndex(1); // gobble RESYNC marker
List followMatchers = new FollowMatchersVisitor().getFollowMatchers(context);
int endIndex = gobbleIllegalCharacters(context, followMatchers);
currentError.setEndIndex(endIndex);
buffer.replaceInsertedChar(currentError.getStartIndex() - 1, RESYNC_START);
buffer.insertChar(endIndex, RESYNC_END);
context.advanceIndex(1); // gobble RESYNC_END marker
break;
case RESYNC_START:
// a RESYNC error we have already recovered from before
context.advanceIndex(1); // gobble RESYNC_START
while (context.getCurrentChar() != RESYNC_END) {
context.advanceIndex(1); // skip all characters up to the RESYNC_END
checkState(context.getCurrentChar() != EOI); // we MUST find a RESYNC_END before EOI
}
context.advanceIndex(1); // gobble RESYNC_END marker
break;
case RESYNC_EOI:
// if we are resyncing on EOI we don't swallow anything
// we also do not have to update the currentError since we only hit this code here
// in the final run
break;
default:
throw new IllegalStateException();
}
return true;
}
@SuppressWarnings( {"ConstantConditions"})
private void rerunAndExecuteErrorActions(MatcherContext context) {
// the context is for the resync action, which at this point has FAILED, i.e. ALL its sub actions haven't
// had a chance to change the value stack, even the ones having run before the actual parse error matcher
// so we need to rerun all sub matchers of the resync sequence up to the point of the parse error
// and then run the minimal set of action in "error action mode"
int savedCurrentIndex = context.getCurrentIndex();
context.setCurrentIndex(context.getStartIndex()); // restart matching the resync sequence
boolean preError = true;
for (Matcher child : context.getMatcher().getChildren()) {
if (preError && !child.getSubContext(context).runMatcher()) {
// run what will be the preceding matcher of all error actions
new EmptyMatcher().getSubContext(context).runMatcher();
context.setIntTag(1); // signal that at least one rule has run before the error actions
preError = false;
}
if (!preError) {
context.setInErrorRecovery(true);
List errorActions = child.accept(new CollectResyncActionsVisitor());
checkState(errorActions != null);
for (ActionMatcher errorAction : errorActions) {
// execute the error actions without looking at their boolean results !!!
errorAction.getSubContext(context).runMatcher();
}
context.setInErrorRecovery(false);
}
}
context.setCurrentIndex(savedCurrentIndex);
}
private int gobbleIllegalCharacters(MatcherContext context, List followMatchers) {
while_loop:
while (true) {
char currentChar = context.getCurrentChar();
if (currentChar == EOI) break;
for (Matcher followMatcher : followMatchers) {
if (followMatcher.accept(new IsStarterCharVisitor(currentChar))) {
break while_loop;
}
}
context.advanceIndex(1);
}
return context.getCurrentIndex();
}
}
/**
* This MatcherVisitor collects the minimal set of actions that has to run underneath a resyncronization sequence
* in order to maintain a consistent Value Stack state.
*/
private static class CollectResyncActionsVisitor extends DefaultMatcherVisitor> {
private ImmutableLinkedList path = ImmutableLinkedList.nil();
@Override
public List visit(ActionMatcher matcher) {
return ImmutableList.of(matcher);
}
@Override
public List visit(FirstOfMatcher matcher) {
for (Matcher child : matcher.getChildren()) {
List actions = child.accept(this);
if (actions != null) return actions;
}
return null;
}
@Override
public List visit(OneOrMoreMatcher matcher) {
return matcher.subMatcher.accept(this);
}
@Override
public List visit(SequenceMatcher matcher) {
if (path.contains(matcher)) {
return null;
}
ImmutableLinkedList previousPath = path;
path = path.prepend(matcher);
List actions = new ArrayList();
for (Matcher sub : matcher.getChildren()) {
List subActions = sub.accept(this);
if (subActions == null) return null;
actions.addAll(subActions);
}
path = previousPath;
return actions;
}
@Override
public List defaultValue(AbstractMatcher matcher) {
return ImmutableList.of();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy