Java.src.org.antlr.v4.runtime.atn.PredictionMode Maven / Gradle / Ivy
Show all versions of antlr4-perf-testsuite Show documentation
/*
* Copyright (c) 2012 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD-3-Clause license that
* can be found in the LICENSE.txt file in the project root.
*/
package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.misc.AbstractEqualityComparator;
import org.antlr.v4.runtime.misc.FlexibleHashMap;
import org.antlr.v4.runtime.misc.MurmurHash;
import org.antlr.v4.runtime.misc.NotNull;
import java.util.BitSet;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
/**
* This enumeration defines the prediction modes available in ANTLR 4 along with
* utility methods for analyzing configuration sets for conflicts and/or
* ambiguities.
*/
public enum PredictionMode {
/**
* The SLL(*) prediction mode. This prediction mode ignores the current
* parser context when making predictions. This is the fastest prediction
* mode, and provides correct results for many grammars. This prediction
* mode is more powerful than the prediction mode provided by ANTLR 3, but
* may result in syntax errors for grammar and input combinations which are
* not SLL.
*
*
* When using this prediction mode, the parser will either return a correct
* parse tree (i.e. the same parse tree that would be returned with the
* {@link #LL} prediction mode), or it will report a syntax error. If a
* syntax error is encountered when using the {@link #SLL} prediction mode,
* it may be due to either an actual syntax error in the input or indicate
* that the particular combination of grammar and input requires the more
* powerful {@link #LL} prediction abilities to complete successfully.
*
*
* This prediction mode does not provide any guarantees for prediction
* behavior for syntactically-incorrect inputs.
*/
SLL,
/**
* The LL(*) prediction mode. This prediction mode allows the current parser
* context to be used for resolving SLL conflicts that occur during
* prediction. This is the fastest prediction mode that guarantees correct
* parse results for all combinations of grammars with syntactically correct
* inputs.
*
*
* When using this prediction mode, the parser will make correct decisions
* for all syntactically-correct grammar and input combinations. However, in
* cases where the grammar is truly ambiguous this prediction mode might not
* report a precise answer for exactly which alternatives are
* ambiguous.
*
*
* This prediction mode does not provide any guarantees for prediction
* behavior for syntactically-incorrect inputs.
*/
LL,
/**
* The LL(*) prediction mode with exact ambiguity detection. In addition to
* the correctness guarantees provided by the {@link #LL} prediction mode,
* this prediction mode instructs the prediction algorithm to determine the
* complete and exact set of ambiguous alternatives for every ambiguous
* decision encountered while parsing.
*
*
* This prediction mode may be used for diagnosing ambiguities during
* grammar development. Due to the performance overhead of calculating sets
* of ambiguous alternatives, this prediction mode should be avoided when
* the exact results are not necessary.
*
*
* This prediction mode does not provide any guarantees for prediction
* behavior for syntactically-incorrect inputs.
*/
LL_EXACT_AMBIG_DETECTION;
/** A Map that uses just the state and the stack context as the key. */
static class AltAndContextMap extends FlexibleHashMap {
public AltAndContextMap() {
super(AltAndContextConfigEqualityComparator.INSTANCE);
}
}
private static final class AltAndContextConfigEqualityComparator extends AbstractEqualityComparator {
public static final AltAndContextConfigEqualityComparator INSTANCE = new AltAndContextConfigEqualityComparator();
private AltAndContextConfigEqualityComparator() {
}
/**
* The hash code is only a function of the {@link ATNState#stateNumber}
* and {@link ATNConfig#context}.
*/
@Override
public int hashCode(ATNConfig o) {
int hashCode = MurmurHash.initialize(7);
hashCode = MurmurHash.update(hashCode, o.getState().stateNumber);
hashCode = MurmurHash.update(hashCode, o.getContext());
hashCode = MurmurHash.finish(hashCode, 2);
return hashCode;
}
@Override
public boolean equals(ATNConfig a, ATNConfig b) {
if ( a==b ) return true;
if ( a==null || b==null ) return false;
return a.getState().stateNumber==b.getState().stateNumber
&& a.getContext().equals(b.getContext());
}
}
/**
* Computes the SLL prediction termination condition.
*
*
* This method computes the SLL prediction termination condition for both of
* the following cases.
*
*
* - The usual SLL+LL fallback upon SLL conflict
* - Pure SLL without LL fallback
*
*
* COMBINED SLL+LL PARSING
*
* When LL-fallback is enabled upon SLL conflict, correct predictions are
* ensured regardless of how the termination condition is computed by this
* method. Due to the substantially higher cost of LL prediction, the
* prediction should only fall back to LL when the additional lookahead
* cannot lead to a unique SLL prediction.
*
* Assuming combined SLL+LL parsing, an SLL configuration set with only
* conflicting subsets should fall back to full LL, even if the
* configuration sets don't resolve to the same alternative (e.g.
* {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting
* configuration, SLL could continue with the hopes that more lookahead will
* resolve via one of those non-conflicting configurations.
*
* Here's the prediction termination rule them: SLL (for SLL+LL parsing)
* stops when it sees only conflicting configuration subsets. In contrast,
* full LL keeps going when there is uncertainty.
*
* HEURISTIC
*
* As a heuristic, we stop prediction when we see any conflicting subset
* unless we see a state that only has one alternative associated with it.
* The single-alt-state thing lets prediction continue upon rules like
* (otherwise, it would admit defeat too soon):
*
* {@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}
*
* When the ATN simulation reaches the state before {@code ';'}, it has a
* DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally
* {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop
* processing this node because alternative to has another way to continue,
* via {@code [6|2|[]]}.
*
* It also let's us continue for this rule:
*
* {@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}
*
* After matching input A, we reach the stop state for rule A, state 1.
* State 8 is the state right before B. Clearly alternatives 1 and 2
* conflict and no amount of further lookahead will separate the two.
* However, alternative 3 will be able to continue and so we do not stop
* working on this state. In the previous example, we're concerned with
* states associated with the conflicting alternatives. Here alt 3 is not
* associated with the conflicting configs, but since we can continue
* looking for input reasonably, don't declare the state done.
*
* PURE SLL PARSING
*
* To handle pure SLL parsing, all we have to do is make sure that we
* combine stack contexts for configurations that differ only by semantic
* predicate. From there, we can do the usual SLL termination heuristic.
*
* PREDICATES IN SLL+LL PARSING
*
* SLL decisions don't evaluate predicates until after they reach DFA stop
* states because they need to create the DFA cache that works in all
* semantic situations. In contrast, full LL evaluates predicates collected
* during start state computation so it can ignore predicates thereafter.
* This means that SLL termination detection can totally ignore semantic
* predicates.
*
* Implementation-wise, {@link ATNConfigSet} combines stack contexts but not
* semantic predicate contexts so we might see two configurations like the
* following.
*
* {@code (s, 1, x, {}), (s, 1, x', {p})}
*
* Before testing these configurations against others, we have to merge
* {@code x} and {@code x'} (without modifying the existing configurations).
* For example, we test {@code (x+x')==x''} when looking for conflicts in
* the following configurations.
*
* {@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}
*
* If the configuration set has predicates (as indicated by
* {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of
* the configurations to strip out all of the predicates so that a standard
* {@link ATNConfigSet} will merge everything ignoring predicates.
*/
public static boolean hasSLLConflictTerminatingPrediction(PredictionMode mode, @NotNull ATNConfigSet configs) {
/* Configs in rule stop states indicate reaching the end of the decision
* rule (local context) or end of start rule (full context). If all
* configs meet this condition, then none of the configurations is able
* to match additional input so we terminate prediction.
*/
if (allConfigsInRuleStopStates(configs)) {
return true;
}
// pure SLL mode parsing
if ( mode == PredictionMode.SLL ) {
// Don't bother with combining configs from different semantic
// contexts if we can fail over to full LL; costs more time
// since we'll often fail over anyway.
if ( configs.hasSemanticContext() ) {
// dup configs, tossing out semantic predicates
ATNConfigSet dup = new ATNConfigSet();
for (ATNConfig c : configs) {
c = c.transform(c.getState(), SemanticContext.NONE, false);
dup.add(c);
}
configs = dup;
}
// now we have combined contexts for configs with dissimilar preds
}
// pure SLL or combined SLL+LL mode parsing
Collection altsets = getConflictingAltSubsets(configs);
boolean heuristic =
hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(configs);
return heuristic;
}
/**
* Checks if any configuration in {@code configs} is in a
* {@link RuleStopState}. Configurations meeting this condition have reached
* the end of the decision rule (local context) or end of start rule (full
* context).
*
* @param configs the configuration set to test
* @return {@code true} if any configuration in {@code configs} is in a
* {@link RuleStopState}, otherwise {@code false}
*/
public static boolean hasConfigInRuleStopState(ATNConfigSet configs) {
for (ATNConfig c : configs) {
if (c.getState() instanceof RuleStopState) {
return true;
}
}
return false;
}
/**
* Checks if all configurations in {@code configs} are in a
* {@link RuleStopState}. Configurations meeting this condition have reached
* the end of the decision rule (local context) or end of start rule (full
* context).
*
* @param configs the configuration set to test
* @return {@code true} if all configurations in {@code configs} are in a
* {@link RuleStopState}, otherwise {@code false}
*/
public static boolean allConfigsInRuleStopStates(@NotNull ATNConfigSet configs) {
for (ATNConfig config : configs) {
if (!(config.getState() instanceof RuleStopState)) {
return false;
}
}
return true;
}
/**
* Full LL prediction termination.
*
* Can we stop looking ahead during ATN simulation or is there some
* uncertainty as to which alternative we will ultimately pick, after
* consuming more input? Even if there are partial conflicts, we might know
* that everything is going to resolve to the same minimum alternative. That
* means we can stop since no more lookahead will change that fact. On the
* other hand, there might be multiple conflicts that resolve to different
* minimums. That means we need more look ahead to decide which of those
* alternatives we should predict.
*
* The basic idea is to split the set of configurations {@code C}, into
* conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with
* non-conflicting configurations. Two configurations conflict if they have
* identical {@link ATNConfig#state} and {@link ATNConfig#context} values
* but different {@link ATNConfig#getAlt} value, e.g. {@code (s, i, ctx, _)}
* and {@code (s, j, ctx, _)} for {@code i!=j}.
*
*
*
* Reduce these configuration subsets to the set of possible alternatives.
* You can compute the alternative subsets in one pass as follows:
*
*
*
* {@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in
* {@code C} holding {@code s} and {@code ctx} fixed.
*
*
*
* Or in pseudo-code, for each configuration {@code c} in {@code C}:
*
*
* map[c] U= c.{@link ATNConfig#getAlt getAlt()} # map hash/equals uses s and x, not
* alt and not pred
*
*
* The values in {@code map} are the set of {@code A_s,ctx} sets.
*
* If {@code |A_s,ctx|=1} then there is no conflict associated with
* {@code s} and {@code ctx}.
*
* Reduce the subsets to singletons by choosing a minimum of each subset. If
* the union of these alternative subsets is a singleton, then no amount of
* more lookahead will help us. We will always pick that alternative. If,
* however, there is more than one alternative, then we are uncertain which
* alternative to predict and must continue looking for resolution. We may
* or may not discover an ambiguity in the future, even if there are no
* conflicting subsets this round.
*
* The biggest sin is to terminate early because it means we've made a
* decision but were uncertain as to the eventual outcome. We haven't used
* enough lookahead. On the other hand, announcing a conflict too late is no
* big deal; you will still have the conflict. It's just inefficient. It
* might even look until the end of file.
*
* No special consideration for semantic predicates is required because
* predicates are evaluated on-the-fly for full LL prediction, ensuring that
* no configuration contains a semantic context during the termination
* check.
*
* CONFLICTING CONFIGS
*
* Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict
* when {@code i!=j} but {@code x=x'}. Because we merge all
* {@code (s, i, _)} configurations together, that means that there are at
* most {@code n} configurations associated with state {@code s} for
* {@code n} possible alternatives in the decision. The merged stacks
* complicate the comparison of configuration contexts {@code x} and
* {@code x'}. Sam checks to see if one is a subset of the other by calling
* merge and checking to see if the merged result is either {@code x} or
* {@code x'}. If the {@code x} associated with lowest alternative {@code i}
* is the superset, then {@code i} is the only possible prediction since the
* others resolve to {@code min(i)} as well. However, if {@code x} is
* associated with {@code j>i} then at least one stack configuration for
* {@code j} is not in conflict with alternative {@code i}. The algorithm
* should keep going, looking for more lookahead due to the uncertainty.
*
* For simplicity, I'm doing a equality check between {@code x} and
* {@code x'} that lets the algorithm continue to consume lookahead longer
* than necessary. The reason I like the equality is of course the
* simplicity but also because that is the test you need to detect the
* alternatives that are actually in conflict.
*
* CONTINUE/STOP RULE
*
* Continue if union of resolved alternative sets from non-conflicting and
* conflicting alternative subsets has more than one alternative. We are
* uncertain about which alternative to predict.
*
* The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which
* alternatives are still in the running for the amount of input we've
* consumed at this point. The conflicting sets let us to strip away
* configurations that won't lead to more states because we resolve
* conflicts to the configuration with a minimum alternate for the
* conflicting set.
*
* CASES
*
*
*
* - no conflicts and more than 1 alternative in set => continue
*
* - {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)},
* {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set
* {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
* {@code {1,3}} => continue
*
*
* - {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
* {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set
* {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} =
* {@code {1}} => stop and predict 1
*
* - {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)},
* {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U
* {@code {1}} = {@code {1}} => stop and predict 1, can announce
* ambiguity {@code {1,2}}
*
* - {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)},
* {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U
* {@code {2}} = {@code {1,2}} => continue
*
* - {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)},
* {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U
* {@code {3}} = {@code {1,3}} => continue
*
*
*
* EXACT AMBIGUITY DETECTION
*
* If all states report the same conflicting set of alternatives, then we
* know we have the exact ambiguity set.
*
* |A_i|>1
and
* A_i = A_j
for all i, j.
*
* In other words, we continue examining lookahead until all {@code A_i}
* have more than one alternative and all {@code A_i} are the same. If
* {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate
* because the resolved set is {@code {1}}. To determine what the real
* ambiguity is, we have to know whether the ambiguity is between one and
* two or one and three so we keep going. We can only stop prediction when
* we need exact ambiguity detection when the sets look like
* {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...
*/
public static int resolvesToJustOneViableAlt(@NotNull Collection altsets) {
return getSingleViableAlt(altsets);
}
/**
* Determines if every alternative subset in {@code altsets} contains more
* than one alternative.
*
* @param altsets a collection of alternative subsets
* @return {@code true} if every {@link BitSet} in {@code altsets} has
* {@link BitSet#cardinality cardinality} > 1, otherwise {@code false}
*/
public static boolean allSubsetsConflict(@NotNull Collection altsets) {
return !hasNonConflictingAltSet(altsets);
}
/**
* Determines if any single alternative subset in {@code altsets} contains
* exactly one alternative.
*
* @param altsets a collection of alternative subsets
* @return {@code true} if {@code altsets} contains a {@link BitSet} with
* {@link BitSet#cardinality cardinality} 1, otherwise {@code false}
*/
public static boolean hasNonConflictingAltSet(@NotNull Collection altsets) {
for (BitSet alts : altsets) {
if ( alts.cardinality()==1 ) {
return true;
}
}
return false;
}
/**
* Determines if any single alternative subset in {@code altsets} contains
* more than one alternative.
*
* @param altsets a collection of alternative subsets
* @return {@code true} if {@code altsets} contains a {@link BitSet} with
* {@link BitSet#cardinality cardinality} > 1, otherwise {@code false}
*/
public static boolean hasConflictingAltSet(@NotNull Collection altsets) {
for (BitSet alts : altsets) {
if ( alts.cardinality()>1 ) {
return true;
}
}
return false;
}
/**
* Determines if every alternative subset in {@code altsets} is equivalent.
*
* @param altsets a collection of alternative subsets
* @return {@code true} if every member of {@code altsets} is equal to the
* others, otherwise {@code false}
*/
public static boolean allSubsetsEqual(@NotNull Collection altsets) {
Iterator it = altsets.iterator();
BitSet first = it.next();
while ( it.hasNext() ) {
BitSet next = it.next();
if ( !next.equals(first) ) return false;
}
return true;
}
/**
* Returns the unique alternative predicted by all alternative subsets in
* {@code altsets}. If no such alternative exists, this method returns
* {@link ATN#INVALID_ALT_NUMBER}.
*
* @param altsets a collection of alternative subsets
*/
public static int getUniqueAlt(@NotNull Collection altsets) {
BitSet all = getAlts(altsets);
if ( all.cardinality()==1 ) return all.nextSetBit(0);
return ATN.INVALID_ALT_NUMBER;
}
/**
* Gets the complete set of represented alternatives for a collection of
* alternative subsets. This method returns the union of each {@link BitSet}
* in {@code altsets}.
*
* @param altsets a collection of alternative subsets
* @return the set of represented alternatives in {@code altsets}
*/
public static BitSet getAlts(@NotNull Collection altsets) {
BitSet all = new BitSet();
for (BitSet alts : altsets) {
all.or(alts);
}
return all;
}
/**
* Get union of all alts from configs.
*
* @since 4.5
*/
@NotNull
public static BitSet getAlts(@NotNull ATNConfigSet configs) {
BitSet alts = new BitSet();
for (ATNConfig config : configs) {
alts.set(config.getAlt());
}
return alts;
}
/**
* This function gets the conflicting alt subsets from a configuration set.
* For each configuration {@code c} in {@code configs}:
*
*
* map[c] U= c.{@link ATNConfig#getAlt getAlt()} # map hash/equals uses s and x, not
* alt and not pred
*
*/
@NotNull
public static Collection getConflictingAltSubsets(@NotNull ATNConfigSet configs) {
AltAndContextMap configToAlts = new AltAndContextMap();
for (ATNConfig c : configs) {
BitSet alts = configToAlts.get(c);
if ( alts==null ) {
alts = new BitSet();
configToAlts.put(c, alts);
}
alts.set(c.getAlt());
}
return configToAlts.values();
}
/**
* Get a map from state to alt subset from a configuration set. For each
* configuration {@code c} in {@code configs}:
*
*
* map[c.{@link ATNConfig#state state}] U= c.{@link ATNConfig#getAlt getAlt()}
*
*/
@NotNull
public static Map getStateToAltMap(@NotNull ATNConfigSet configs) {
Map m = new HashMap();
for (ATNConfig c : configs) {
BitSet alts = m.get(c.getState());
if ( alts==null ) {
alts = new BitSet();
m.put(c.getState(), alts);
}
alts.set(c.getAlt());
}
return m;
}
public static boolean hasStateAssociatedWithOneAlt(@NotNull ATNConfigSet configs) {
Map x = getStateToAltMap(configs);
for (BitSet alts : x.values()) {
if ( alts.cardinality()==1 ) return true;
}
return false;
}
public static int getSingleViableAlt(@NotNull Collection altsets) {
BitSet viableAlts = new BitSet();
for (BitSet alts : altsets) {
int minAlt = alts.nextSetBit(0);
viableAlts.set(minAlt);
if ( viableAlts.cardinality()>1 ) { // more than 1 viable alt
return ATN.INVALID_ALT_NUMBER;
}
}
return viableAlts.nextSetBit(0);
}
}