All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.antlr.analysis.DFA Maven / Gradle / Ivy

There is a newer version: 3.5.3
Show newest version
/*
 * [The "BSD license"]
 *  Copyright (c) 2010 Terence Parr
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *  1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *      derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.antlr.analysis;

import org.antlr.codegen.CodeGenerator;
import org.antlr.misc.IntSet;
import org.antlr.misc.IntervalSet;
import org.antlr.misc.Utils;
import org.antlr.runtime.IntStream;
import org.stringtemplate.v4.ST;
import org.antlr.tool.*;

import java.util.*;

/** A DFA (converted from a grammar's NFA).
 *  DFAs are used as prediction machine for alternative blocks in all kinds
 *  of recognizers (lexers, parsers, tree walkers).
 */
public class DFA {
	public static final int REACHABLE_UNKNOWN = -2;
	public static final int REACHABLE_BUSY = -1; // in process of computing
	public static final int REACHABLE_NO = 0;
	public static final int REACHABLE_YES = 1;

	public static final int CYCLIC_UNKNOWN = -2;
	public static final int CYCLIC_BUSY = -1; // in process of computing
	public static final int CYCLIC_DONE = 0;
	
	/** Prevent explosion of DFA states during conversion. The max number
	 *  of states per alt in a single decision's DFA.
	public static final int MAX_STATES_PER_ALT_IN_DFA = 450;
	 */

	/** Set to 0 to not terminate early (time in ms) */
	public static int MAX_TIME_PER_DFA_CREATION = 1*1000;

	/** How many edges can each DFA state have before a "special" state
	 *  is created that uses IF expressions instead of a table?
	 */
	public static int MAX_STATE_TRANSITIONS_FOR_TABLE = 65534;

	/** What's the start state for this DFA? */
    public DFAState startState;

	/** This DFA is being built for which decision? */
	public int decisionNumber = 0;

    /** From what NFAState did we create the DFA? */
    public NFAState decisionNFAStartState;

	/** The printable grammar fragment associated with this DFA */
	public String description;

	/** A set of all uniquely-numbered DFA states.  Maps hash of DFAState
     *  to the actual DFAState object.  We use this to detect
     *  existing DFA states.  Map.  Use Map so
	 *  we can get old state back (Set only allows you to see if it's there).
	 *  Not used during fixed k lookahead as it's a waste to fill it with
	 *  a dup of states array.
     */
    protected Map uniqueStates = new HashMap();

	/** Maps the state number to the actual DFAState.  Use a Vector as it
	 *  grows automatically when I set the ith element.  This contains all
	 *  states, but the states are not unique.  s3 might be same as s1 so
	 *  s3 -> s1 in this table.  This is how cycles occur.  If fixed k,
	 *  then these states will all be unique as states[i] always points
	 *  at state i when no cycles exist.
	 *
	 *  This is managed in parallel with uniqueStates and simply provides
	 *  a way to go from state number to DFAState rather than via a
	 *  hash lookup.
	 */
	protected Vector states = new Vector();

	/** Unique state numbers per DFA */
	protected int stateCounter = 0;

	/** count only new states not states that were rejected as already present */
	protected int numberOfStates = 0;

	/** User specified max fixed lookahead.  If 0, nothing specified.  -1
	 *  implies we have not looked at the options table yet to set k.
	 */
	protected int user_k = -1;

	/** While building the DFA, track max lookahead depth if not cyclic */
	protected int max_k = -1;

    /** Is this DFA reduced?  I.e., can all states lead to an accept state? */
    protected boolean reduced = true;

    /** Are there any loops in this DFA?
	 *  Computed by doesStateReachAcceptState()
	 */
    protected boolean cyclic = false;

	/** Track whether this DFA has at least one sem/syn pred encountered
	 *  during a closure operation.  This is useful for deciding whether
	 *  to retry a non-LL(*) with k=1.  If no pred, it will not work w/o
	 *  a pred so don't bother.  It would just give another error message.
	 */
	public boolean predicateVisible = false;

	public boolean hasPredicateBlockedByAction = false;

	/** Each alt in an NFA derived from a grammar must have a DFA state that
     *  predicts it lest the parser not know what to do.  Nondeterminisms can
     *  lead to this situation (assuming no semantic predicates can resolve
     *  the problem) and when for some reason, I cannot compute the lookahead
     *  (which might arise from an error in the algorithm or from
     *  left-recursion etc...).  This list starts out with all alts contained
     *  and then in method doesStateReachAcceptState() I remove the alts I
     *  know to be uniquely predicted.
     */
    protected List unreachableAlts;

	protected int nAlts = 0;

	/** We only want one accept state per predicted alt; track here */
	protected DFAState[] altToAcceptState;

	/** Track whether an alt discovers recursion for each alt during
	 *  NFA to DFA conversion; >1 alt with recursion implies nonregular.
	 */
	public IntSet recursiveAltSet = new IntervalSet();

	/** Which NFA are we converting (well, which piece of the NFA)? */
    public NFA nfa;

	protected NFAToDFAConverter nfaConverter;

	/** This probe tells you a lot about a decision and is useful even
	 *  when there is no error such as when a syntactic nondeterminism
	 *  is solved via semantic predicates.  Perhaps a GUI would want
	 *  the ability to show that.
	 */
	public DecisionProbe probe = new DecisionProbe(this);

	/** Track absolute time of the conversion so we can have a failsafe:
	 *  if it takes too long, then terminate.  Assume bugs are in the
	 *  analysis engine.
	 */
	//protected long conversionStartTime;

	/** Map an edge transition table to a unique set number; ordered so
	 *  we can push into the output template as an ordered list of sets
	 *  and then ref them from within the transition[][] table.  Like this
	 *  for C# target:
	 *     public static readonly DFA30_transition0 =
	 *     	new short[] { 46, 46, -1, 46, 46, -1, -1, -1, -1, -1, -1, -1,...};
	 *         public static readonly DFA30_transition1 =
	 *     	new short[] { 21 };
	 *      public static readonly short[][] DFA30_transition = {
	 *     	  DFA30_transition0,
	 *     	  DFA30_transition0,
	 *     	  DFA30_transition1,
	 *     	  ...
	 *      };
	 */
	public Map, Integer> edgeTransitionClassMap = new LinkedHashMap, Integer>();

	/** The unique edge transition class number; every time we see a new
	 *  set of edges emanating from a state, we number it so we can reuse
	 *  if it's every seen again for another state.  For Java grammar,
	 *  some of the big edge transition tables are seen about 57 times.
	 */
	protected int edgeTransitionClass =0;

	/* This DFA can be converted to a transition[state][char] table and
	 * the following tables are filled by createStateTables upon request.
	 * These are injected into the templates for code generation.
	 * See March 25, 2006 entry for description:
	 *   http://www.antlr.org/blog/antlr3/codegen.tml
	 * Often using Vector as can't set ith position in a List and have
	 * it extend list size; bizarre.
	 */

	/** List of special DFAState objects */
	public List specialStates;
	/** List of ST for special states. */
	public List specialStateSTs;
	public Vector accept;
	public Vector eot;
	public Vector eof;
	public Vector min;
	public Vector max;
	public Vector special;
	public Vector> transition;
	/** just the Vector indicating which unique edge table is at
	 *  position i.
	 */
	public Vector transitionEdgeTables; // not used by java yet
	protected int uniqueCompressedSpecialStateNum = 0;

	/** Which generator to use if we're building state tables */
	protected CodeGenerator generator = null;

	protected DFA() {}

	public DFA(int decisionNumber, NFAState decisionStartState) {
		this.decisionNumber = decisionNumber;
        this.decisionNFAStartState = decisionStartState;
        nfa = decisionStartState.nfa;
        nAlts = nfa.grammar.getNumberOfAltsForDecisionNFA(decisionStartState);
        //setOptions( nfa.grammar.getDecisionOptions(getDecisionNumber()) );
        initAltRelatedInfo();

		//long start = System.currentTimeMillis();
        nfaConverter = new NFAToDFAConverter(this);
		try {
			nfaConverter.convert();

			// figure out if there are problems with decision
			verify();

			if ( !probe.isDeterministic() || probe.analysisOverflowed() ) {
				probe.issueWarnings();
			}

			// must be after verify as it computes cyclic, needed by this routine
			// should be after warnings because early termination or something
			// will not allow the reset to operate properly in some cases.
			resetStateNumbersToBeContiguous();

			//long stop = System.currentTimeMillis();
			//System.out.println("verify cost: "+(int)(stop-start)+" ms");
		}
//		catch (AnalysisTimeoutException at) {
//			probe.reportAnalysisTimeout();
//			if ( !okToRetryDFAWithK1() ) {
//				probe.issueWarnings();
//			}
//		}
		catch (NonLLStarDecisionException nonLL) {
			probe.reportNonLLStarDecision(this);
			// >1 alt recurses, k=* and no auto backtrack nor manual sem/syn
			if ( !okToRetryDFAWithK1() ) {
				probe.issueWarnings();
			}
		}
    }

	/** Walk all states and reset their numbers to be a contiguous sequence
	 *  of integers starting from 0.  Only cyclic DFA can have unused positions
	 *  in states list.  State i might be identical to a previous state j and
	 *  will result in states[i] == states[j].  We don't want to waste a state
	 *  number on this.  Useful mostly for code generation in tables.
	 *
	 *  At the start of this routine, states[i].stateNumber <= i by definition.
	 *  If states[50].stateNumber is 50 then a cycle during conversion may
	 *  try to add state 103, but we find that an identical DFA state, named
	 *  50, already exists, hence, states[103]==states[50] and both have
	 *  stateNumber 50 as they point at same object.  Afterwards, the set
	 *  of state numbers from all states should represent a contiguous range
	 *  from 0..n-1 where n is the number of unique states.
	 */
	public void resetStateNumbersToBeContiguous() {
		if ( getUserMaxLookahead()>0 ) {
			// all numbers are unique already; no states are thrown out.
			return;
		}

        // walk list of DFAState objects by state number,
		// setting state numbers to 0..n-1
		int snum=0;
		for (int i = 0; i <= getMaxStateNumber(); i++) {
			DFAState s = getState(i);
            // some states are unused after creation most commonly due to cycles
            // or conflict resolution.
            if ( s==null ) {
                continue;
            }
			// state i is mapped to DFAState with state number set to i originally
			// so if it's less than i, then we renumbered it already; that
			// happens when states have been merged or cycles occurred I think.
			// states[50] will point to DFAState with s50 in it but
			// states[103] might also point at this same DFAState.  Since
			// 50 < 103 then it's already been renumbered as it points downwards.
			boolean alreadyRenumbered = s.stateNumber getJavaCompressedAccept() { return getRunLengthEncoding(accept); }
	public List getJavaCompressedEOT() { return getRunLengthEncoding(eot); }
	public List getJavaCompressedEOF() { return getRunLengthEncoding(eof); }
	public List getJavaCompressedMin() { return getRunLengthEncoding(min); }
	public List getJavaCompressedMax() { return getRunLengthEncoding(max); }
	public List getJavaCompressedSpecial() { return getRunLengthEncoding(special); }
	public List> getJavaCompressedTransition() {
		if ( transition==null || transition.isEmpty() ) {
			return null;
		}
		List> encoded = new ArrayList>(transition.size());
		// walk Vector> which is the transition[][] table
		for (int i = 0; i < transition.size(); i++) {
			Vector transitionsForState = transition.elementAt(i);
			encoded.add(getRunLengthEncoding(transitionsForState));
		}
		return encoded;
	}

	/** Compress the incoming data list so that runs of same number are
	 *  encoded as number,value pair sequences.  3 -1 -1 -1 28 is encoded
	 *  as 1 3 3 -1 1 28.  I am pretty sure this is the lossless compression
	 *  that GIF files use.  Transition tables are heavily compressed by
	 *  this technique.  I got the idea from JFlex http://jflex.de/
	 *
	 *  Return List where each string is either \xyz for 8bit char
	 *  and \uFFFF for 16bit.  Hideous and specific to Java, but it is the
	 *  only target bad enough to need it.
	 */
	public List getRunLengthEncoding(List data) {
		if ( data==null || data.isEmpty() ) {
			// for states with no transitions we want an empty string ""
			// to hold its place in the transitions array.
			List empty = new ArrayList();
			empty.add("");
			return empty;
		}
		int size = Math.max(2,data.size()/2);
		List encoded = new ArrayList(size); // guess at size
		// scan values looking for runs
		int i = 0;
		Integer emptyValue = Utils.integer(-1);
		while ( i < data.size() ) {
			Integer I = data.get(i);
			if ( I==null ) {
				I = emptyValue;
			}
			// count how many v there are?
			int n = 0;
			for (int j = i; j < data.size(); j++) {
				Integer v = data.get(j);
				if ( v==null ) {
					v = emptyValue;
				}
				if ( I.equals(v) ) {
					n++;
				}
				else {
					break;
				}
			}
			encoded.add(generator.target.encodeIntAsCharEscape((char)n));
			encoded.add(generator.target.encodeIntAsCharEscape((char)I.intValue()));
			i+=n;
		}
		return encoded;
	}

	public void createStateTables(CodeGenerator generator) {
		//System.out.println("createTables:\n"+this);
		this.generator = generator;
		description = getNFADecisionStartState().getDescription();
		description =
			generator.target.getTargetStringLiteralFromString(description);

		// create all the tables
		special = new Vector(this.getNumberOfStates()); // Vector
		special.setSize(this.getNumberOfStates());
		specialStates = new ArrayList();				// List
		specialStateSTs = new ArrayList();				// List
		accept = new Vector(this.getNumberOfStates()); // Vector
		accept.setSize(this.getNumberOfStates());
		eot = new Vector(this.getNumberOfStates()); // Vector
		eot.setSize(this.getNumberOfStates());
		eof = new Vector(this.getNumberOfStates()); // Vector
		eof.setSize(this.getNumberOfStates());
		min = new Vector(this.getNumberOfStates()); // Vector
		min.setSize(this.getNumberOfStates());
		max = new Vector(this.getNumberOfStates()); // Vector
		max.setSize(this.getNumberOfStates());
		transition = new Vector>(this.getNumberOfStates()); // Vector>
		transition.setSize(this.getNumberOfStates());
		transitionEdgeTables = new Vector(this.getNumberOfStates()); // Vector
		transitionEdgeTables.setSize(this.getNumberOfStates());

		// for each state in the DFA, fill relevant tables.
		Iterator it;
		if ( getUserMaxLookahead()>0 ) {
			it = states.iterator();
		}
		else {
			it = getUniqueStates().values().iterator();
		}
		while ( it.hasNext() ) {
			DFAState s = it.next();
			if ( s==null ) {
				// ignore null states; some acylic DFA see this condition
				// when inlining DFA (due to lacking of exit branch pruning?)
				continue;
			}
			if ( s.isAcceptState() ) {
				// can't compute min,max,special,transition on accepts
				accept.set(s.stateNumber,
						   Utils.integer(s.getUniquelyPredictedAlt()));
			}
			else {
				createMinMaxTables(s);
				createTransitionTableEntryForState(s);
				createSpecialTable(s);
				createEOTAndEOFTables(s);
			}
		}

		// now that we have computed list of specialStates, gen code for 'em
		for (int i = 0; i < specialStates.size(); i++) {
			DFAState ss = specialStates.get(i);
			ST stateST =
				generator.generateSpecialState(ss);
			specialStateSTs.add(stateST);
		}

		// check that the tables are not messed up by encode/decode
		/*
		testEncodeDecode(min);
		testEncodeDecode(max);
		testEncodeDecode(accept);
		testEncodeDecode(special);
		System.out.println("min="+min);
		System.out.println("max="+max);
		System.out.println("eot="+eot);
		System.out.println("eof="+eof);
		System.out.println("accept="+accept);
		System.out.println("special="+special);
		System.out.println("transition="+transition);
		*/
	}

	/*
	private void testEncodeDecode(List data) {
		System.out.println("data="+data);
		List encoded = getRunLengthEncoding(data);
		StringBuffer buf = new StringBuffer();
		for (int i = 0; i < encoded.size(); i++) {
			String I = (String)encoded.get(i);
			int v = 0;
			if ( I.startsWith("\\u") ) {
				v = Integer.parseInt(I.substring(2,I.length()), 16);
			}
			else {
				v = Integer.parseInt(I.substring(1,I.length()), 8);
			}
			buf.append((char)v);
		}
		String encodedS = buf.toString();
		short[] decoded = org.antlr.runtime.DFA.unpackEncodedString(encodedS);
		//System.out.println("decoded:");
		for (int i = 0; i < decoded.length; i++) {
			short x = decoded[i];
			if ( x!=((Integer)data.get(i)).intValue() ) {
				System.err.println("problem with encoding");
			}
			//System.out.print(", "+x);
		}
		//System.out.println();
	}
	*/

	protected void createMinMaxTables(DFAState s) {
		int smin = Label.MAX_CHAR_VALUE + 1;
		int smax = Label.MIN_ATOM_VALUE - 1;
		for (int j = 0; j < s.getNumberOfTransitions(); j++) {
			Transition edge = s.transition(j);
			Label label = edge.label;
			if ( label.isAtom() ) {
				if ( label.getAtom()>=Label.MIN_CHAR_VALUE ) {
					if ( label.getAtom()smax ) {
						smax = label.getAtom();
					}
				}
			}
			else if ( label.isSet() ) {
				IntervalSet labels = (IntervalSet)label.getSet();
				int lmin = labels.getMinElement();
				// if valid char (don't do EOF) and less than current min
				if ( lmin=Label.MIN_CHAR_VALUE ) {
					smin = labels.getMinElement();
				}
				if ( labels.getMaxElement()>smax ) {
					smax = labels.getMaxElement();
				}
			}
		}

		if ( smax<0 ) {
			// must be predicates or pure EOT transition; just zero out min, max
			smin = Label.MIN_CHAR_VALUE;
			smax = Label.MIN_CHAR_VALUE;
		}

		min.set(s.stateNumber, Utils.integer((char)smin));
		max.set(s.stateNumber, Utils.integer((char)smax));

		if ( smax<0 || smin>Label.MAX_CHAR_VALUE || smin<0 ) {
			ErrorManager.internalError("messed up: min="+min+", max="+max);
		}
	}

	protected void createTransitionTableEntryForState(DFAState s) {
		/*
		System.out.println("createTransitionTableEntryForState s"+s.stateNumber+
			" dec "+s.dfa.decisionNumber+" cyclic="+s.dfa.isCyclic());
			*/
		int smax = max.get(s.stateNumber);
		int smin = min.get(s.stateNumber);

		Vector stateTransitions = new Vector(smax-smin+1);
		stateTransitions.setSize(smax-smin+1);
		transition.set(s.stateNumber, stateTransitions);
		for (int j = 0; j < s.getNumberOfTransitions(); j++) {
			Transition edge = s.transition(j);
			Label label = edge.label;
			if ( label.isAtom() && label.getAtom()>=Label.MIN_CHAR_VALUE ) {
				int labelIndex = label.getAtom()-smin; // offset from 0
				stateTransitions.set(labelIndex,
									 Utils.integer(edge.target.stateNumber));
			}
			else if ( label.isSet() ) {
				IntervalSet labels = (IntervalSet)label.getSet();
				int[] atoms = labels.toArray();
				for (int a = 0; a < atoms.length; a++) {
					// set the transition if the label is valid (don't do EOF)
					if ( atoms[a]>=Label.MIN_CHAR_VALUE ) {
						int labelIndex = atoms[a]-smin; // offset from 0
						stateTransitions.set(labelIndex,
											 Utils.integer(edge.target.stateNumber));
					}
				}
			}
		}
		// track unique state transition tables so we can reuse
		Integer edgeClass = edgeTransitionClassMap.get(stateTransitions);
		if ( edgeClass!=null ) {
			//System.out.println("we've seen this array before; size="+stateTransitions.size());
			transitionEdgeTables.set(s.stateNumber, edgeClass);
		}
		else {
			edgeClass = Utils.integer(edgeTransitionClass);
			transitionEdgeTables.set(s.stateNumber, edgeClass);
			edgeTransitionClassMap.put(stateTransitions, edgeClass);
			edgeTransitionClass++;
		}
	}

	/** Set up the EOT and EOF tables; we cannot put -1 min/max values so
	 *  we need another way to test that in the DFA transition function.
	 */
	protected void createEOTAndEOFTables(DFAState s) {
		for (int j = 0; j < s.getNumberOfTransitions(); j++) {
			Transition edge = s.transition(j);
			Label label = edge.label;
			if ( label.isAtom() ) {
				if ( label.getAtom()==Label.EOT ) {
					// eot[s] points to accept state
					eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
				}
				else if ( label.getAtom()==Label.EOF ) {
					// eof[s] points to accept state
					eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
				}
			}
			else if ( label.isSet() ) {
				IntervalSet labels = (IntervalSet)label.getSet();
				int[] atoms = labels.toArray();
				for (int a = 0; a < atoms.length; a++) {
					if ( atoms[a]==Label.EOT ) {
						// eot[s] points to accept state
						eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
					}
					else if ( atoms[a]==Label.EOF ) {
						eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
					}
				}
			}
		}
	}

	protected void createSpecialTable(DFAState s) {
		// number all special states from 0...n-1 instead of their usual numbers
		boolean hasSemPred = false;

		// TODO this code is very similar to canGenerateSwitch.  Refactor to share
		for (int j = 0; j < s.getNumberOfTransitions(); j++) {
			Transition edge = s.transition(j);
			Label label = edge.label;
			// can't do a switch if the edges have preds or are going to
			// require gated predicates
			if ( label.isSemanticPredicate() ||
				 ((DFAState)edge.target).getGatedPredicatesInNFAConfigurations()!=null)
			{
				hasSemPred = true;
				break;
			}
		}
		// if has pred or too big for table, make it special
		int smax = max.get(s.stateNumber);
		int smin = min.get(s.stateNumber);
		if ( hasSemPred || smax-smin>MAX_STATE_TRANSITIONS_FOR_TABLE ) {
			special.set(s.stateNumber,
						Utils.integer(uniqueCompressedSpecialStateNum));
			uniqueCompressedSpecialStateNum++;
			specialStates.add(s);
		}
		else {
			special.set(s.stateNumber, Utils.integer(-1)); // not special
		}
	}

	public int predict(IntStream input) {
		Interpreter interp = new Interpreter(nfa.grammar, input);
		return interp.predict(this);
	}

	/** Add a new DFA state to this DFA if not already present.
     *  To force an acyclic, fixed maximum depth DFA, just always
	 *  return the incoming state.  By not reusing old states,
	 *  no cycles can be created.  If we're doing fixed k lookahead
	 *  don't updated uniqueStates, just return incoming state, which
	 *  indicates it's a new state.
     */
    protected DFAState addState(DFAState d) {
		if ( getUserMaxLookahead()>0 ) {
			return d;
		}
		// does a DFA state exist already with everything the same
		// except its state number?
		DFAState existing = uniqueStates.get(d);
		if ( existing != null ) {
            /*
            System.out.println("state "+d.stateNumber+" exists as state "+
                existing.stateNumber);
                */
            // already there...get the existing DFA state
			return existing;
		}

		// if not there, then add new state.
		uniqueStates.put(d,d);
        numberOfStates++;
		return d;
	}

	public void removeState(DFAState d) {
		DFAState it = uniqueStates.remove(d);
		if ( it!=null ) {
			numberOfStates--;
		}
	}

	public Map getUniqueStates() {
		return uniqueStates;
	}

	/** What is the max state number ever created?  This may be beyond
	 *  getNumberOfStates().
	 */
	public int getMaxStateNumber() {
		return states.size()-1;
	}

	public DFAState getState(int stateNumber) {
		return states.get(stateNumber);
	}

	public void setState(int stateNumber, DFAState d) {
		states.set(stateNumber, d);
	}

	/** Is the DFA reduced?  I.e., does every state have a path to an accept
     *  state?  If not, don't delete as we need to generate an error indicating
     *  which paths are "dead ends".  Also tracks list of alts with no accept
     *  state in the DFA.  Must call verify() first before this makes sense.
     */
    public boolean isReduced() {
        return reduced;
    }

    /** Is this DFA cyclic?  That is, are there any loops?  If not, then
     *  the DFA is essentially an LL(k) predictor for some fixed, max k value.
     *  We can build a series of nested IF statements to match this.  In the
     *  presence of cycles, we need to build a general DFA and interpret it
     *  to distinguish between alternatives.
     */
    public boolean isCyclic() {
        return cyclic && getUserMaxLookahead()==0;
    }

	public boolean isClassicDFA() {
		return !isCyclic() &&
			   !nfa.grammar.decisionsWhoseDFAsUsesSemPreds.contains(this) &&
			   !nfa.grammar.decisionsWhoseDFAsUsesSynPreds.contains(this);
	}

	public boolean canInlineDecision() {
		return !isCyclic() &&
		    !probe.isNonLLStarDecision() &&
			getNumberOfStates() < CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE;
	}

	/** Is this DFA derived from the NFA for the Tokens rule? */
	public boolean isTokensRuleDecision() {
		if ( nfa.grammar.type!=Grammar.LEXER ) {
			return false;
		}
		NFAState nfaStart = getNFADecisionStartState();
		Rule r = nfa.grammar.getLocallyDefinedRule(Grammar.ARTIFICIAL_TOKENS_RULENAME);
		NFAState TokensRuleStart = r.startState;
		NFAState TokensDecisionStart =
			(NFAState)TokensRuleStart.transition[0].target;
		return nfaStart == TokensDecisionStart;
	}

	/** The user may specify a max, acyclic lookahead for any decision.  No
	 *  DFA cycles are created when this value, k, is greater than 0.
	 *  If this decision has no k lookahead specified, then try the grammar.
	 */
	public int getUserMaxLookahead() {
		if ( user_k>=0 ) { // cache for speed
			return user_k;
		}
		user_k = nfa.grammar.getUserMaxLookahead(decisionNumber);
		return user_k;
	}

	public boolean getAutoBacktrackMode() {
		return nfa.grammar.getAutoBacktrackMode(decisionNumber);
	}

	public void setUserMaxLookahead(int k) {
		this.user_k = k;
	}

	/** Return k if decision is LL(k) for some k else return max int
     */
	public int getMaxLookaheadDepth() {
		if ( hasCycle() ) return Integer.MAX_VALUE;
		// compute to be sure
		return _getMaxLookaheadDepth(startState, 0);
	}

	int _getMaxLookaheadDepth(DFAState d, int depth) {
		// not cyclic; don't worry about termination
		// fail if pred edge.
		int max = depth;
		for (int i=0; i (X|Y)\n" +
	 *     | X\n" +
	 *     ;
	 *
	 *  gives
	 * 
	 * .s0-X->.s1
	 * .s0-Y&&{synpred1_t}?->:s2=>1
	 * .s1-{synpred1_t}?->:s2=>1
	 * .s1-{true}?->:s3=>2
	 */
	public boolean hasSynPred() {
		boolean has = _hasSynPred(startState, new HashSet());
//		if ( !has ) {
//			System.out.println("no synpred in dec "+decisionNumber);
//			FASerializer serializer = new FASerializer(nfa.grammar);
//			String result = serializer.serialize(startState);
//			System.out.println(result);
//		}
		return has;
	}

	public boolean getHasSynPred() { return hasSynPred(); } // for ST	

	boolean _hasSynPred(DFAState d, Set busy) {
		busy.add(d);
		for (int i=0; i=0 ) {
//					System.out.println("has pred "+ctx.toString()+" "+ctx.isSyntacticPredicate());
//					System.out.println(((SemanticContext.Predicate)ctx).predicateAST.token);
//				}
				if ( ctx.isSyntacticPredicate() ) return true;
			}
			DFAState edgeTarget = (DFAState)t.target;
			if ( !busy.contains(edgeTarget) && _hasSynPred(edgeTarget, busy) ) return true;
		}

		return false;
	}

	public boolean hasSemPred() { // has user-defined sempred
		boolean has = _hasSemPred(startState, new HashSet());
		return has;
	}

	boolean _hasSemPred(DFAState d, Set busy) {
		busy.add(d);
		for (int i=0; i());
		return cyclic;
	}

	boolean _hasCycle(DFAState d, Map busy) {
		busy.put(d, CYCLIC_BUSY);
		for (int i=0; i getUnreachableAlts() {
        return unreachableAlts;
    }

	/** Once this DFA has been built, need to verify that:
	 *
	 *  1. it's reduced
	 *  2. all alts have an accept state
	 *
	 *  Elsewhere, in the NFA converter, we need to verify that:
	 *
	 *  3. alts i and j have disjoint lookahead if no sem preds
	 *  4. if sem preds, nondeterministic alts must be sufficiently covered
	 *
	 *  This is avoided if analysis bails out for any reason.
	 */
	public void verify() {
		doesStateReachAcceptState(startState);
	}

    /** figure out if this state eventually reaches an accept state and
     *  modify the instance variable 'reduced' to indicate if we find
     *  at least one state that cannot reach an accept state.  This implies
     *  that the overall DFA is not reduced.  This algorithm should be
     *  linear in the number of DFA states.
     *
     *  The algorithm also tracks which alternatives have no accept state,
     *  indicating a nondeterminism.
	 *
	 *  Also computes whether the DFA is cyclic.
	 *
     *  TODO: I call getUniquelyPredicatedAlt too much; cache predicted alt
     */
    protected boolean doesStateReachAcceptState(DFAState d) {
		if ( d.isAcceptState() ) {
            // accept states have no edges emanating from them so we can return
            d.setAcceptStateReachable(REACHABLE_YES);
            // this alt is uniquely predicted, remove from nondeterministic list
            int predicts = d.getUniquelyPredictedAlt();
            unreachableAlts.remove(Utils.integer(predicts));
            return true;
        }

        // avoid infinite loops
        d.setAcceptStateReachable(REACHABLE_BUSY);

        boolean anEdgeReachesAcceptState = false;
        // Visit every transition, track if at least one edge reaches stop state
		// Cannot terminate when we know this state reaches stop state since
		// all transitions must be traversed to set status of each DFA state.
		for (int i=0; i synpreds = a.getGatedSyntacticPredicatesInNFAConfigurations();
				if ( synpreds!=null ) {
					// add all the predicates we find (should be just one, right?)
					for (SemanticContext semctx : synpreds) {
						// System.out.println("synpreds: "+semctx);
						nfa.grammar.synPredUsedInDFA(this, semctx);
					}
				}
			}
		}
	}

	public NFAState getNFADecisionStartState() {
        return decisionNFAStartState;
    }

	public DFAState getAcceptState(int alt) {
		return altToAcceptState[alt];
	}

	public void setAcceptState(int alt, DFAState acceptState) {
		altToAcceptState[alt] = acceptState;
	}

	public String getDescription() {
		return description;
	}

	public int getDecisionNumber() {
        return decisionNFAStartState.getDecisionNumber();
    }

	/** If this DFA failed to finish during construction, we might be
	 *  able to retry with k=1 but we need to know whether it will
	 *  potentially succeed.  Can only succeed if there is a predicate
	 *  to resolve the issue.  Don't try if k=1 already as it would
	 *  cycle forever.  Timeout can retry with k=1 even if no predicate
	 *  if k!=1.
	 */
	public boolean okToRetryDFAWithK1() {
		boolean nonLLStarOrOverflowAndPredicateVisible =
			(probe.isNonLLStarDecision()||probe.analysisOverflowed()) &&
		    predicateVisible; // auto backtrack or manual sem/syn
		return getUserMaxLookahead()!=1 &&
			 nonLLStarOrOverflowAndPredicateVisible;
	}

	public String getReasonForFailure() {
		StringBuilder buf = new StringBuilder();
		if ( probe.isNonLLStarDecision() ) {
			buf.append("non-LL(*)");
			if ( predicateVisible ) {
				buf.append(" && predicate visible");
			}
		}
		if ( probe.analysisOverflowed() ) {
			buf.append("recursion overflow");
			if ( predicateVisible ) {
				buf.append(" && predicate visible");
			}
		}
		buf.append("\n");
		return buf.toString();
	}

	/** What GrammarAST node (derived from the grammar) is this DFA
     *  associated with?  It will point to the start of a block or
     *  the loop back of a (...)+ block etc...
     */
    public GrammarAST getDecisionASTNode() {
        return decisionNFAStartState.associatedASTNode;
    }

    public boolean isGreedy() {
		GrammarAST blockAST = nfa.grammar.getDecisionBlockAST(decisionNumber);
		Object v = nfa.grammar.getBlockOption(blockAST,"greedy");
		if ( v!=null && v.equals("false") ) {
			return false;
		}
        return true;

	}

    public DFAState newState() {
        DFAState n = new DFAState(this);
        n.stateNumber = stateCounter;
        stateCounter++;
		states.setSize(n.stateNumber+1);
		states.set(n.stateNumber, n); // track state num to state
        return n;
    }

	public int getNumberOfStates() {
		if ( getUserMaxLookahead()>0 ) {
			// if using fixed lookahead then uniqueSets not set
			return states.size();
		}
		return numberOfStates;
	}

	public int getNumberOfAlts() {
		return nAlts;
	}

//	public boolean analysisTimedOut() {
//		return probe.analysisTimedOut();
//	}

    protected void initAltRelatedInfo() {
        unreachableAlts = new LinkedList();
        for (int i = 1; i <= nAlts; i++) {
            unreachableAlts.add(Utils.integer(i));
        }
		altToAcceptState = new DFAState[nAlts+1];
    }

	@Override
	public String toString() {
		FASerializer serializer = new FASerializer(nfa.grammar);
		if ( startState==null ) {
			return "";
		}
		return serializer.serialize(startState, false);
	}

	/** EOT (end of token) is a label that indicates when the DFA conversion
	 *  algorithm would "fall off the end of a lexer rule".  It normally
	 *  means the default clause.  So for ('a'..'z')+ you would see a DFA
	 *  with a state that has a..z and EOT emanating from it.  a..z would
	 *  jump to a state predicting alt 1 and EOT would jump to a state
	 *  predicting alt 2 (the exit loop branch).  EOT implies anything other
	 *  than a..z.  If for some reason, the set is "all char" such as with
	 *  the wildcard '.', then EOT cannot match anything.  For example,
	 *
	 *     BLOCK : '{' (.)* '}'
	 *
	 *  consumes all char until EOF when greedy=true.  When all edges are
	 *  combined for the DFA state after matching '}', you will find that
	 *  it is all char.  The EOT transition has nothing to match and is
	 *  unreachable.  The findNewDFAStatesAndAddDFATransitions() method
	 *  must know to ignore the EOT, so we simply remove it from the
	 *  reachable labels.  Later analysis will find that the exit branch
	 *  is not predicted by anything.  For greedy=false, we leave only
	 *  the EOT label indicating that the DFA should stop immediately
	 *  and predict the exit branch. The reachable labels are often a
	 *  set of disjoint values like: [, 42, {0..41, 43..65534}]
	 *  due to DFA conversion so must construct a pure set to see if
	 *  it is same as Label.ALLCHAR.
	 *
	 *  Only do this for Lexers.
	 *
	 *  If EOT coexists with ALLCHAR:
	 *  1. If not greedy, modify the labels parameter to be EOT
	 *  2. If greedy, remove EOT from the labels set
	protected boolean reachableLabelsEOTCoexistsWithAllChar(OrderedHashSet labels)
	{
		Label eot = new Label(Label.EOT);
		if ( !labels.containsKey(eot) ) {
			return false;
		}
		System.out.println("### contains EOT");
		boolean containsAllChar = false;
		IntervalSet completeVocab = new IntervalSet();
		int n = labels.size();
		for (int i=0; i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy