All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.antlr.v4.automata.ParserATNFactory Maven / Gradle / Ivy

There is a newer version: 4.13.2
Show newest version
/*
 * [The "BSD license"]
 *  Copyright (c) 2012 Terence Parr
 *  Copyright (c) 2012 Sam Harwell
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *
 *  1. Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *     derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package org.antlr.v4.automata;


import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.Token;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.runtime.tree.Tree;
import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.parse.ATNBuilder;
import org.antlr.v4.parse.GrammarASTAdaptor;
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.atn.ActionTransition;
import org.antlr.v4.runtime.atn.AtomTransition;
import org.antlr.v4.runtime.atn.BasicBlockStartState;
import org.antlr.v4.runtime.atn.BasicState;
import org.antlr.v4.runtime.atn.BlockEndState;
import org.antlr.v4.runtime.atn.BlockStartState;
import org.antlr.v4.runtime.atn.DecisionState;
import org.antlr.v4.runtime.atn.EpsilonTransition;
import org.antlr.v4.runtime.atn.LL1Analyzer;
import org.antlr.v4.runtime.atn.LoopEndState;
import org.antlr.v4.runtime.atn.NotSetTransition;
import org.antlr.v4.runtime.atn.PlusBlockStartState;
import org.antlr.v4.runtime.atn.PlusLoopbackState;
import org.antlr.v4.runtime.atn.PredicateTransition;
import org.antlr.v4.runtime.atn.RuleStartState;
import org.antlr.v4.runtime.atn.RuleStopState;
import org.antlr.v4.runtime.atn.RuleTransition;
import org.antlr.v4.runtime.atn.SetTransition;
import org.antlr.v4.runtime.atn.StarBlockStartState;
import org.antlr.v4.runtime.atn.StarLoopEntryState;
import org.antlr.v4.runtime.atn.StarLoopbackState;
import org.antlr.v4.runtime.atn.Transition;
import org.antlr.v4.runtime.atn.WildcardTransition;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
import org.antlr.v4.runtime.misc.Pair;
import org.antlr.v4.semantics.UseDefAnalyzer;
import org.antlr.v4.tool.ErrorManager;
import org.antlr.v4.tool.ErrorType;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.LeftRecursiveRule;
import org.antlr.v4.tool.Rule;
import org.antlr.v4.tool.ast.ActionAST;
import org.antlr.v4.tool.ast.AltAST;
import org.antlr.v4.tool.ast.BlockAST;
import org.antlr.v4.tool.ast.GrammarAST;
import org.antlr.v4.tool.ast.PredAST;
import org.antlr.v4.tool.ast.QuantifierAST;
import org.antlr.v4.tool.ast.TerminalAST;

import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

/** ATN construction routines triggered by ATNBuilder.g.
 *
 *  No side-effects. It builds an ATN object and returns it.
 */
public class ParserATNFactory implements ATNFactory {
	@NotNull
	public final Grammar g;

	@NotNull
	public final ATN atn;

	public Rule currentRule;

	public int currentOuterAlt;

	protected final List> preventEpsilonDecisions =
		new ArrayList>();

	public ParserATNFactory(@NotNull Grammar g) {
		if (g == null) {
			throw new NullPointerException("g");
		}

		this.g = g;
		this.atn = new ATN();
	}

	@Override
	public ATN createATN() {
		_createATN(g.rules.values());
		atn.maxTokenType = g.getMaxTokenType();
        addRuleFollowLinks();
		addEOFTransitionToStartRules();
		ATNOptimizer.optimize(g, atn);

		for (Pair pair : preventEpsilonDecisions) {
			LL1Analyzer analyzer = new LL1Analyzer(atn);
			if (analyzer.LOOK(pair.b, null).contains(org.antlr.v4.runtime.Token.EPSILON)) {
				LeftRecursiveRule r;
				g.tool.errMgr.grammarError(ErrorType.EPSILON_LR_FOLLOW, g.fileName, ((GrammarAST)pair.a.ast.getChild(0)).getToken(), pair.a.name);
			}
		}

		return atn;
	}

	protected void _createATN(Collection rules) {
		createRuleStartAndStopATNStates();

		GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
		for (Rule r : rules) {
			// find rule's block
			GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK);
			CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
			ATNBuilder b = new ATNBuilder(nodes,this);
			try {
				setCurrentRuleName(r.name);
				Handle h = b.ruleBlock(null);
				rule(r.ast, r.name, h);
			}
			catch (RecognitionException re) {
				ErrorManager.fatalInternalError("bad grammar AST structure", re);
			}
		}
	}

	@Override
	public void setCurrentRuleName(String name) {
		this.currentRule = g.getRule(name);
	}

	@Override
	public void setCurrentOuterAlt(int alt) {
		currentOuterAlt = alt;
	}

	/* start->ruleblock->end */
	@Override
	public Handle rule(GrammarAST ruleAST, String name, Handle blk) {
		Rule r = g.getRule(name);
		RuleStartState start = atn.ruleToStartState[r.index];
		epsilon(start, blk.left);
		RuleStopState stop = atn.ruleToStopState[r.index];
		epsilon(blk.right, stop);
		Handle h = new Handle(start, stop);
//		ATNPrinter ser = new ATNPrinter(g, h.left);
//		System.out.println(ruleAST.toStringTree()+":\n"+ser.asString());
		ruleAST.atnState = start;
		return h;
	}

	/** From label {@code A} build graph {@code o-A->o}. */
	@Override
	public Handle tokenRef(TerminalAST node) {
		ATNState left = newState(node);
		ATNState right = newState(node);
		int ttype = g.getTokenType(node.getText());
		left.addTransition(new AtomTransition(right, ttype));
		node.atnState = left;
		return new Handle(left, right);
	}

	/** From set build single edge graph {@code o->o-set->o}.  To conform to
     *  what an alt block looks like, must have extra state on left.
	 *  This also handles {@code ~A}, converted to {@code ~{A}} set.
     */
	@Override
	public Handle set(GrammarAST associatedAST, List terminals, boolean invert) {
		ATNState left = newState(associatedAST);
		ATNState right = newState(associatedAST);
		IntervalSet set = new IntervalSet();
		for (GrammarAST t : terminals) {
			int ttype = g.getTokenType(t.getText());
			set.add(ttype);
		}
		if ( invert ) {
			left.addTransition(new NotSetTransition(right, set));
		}
		else {
			left.addTransition(new SetTransition(right, set));
		}
		associatedAST.atnState = left;
		return new Handle(left, right);
	}

	/** Not valid for non-lexers. */
	@Override
	public Handle range(GrammarAST a, GrammarAST b) {
		throw new UnsupportedOperationException();
	}

	protected int getTokenType(GrammarAST atom) {
		int ttype;
		if ( g.isLexer() ) {
			ttype = CharSupport.getCharValueFromGrammarCharLiteral(atom.getText());
		}
		else {
			ttype = g.getTokenType(atom.getText());
		}
		return ttype;
	}

	/** For a non-lexer, just build a simple token reference atom. */
	@Override
	public Handle stringLiteral(TerminalAST stringLiteralAST) {
		return tokenRef(stringLiteralAST);
	}

	/** {@code [Aa]} char sets not allowed in parser */
	@Override
	public Handle charSetLiteral(GrammarAST charSetAST) {
		return null;
	}

	/**
	 * For reference to rule {@code r}, build
	 *
	 * 
	 *  o->(r)  o
	 * 
* * where {@code (r)} is the start of rule {@code r} and the trailing * {@code o} is not linked to from rule ref state directly (uses * {@link RuleTransition#followState}). */ @Override public Handle ruleRef(GrammarAST node) { Handle h = _ruleRef(node); return h; } public Handle _ruleRef(GrammarAST node) { Rule r = g.getRule(node.getText()); if ( r==null ) { g.tool.errMgr.toolError(ErrorType.INTERNAL_ERROR, "Rule "+node.getText()+" undefined"); return null; } RuleStartState start = atn.ruleToStartState[r.index]; ATNState left = newState(node); ATNState right = newState(node); RuleTransition call = new RuleTransition(start, r.index, right); left.addTransition(call); node.atnState = left; return new Handle(left, right); } public void addFollowLink(int ruleIndex, ATNState right) { // add follow edge from end of invoked rule RuleStopState stop = atn.ruleToStopState[ruleIndex]; // System.out.println("add follow link from "+ruleIndex+" to "+right); epsilon(stop, right); } /** From an empty alternative build {@code o-e->o}. */ @Override public Handle epsilon(GrammarAST node) { ATNState left = newState(node); ATNState right = newState(node); epsilon(left, right); node.atnState = left; return new Handle(left, right); } /** Build what amounts to an epsilon transition with a semantic * predicate action. The {@code pred} is a pointer into the AST of * the {@link ANTLRParser#SEMPRED} token. */ @Override public Handle sempred(PredAST pred) { //System.out.println("sempred: "+ pred); ATNState left = newState(pred); ATNState right = newState(pred); boolean isCtxDependent = UseDefAnalyzer.actionIsContextDependent(pred); PredicateTransition p = new PredicateTransition(right, currentRule.index, g.sempreds.get(pred), isCtxDependent); left.addTransition(p); pred.atnState = left; return new Handle(left, right); } /** Build what amounts to an epsilon transition with an action. * The action goes into ATN though it is ignored during prediction * if {@link ActionTransition#actionIndex actionIndex}{@code <0}. */ @Override public Handle action(ActionAST action) { //System.out.println("action: "+action); ATNState left = newState(action); ATNState right = newState(action); ActionTransition a = new ActionTransition(right, currentRule.index); left.addTransition(a); action.atnState = left; return new Handle(left, right); } @Override public Handle action(String action) { return null; } /** * From {@code A|B|..|Z} alternative block build * *
	 *  o->o-A->o->o (last ATNState is BlockEndState pointed to by all alts)
	 *  |          ^
	 *  |->o-B->o--|
	 *  |          |
	 *  ...        |
	 *  |          |
	 *  |->o-Z->o--|
	 * 
* * So start node points at every alternative with epsilon transition and * every alt right side points at a block end ATNState. *

* Special case: only one alternative: don't make a block with alt * begin/end. *

* Special case: if just a list of tokens/chars/sets, then collapse to a * single edged o-set->o graph. *

* TODO: Set alt number (1..n) in the states? */ @Override public Handle block(BlockAST blkAST, GrammarAST ebnfRoot, List alts) { if ( ebnfRoot==null ) { if ( alts.size()==1 ) { Handle h = alts.get(0); blkAST.atnState = h.left; return h; } BlockStartState start = newState(BasicBlockStartState.class, blkAST); if ( alts.size()>1 ) atn.defineDecisionState(start); return makeBlock(start, blkAST, alts); } switch ( ebnfRoot.getType() ) { case ANTLRParser.OPTIONAL : BlockStartState start = newState(BasicBlockStartState.class, blkAST); atn.defineDecisionState(start); Handle h = makeBlock(start, blkAST, alts); return optional(ebnfRoot, h); case ANTLRParser.CLOSURE : BlockStartState star = newState(StarBlockStartState.class, ebnfRoot); if ( alts.size()>1 ) atn.defineDecisionState(star); h = makeBlock(star, blkAST, alts); return star(ebnfRoot, h); case ANTLRParser.POSITIVE_CLOSURE : PlusBlockStartState plus = newState(PlusBlockStartState.class, ebnfRoot); if ( alts.size()>1 ) atn.defineDecisionState(plus); h = makeBlock(plus, blkAST, alts); return plus(ebnfRoot, h); } return null; } protected Handle makeBlock(BlockStartState start, BlockAST blkAST, List alts) { BlockEndState end = newState(BlockEndState.class, blkAST); start.endState = end; for (Handle alt : alts) { // hook alts up to decision block epsilon(start, alt.left); epsilon(alt.right, end); // no back link in ATN so must walk entire alt to see if we can // strip out the epsilon to 'end' state TailEpsilonRemover opt = new TailEpsilonRemover(atn); opt.visit(alt.left); } Handle h = new Handle(start, end); // FASerializer ser = new FASerializer(g, h.left); // System.out.println(blkAST.toStringTree()+":\n"+ser); blkAST.atnState = start; if (Boolean.valueOf(blkAST.getOptionString("preventepsilon"))) { preventEpsilonDecisions.add(new Pair(currentRule, start)); } return h; } @NotNull @Override public Handle alt(@NotNull List els) { return elemList(els); } @NotNull public Handle elemList(@NotNull List els) { int n = els.size(); for (int i = 0; i < n - 1; i++) { // hook up elements (visit all but last) Handle el = els.get(i); // if el is of form o-x->o for x in {rule, action, pred, token, ...} // and not last in alt Transition tr = null; if ( el.left.getNumberOfTransitions()==1 ) tr = el.left.transition(0); boolean isRuleTrans = tr instanceof RuleTransition; if ( el.left.getStateType() == ATNState.BASIC && el.right.getStateType()== ATNState.BASIC && tr!=null && (isRuleTrans || tr.target == el.right) ) { // we can avoid epsilon edge to next el if ( isRuleTrans ) ((RuleTransition)tr).followState = els.get(i+1).left; else tr.target = els.get(i+1).left; atn.removeState(el.right); // we skipped over this state } else { // need epsilon if previous block's right end node is complicated epsilon(el.right, els.get(i+1).left); } } Handle first = els.get(0); Handle last = els.get(n -1); if ( first==null || last==null ) { g.tool.errMgr.toolError(ErrorType.INTERNAL_ERROR, "element list has first|last == null"); } return new Handle(first.left, last.right); } /** * From {@code (A)?} build either: * *

	 *  o--A->o
	 *  |     ^
	 *  o---->|
	 * 
* * or, if {@code A} is a block, just add an empty alt to the end of the * block */ @NotNull @Override public Handle optional(@NotNull GrammarAST optAST, @NotNull Handle blk) { BlockStartState blkStart = (BlockStartState)blk.left; blkStart.nonGreedy = !((QuantifierAST)optAST).isGreedy(); if (((QuantifierAST)optAST).isGreedy()) { epsilon(blkStart, blk.right); } else { Transition existing = blkStart.removeTransition(0); epsilon(blkStart, blk.right); blkStart.addTransition(existing); } optAST.atnState = blk.left; return blk; } /** * From {@code (blk)+} build * *
	 *   |---------|
	 *   v         |
	 *  [o-blk-o]->o->o
	 * 
* * We add a decision for loop back node to the existing one at {@code blk} * start. */ @NotNull @Override public Handle plus(@NotNull GrammarAST plusAST, @NotNull Handle blk) { PlusBlockStartState blkStart = (PlusBlockStartState)blk.left; BlockEndState blkEnd = (BlockEndState)blk.right; PlusLoopbackState loop = newState(PlusLoopbackState.class, plusAST); loop.nonGreedy = !((QuantifierAST)plusAST).isGreedy(); atn.defineDecisionState(loop); LoopEndState end = newState(LoopEndState.class, plusAST); blkStart.loopBackState = loop; end.loopBackState = loop; plusAST.atnState = blkStart; epsilon(blkEnd, loop); // blk can see loop back BlockAST blkAST = (BlockAST)plusAST.getChild(0); if ( ((QuantifierAST)plusAST).isGreedy() ) { if (expectNonGreedy(blkAST)) { g.tool.errMgr.grammarError(ErrorType.EXPECTED_NON_GREEDY_WILDCARD_BLOCK, g.fileName, plusAST.getToken(), plusAST.getToken().getText()); } epsilon(loop, blkStart); // loop back to start epsilon(loop, end); // or exit } else { // if not greedy, priority to exit branch; make it first epsilon(loop, end); // exit epsilon(loop, blkStart); // loop back to start } return new Handle(blkStart, end); } /** * From {@code (blk)*} build {@code ( blk+ )?} with *two* decisions, one for * entry and one for choosing alts of {@code blk}. * *
	 *   |-------------|
	 *   v             |
	 *   o--[o-blk-o]->o  o
	 *   |                ^
	 *   -----------------|
	 * 
* * Note that the optional bypass must jump outside the loop as * {@code (A|B)*} is not the same thing as {@code (A|B|)+}. */ @NotNull @Override public Handle star(@NotNull GrammarAST starAST, @NotNull Handle elem) { StarBlockStartState blkStart = (StarBlockStartState)elem.left; BlockEndState blkEnd = (BlockEndState)elem.right; StarLoopEntryState entry = newState(StarLoopEntryState.class, starAST); entry.nonGreedy = !((QuantifierAST)starAST).isGreedy(); atn.defineDecisionState(entry); LoopEndState end = newState(LoopEndState.class, starAST); StarLoopbackState loop = newState(StarLoopbackState.class, starAST); entry.loopBackState = loop; end.loopBackState = loop; BlockAST blkAST = (BlockAST)starAST.getChild(0); if ( ((QuantifierAST)starAST).isGreedy() ) { if (expectNonGreedy(blkAST)) { g.tool.errMgr.grammarError(ErrorType.EXPECTED_NON_GREEDY_WILDCARD_BLOCK, g.fileName, starAST.getToken(), starAST.getToken().getText()); } epsilon(entry, blkStart); // loop enter edge (alt 1) epsilon(entry, end); // bypass loop edge (alt 2) } else { // if not greedy, priority to exit branch; make it first epsilon(entry, end); // bypass loop edge (alt 1) epsilon(entry, blkStart); // loop enter edge (alt 2) } epsilon(blkEnd, loop); // block end hits loop back epsilon(loop, entry); // loop back to entry/exit decision starAST.atnState = entry; // decision is to enter/exit; blk is its own decision return new Handle(entry, end); } /** Build an atom with all possible values in its label. */ @NotNull @Override public Handle wildcard(GrammarAST node) { ATNState left = newState(node); ATNState right = newState(node); left.addTransition(new WildcardTransition(right)); node.atnState = left; return new Handle(left, right); } void epsilon(ATNState a, @NotNull ATNState b) { if ( a!=null ) a.addTransition(new EpsilonTransition(b)); } /** Define all the rule begin/end ATNStates to solve forward reference * issues. */ void createRuleStartAndStopATNStates() { atn.ruleToStartState = new RuleStartState[g.rules.size()]; atn.ruleToStopState = new RuleStopState[g.rules.size()]; for (Rule r : g.rules.values()) { RuleStartState start = newState(RuleStartState.class, r.ast); RuleStopState stop = newState(RuleStopState.class, r.ast); start.stopState = stop; start.setRuleIndex(r.index); stop.setRuleIndex(r.index); atn.ruleToStartState[r.index] = start; atn.ruleToStopState[r.index] = stop; } } public void addRuleFollowLinks() { for (ATNState p : atn.states) { if ( p!=null && p.getStateType() == ATNState.BASIC && p.getNumberOfTransitions()==1 && p.transition(0) instanceof RuleTransition ) { RuleTransition rt = (RuleTransition) p.transition(0); addFollowLink(rt.ruleIndex, rt.followState); } } } /** Add an EOF transition to any rule end ATNState that points to nothing * (i.e., for all those rules not invoked by another rule). These * are start symbols then. * * Return the number of grammar entry points; i.e., how many rules are * not invoked by another rule (they can only be invoked from outside). * These are the start rules. */ public int addEOFTransitionToStartRules() { int n = 0; ATNState eofTarget = newState(null); // one unique EOF target for all rules for (Rule r : g.rules.values()) { ATNState stop = atn.ruleToStopState[r.index]; if ( stop.getNumberOfTransitions()>0 ) continue; n++; Transition t = new AtomTransition(eofTarget, Token.EOF); stop.addTransition(t); } return n; } @Override public Handle label(Handle t) { return t; } @Override public Handle listLabel(Handle t) { return t; } @NotNull public T newState(@NotNull Class nodeType, GrammarAST node) { Exception cause; try { Constructor ctor = nodeType.getConstructor(); T s = ctor.newInstance(); if ( currentRule==null ) s.setRuleIndex(-1); else s.setRuleIndex(currentRule.index); atn.addState(s); return s; } catch (InstantiationException ex) { cause = ex; } catch (IllegalAccessException ex) { cause = ex; } catch (IllegalArgumentException ex) { cause = ex; } catch (InvocationTargetException ex) { cause = ex; } catch (NoSuchMethodException ex) { cause = ex; } catch (SecurityException ex) { cause = ex; } String message = String.format("Could not create %s of type %s.", ATNState.class.getName(), nodeType.getName()); throw new UnsupportedOperationException(message, cause); } @NotNull public ATNState newState(@Nullable GrammarAST node) { ATNState n = new BasicState(); n.setRuleIndex(currentRule.index); atn.addState(n); return n; } @NotNull @Override public ATNState newState() { return newState(null); } public boolean expectNonGreedy(@NotNull BlockAST blkAST) { if ( blockHasWildcardAlt(blkAST) ) { return true; } return false; } /** * {@code (BLOCK (ALT .))} or {@code (BLOCK (ALT 'a') (ALT .))}. */ public static boolean blockHasWildcardAlt(@NotNull GrammarAST block) { for (Object alt : block.getChildren()) { if ( !(alt instanceof AltAST) ) continue; AltAST altAST = (AltAST)alt; if ( altAST.getChildCount()==1 ) { Tree e = altAST.getChild(0); if ( e.getType()==ANTLRParser.WILDCARD ) { return true; } } } return false; } @Override public Handle lexerAltCommands(Handle alt, Handle cmds) { return null; } @Override public String lexerCallCommand(GrammarAST ID, GrammarAST arg) { return null; } @Override public String lexerCommand(GrammarAST ID) { return null; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy