org.antlr.codegen.CodeGenerator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of antlr-complete Show documentation
Show all versions of antlr-complete Show documentation
Complete distribution for ANTLR 3
/*
* [The "BSD license"]
* Copyright (c) 2010 Terence Parr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.codegen;
import org.antlr.Tool;
import org.antlr.analysis.DFA;
import org.antlr.analysis.*;
import org.antlr.grammar.v3.ANTLRLexer;
import org.antlr.grammar.v3.ANTLRParser;
import org.antlr.grammar.v3.ActionTranslator;
import org.antlr.grammar.v3.CodeGenTreeWalker;
import org.antlr.misc.BitSet;
import org.antlr.misc.*;
import org.antlr.runtime.*;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.tool.*;
import org.stringtemplate.v4.*;
import java.io.IOException;
import java.io.Writer;
import java.util.*;
/** ANTLR's code generator.
*
* Generate recognizers derived from grammars. Language independence
* achieved through the use of STGroup objects. All output
* strings are completely encapsulated in the group files such as Java.stg.
* Some computations are done that are unused by a particular language.
* This generator just computes and sets the values into the templates;
* the templates are free to use or not use the information.
*
* To make a new code generation target, define X.stg for language X
* by copying from existing Y.stg most closely releated to your language;
* e.g., to do CSharp.stg copy Java.stg. The template group file has a
* bunch of templates that are needed by the code generator. You can add
* a new target w/o even recompiling ANTLR itself. The language=X option
* in a grammar file dictates which templates get loaded/used.
*
* Some language like C need both parser files and header files. Java needs
* to have a separate file for the cyclic DFA as ANTLR generates bytecodes
* directly (which cannot be in the generated parser Java file). To facilitate
* this,
*
* cyclic can be in same file, but header, output must be searpate. recognizer
* is in outptufile.
*/
public class CodeGenerator {
/** When generating SWITCH statements, some targets might need to limit
* the size (based upon the number of case labels). Generally, this
* limit will be hit only for lexers where wildcard in a UNICODE
* vocabulary environment would generate a SWITCH with 65000 labels.
*/
public final static int MSCL_DEFAULT = 300;
public static int MAX_SWITCH_CASE_LABELS = MSCL_DEFAULT;
public final static int MSA_DEFAULT = 3;
public static int MIN_SWITCH_ALTS = MSA_DEFAULT;
public boolean GENERATE_SWITCHES_WHEN_POSSIBLE = true;
public static boolean LAUNCH_ST_INSPECTOR = false;
public final static int MADSI_DEFAULT = 60; // do lots of states inline (needed for expression rules)
public static int MAX_ACYCLIC_DFA_STATES_INLINE = MADSI_DEFAULT;
public static String classpathTemplateRootDirectoryName =
"org/antlr/codegen/templates";
/** Which grammar are we generating code for? Each generator
* is attached to a specific grammar.
*/
public Grammar grammar;
/** What language are we generating? */
protected String language;
/** The target specifies how to write out files and do other language
* specific actions.
*/
public Target target = null;
/** Where are the templates this generator should use to generate code? */
protected STGroup templates;
/** The basic output templates without AST or templates stuff; this will be
* the templates loaded for the language such as Java.stg *and* the Dbg
* stuff if turned on. This is used for generating syntactic predicates.
*/
protected STGroup baseTemplates;
protected ST recognizerST;
protected ST outputFileST;
protected ST headerFileST;
/** Used to create unique labels */
protected int uniqueLabelNumber = 1;
/** A reference to the ANTLR tool so we can learn about output directories
* and such.
*/
protected Tool tool;
/** Generate debugging event method calls */
protected boolean debug;
/** Create a Tracer object and make the recognizer invoke this. */
protected boolean trace;
/** Track runtime parsing information about decisions etc...
* This requires the debugging event mechanism to work.
*/
protected boolean profile;
protected int lineWidth = 72;
/** I have factored out the generation of acyclic DFAs to separate class */
public ACyclicDFACodeGenerator acyclicDFAGenerator =
new ACyclicDFACodeGenerator(this);
/** I have factored out the generation of cyclic DFAs to separate class */
/*
public CyclicDFACodeGenerator cyclicDFAGenerator =
new CyclicDFACodeGenerator(this);
*/
public static final String VOCAB_FILE_EXTENSION = ".tokens";
protected final static String vocabFilePattern =
"=\n}>" +
"=\n}>";
public CodeGenerator(Tool tool, Grammar grammar, String language) {
this.tool = tool;
this.grammar = grammar;
this.language = language;
target = loadLanguageTarget(language);
}
public static Target loadLanguageTarget(String language) {
Target target = null;
String targetName = "org.antlr.codegen."+language+"Target";
try {
Class extends Target> c = Class.forName(targetName).asSubclass(Target.class);
target = (Target)c.newInstance();
}
catch (ClassNotFoundException cnfe) {
target = new Target(); // use default
}
catch (InstantiationException ie) {
ErrorManager.error(ErrorManager.MSG_CANNOT_CREATE_TARGET_GENERATOR,
targetName,
ie);
}
catch (IllegalAccessException cnfe) {
ErrorManager.error(ErrorManager.MSG_CANNOT_CREATE_TARGET_GENERATOR,
targetName,
cnfe);
}
return target;
}
/** load the main language.stg template group file */
public void loadTemplates(String language) {
String langDir = classpathTemplateRootDirectoryName+"/"+language;
STGroup coreTemplates = new ToolSTGroupFile(langDir+"/"+language+".stg");
baseTemplates = coreTemplates;
// dynamically add subgroups that act like filters to apply to
// their supergroup. E.g., Java:Dbg:AST:ASTParser::ASTDbg.
String outputOption = (String)grammar.getOption("output");
if ( outputOption!=null && outputOption.equals("AST") ) {
if ( debug && grammar.type!=Grammar.LEXER ) {
STGroup dbgTemplates = new ToolSTGroupFile(langDir+"/Dbg.stg");
dbgTemplates.importTemplates(coreTemplates);
baseTemplates = dbgTemplates;
STGroup astTemplates = new ToolSTGroupFile(langDir+"/AST.stg");
astTemplates.importTemplates(dbgTemplates);
STGroup astParserTemplates;
if ( grammar.type==Grammar.TREE_PARSER ) {
astParserTemplates = new ToolSTGroupFile(langDir+"/ASTTreeParser.stg");
astParserTemplates.importTemplates(astTemplates);
}
else {
astParserTemplates = new ToolSTGroupFile(langDir+"/ASTParser.stg");
astParserTemplates.importTemplates(astTemplates);
}
STGroup astDbgTemplates = new ToolSTGroupFile(langDir+"/ASTDbg.stg");
astDbgTemplates.importTemplates(astParserTemplates);
templates = astDbgTemplates;
dbgTemplates.iterateAcrossValues = true; // ST v3 compatibility with Maps
astDbgTemplates.iterateAcrossValues = true;
astParserTemplates.iterateAcrossValues = true;
}
else {
STGroup astTemplates = new ToolSTGroupFile(langDir+"/AST.stg");
astTemplates.importTemplates(coreTemplates);
STGroup astParserTemplates;
if ( grammar.type==Grammar.TREE_PARSER ) {
astParserTemplates = new ToolSTGroupFile(langDir+"/ASTTreeParser.stg");
astParserTemplates.importTemplates(astTemplates);
}
else {
astParserTemplates = new ToolSTGroupFile(langDir+"/ASTParser.stg");
astParserTemplates.importTemplates(astTemplates);
}
templates = astParserTemplates;
astTemplates.iterateAcrossValues = true; // ST v3 compatibility with Maps
astParserTemplates.iterateAcrossValues = true;
}
}
else if ( outputOption!=null && outputOption.equals("template") ) {
if ( debug && grammar.type!=Grammar.LEXER ) {
STGroup dbgTemplates = new ToolSTGroupFile(langDir+"/Dbg.stg");
dbgTemplates.importTemplates(coreTemplates);
baseTemplates = dbgTemplates;
STGroup stTemplates = new ToolSTGroupFile(langDir+"/ST.stg");
stTemplates.importTemplates(dbgTemplates);
templates = stTemplates;
dbgTemplates.iterateAcrossValues = true;
}
else {
STGroup stTemplates = new ToolSTGroupFile(langDir+"/ST.stg");
stTemplates.importTemplates(coreTemplates);
templates = stTemplates;
}
templates.iterateAcrossValues = true; // ST v3 compatibility with Maps
}
else if ( debug && grammar.type!=Grammar.LEXER ) {
STGroup dbgTemplates = new ToolSTGroupFile(langDir+"/Dbg.stg");
dbgTemplates.importTemplates(coreTemplates);
templates = dbgTemplates;
baseTemplates = templates;
baseTemplates.iterateAcrossValues = true; // ST v3 compatibility with Maps
}
else {
templates = coreTemplates;
coreTemplates.iterateAcrossValues = true; // ST v3 compatibility with Maps
}
}
/** Given the grammar to which we are attached, walk the AST associated
* with that grammar to create NFAs. Then create the DFAs for all
* decision points in the grammar by converting the NFAs to DFAs.
* Finally, walk the AST again to generate code.
*
* Either 1 or 2 files are written:
*
* recognizer: the main parser/lexer/treewalker item
* header file: language like C/C++ need extern definitions
*
* The target, such as JavaTarget, dictates which files get written.
*/
public ST genRecognizer() {
//System.out.println("### generate "+grammar.name+" recognizer");
// LOAD OUTPUT TEMPLATES
loadTemplates(language);
if ( templates==null ) {
return null;
}
// CREATE NFA FROM GRAMMAR, CREATE DFA FROM NFA
if ( ErrorManager.doNotAttemptAnalysis() ) {
return null;
}
target.performGrammarAnalysis(this, grammar);
// some grammar analysis errors will not yield reliable DFA
if ( ErrorManager.doNotAttemptCodeGen() ) {
return null;
}
// OPTIMIZE DFA
DFAOptimizer optimizer = new DFAOptimizer(grammar);
optimizer.optimize();
// OUTPUT FILE (contains recognizerST)
outputFileST = templates.getInstanceOf("outputFile");
// HEADER FILE
if ( templates.isDefined("headerFile") ) {
headerFileST = templates.getInstanceOf("headerFile");
}
else {
// create a dummy to avoid null-checks all over code generator
headerFileST = new ST(templates,"xyz");
headerFileST.add("cyclicDFAs", (Object)null); // it normally sees this from outputFile
//headerFileST.impl.name = "dummy-header-file";
}
boolean filterMode = grammar.getOption("filter")!=null &&
grammar.getOption("filter").equals("true");
boolean canBacktrack = grammar.getSyntacticPredicates()!=null ||
grammar.composite.getRootGrammar().atLeastOneBacktrackOption ||
filterMode;
// TODO: move this down further because generating the recognizer
// alters the model with info on who uses predefined properties etc...
// The actions here might refer to something.
// The only two possible output files are available at this point.
// Verify action scopes are ok for target and dump actions into output
// Templates can say for example.
Map> actions = grammar.getActions();
verifyActionScopesOkForTarget(actions);
// translate $x::y references
translateActionAttributeReferences(actions);
ST gateST = templates.getInstanceOf("actionGate");
if ( filterMode ) {
// if filtering, we need to set actions to execute at backtracking
// level 1 not 0.
gateST = templates.getInstanceOf("filteringActionGate");
}
grammar.setSynPredGateIfNotAlready(gateST);
headerFileST.add("actions", actions);
outputFileST.add("actions", actions);
headerFileST.add("buildTemplate", grammar.buildTemplate());
outputFileST.add("buildTemplate", grammar.buildTemplate());
headerFileST.add("buildAST", grammar.buildAST());
outputFileST.add("buildAST", grammar.buildAST());
outputFileST.add("rewriteMode", grammar.rewriteMode());
headerFileST.add("rewriteMode", grammar.rewriteMode());
outputFileST.add("backtracking", canBacktrack);
headerFileST.add("backtracking", canBacktrack);
// turn on memoize attribute at grammar level so we can create ruleMemo.
// each rule has memoize attr that hides this one, indicating whether
// it needs to save results
String memoize = (String)grammar.getOption("memoize");
outputFileST.add("memoize",
(grammar.atLeastOneRuleMemoizes ||
memoize != null && memoize.equals("true") &&
canBacktrack));
headerFileST.add("memoize",
(grammar.atLeastOneRuleMemoizes ||
memoize != null && memoize.equals("true") &&
canBacktrack));
outputFileST.add("trace", trace);
headerFileST.add("trace", trace);
outputFileST.add("profile", profile);
headerFileST.add("profile", profile);
// RECOGNIZER
if ( grammar.type==Grammar.LEXER ) {
recognizerST = templates.getInstanceOf("lexer");
outputFileST.add("LEXER", true);
headerFileST.add("LEXER", true);
recognizerST.add("filterMode",
filterMode);
}
else if ( grammar.type==Grammar.PARSER ||
grammar.type==Grammar.COMBINED )
{
recognizerST = templates.getInstanceOf("parser");
outputFileST.add("PARSER", true);
headerFileST.add("PARSER", true);
}
else {
recognizerST = templates.getInstanceOf("treeParser");
outputFileST.add("TREE_PARSER", true);
headerFileST.add("TREE_PARSER", true);
recognizerST.add("filterMode",
filterMode);
}
outputFileST.add("recognizer", recognizerST);
headerFileST.add("recognizer", recognizerST);
outputFileST.add("actionScope",
grammar.getDefaultActionScope(grammar.type));
headerFileST.add("actionScope",
grammar.getDefaultActionScope(grammar.type));
String targetAppropriateFileNameString =
target.getTargetStringLiteralFromString(grammar.getFileName());
outputFileST.add("fileName", targetAppropriateFileNameString);
headerFileST.add("fileName", targetAppropriateFileNameString);
outputFileST.add("ANTLRVersion", tool.VERSION);
headerFileST.add("ANTLRVersion", tool.VERSION);
outputFileST.add("generatedTimestamp", Tool.getCurrentTimeStamp());
headerFileST.add("generatedTimestamp", Tool.getCurrentTimeStamp());
// GENERATE RECOGNIZER
// Walk the AST holding the input grammar, this time generating code
// Decisions are generated by using the precomputed DFAs
// Fill in the various templates with data
CodeGenTreeWalker gen = new CodeGenTreeWalker(new CommonTreeNodeStream(grammar.getGrammarTree()));
try {
gen.grammar_(
grammar,
recognizerST,
outputFileST,
headerFileST);
}
catch (RecognitionException re) {
ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE,
re);
}
genTokenTypeConstants(recognizerST);
genTokenTypeConstants(outputFileST);
genTokenTypeConstants(headerFileST);
if ( grammar.type!=Grammar.LEXER ) {
genTokenTypeNames(recognizerST);
genTokenTypeNames(outputFileST);
genTokenTypeNames(headerFileST);
}
// Now that we know what synpreds are used, we can set into template
Set synpredNames = null;
if ( grammar.synPredNamesUsedInDFA.size()>0 ) {
synpredNames = grammar.synPredNamesUsedInDFA;
}
outputFileST.add("synpreds", synpredNames);
headerFileST.add("synpreds", synpredNames);
// all recognizers can see Grammar object
recognizerST.add("grammar", grammar);
// do not render templates to disk if errors occurred
if ( ErrorManager.getErrorState().errors > 0 ) {
return null;
}
if (LAUNCH_ST_INSPECTOR) {
outputFileST.inspect();
if ( templates.isDefined("headerFile") ) headerFileST.inspect();
}
// WRITE FILES
try {
target.genRecognizerFile(tool,this,grammar,outputFileST);
if ( templates.isDefined("headerFile") ) {
ST extST = templates.getInstanceOf("headerFileExtension");
target.genRecognizerHeaderFile(tool,this,grammar,headerFileST,extST.render());
}
// write out the vocab interchange file; used by antlr,
// does not change per target
ST tokenVocabSerialization = genTokenVocabOutput();
String vocabFileName = getVocabFileName();
if ( vocabFileName!=null ) {
write(tokenVocabSerialization, vocabFileName);
}
//System.out.println(outputFileST.getDOTForDependencyGraph(false));
}
catch (IOException ioe) {
ErrorManager.error(ErrorManager.MSG_CANNOT_WRITE_FILE, ioe);
}
/*
System.out.println("num obj.prop refs: "+ ASTExpr.totalObjPropRefs);
System.out.println("num reflection lookups: "+ ASTExpr.totalReflectionLookups);
*/
return outputFileST;
}
/** Some targets will have some extra scopes like C++ may have
* '@headerfile:name {action}' or something. Make sure the
* target likes the scopes in action table.
*/
protected void verifyActionScopesOkForTarget(Map> actions) {
for (Map.Entry> entry : actions.entrySet()) {
String scope = entry.getKey();
if ( !target.isValidActionScope(grammar.type, scope) ) {
// get any action from the scope to get error location
Map scopeActions = entry.getValue();
GrammarAST actionAST =
(GrammarAST)scopeActions.values().iterator().next();
ErrorManager.grammarError(
ErrorManager.MSG_INVALID_ACTION_SCOPE,grammar,
actionAST.getToken(),scope,
grammar.getGrammarTypeString());
}
}
}
/** Actions may reference $x::y attributes, call translateAction on
* each action and replace that action in the Map.
*/
protected void translateActionAttributeReferences(Map> actions) {
for (Map.Entry> entry : actions.entrySet()) {
Map scopeActions = entry.getValue();
translateActionAttributeReferencesForSingleScope(null,scopeActions);
}
}
/** Use for translating rule @init{...} actions that have no scope */
public void translateActionAttributeReferencesForSingleScope(
Rule r,
Map scopeActions)
{
String ruleName=null;
if ( r!=null ) {
ruleName = r.name;
}
for (Map.Entry entry : scopeActions.entrySet()) {
String name = entry.getKey();
GrammarAST actionAST = (GrammarAST)entry.getValue();
List> chunks = translateAction(ruleName,actionAST);
scopeActions.put(name, chunks); // replace with translation
}
}
/** Error recovery in ANTLR recognizers.
*
* Based upon original ideas:
*
* Algorithms + Data Structures = Programs by Niklaus Wirth
*
* and
*
* A note on error recovery in recursive descent parsers:
* http://portal.acm.org/citation.cfm?id=947902.947905
*
* Later, Josef Grosch had some good ideas:
* Efficient and Comfortable Error Recovery in Recursive Descent Parsers:
* ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
*
* Like Grosch I implemented local FOLLOW sets that are combined at run-time
* upon error to avoid parsing overhead.
*/
public void generateLocalFOLLOW(GrammarAST referencedElementNode,
String referencedElementName,
String enclosingRuleName,
int elementIndex)
{
/*
System.out.println("compute FOLLOW "+grammar.name+"."+referencedElementNode.toString()+
" for "+referencedElementName+"#"+elementIndex +" in "+
enclosingRuleName+
" line="+referencedElementNode.getLine());
*/
NFAState followingNFAState = referencedElementNode.followingNFAState;
LookaheadSet follow = null;
if ( followingNFAState!=null ) {
// compute follow for this element and, as side-effect, track
// the rule LOOK sensitivity.
follow = grammar.FIRST(followingNFAState);
}
if ( follow==null ) {
ErrorManager.internalError("no follow state or cannot compute follow");
follow = new LookaheadSet();
}
if ( follow.member(Label.EOF) ) {
// TODO: can we just remove? Seems needed here:
// compilation_unit : global_statement* EOF
// Actually i guess we resync to EOF regardless
follow.remove(Label.EOF);
}
//System.out.println(" "+follow);
List tokenTypeList;
long[] words;
if ( follow.tokenTypeSet==null ) {
words = new long[1];
tokenTypeList = new ArrayList();
}
else {
BitSet bits = BitSet.of(follow.tokenTypeSet);
words = bits.toPackedArray();
tokenTypeList = follow.tokenTypeSet.toList();
}
// use the target to convert to hex strings (typically)
String[] wordStrings = new String[words.length];
for (int j = 0; j < words.length; j++) {
long w = words[j];
wordStrings[j] = target.getTarget64BitStringFromValue(w);
}
recognizerST.addAggr("bitsets.{name,inName,bits,tokenTypes,tokenIndex}",
referencedElementName,
enclosingRuleName,
wordStrings,
tokenTypeList,
Utils.integer(elementIndex));
outputFileST.addAggr("bitsets.{name,inName,bits,tokenTypes,tokenIndex}",
referencedElementName,
enclosingRuleName,
wordStrings,
tokenTypeList,
Utils.integer(elementIndex));
headerFileST.addAggr("bitsets.{name,inName,bits,tokenTypes,tokenIndex}",
referencedElementName,
enclosingRuleName,
wordStrings,
tokenTypeList,
Utils.integer(elementIndex));
}
// L O O K A H E A D D E C I S I O N G E N E R A T I O N
/** Generate code that computes the predicted alt given a DFA. The
* recognizerST can be either the main generated recognizerTemplate
* for storage in the main parser file or a separate file. It's up to
* the code that ultimately invokes the codegen.g grammar rule.
*
* Regardless, the output file and header file get a copy of the DFAs.
*/
public ST genLookaheadDecision(ST recognizerST,
DFA dfa)
{
ST decisionST;
// If we are doing inline DFA and this one is acyclic and LL(*)
// I have to check for is-non-LL(*) because if non-LL(*) the cyclic
// check is not done by DFA.verify(); that is, verify() avoids
// doesStateReachAcceptState() if non-LL(*)
if ( dfa.canInlineDecision() ) {
decisionST =
acyclicDFAGenerator.genFixedLookaheadDecision(getTemplates(), dfa);
}
else {
// generate any kind of DFA here (cyclic or acyclic)
dfa.createStateTables(this);
outputFileST.add("cyclicDFAs", dfa);
headerFileST.add("cyclicDFAs", dfa);
decisionST = templates.getInstanceOf("dfaDecision");
String description = dfa.getNFADecisionStartState().getDescription();
description = target.getTargetStringLiteralFromString(description);
if ( description!=null ) {
decisionST.add("description", description);
}
decisionST.add("decisionNumber",
Utils.integer(dfa.getDecisionNumber()));
}
return decisionST;
}
/** A special state is huge (too big for state tables) or has a predicated
* edge. Generate a simple if-then-else. Cannot be an accept state as
* they have no emanating edges. Don't worry about switch vs if-then-else
* because if you get here, the state is super complicated and needs an
* if-then-else. This is used by the new DFA scheme created June 2006.
*/
public ST generateSpecialState(DFAState s) {
ST stateST;
stateST = templates.getInstanceOf("cyclicDFAState");
stateST.add("needErrorClause", true);
stateST.add("semPredState",
s.isResolvedWithPredicates());
stateST.add("stateNumber", s.stateNumber);
stateST.add("decisionNumber", s.dfa.decisionNumber);
boolean foundGatedPred = false;
ST eotST = null;
for (int i = 0; i < s.getNumberOfTransitions(); i++) {
Transition edge = s.transition(i);
ST edgeST;
if ( edge.label.getAtom()==Label.EOT ) {
// this is the default clause; has to held until last
edgeST = templates.getInstanceOf("eotDFAEdge");
stateST.remove("needErrorClause");
eotST = edgeST;
}
else {
edgeST = templates.getInstanceOf("cyclicDFAEdge");
ST exprST =
genLabelExpr(templates,edge,1);
edgeST.add("labelExpr", exprST);
}
edgeST.add("edgeNumber", Utils.integer(i + 1));
edgeST.add("targetStateNumber",
Utils.integer(edge.target.stateNumber));
// stick in any gated predicates for any edge if not already a pred
if ( !edge.label.isSemanticPredicate() ) {
DFAState t = (DFAState)edge.target;
SemanticContext preds = t.getGatedPredicatesInNFAConfigurations();
if ( preds!=null ) {
foundGatedPred = true;
ST predST = preds.genExpr(this,
getTemplates(),
t.dfa);
edgeST.add("predicates", predST.render());
}
}
if ( edge.label.getAtom()!=Label.EOT ) {
stateST.add("edges", edgeST);
}
}
if ( foundGatedPred ) {
// state has >= 1 edge with a gated pred (syn or sem)
// must rewind input first, set flag.
stateST.add("semPredState", foundGatedPred);
}
if ( eotST!=null ) {
stateST.add("edges", eotST);
}
return stateST;
}
/** Generate an expression for traversing an edge. */
protected ST genLabelExpr(STGroup templates,
Transition edge,
int k)
{
Label label = edge.label;
if ( label.isSemanticPredicate() ) {
return genSemanticPredicateExpr(templates, edge);
}
if ( label.isSet() ) {
return genSetExpr(templates, label.getSet(), k, true);
}
// must be simple label
ST eST = templates.getInstanceOf("lookaheadTest");
eST.add("atom", getTokenTypeAsTargetLabel(label.getAtom()));
eST.add("atomAsInt", Utils.integer(label.getAtom()));
eST.add("k", Utils.integer(k));
return eST;
}
protected ST genSemanticPredicateExpr(STGroup templates,
Transition edge)
{
DFA dfa = ((DFAState)edge.target).dfa; // which DFA are we in
Label label = edge.label;
SemanticContext semCtx = label.getSemanticContext();
return semCtx.genExpr(this,templates,dfa);
}
/** For intervals such as [3..3, 30..35], generate an expression that
* tests the lookahead similar to LA(1)==3 || (LA(1)>=30&&LA(1)<=35)
*/
public ST genSetExpr(STGroup templates,
IntSet set,
int k,
boolean partOfDFA)
{
if ( !(set instanceof IntervalSet) ) {
throw new IllegalArgumentException("unable to generate expressions for non IntervalSet objects");
}
IntervalSet iset = (IntervalSet)set;
if ( iset.getIntervals()==null || iset.getIntervals().isEmpty() ) {
ST emptyST = new ST(templates, "");
emptyST.impl.name = "empty-set-expr";
return emptyST;
}
String testSTName = "lookaheadTest";
String testRangeSTName = "lookaheadRangeTest";
String testSetSTName = "lookaheadSetTest";
String varSTName = "lookaheadVarName";
if ( !partOfDFA ) {
testSTName = "isolatedLookaheadTest";
testRangeSTName = "isolatedLookaheadRangeTest";
testSetSTName = "isolatedLookaheadSetTest";
varSTName = "isolatedLookaheadVarName";
}
ST setST = templates.getInstanceOf("setTest");
// If the SetTest template exists, separate the ranges:
// flatten the small ones into one list and make that a range,
// and leave the others as they are.
if ( templates.isDefined(testSetSTName) ) {
// Flatten the IntervalSet into a list of integers.
ST sST = templates.getInstanceOf(testSetSTName);
Iterator iter = iset.getIntervals().iterator();
int rangeNumber = 1;
while (iter.hasNext()) {
Interval I = iter.next();
int a = I.a;
int b = I.b;
// Not flattening the large ranges helps us avoid making a
// set that contains 90% of Unicode when we could just use
// a simple range like (LA(1)>=123 && LA(1)<=65535).
// This flattens all ranges of length 4 or less.
if (b - a < 4) {
for (int i = a; i <= b; i++) {
sST.add("values", getTokenTypeAsTargetLabel(i));
sST.add("valuesAsInt", Utils.integer(i));
}
} else {
ST eST = templates.getInstanceOf(testRangeSTName);
eST.add("lower", getTokenTypeAsTargetLabel(a));
eST.add("lowerAsInt", Utils.integer(a));
eST.add("upper", getTokenTypeAsTargetLabel(b));
eST.add("upperAsInt", Utils.integer(b));
eST.add("rangeNumber", Utils.integer(rangeNumber));
eST.add("k", Utils.integer(k));
setST.add("ranges", eST);
rangeNumber++;
}
}
sST.add("k", Utils.integer(k));
setST.add("ranges", sST);
return setST;
}
Iterator iter = iset.getIntervals().iterator();
int rangeNumber = 1;
while (iter.hasNext()) {
Interval I = iter.next();
int a = I.a;
int b = I.b;
ST eST;
if ( a==b ) {
eST = templates.getInstanceOf(testSTName);
eST.add("atom", getTokenTypeAsTargetLabel(a));
eST.add("atomAsInt", Utils.integer(a));
//eST.add("k",Utils.integer(k));
}
else {
eST = templates.getInstanceOf(testRangeSTName);
eST.add("lower", getTokenTypeAsTargetLabel(a));
eST.add("lowerAsInt", Utils.integer(a));
eST.add("upper", getTokenTypeAsTargetLabel(b));
eST.add("upperAsInt", Utils.integer(b));
eST.add("rangeNumber", Utils.integer(rangeNumber));
}
eST.add("k", Utils.integer(k));
setST.add("ranges", eST);
rangeNumber++;
}
return setST;
}
// T O K E N D E F I N I T I O N G E N E R A T I O N
/** Set attributes tokens and literals attributes in the incoming
* code template. This is not the token vocab interchange file, but
* rather a list of token type ID needed by the recognizer.
*/
protected void genTokenTypeConstants(ST code) {
// make constants for the token types
for (String tokenID : grammar.getTokenIDs()) {
int tokenType = grammar.getTokenType(tokenID);
if ( tokenType==Label.EOF ||
tokenType>=Label.MIN_TOKEN_TYPE )
{
// don't do FAUX labels 'cept EOF
code.addAggr("tokens.{name,type}", tokenID, Utils.integer(tokenType));
}
}
}
/** Generate a token names table that maps token type to a printable
* name: either the label like INT or the literal like "begin".
*/
protected void genTokenTypeNames(ST code) {
for (int t=Label.MIN_TOKEN_TYPE; t<=grammar.getMaxTokenType(); t++) {
String tokenName = grammar.getTokenDisplayName(t);
if ( tokenName!=null ) {
tokenName=target.getTargetStringLiteralFromString(tokenName, true);
code.add("tokenNames", tokenName);
}
}
}
/** Get a meaningful name for a token type useful during code generation.
* Literals without associated names are converted to the string equivalent
* of their integer values. Used to generate x==ID and x==34 type comparisons
* etc... Essentially we are looking for the most obvious way to refer
* to a token type in the generated code. If in the lexer, return the
* char literal translated to the target language. For example, ttype=10
* will yield '\n' from the getTokenDisplayName method. That must
* be converted to the target languages literals. For most C-derived
* languages no translation is needed.
*/
public String getTokenTypeAsTargetLabel(int ttype) {
if ( grammar.type==Grammar.LEXER ) {
String name = grammar.getTokenDisplayName(ttype);
return target.getTargetCharLiteralFromANTLRCharLiteral(this,name);
}
return target.getTokenTypeAsTargetLabel(this,ttype);
}
/** Generate a token vocab file with all the token names/types. For example:
* ID=7
* FOR=8
* 'for'=8
*
* This is independent of the target language; used by antlr internally
*/
protected ST genTokenVocabOutput() {
ST vocabFileST = new ST(vocabFilePattern);
vocabFileST.add("literals",(Object)null); // "define" literals arg
vocabFileST.add("tokens",(Object)null);
vocabFileST.impl.name = "vocab-file";
// make constants for the token names
for (String tokenID : grammar.getTokenIDs()) {
int tokenType = grammar.getTokenType(tokenID);
if ( tokenType>=Label.MIN_TOKEN_TYPE ) {
vocabFileST.addAggr("tokens.{name,type}", tokenID, Utils.integer(tokenType));
}
}
// now dump the strings
for (String literal : grammar.getStringLiterals()) {
int tokenType = grammar.getTokenType(literal);
if ( tokenType>=Label.MIN_TOKEN_TYPE ) {
vocabFileST.addAggr("tokens.{name,type}", literal, Utils.integer(tokenType));
}
}
return vocabFileST;
}
public List extends Object> translateAction(String ruleName,
GrammarAST actionTree)
{
if ( actionTree.getType()==ANTLRParser.ARG_ACTION ) {
return translateArgAction(ruleName, actionTree);
}
ActionTranslator translator = new ActionTranslator(this,ruleName,actionTree);
List
© 2015 - 2024 Weber Informatics LLC | Privacy Policy