org.antlr.v4.tool.GrammarTransformPipeline Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of virtdata-lib-realer Show documentation
Show all versions of virtdata-lib-realer Show documentation
With inspiration from other libraries
/*
* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
package org.antlr.v4.tool;
import org.antlr.runtime.CommonToken;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.runtime.tree.Tree;
import org.antlr.runtime.tree.TreeVisitor;
import org.antlr.runtime.tree.TreeVisitorAction;
import org.antlr.v4.Tool;
import org.antlr.v4.analysis.LeftRecursiveRuleTransformer;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.parse.BlockSetTransformer;
import org.antlr.v4.parse.GrammarASTAdaptor;
import org.antlr.v4.parse.GrammarToken;
import org.antlr.v4.runtime.misc.DoubleKeyMap;
import org.antlr.v4.runtime.misc.Pair;
import org.antlr.v4.tool.ast.AltAST;
import org.antlr.v4.tool.ast.BlockAST;
import org.antlr.v4.tool.ast.GrammarAST;
import org.antlr.v4.tool.ast.GrammarASTWithOptions;
import org.antlr.v4.tool.ast.GrammarRootAST;
import org.antlr.v4.tool.ast.RuleAST;
import org.antlr.v4.tool.ast.TerminalAST;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/** Handle left-recursion and block-set transforms */
public class GrammarTransformPipeline {
public Grammar g;
public Tool tool;
public GrammarTransformPipeline(Grammar g, Tool tool) {
this.g = g;
this.tool = tool;
}
public void process() {
GrammarRootAST root = g.ast;
if ( root==null ) return;
tool.log("grammar", "before: "+root.toStringTree());
integrateImportedGrammars(g);
reduceBlocksToSets(root);
expandParameterizedLoops(root);
tool.log("grammar", "after: "+root.toStringTree());
}
public void reduceBlocksToSets(GrammarAST root) {
CommonTreeNodeStream nodes = new CommonTreeNodeStream(new GrammarASTAdaptor(), root);
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
BlockSetTransformer transformer = new BlockSetTransformer(nodes, g);
transformer.setTreeAdaptor(adaptor);
transformer.downup(root);
}
/** Find and replace
* ID*[','] with ID (',' ID)*
* ID+[','] with ID (',' ID)+
* (x {action} y)+[','] with x {action} y (',' x {action} y)+
*
* Parameter must be a token.
* todo: do we want?
*/
public void expandParameterizedLoops(GrammarAST root) {
TreeVisitor v = new TreeVisitor(new GrammarASTAdaptor());
v.visit(root, new TreeVisitorAction() {
@Override
public Object pre(Object t) {
if ( ((GrammarAST)t).getType() == 3 ) {
return expandParameterizedLoop((GrammarAST)t);
}
return t;
}
@Override
public Object post(Object t) { return t; }
});
}
public GrammarAST expandParameterizedLoop(GrammarAST t) {
// todo: update grammar, alter AST
return t;
}
/** Utility visitor that sets grammar ptr in each node */
public static void setGrammarPtr(final Grammar g, GrammarAST tree) {
if ( tree==null ) return;
// ensure each node has pointer to surrounding grammar
TreeVisitor v = new TreeVisitor(new GrammarASTAdaptor());
v.visit(tree, new TreeVisitorAction() {
@Override
public Object pre(Object t) { ((GrammarAST)t).g = g; return t; }
@Override
public Object post(Object t) { return t; }
});
}
public static void augmentTokensWithOriginalPosition(final Grammar g, GrammarAST tree) {
if ( tree==null ) return;
List optionsSubTrees = tree.getNodesWithType(ANTLRParser.ELEMENT_OPTIONS);
for (int i = 0; i < optionsSubTrees.size(); i++) {
GrammarAST t = optionsSubTrees.get(i);
CommonTree elWithOpt = t.parent;
if ( elWithOpt instanceof GrammarASTWithOptions ) {
Map options = ((GrammarASTWithOptions) elWithOpt).getOptions();
if ( options.containsKey(LeftRecursiveRuleTransformer.TOKENINDEX_OPTION_NAME) ) {
GrammarToken newTok = new GrammarToken(g, elWithOpt.getToken());
newTok.originalTokenIndex = Integer.valueOf(options.get(LeftRecursiveRuleTransformer.TOKENINDEX_OPTION_NAME).getText());
elWithOpt.token = newTok;
GrammarAST originalNode = g.ast.getNodeWithTokenIndex(newTok.getTokenIndex());
if (originalNode != null) {
// update the AST node start/stop index to match the values
// of the corresponding node in the original parse tree.
elWithOpt.setTokenStartIndex(originalNode.getTokenStartIndex());
elWithOpt.setTokenStopIndex(originalNode.getTokenStopIndex());
}
else {
// the original AST node could not be located by index;
// make sure to assign valid values for the start/stop
// index so toTokenString will not throw exceptions.
elWithOpt.setTokenStartIndex(newTok.getTokenIndex());
elWithOpt.setTokenStopIndex(newTok.getTokenIndex());
}
}
}
}
}
/** Merge all the rules, token definitions, and named actions from
imported grammars into the root grammar tree. Perform:
(tokens { X (= Y 'y')) + (tokens { Z ) -> (tokens { X (= Y 'y') Z)
(@ members {foo}) + (@ members {bar}) -> (@ members {foobar})
(RULES (RULE x y)) + (RULES (RULE z)) -> (RULES (RULE x y z))
Rules in root prevent same rule from being appended to RULES node.
The goal is a complete combined grammar so we can ignore subordinate
grammars.
*/
public void integrateImportedGrammars(Grammar rootGrammar) {
List imports = rootGrammar.getAllImportedGrammars();
if ( imports==null ) return;
GrammarAST root = rootGrammar.ast;
GrammarAST id = (GrammarAST) root.getChild(0);
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(id.token.getInputStream());
GrammarAST channelsRoot = (GrammarAST)root.getFirstChildWithType(ANTLRParser.CHANNELS);
GrammarAST tokensRoot = (GrammarAST)root.getFirstChildWithType(ANTLRParser.TOKENS_SPEC);
List actionRoots = root.getNodesWithType(ANTLRParser.AT);
// Compute list of rules in root grammar and ensure we have a RULES node
GrammarAST RULES = (GrammarAST)root.getFirstChildWithType(ANTLRParser.RULES);
Set rootRuleNames = new HashSet();
// make list of rules we have in root grammar
List rootRules = RULES.getNodesWithType(ANTLRParser.RULE);
for (GrammarAST r : rootRules) rootRuleNames.add(r.getChild(0).getText());
// make list of modes we have in root grammar
List rootModes = root.getNodesWithType(ANTLRParser.MODE);
Set rootModeNames = new HashSet();
for (GrammarAST m : rootModes) rootModeNames.add(m.getChild(0).getText());
List addedModes = new ArrayList();
for (Grammar imp : imports) {
// COPY CHANNELS
GrammarAST imp_channelRoot = (GrammarAST)imp.ast.getFirstChildWithType(ANTLRParser.CHANNELS);
if ( imp_channelRoot != null) {
rootGrammar.tool.log("grammar", "imported channels: "+imp_channelRoot.getChildren());
if (channelsRoot==null) {
channelsRoot = imp_channelRoot.dupTree();
channelsRoot.g = rootGrammar;
root.insertChild(1, channelsRoot); // ^(GRAMMAR ID TOKENS...)
} else {
for (int c = 0; c < imp_channelRoot.getChildCount(); ++c) {
String channel = imp_channelRoot.getChild(c).getText();
boolean channelIsInRootGrammar = false;
for (int rc = 0; rc < channelsRoot.getChildCount(); ++rc) {
String rootChannel = channelsRoot.getChild(rc).getText();
if (rootChannel.equals(channel)) {
channelIsInRootGrammar = true;
break;
}
}
if (!channelIsInRootGrammar) {
channelsRoot.addChild(imp_channelRoot.getChild(c).dupNode());
}
}
}
}
// COPY TOKENS
GrammarAST imp_tokensRoot = (GrammarAST)imp.ast.getFirstChildWithType(ANTLRParser.TOKENS_SPEC);
if ( imp_tokensRoot!=null ) {
rootGrammar.tool.log("grammar", "imported tokens: "+imp_tokensRoot.getChildren());
if ( tokensRoot==null ) {
tokensRoot = (GrammarAST)adaptor.create(ANTLRParser.TOKENS_SPEC, "TOKENS");
tokensRoot.g = rootGrammar;
root.insertChild(1, tokensRoot); // ^(GRAMMAR ID TOKENS...)
}
tokensRoot.addChildren(Arrays.asList(imp_tokensRoot.getChildren().toArray(new Tree[0])));
}
List all_actionRoots = new ArrayList();
List imp_actionRoots = imp.ast.getAllChildrenWithType(ANTLRParser.AT);
if ( actionRoots!=null ) all_actionRoots.addAll(actionRoots);
all_actionRoots.addAll(imp_actionRoots);
// COPY ACTIONS
if ( imp_actionRoots!=null ) {
DoubleKeyMap namedActions =
new DoubleKeyMap();
rootGrammar.tool.log("grammar", "imported actions: "+imp_actionRoots);
for (GrammarAST at : all_actionRoots) {
String scopeName = rootGrammar.getDefaultActionScope();
GrammarAST scope, name, action;
if ( at.getChildCount()>2 ) { // must have a scope
scope = (GrammarAST)at.getChild(0);
scopeName = scope.getText();
name = (GrammarAST)at.getChild(1);
action = (GrammarAST)at.getChild(2);
}
else {
name = (GrammarAST)at.getChild(0);
action = (GrammarAST)at.getChild(1);
}
GrammarAST prevAction = namedActions.get(scopeName, name.getText());
if ( prevAction==null ) {
namedActions.put(scopeName, name.getText(), action);
}
else {
if ( prevAction.g == at.g ) {
rootGrammar.tool.errMgr.grammarError(ErrorType.ACTION_REDEFINITION,
at.g.fileName, name.token, name.getText());
}
else {
String s1 = prevAction.getText();
s1 = s1.substring(1, s1.length()-1);
String s2 = action.getText();
s2 = s2.substring(1, s2.length()-1);
String combinedAction = "{"+s1 + '\n'+ s2+"}";
prevAction.token.setText(combinedAction);
}
}
}
// at this point, we have complete list of combined actions,
// some of which are already living in root grammar.
// Merge in any actions not in root grammar into root's tree.
for (String scopeName : namedActions.keySet()) {
for (String name : namedActions.keySet(scopeName)) {
GrammarAST action = namedActions.get(scopeName, name);
rootGrammar.tool.log("grammar", action.g.name+" "+scopeName+":"+name+"="+action.getText());
if ( action.g != rootGrammar ) {
root.insertChild(1, action.getParent());
}
}
}
}
// COPY MODES
// The strategy is to copy all the mode sections rules across to any
// mode section in the new grammar with the same name or a new
// mode section if no matching mode is resolved. Rules which are
// already in the new grammar are ignored for copy. If the mode
// section being added ends up empty it is not added to the merged
// grammar.
List modes = imp.ast.getNodesWithType(ANTLRParser.MODE);
if (modes != null) {
for (GrammarAST m : modes) {
rootGrammar.tool.log("grammar", "imported mode: " + m.toStringTree());
String name = m.getChild(0).getText();
boolean rootAlreadyHasMode = rootModeNames.contains(name);
GrammarAST destinationAST = null;
if (rootAlreadyHasMode) {
for (GrammarAST m2 : rootModes) {
if (m2.getChild(0).getText().equals(name)) {
destinationAST = m2;
break;
}
}
} else {
destinationAST = m.dupNode();
destinationAST.addChild(m.getChild(0).dupNode());
}
int addedRules = 0;
List modeRules = m.getAllChildrenWithType(ANTLRParser.RULE);
for (GrammarAST r : modeRules) {
rootGrammar.tool.log("grammar", "imported rule: "+r.toStringTree());
String ruleName = r.getChild(0).getText();
boolean rootAlreadyHasRule = rootRuleNames.contains(ruleName);
if (!rootAlreadyHasRule) {
destinationAST.addChild(r);
addedRules++;
rootRuleNames.add(ruleName);
}
}
if (!rootAlreadyHasMode && addedRules > 0) {
rootGrammar.ast.addChild(destinationAST);
rootModeNames.add(name);
rootModes.add(destinationAST);
}
}
}
// COPY RULES
// Rules copied in the mode copy phase are not copied again.
List rules = imp.ast.getNodesWithType(ANTLRParser.RULE);
if ( rules!=null ) {
for (GrammarAST r : rules) {
rootGrammar.tool.log("grammar", "imported rule: "+r.toStringTree());
String name = r.getChild(0).getText();
boolean rootAlreadyHasRule = rootRuleNames.contains(name);
if ( !rootAlreadyHasRule ) {
RULES.addChild(r); // merge in if not overridden
rootRuleNames.add(name);
}
}
}
GrammarAST optionsRoot = (GrammarAST)imp.ast.getFirstChildWithType(ANTLRParser.OPTIONS);
if ( optionsRoot!=null ) {
// suppress the warning if the options match the options specified
// in the root grammar
// https://github.com/antlr/antlr4/issues/707
boolean hasNewOption = false;
for (Map.Entry option : imp.ast.getOptions().entrySet()) {
String importOption = imp.ast.getOptionString(option.getKey());
if (importOption == null) {
continue;
}
String rootOption = rootGrammar.ast.getOptionString(option.getKey());
if (!importOption.equals(rootOption)) {
hasNewOption = true;
break;
}
}
if (hasNewOption) {
rootGrammar.tool.errMgr.grammarError(ErrorType.OPTIONS_IN_DELEGATE,
optionsRoot.g.fileName, optionsRoot.token, imp.name);
}
}
}
rootGrammar.tool.log("grammar", "Grammar: "+rootGrammar.ast.toStringTree());
}
/** Build lexer grammar from combined grammar that looks like:
*
* (COMBINED_GRAMMAR A
* (tokens { X (= Y 'y'))
* (OPTIONS (= x 'y'))
* (@ members {foo})
* (@ lexer header {package jj;})
* (RULES (RULE .+)))
*
* Move rules and actions to new tree, don't dup. Split AST apart.
* We'll have this Grammar share token symbols later; don't generate
* tokenVocab or tokens{} section. Copy over named actions.
*
* Side-effects: it removes children from GRAMMAR & RULES nodes
* in combined AST. Anything cut out is dup'd before
* adding to lexer to avoid "who's ur daddy" issues
*/
public GrammarRootAST extractImplicitLexer(Grammar combinedGrammar) {
GrammarRootAST combinedAST = combinedGrammar.ast;
//tool.log("grammar", "before="+combinedAST.toStringTree());
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(combinedAST.token.getInputStream());
GrammarAST[] elements = combinedAST.getChildren().toArray(new GrammarAST[0]);
// MAKE A GRAMMAR ROOT and ID
String lexerName = combinedAST.getChild(0).getText()+"Lexer";
GrammarRootAST lexerAST =
new GrammarRootAST(new CommonToken(ANTLRParser.GRAMMAR, "LEXER_GRAMMAR"), combinedGrammar.ast.tokenStream);
lexerAST.grammarType = ANTLRParser.LEXER;
lexerAST.token.setInputStream(combinedAST.token.getInputStream());
lexerAST.addChild((GrammarAST)adaptor.create(ANTLRParser.ID, lexerName));
// COPY OPTIONS
GrammarAST optionsRoot =
(GrammarAST)combinedAST.getFirstChildWithType(ANTLRParser.OPTIONS);
if ( optionsRoot!=null && optionsRoot.getChildCount()!=0 ) {
GrammarAST lexerOptionsRoot = (GrammarAST)adaptor.dupNode(optionsRoot);
lexerAST.addChild(lexerOptionsRoot);
GrammarAST[] options = optionsRoot.getChildren().toArray(new GrammarAST[0]);
for (GrammarAST o : options) {
String optionName = o.getChild(0).getText();
if ( Grammar.lexerOptions.contains(optionName) &&
!Grammar.doNotCopyOptionsToLexer.contains(optionName) )
{
GrammarAST optionTree = (GrammarAST)adaptor.dupTree(o);
lexerOptionsRoot.addChild(optionTree);
lexerAST.setOption(optionName, (GrammarAST)optionTree.getChild(1));
}
}
}
// COPY all named actions, but only move those with lexer:: scope
List actionsWeMoved = new ArrayList();
for (GrammarAST e : elements) {
if ( e.getType()==ANTLRParser.AT ) {
lexerAST.addChild((Tree)adaptor.dupTree(e));
if ( e.getChild(0).getText().equals("lexer") ) {
actionsWeMoved.add(e);
}
}
}
for (GrammarAST r : actionsWeMoved) {
combinedAST.deleteChild( r );
}
GrammarAST combinedRulesRoot =
(GrammarAST)combinedAST.getFirstChildWithType(ANTLRParser.RULES);
if ( combinedRulesRoot==null ) return lexerAST;
// MOVE lexer rules
GrammarAST lexerRulesRoot =
(GrammarAST)adaptor.create(ANTLRParser.RULES, "RULES");
lexerAST.addChild(lexerRulesRoot);
List rulesWeMoved = new ArrayList();
GrammarASTWithOptions[] rules;
if (combinedRulesRoot.getChildCount() > 0) {
rules = combinedRulesRoot.getChildren().toArray(new GrammarASTWithOptions[0]);
}
else {
rules = new GrammarASTWithOptions[0];
}
for (GrammarASTWithOptions r : rules) {
String ruleName = r.getChild(0).getText();
if (Grammar.isTokenName(ruleName)) {
lexerRulesRoot.addChild((Tree)adaptor.dupTree(r));
rulesWeMoved.add(r);
}
}
for (GrammarAST r : rulesWeMoved) {
combinedRulesRoot.deleteChild( r );
}
// Will track 'if' from IF : 'if' ; rules to avoid defining new token for 'if'
List> litAliases =
Grammar.getStringLiteralAliasesFromLexerRules(lexerAST);
Set stringLiterals = combinedGrammar.getStringLiterals();
// add strings from combined grammar (and imported grammars) into lexer
// put them first as they are keywords; must resolve ambigs to these rules
// tool.log("grammar", "strings from parser: "+stringLiterals);
int insertIndex = 0;
nextLit:
for (String lit : stringLiterals) {
// if lexer already has a rule for literal, continue
if ( litAliases!=null ) {
for (Pair pair : litAliases) {
GrammarAST litAST = pair.b;
if ( lit.equals(litAST.getText()) ) continue nextLit;
}
}
// create for each literal: (RULE (BLOCK (ALT ))
String rname = combinedGrammar.getStringLiteralLexerRuleName(lit);
// can't use wizard; need special node types
GrammarAST litRule = new RuleAST(ANTLRParser.RULE);
BlockAST blk = new BlockAST(ANTLRParser.BLOCK);
AltAST alt = new AltAST(ANTLRParser.ALT);
TerminalAST slit = new TerminalAST(new CommonToken(ANTLRParser.STRING_LITERAL, lit));
alt.addChild(slit);
blk.addChild(alt);
CommonToken idToken = new CommonToken(ANTLRParser.TOKEN_REF, rname);
litRule.addChild(new TerminalAST(idToken));
litRule.addChild(blk);
lexerRulesRoot.insertChild(insertIndex, litRule);
// lexerRulesRoot.getChildren().add(0, litRule);
lexerRulesRoot.freshenParentAndChildIndexes(); // reset indexes and set litRule parent
// next literal will be added after the one just added
insertIndex++;
}
// TODO: take out after stable if slow
lexerAST.sanityCheckParentAndChildIndexes();
combinedAST.sanityCheckParentAndChildIndexes();
// tool.log("grammar", combinedAST.toTokenString());
combinedGrammar.tool.log("grammar", "after extract implicit lexer ="+combinedAST.toStringTree());
combinedGrammar.tool.log("grammar", "lexer ="+lexerAST.toStringTree());
if ( lexerRulesRoot.getChildCount()==0 ) return null;
return lexerAST;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy