org.antlr.v4.tool.Grammar Maven / Gradle / Ivy
/*
* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
package org.antlr.v4.tool;
import org.antlr.v4.Tool;
import org.antlr.v4.analysis.LeftRecursiveRuleTransformer;
import org.antlr.v4.automata.ParserATNFactory;
import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.misc.OrderedHashMap;
import org.antlr.v4.misc.Utils;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.parse.GrammarASTAdaptor;
import org.antlr.v4.parse.GrammarTreeVisitor;
import org.antlr.v4.parse.TokenVocabParser;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.LexerInterpreter;
import org.antlr.v4.runtime.ParserInterpreter;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.TokenStream;
import org.antlr.v4.runtime.Vocabulary;
import org.antlr.v4.runtime.VocabularyImpl;
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNDeserializer;
import org.antlr.v4.runtime.atn.ATNSerializer;
import org.antlr.v4.runtime.atn.SemanticContext;
import org.antlr.v4.runtime.dfa.DFA;
import org.antlr.v4.runtime.misc.IntSet;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.Pair;
import org.antlr.v4.tool.ast.ActionAST;
import org.antlr.v4.tool.ast.GrammarAST;
import org.antlr.v4.tool.ast.GrammarASTWithOptions;
import org.antlr.v4.tool.ast.GrammarRootAST;
import org.antlr.v4.tool.ast.PredAST;
import org.antlr.v4.tool.ast.RuleAST;
import org.antlr.v4.tool.ast.TerminalAST;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class Grammar implements AttributeResolver {
public static final String GRAMMAR_FROM_STRING_NAME = "";
/**
* This value is used in the following situations to indicate that a token
* type does not have an associated name which can be directly referenced in
* a grammar.
*
*
* - This value is the name and display name for the token with type
* {@link Token#INVALID_TYPE}.
* - This value is the name for tokens with a type not represented by a
* named token. The display name for these tokens is simply the string
* representation of the token type as an integer.
*
*/
public static final String INVALID_TOKEN_NAME = "";
/**
* This value is used as the name for elements in the array returned by
* {@link #getRuleNames} for indexes not associated with a rule.
*/
public static final String INVALID_RULE_NAME = "";
public static final Set parserOptions = new HashSet();
static {
parserOptions.add("superClass");
parserOptions.add("contextSuperClass");
parserOptions.add("TokenLabelType");
parserOptions.add("tokenVocab");
parserOptions.add("language");
parserOptions.add("accessLevel");
parserOptions.add("exportMacro");
}
public static final Set lexerOptions = parserOptions;
public static final Set ruleOptions = new HashSet();
public static final Set ParserBlockOptions = new HashSet();
public static final Set LexerBlockOptions = new HashSet();
/** Legal options for rule refs like id<key=value> */
public static final Set ruleRefOptions = new HashSet();
static {
ruleRefOptions.add(LeftRecursiveRuleTransformer.PRECEDENCE_OPTION_NAME);
ruleRefOptions.add(LeftRecursiveRuleTransformer.TOKENINDEX_OPTION_NAME);
}
/** Legal options for terminal refs like ID<assoc=right> */
public static final Set tokenOptions = new HashSet();
static {
tokenOptions.add("assoc");
tokenOptions.add(LeftRecursiveRuleTransformer.TOKENINDEX_OPTION_NAME);
}
public static final Set actionOptions = new HashSet();
public static final Set semPredOptions = new HashSet();
static {
semPredOptions.add(LeftRecursiveRuleTransformer.PRECEDENCE_OPTION_NAME);
semPredOptions.add("fail");
}
public static final Set doNotCopyOptionsToLexer = new HashSet();
static {
doNotCopyOptionsToLexer.add("superClass");
doNotCopyOptionsToLexer.add("TokenLabelType");
doNotCopyOptionsToLexer.add("tokenVocab");
}
public static final Map grammarAndLabelRefTypeToScope =
new HashMap();
static {
grammarAndLabelRefTypeToScope.put("parser:RULE_LABEL", Rule.predefinedRulePropertiesDict);
grammarAndLabelRefTypeToScope.put("parser:TOKEN_LABEL", AttributeDict.predefinedTokenDict);
grammarAndLabelRefTypeToScope.put("combined:RULE_LABEL", Rule.predefinedRulePropertiesDict);
grammarAndLabelRefTypeToScope.put("combined:TOKEN_LABEL", AttributeDict.predefinedTokenDict);
}
public String name;
public GrammarRootAST ast;
/** Track token stream used to create this grammar */
public final org.antlr.runtime.TokenStream tokenStream;
/** If we transform grammar, track original unaltered token stream.
* This is set to the same value as tokenStream when tokenStream is
* initially set.
*
* If this field differs from tokenStream, then we have transformed
* the grammar.
*/
public org.antlr.runtime.TokenStream originalTokenStream;
public String text; // testing only
public String fileName;
/** Was this parser grammar created from a COMBINED grammar? If so,
* this is what we extracted.
*/
public LexerGrammar implicitLexer;
/** If this is an extracted/implicit lexer, we point at original grammar */
public Grammar originalGrammar;
/** If we're imported, who imported us? If null, implies grammar is root */
public Grammar parent;
public List importedGrammars;
/** All rules defined in this specific grammar, not imported. Also does
* not include lexical rules if combined.
*/
public OrderedHashMap rules = new OrderedHashMap();
public List indexToRule = new ArrayList();
int ruleNumber = 0; // used to get rule indexes (0..n-1)
int stringLiteralRuleNumber = 0; // used to invent rule names for 'keyword', ';', ... (0..n-1)
/** The ATN that represents the grammar with edges labelled with tokens
* or epsilon. It is more suitable to analysis than an AST representation.
*/
public ATN atn;
public Map stateToGrammarRegionMap;
public Map decisionDFAs = new HashMap();
public List decisionLOOK;
public final Tool tool;
/** Token names and literal tokens like "void" are uniquely indexed.
* with -1 implying EOF. Characters are different; they go from
* -1 (EOF) to \uFFFE. For example, 0 could be a binary byte you
* want to lexer. Labels of DFA/ATN transitions can be both tokens
* and characters. I use negative numbers for bookkeeping labels
* like EPSILON. Char/String literals and token types overlap in the same
* space, however.
*/
int maxTokenType = Token.MIN_USER_TOKEN_TYPE -1;
/**
* Map token like {@code ID} (but not literals like {@code 'while'}) to its
* token type.
*/
public final Map tokenNameToTypeMap = new LinkedHashMap();
/**
* Map token literals like {@code 'while'} to its token type. It may be that
* {@code WHILE="while"=35}, in which case both {@link #tokenNameToTypeMap}
* and this field will have entries both mapped to 35.
*/
public final Map stringLiteralToTypeMap = new LinkedHashMap();
/**
* Reverse index for {@link #stringLiteralToTypeMap}. Indexed with raw token
* type. 0 is invalid.
*/
public final List typeToStringLiteralList = new ArrayList();
/**
* Map a token type to its token name. Indexed with raw token type. 0 is
* invalid.
*/
public final List typeToTokenList = new ArrayList();
/**
* The maximum channel value which is assigned by this grammar. Values below
* {@link Token#MIN_USER_CHANNEL_VALUE} are assumed to be predefined.
*/
int maxChannelType = Token.MIN_USER_CHANNEL_VALUE - 1;
/**
* Map channel like {@code COMMENTS_CHANNEL} to its constant channel value.
* Only user-defined channels are defined in this map.
*/
public final Map channelNameToValueMap = new LinkedHashMap();
/**
* Map a constant channel value to its name. Indexed with raw channel value.
* The predefined channels {@link Token#DEFAULT_CHANNEL} and
* {@link Token#HIDDEN_CHANNEL} are not stored in this list, so the values
* at the corresponding indexes is {@code null}.
*/
public final List channelValueToNameList = new ArrayList();
/** Map a name to an action.
* The code generator will use this to fill holes in the output files.
* I track the AST node for the action in case I need the line number
* for errors.
*/
public Map namedActions = new HashMap();
/** Tracks all user lexer actions in all alternatives of all rules.
* Doesn't track sempreds. maps tree node to action index (alt number 1..n).
*/
public LinkedHashMap lexerActions = new LinkedHashMap();
/** All sempreds found in grammar; maps tree node to sempred index;
* sempred index is 0..n-1
*/
public LinkedHashMap sempreds = new LinkedHashMap();
/** Map the other direction upon demand */
public LinkedHashMap indexToPredMap;
public static final String AUTO_GENERATED_TOKEN_NAME_PREFIX = "T__";
public Grammar(Tool tool, GrammarRootAST ast) {
if ( ast==null ) {
throw new NullPointerException("ast");
}
if (ast.tokenStream == null) {
throw new IllegalArgumentException("ast must have a token stream");
}
this.tool = tool;
this.ast = ast;
this.name = (ast.getChild(0)).getText();
this.tokenStream = ast.tokenStream;
this.originalTokenStream = this.tokenStream;
initTokenSymbolTables();
}
/** For testing */
public Grammar(String grammarText) throws org.antlr.runtime.RecognitionException {
this(GRAMMAR_FROM_STRING_NAME, grammarText, null);
}
public Grammar(String grammarText, LexerGrammar tokenVocabSource) throws org.antlr.runtime.RecognitionException {
this(GRAMMAR_FROM_STRING_NAME, grammarText, tokenVocabSource, null);
}
/** For testing */
public Grammar(String grammarText, ANTLRToolListener listener)
throws org.antlr.runtime.RecognitionException
{
this(GRAMMAR_FROM_STRING_NAME, grammarText, listener);
}
/** For testing; builds trees, does sem anal */
public Grammar(String fileName, String grammarText)
throws org.antlr.runtime.RecognitionException
{
this(fileName, grammarText, null);
}
/** For testing; builds trees, does sem anal */
public Grammar(String fileName, String grammarText, ANTLRToolListener listener)
throws org.antlr.runtime.RecognitionException
{
this(fileName, grammarText, null, listener);
}
/** For testing; builds trees, does sem anal */
public Grammar(String fileName, String grammarText, Grammar tokenVocabSource, ANTLRToolListener listener)
throws org.antlr.runtime.RecognitionException
{
this.text = grammarText;
this.fileName = fileName;
this.tool = new Tool();
ANTLRToolListener hush = new ANTLRToolListener() {
@Override
public void info(String msg) { }
@Override
public void error(ANTLRMessage msg) { }
@Override
public void warning(ANTLRMessage msg) { }
};
tool.addListener(hush); // we want to hush errors/warnings
this.tool.addListener(listener);
org.antlr.runtime.ANTLRStringStream in = new org.antlr.runtime.ANTLRStringStream(grammarText);
in.name = fileName;
this.ast = tool.parse(fileName, in);
if ( ast==null ) {
throw new UnsupportedOperationException();
}
if (ast.tokenStream == null) {
throw new IllegalStateException("expected ast to have a token stream");
}
this.tokenStream = ast.tokenStream;
this.originalTokenStream = this.tokenStream;
// ensure each node has pointer to surrounding grammar
final Grammar thiz = this;
org.antlr.runtime.tree.TreeVisitor v = new org.antlr.runtime.tree.TreeVisitor(new GrammarASTAdaptor());
v.visit(ast, new org.antlr.runtime.tree.TreeVisitorAction() {
@Override
public Object pre(Object t) { ((GrammarAST)t).g = thiz; return t; }
@Override
public Object post(Object t) { return t; }
});
initTokenSymbolTables();
if (tokenVocabSource != null) {
importVocab(tokenVocabSource);
}
tool.process(this, false);
}
protected void initTokenSymbolTables() {
tokenNameToTypeMap.put("EOF", Token.EOF);
// reserve a spot for the INVALID token
typeToTokenList.add(null);
}
public void loadImportedGrammars() {
if ( ast==null ) return;
GrammarAST i = (GrammarAST)ast.getFirstChildWithType(ANTLRParser.IMPORT);
if ( i==null ) return;
Set visited = new HashSet<>();
visited.add(this.name);
importedGrammars = new ArrayList();
for (Object c : i.getChildren()) {
GrammarAST t = (GrammarAST)c;
String importedGrammarName = null;
if ( t.getType()==ANTLRParser.ASSIGN ) {
t = (GrammarAST)t.getChild(1);
importedGrammarName = t.getText();
}
else if ( t.getType()==ANTLRParser.ID ) {
importedGrammarName = t.getText();
}
if ( visited.contains(importedGrammarName) ) { // ignore circular refs
continue;
}
Grammar g;
try {
g = tool.loadImportedGrammar(this, t);
}
catch (IOException ioe) {
tool.errMgr.grammarError(ErrorType.ERROR_READING_IMPORTED_GRAMMAR,
importedGrammarName,
t.getToken(),
importedGrammarName,
name);
continue;
}
// did it come back as error node or missing?
if ( g == null ) continue;
g.parent = this;
importedGrammars.add(g);
g.loadImportedGrammars(); // recursively pursue any imports in this import
}
}
public void defineAction(GrammarAST atAST) {
if ( atAST.getChildCount()==2 ) {
String name = atAST.getChild(0).getText();
namedActions.put(name, (ActionAST)atAST.getChild(1));
}
else {
String scope = atAST.getChild(0).getText();
String gtype = getTypeString();
if ( scope.equals(gtype) || (scope.equals("parser")&>ype.equals("combined")) ) {
String name = atAST.getChild(1).getText();
namedActions.put(name, (ActionAST)atAST.getChild(2));
}
}
}
/**
* Define the specified rule in the grammar. This method assigns the rule's
* {@link Rule#index} according to the {@link #ruleNumber} field, and adds
* the {@link Rule} instance to {@link #rules} and {@link #indexToRule}.
*
* @param r The rule to define in the grammar.
* @return {@code true} if the rule was added to the {@link Grammar}
* instance; otherwise, {@code false} if a rule with this name already
* existed in the grammar instance.
*/
public boolean defineRule(Rule r) {
if ( rules.get(r.name)!=null ) {
return false;
}
rules.put(r.name, r);
r.index = ruleNumber++;
indexToRule.add(r);
return true;
}
/**
* Undefine the specified rule from this {@link Grammar} instance. The
* instance {@code r} is removed from {@link #rules} and
* {@link #indexToRule}. This method updates the {@link Rule#index} field
* for all rules defined after {@code r}, and decrements {@link #ruleNumber}
* in preparation for adding new rules.
*
* This method does nothing if the current {@link Grammar} does not contain
* the instance {@code r} at index {@code r.index} in {@link #indexToRule}.
*
*
* @param r
* @return {@code true} if the rule was removed from the {@link Grammar}
* instance; otherwise, {@code false} if the specified rule was not defined
* in the grammar.
*/
public boolean undefineRule(Rule r) {
if (r.index < 0 || r.index >= indexToRule.size() || indexToRule.get(r.index) != r) {
return false;
}
assert rules.get(r.name) == r;
rules.remove(r.name);
indexToRule.remove(r.index);
for (int i = r.index; i < indexToRule.size(); i++) {
assert indexToRule.get(i).index == i + 1;
indexToRule.get(i).index--;
}
ruleNumber--;
return true;
}
// public int getNumRules() {
// int n = rules.size();
// List imports = getAllImportedGrammars();
// if ( imports!=null ) {
// for (Grammar g : imports) n += g.getNumRules();
// }
// return n;
// }
public Rule getRule(String name) {
Rule r = rules.get(name);
if ( r!=null ) return r;
return null;
/*
List imports = getAllImportedGrammars();
if ( imports==null ) return null;
for (Grammar g : imports) {
r = g.getRule(name); // recursively walk up hierarchy
if ( r!=null ) return r;
}
return null;
*/
}
public ATN getATN() {
if ( atn==null ) {
ParserATNFactory factory = new ParserATNFactory(this);
atn = factory.createATN();
}
return atn;
}
public Rule getRule(int index) { return indexToRule.get(index); }
public Rule getRule(String grammarName, String ruleName) {
if ( grammarName!=null ) { // scope override
Grammar g = getImportedGrammar(grammarName);
if ( g ==null ) {
return null;
}
return g.rules.get(ruleName);
}
return getRule(ruleName);
}
/** Get list of all imports from all grammars in the delegate subtree of g.
* The grammars are in import tree preorder. Don't include ourselves
* in list as we're not a delegate of ourselves.
*/
public List getAllImportedGrammars() {
if (importedGrammars == null) {
return null;
}
LinkedHashMap delegates = new LinkedHashMap();
for (Grammar d : importedGrammars) {
delegates.put(d.fileName, d);
List ds = d.getAllImportedGrammars();
if (ds != null) {
for (Grammar imported : ds) {
delegates.put(imported.fileName, imported);
}
}
}
return new ArrayList(delegates.values());
}
public List getImportedGrammars() { return importedGrammars; }
public LexerGrammar getImplicitLexer() {
return implicitLexer;
}
/** convenience method for Tool.loadGrammar() */
public static Grammar load(String fileName) {
Tool antlr = new Tool();
return antlr.loadGrammar(fileName);
}
/** Return list of imported grammars from root down to our parent.
* Order is [root, ..., this.parent]. (us not included).
*/
public List getGrammarAncestors() {
Grammar root = getOutermostGrammar();
if ( this==root ) return null;
List grammars = new ArrayList();
// walk backwards to root, collecting grammars
Grammar p = this.parent;
while ( p!=null ) {
grammars.add(0, p); // add to head so in order later
p = p.parent;
}
return grammars;
}
/** Return the grammar that imported us and our parents. Return this
* if we're root.
*/
public Grammar getOutermostGrammar() {
if ( parent==null ) return this;
return parent.getOutermostGrammar();
}
/** Get the name of the generated recognizer; may or may not be same
* as grammar name.
* Recognizer is TParser and TLexer from T if combined, else
* just use T regardless of grammar type.
*/
public String getRecognizerName() {
String suffix = "";
List grammarsFromRootToMe = getOutermostGrammar().getGrammarAncestors();
String qualifiedName = name;
if ( grammarsFromRootToMe!=null ) {
StringBuilder buf = new StringBuilder();
for (Grammar g : grammarsFromRootToMe) {
buf.append(g.name);
buf.append('_');
}
buf.append(name);
qualifiedName = buf.toString();
}
if ( isCombined() || (isLexer() && implicitLexer!=null) )
{
suffix = Grammar.getGrammarTypeToFileNameSuffix(getType());
}
return qualifiedName+suffix;
}
public String getStringLiteralLexerRuleName(String lit) {
return AUTO_GENERATED_TOKEN_NAME_PREFIX + stringLiteralRuleNumber++;
}
/** Return grammar directly imported by this grammar */
public Grammar getImportedGrammar(String name) {
for (Grammar g : importedGrammars) {
if ( g.name.equals(name) ) return g;
}
return null;
}
public int getTokenType(String token) {
Integer I;
if ( token.charAt(0)=='\'') {
I = stringLiteralToTypeMap.get(token);
}
else { // must be a label like ID
I = tokenNameToTypeMap.get(token);
}
int i = (I!=null)? I : Token.INVALID_TYPE;
//tool.log("grammar", "grammar type "+type+" "+tokenName+"->"+i);
return i;
}
/** Given a token type, get a meaningful name for it such as the ID
* or string literal. If this is a lexer and the ttype is in the
* char vocabulary, compute an ANTLR-valid (possibly escaped) char literal.
*/
public String getTokenDisplayName(int ttype) {
// inside any target's char range and is lexer grammar?
if ( isLexer() &&
ttype >= Lexer.MIN_CHAR_VALUE && ttype <= Lexer.MAX_CHAR_VALUE )
{
return CharSupport.getANTLRCharLiteralForChar(ttype);
}
if ( ttype==Token.EOF ) {
return "EOF";
}
if ( ttype==Token.INVALID_TYPE ) {
return INVALID_TOKEN_NAME;
}
if (ttype >= 0 && ttype < typeToStringLiteralList.size() && typeToStringLiteralList.get(ttype) != null) {
return typeToStringLiteralList.get(ttype);
}
if (ttype >= 0 && ttype < typeToTokenList.size() && typeToTokenList.get(ttype) != null) {
return typeToTokenList.get(ttype);
}
return String.valueOf(ttype);
}
/**
* Gets the name by which a token can be referenced in the generated code.
* For tokens defined in a {@code tokens{}} block or via a lexer rule, this
* is the declared name of the token. For token types generated by the use
* of a string literal within a parser rule of a combined grammar, this is
* the automatically generated token type which includes the
* {@link #AUTO_GENERATED_TOKEN_NAME_PREFIX} prefix. For types which are not
* associated with a defined token, this method returns
* {@link #INVALID_TOKEN_NAME}.
*
* @param ttype The token type.
* @return The name of the token with the specified type.
*/
public String getTokenName(int ttype) {
// inside any target's char range and is lexer grammar?
if ( isLexer() &&
ttype >= Lexer.MIN_CHAR_VALUE && ttype <= Lexer.MAX_CHAR_VALUE )
{
return CharSupport.getANTLRCharLiteralForChar(ttype);
}
if ( ttype==Token.EOF ) {
return "EOF";
}
if (ttype >= 0 && ttype < typeToTokenList.size() && typeToTokenList.get(ttype) != null) {
return typeToTokenList.get(ttype);
}
return INVALID_TOKEN_NAME;
}
/**
* Gets the constant channel value for a user-defined channel.
*
*
* This method only returns channel values for user-defined channels. All
* other channels, including the predefined channels
* {@link Token#DEFAULT_CHANNEL} and {@link Token#HIDDEN_CHANNEL} along with
* any channel defined in code (e.g. in a {@code @members{}} block), are
* ignored.
*
* @param channel The channel name.
* @return The channel value, if {@code channel} is the name of a known
* user-defined token channel; otherwise, -1.
*/
public int getChannelValue(String channel) {
Integer I = channelNameToValueMap.get(channel);
int i = (I != null) ? I : -1;
return i;
}
/**
* Gets an array of rule names for rules defined or imported by the
* grammar. The array index is the rule index, and the value is the name of
* the rule with the corresponding {@link Rule#index}.
*
* If no rule is defined with an index for an element of the resulting
* array, the value of that element is {@link #INVALID_RULE_NAME}.
*
* @return The names of all rules defined in the grammar.
*/
public String[] getRuleNames() {
String[] result = new String[rules.size()];
Arrays.fill(result, INVALID_RULE_NAME);
for (Rule rule : rules.values()) {
result[rule.index] = rule.name;
}
return result;
}
/**
* Gets an array of token names for tokens defined or imported by the
* grammar. The array index is the token type, and the value is the result
* of {@link #getTokenName} for the corresponding token type.
*
* @see #getTokenName
* @return The token names of all tokens defined in the grammar.
*/
public String[] getTokenNames() {
int numTokens = getMaxTokenType();
String[] tokenNames = new String[numTokens+1];
for (int i = 0; i < tokenNames.length; i++) {
tokenNames[i] = getTokenName(i);
}
return tokenNames;
}
/**
* Gets an array of display names for tokens defined or imported by the
* grammar. The array index is the token type, and the value is the result
* of {@link #getTokenDisplayName} for the corresponding token type.
*
* @see #getTokenDisplayName
* @return The display names of all tokens defined in the grammar.
*/
public String[] getTokenDisplayNames() {
int numTokens = getMaxTokenType();
String[] tokenNames = new String[numTokens+1];
for (int i = 0; i < tokenNames.length; i++) {
tokenNames[i] = getTokenDisplayName(i);
}
return tokenNames;
}
/**
* Gets the literal names assigned to tokens in the grammar.
*/
public String[] getTokenLiteralNames() {
int numTokens = getMaxTokenType();
String[] literalNames = new String[numTokens+1];
for (int i = 0; i < Math.min(literalNames.length, typeToStringLiteralList.size()); i++) {
literalNames[i] = typeToStringLiteralList.get(i);
}
for (Map.Entry entry : stringLiteralToTypeMap.entrySet()) {
if (entry.getValue() >= 0 && entry.getValue() < literalNames.length && literalNames[entry.getValue()] == null) {
literalNames[entry.getValue()] = entry.getKey();
}
}
return literalNames;
}
/**
* Gets the symbolic names assigned to tokens in the grammar.
*/
public String[] getTokenSymbolicNames() {
int numTokens = getMaxTokenType();
String[] symbolicNames = new String[numTokens+1];
for (int i = 0; i < Math.min(symbolicNames.length, typeToTokenList.size()); i++) {
if (typeToTokenList.get(i) == null || typeToTokenList.get(i).startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX)) {
continue;
}
symbolicNames[i] = typeToTokenList.get(i);
}
return symbolicNames;
}
/**
* Gets a {@link Vocabulary} instance describing the vocabulary used by the
* grammar.
*/
public Vocabulary getVocabulary() {
return new VocabularyImpl(getTokenLiteralNames(), getTokenSymbolicNames());
}
/** Given an arbitrarily complex SemanticContext, walk the "tree" and get display string.
* Pull predicates from grammar text.
*/
public String getSemanticContextDisplayString(SemanticContext semctx) {
if ( semctx instanceof SemanticContext.Predicate ) {
return getPredicateDisplayString((SemanticContext.Predicate)semctx);
}
if ( semctx instanceof SemanticContext.AND ) {
SemanticContext.AND and = (SemanticContext.AND)semctx;
return joinPredicateOperands(and, " and ");
}
if ( semctx instanceof SemanticContext.OR ) {
SemanticContext.OR or = (SemanticContext.OR)semctx;
return joinPredicateOperands(or, " or ");
}
return semctx.toString();
}
public String joinPredicateOperands(SemanticContext.Operator op, String separator) {
StringBuilder buf = new StringBuilder();
for (SemanticContext operand : op.getOperands()) {
if (buf.length() > 0) {
buf.append(separator);
}
buf.append(getSemanticContextDisplayString(operand));
}
return buf.toString();
}
public LinkedHashMap getIndexToPredicateMap() {
LinkedHashMap indexToPredMap = new LinkedHashMap();
for (Rule r : rules.values()) {
for (ActionAST a : r.actions) {
if (a instanceof PredAST) {
PredAST p = (PredAST) a;
indexToPredMap.put(sempreds.get(p), p);
}
}
}
return indexToPredMap;
}
public String getPredicateDisplayString(SemanticContext.Predicate pred) {
if ( indexToPredMap==null ) {
indexToPredMap = getIndexToPredicateMap();
}
ActionAST actionAST = indexToPredMap.get(pred.predIndex);
return actionAST.getText();
}
/** What is the max char value possible for this grammar's target? Use
* unicode max if no target defined.
*/
public int getMaxCharValue() {
return org.antlr.v4.runtime.Lexer.MAX_CHAR_VALUE;
// if ( generator!=null ) {
// return generator.target.getMaxCharValue(generator);
// }
// else {
// return Label.MAX_CHAR_VALUE;
// }
}
/** Return a set of all possible token or char types for this grammar */
public IntSet getTokenTypes() {
if ( isLexer() ) {
return getAllCharValues();
}
return IntervalSet.of(Token.MIN_USER_TOKEN_TYPE, getMaxTokenType());
}
/** Return min to max char as defined by the target.
* If no target, use max unicode char value.
*/
public IntSet getAllCharValues() {
return IntervalSet.of(Lexer.MIN_CHAR_VALUE, getMaxCharValue());
}
/** How many token types have been allocated so far? */
public int getMaxTokenType() {
return typeToTokenList.size() - 1; // don't count 0 (invalid)
}
/** Return a new unique integer in the token type space */
public int getNewTokenType() {
maxTokenType++;
return maxTokenType;
}
/** Return a new unique integer in the channel value space. */
public int getNewChannelNumber() {
maxChannelType++;
return maxChannelType;
}
public void importTokensFromTokensFile() {
String vocab = getOptionString("tokenVocab");
if ( vocab!=null ) {
TokenVocabParser vparser = new TokenVocabParser(this);
Map tokens = vparser.load();
tool.log("grammar", "tokens=" + tokens);
for (String t : tokens.keySet()) {
if ( t.charAt(0)=='\'' ) defineStringLiteral(t, tokens.get(t));
else defineTokenName(t, tokens.get(t));
}
}
}
public void importVocab(Grammar importG) {
for (String tokenName: importG.tokenNameToTypeMap.keySet()) {
defineTokenName(tokenName, importG.tokenNameToTypeMap.get(tokenName));
}
for (String tokenName: importG.stringLiteralToTypeMap.keySet()) {
defineStringLiteral(tokenName, importG.stringLiteralToTypeMap.get(tokenName));
}
for (Map.Entry channel : importG.channelNameToValueMap.entrySet()) {
defineChannelName(channel.getKey(), channel.getValue());
}
// this.tokenNameToTypeMap.putAll( importG.tokenNameToTypeMap );
// this.stringLiteralToTypeMap.putAll( importG.stringLiteralToTypeMap );
int max = Math.max(this.typeToTokenList.size(), importG.typeToTokenList.size());
Utils.setSize(typeToTokenList, max);
for (int ttype=0; ttype=typeToStringLiteralList.size() ) {
Utils.setSize(typeToStringLiteralList, ttype+1);
}
typeToStringLiteralList.set(ttype, lit);
setTokenForType(ttype, lit);
return ttype;
}
return Token.INVALID_TYPE;
}
public int defineTokenAlias(String name, String lit) {
int ttype = defineTokenName(name);
stringLiteralToTypeMap.put(lit, ttype);
setTokenForType(ttype, name);
return ttype;
}
public void setTokenForType(int ttype, String text) {
if (ttype == Token.EOF) {
// ignore EOF, it will be reported as an error separately
return;
}
if ( ttype>=typeToTokenList.size() ) {
Utils.setSize(typeToTokenList, ttype+1);
}
String prevToken = typeToTokenList.get(ttype);
if ( prevToken==null || prevToken.charAt(0)=='\'' ) {
// only record if nothing there before or if thing before was a literal
typeToTokenList.set(ttype, text);
}
}
/**
* Define a token channel with a specified name.
*
*
* If a channel with the specified name already exists, the previously
* assigned channel value is returned.
*
* @param name The channel name.
* @return The constant channel value assigned to the channel.
*/
public int defineChannelName(String name) {
Integer prev = channelNameToValueMap.get(name);
if (prev == null) {
return defineChannelName(name, getNewChannelNumber());
}
return prev;
}
/**
* Define a token channel with a specified name.
*
*
* If a channel with the specified name already exists, the previously
* assigned channel value is not altered.
*
* @param name The channel name.
* @return The constant channel value assigned to the channel.
*/
public int defineChannelName(String name, int value) {
Integer prev = channelNameToValueMap.get(name);
if (prev != null) {
return prev;
}
channelNameToValueMap.put(name, value);
setChannelNameForValue(value, name);
maxChannelType = Math.max(maxChannelType, value);
return value;
}
/**
* Sets the channel name associated with a particular channel value.
*
*
* If a name has already been assigned to the channel with constant value
* {@code channelValue}, this method does nothing.
*
* @param channelValue The constant value for the channel.
* @param name The channel name.
*/
public void setChannelNameForValue(int channelValue, String name) {
if (channelValue >= channelValueToNameList.size()) {
Utils.setSize(channelValueToNameList, channelValue + 1);
}
String prevChannel = channelValueToNameList.get(channelValue);
if (prevChannel == null) {
channelValueToNameList.set(channelValue, name);
}
}
// no isolated attr at grammar action level
@Override
public Attribute resolveToAttribute(String x, ActionAST node) {
return null;
}
// no $x.y makes sense here
@Override
public Attribute resolveToAttribute(String x, String y, ActionAST node) {
return null;
}
@Override
public boolean resolvesToLabel(String x, ActionAST node) { return false; }
@Override
public boolean resolvesToListLabel(String x, ActionAST node) { return false; }
@Override
public boolean resolvesToToken(String x, ActionAST node) { return false; }
@Override
public boolean resolvesToAttributeDict(String x, ActionAST node) {
return false;
}
/** Given a grammar type, what should be the default action scope?
* If I say @members in a COMBINED grammar, for example, the
* default scope should be "parser".
*/
public String getDefaultActionScope() {
switch ( getType() ) {
case ANTLRParser.LEXER :
return "lexer";
case ANTLRParser.PARSER :
case ANTLRParser.COMBINED :
return "parser";
}
return null;
}
public int getType() {
if ( ast!=null ) return ast.grammarType;
return 0;
}
public org.antlr.runtime.TokenStream getTokenStream() {
if ( ast!=null ) return ast.tokenStream;
return null;
}
public boolean isLexer() { return getType()==ANTLRParser.LEXER; }
public boolean isParser() { return getType()==ANTLRParser.PARSER; }
public boolean isCombined() { return getType()==ANTLRParser.COMBINED; }
/** Is id a valid token name? Does id start with an uppercase letter? */
public static boolean isTokenName(String id) {
return Character.isUpperCase(id.charAt(0));
}
public String getTypeString() {
if ( ast==null ) return null;
return ANTLRParser.tokenNames[getType()].toLowerCase();
}
public static String getGrammarTypeToFileNameSuffix(int type) {
switch ( type ) {
case ANTLRParser.LEXER : return "Lexer";
case ANTLRParser.PARSER : return "Parser";
// if combined grammar, gen Parser and Lexer will be done later
// TODO: we are separate now right?
case ANTLRParser.COMBINED : return "Parser";
default :
return "";
}
}
public String getOptionString(String key) { return ast.getOptionString(key); }
/** Given ^(TOKEN_REF ^(OPTIONS ^(ELEMENT_OPTIONS (= assoc right))))
* set option assoc=right in TOKEN_REF.
*/
public static void setNodeOptions(GrammarAST node, GrammarAST options) {
if ( options==null ) return;
GrammarASTWithOptions t = (GrammarASTWithOptions)node;
if ( t.getChildCount()==0 || options.getChildCount()==0 ) return;
for (Object o : options.getChildren()) {
GrammarAST c = (GrammarAST)o;
if ( c.getType()==ANTLRParser.ASSIGN ) {
t.setOption(c.getChild(0).getText(), (GrammarAST)c.getChild(1));
}
else {
t.setOption(c.getText(), null); // no arg such as ID
}
}
}
/** Return list of (TOKEN_NAME node, 'literal' node) pairs */
public static List> getStringLiteralAliasesFromLexerRules(GrammarRootAST ast) {
String[] patterns = {
"(RULE %name:TOKEN_REF (BLOCK (ALT %lit:STRING_LITERAL)))",
"(RULE %name:TOKEN_REF (BLOCK (ALT %lit:STRING_LITERAL ACTION)))",
"(RULE %name:TOKEN_REF (BLOCK (ALT %lit:STRING_LITERAL SEMPRED)))",
"(RULE %name:TOKEN_REF (BLOCK (LEXER_ALT_ACTION (ALT %lit:STRING_LITERAL) .)))",
"(RULE %name:TOKEN_REF (BLOCK (LEXER_ALT_ACTION (ALT %lit:STRING_LITERAL) . .)))",
"(RULE %name:TOKEN_REF (BLOCK (LEXER_ALT_ACTION (ALT %lit:STRING_LITERAL) (LEXER_ACTION_CALL . .))))",
"(RULE %name:TOKEN_REF (BLOCK (LEXER_ALT_ACTION (ALT %lit:STRING_LITERAL) . (LEXER_ACTION_CALL . .))))",
"(RULE %name:TOKEN_REF (BLOCK (LEXER_ALT_ACTION (ALT %lit:STRING_LITERAL) (LEXER_ACTION_CALL . .) .)))",
// TODO: allow doc comment in there
};
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(ast.token.getInputStream());
org.antlr.runtime.tree.TreeWizard wiz = new org.antlr.runtime.tree.TreeWizard(adaptor,ANTLRParser.tokenNames);
List> lexerRuleToStringLiteral =
new ArrayList>();
List ruleNodes = ast.getNodesWithType(ANTLRParser.RULE);
if ( ruleNodes==null || ruleNodes.isEmpty() ) return null;
for (GrammarAST r : ruleNodes) {
//tool.log("grammar", r.toStringTree());
// System.out.println("chk: "+r.toStringTree());
org.antlr.runtime.tree.Tree name = r.getChild(0);
if ( name.getType()==ANTLRParser.TOKEN_REF ) {
// check rule against patterns
boolean isLitRule;
for (String pattern : patterns) {
isLitRule =
defAlias(r, pattern, wiz, lexerRuleToStringLiteral);
if ( isLitRule ) break;
}
// if ( !isLitRule ) System.out.println("no pattern matched");
}
}
return lexerRuleToStringLiteral;
}
protected static boolean defAlias(GrammarAST r, String pattern,
org.antlr.runtime.tree.TreeWizard wiz,
List> lexerRuleToStringLiteral)
{
HashMap nodes = new HashMap();
if ( wiz.parse(r, pattern, nodes) ) {
GrammarAST litNode = (GrammarAST)nodes.get("lit");
GrammarAST nameNode = (GrammarAST)nodes.get("name");
Pair pair =
new Pair(nameNode, litNode);
lexerRuleToStringLiteral.add(pair);
return true;
}
return false;
}
public Set getStringLiterals() {
final Set strings = new LinkedHashSet();
GrammarTreeVisitor collector = new GrammarTreeVisitor() {
@Override
public void stringRef(TerminalAST ref) {
strings.add(ref.getText());
}
@Override
public ErrorManager getErrorManager() { return tool.errMgr; }
};
collector.visitGrammar(ast);
return strings;
}
public void setLookaheadDFA(int decision, DFA lookaheadDFA) {
decisionDFAs.put(decision, lookaheadDFA);
}
public static Map getStateToGrammarRegionMap(GrammarRootAST ast, IntervalSet grammarTokenTypes) {
Map stateToGrammarRegionMap = new HashMap();
if ( ast==null ) return stateToGrammarRegionMap;
List nodes = ast.getNodesWithType(grammarTokenTypes);
for (GrammarAST n : nodes) {
if (n.atnState != null) {
Interval tokenRegion = Interval.of(n.getTokenStartIndex(), n.getTokenStopIndex());
org.antlr.runtime.tree.Tree ruleNode = null;
// RULEs, BLOCKs of transformed recursive rules point to original token interval
switch ( n.getType() ) {
case ANTLRParser.RULE :
ruleNode = n;
break;
case ANTLRParser.BLOCK :
case ANTLRParser.CLOSURE :
ruleNode = n.getAncestor(ANTLRParser.RULE);
break;
}
if ( ruleNode instanceof RuleAST ) {
String ruleName = ((RuleAST) ruleNode).getRuleName();
Rule r = ast.g.getRule(ruleName);
if ( r instanceof LeftRecursiveRule ) {
RuleAST originalAST = ((LeftRecursiveRule) r).getOriginalAST();
tokenRegion = Interval.of(originalAST.getTokenStartIndex(), originalAST.getTokenStopIndex());
}
}
stateToGrammarRegionMap.put(n.atnState.stateNumber, tokenRegion);
}
}
return stateToGrammarRegionMap;
}
/** Given an ATN state number, return the token index range within the grammar from which that ATN state was derived. */
public Interval getStateToGrammarRegion(int atnStateNumber) {
if ( stateToGrammarRegionMap==null ) {
stateToGrammarRegionMap = getStateToGrammarRegionMap(ast, null); // map all nodes with non-null atn state ptr
}
if ( stateToGrammarRegionMap==null ) return Interval.INVALID;
return stateToGrammarRegionMap.get(atnStateNumber);
}
public LexerInterpreter createLexerInterpreter(CharStream input) {
if (this.isParser()) {
throw new IllegalStateException("A lexer interpreter can only be created for a lexer or combined grammar.");
}
if (this.isCombined()) {
return implicitLexer.createLexerInterpreter(input);
}
char[] serializedAtn = ATNSerializer.getSerializedAsChars(atn);
ATN deserialized = new ATNDeserializer().deserialize(serializedAtn);
List allChannels = new ArrayList();
allChannels.add("DEFAULT_TOKEN_CHANNEL");
allChannels.add("HIDDEN");
allChannels.addAll(channelValueToNameList);
return new LexerInterpreter(fileName, getVocabulary(), Arrays.asList(getRuleNames()), allChannels, ((LexerGrammar)this).modes.keySet(), deserialized, input);
}
/** @since 4.5.1 */
public GrammarParserInterpreter createGrammarParserInterpreter(TokenStream tokenStream) {
if (this.isLexer()) {
throw new IllegalStateException("A parser interpreter can only be created for a parser or combined grammar.");
}
char[] serializedAtn = ATNSerializer.getSerializedAsChars(atn);
ATN deserialized = new ATNDeserializer().deserialize(serializedAtn);
return new GrammarParserInterpreter(this, deserialized, tokenStream);
}
public ParserInterpreter createParserInterpreter(TokenStream tokenStream) {
if (this.isLexer()) {
throw new IllegalStateException("A parser interpreter can only be created for a parser or combined grammar.");
}
char[] serializedAtn = ATNSerializer.getSerializedAsChars(atn);
ATN deserialized = new ATNDeserializer().deserialize(serializedAtn);
return new ParserInterpreter(fileName, getVocabulary(), Arrays.asList(getRuleNames()), deserialized, tokenStream);
}
}