org.antlr.v4.semantics.SymbolChecks Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of antlr4 Show documentation
Show all versions of antlr4 Show documentation
The ANTLR 4 grammar compiler.
/*
* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
package org.antlr.v4.semantics;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.Tree;
import org.antlr.v4.automata.LexerATNFactory;
import org.antlr.v4.parse.ANTLRLexer;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.tool.Alternative;
import org.antlr.v4.tool.Attribute;
import org.antlr.v4.tool.AttributeDict;
import org.antlr.v4.tool.ErrorManager;
import org.antlr.v4.tool.ErrorType;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.LabelElementPair;
import org.antlr.v4.tool.LabelType;
import org.antlr.v4.tool.LeftRecursiveRule;
import org.antlr.v4.tool.LexerGrammar;
import org.antlr.v4.tool.Rule;
import org.antlr.v4.tool.ast.AltAST;
import org.antlr.v4.tool.ast.GrammarAST;
import org.antlr.v4.tool.ast.TerminalAST;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/** Check for symbol problems; no side-effects. Inefficient to walk rules
* and such multiple times, but I like isolating all error checking outside
* of code that actually defines symbols etc...
*
* Side-effect: strip away redef'd rules.
*/
public class SymbolChecks {
Grammar g;
SymbolCollector collector;
Map nameToRuleMap = new HashMap();
Set tokenIDs = new HashSet();
Map> actionScopeToActionNames = new HashMap>();
public ErrorManager errMgr;
protected final Set reservedNames = new HashSet();
{
reservedNames.addAll(LexerATNFactory.getCommonConstants());
}
public SymbolChecks(Grammar g, SymbolCollector collector) {
this.g = g;
this.collector = collector;
this.errMgr = g.tool.errMgr;
for (GrammarAST tokenId : collector.tokenIDRefs) {
tokenIDs.add(tokenId.getText());
}
}
public void process() {
// methods affect fields, but no side-effects outside this object
// So, call order sensitive
// First collect all rules for later use in checkForLabelConflict()
if (g.rules != null) {
for (Rule r : g.rules.values()) nameToRuleMap.put(r.name, r);
}
checkReservedNames(g.rules.values());
checkActionRedefinitions(collector.namedActions);
checkForLabelConflicts(g.rules.values());
}
public void checkActionRedefinitions(List actions) {
if (actions == null) return;
String scope = g.getDefaultActionScope();
String name;
GrammarAST nameNode;
for (GrammarAST ampersandAST : actions) {
nameNode = (GrammarAST) ampersandAST.getChild(0);
if (ampersandAST.getChildCount() == 2) {
name = nameNode.getText();
}
else {
scope = nameNode.getText();
name = ampersandAST.getChild(1).getText();
}
Set scopeActions = actionScopeToActionNames.get(scope);
if (scopeActions == null) { // init scope
scopeActions = new HashSet();
actionScopeToActionNames.put(scope, scopeActions);
}
if (!scopeActions.contains(name)) {
scopeActions.add(name);
}
else {
errMgr.grammarError(ErrorType.ACTION_REDEFINITION,
g.fileName, nameNode.token, name);
}
}
}
/**
* Make sure a label doesn't conflict with another symbol.
* Labels must not conflict with: rules, tokens, scope names,
* return values, parameters, and rule-scope dynamic attributes
* defined in surrounding rule. Also they must have same type
* for repeated defs.
*/
public void checkForLabelConflicts(Collection rules) {
for (Rule r : rules) {
checkForAttributeConflicts(r);
Map labelNameSpace = new HashMap<>();
for (int i = 1; i <= r.numberOfAlts; i++) {
Alternative a = r.alt[i];
for (List pairs : a.labelDefs.values()) {
if (r.hasAltSpecificContexts()) {
// Collect labelName-labeledRules map for rule with alternative labels.
Map> labelPairs = new HashMap<>();
for (LabelElementPair p : pairs) {
String labelName = findAltLabelName(p.label);
if (labelName != null) {
List list;
if (labelPairs.containsKey(labelName)) {
list = labelPairs.get(labelName);
}
else {
list = new ArrayList<>();
labelPairs.put(labelName, list);
}
list.add(p);
}
}
for (List internalPairs : labelPairs.values()) {
labelNameSpace.clear();
checkLabelPairs(r, labelNameSpace, internalPairs);
}
}
else {
checkLabelPairs(r, labelNameSpace, pairs);
}
}
}
}
}
private void checkLabelPairs(Rule r, Map labelNameSpace, List pairs) {
for (LabelElementPair p : pairs) {
checkForLabelConflict(r, p.label);
String name = p.label.getText();
LabelElementPair prev = labelNameSpace.get(name);
if (prev == null) {
labelNameSpace.put(name, p);
}
else {
checkForTypeMismatch(r, prev, p);
}
}
}
private String findAltLabelName(CommonTree label) {
if (label == null) {
return null;
}
else if (label instanceof AltAST) {
AltAST altAST = (AltAST) label;
if (altAST.altLabel != null) {
return altAST.altLabel.toString();
}
else if (altAST.leftRecursiveAltInfo != null) {
return altAST.leftRecursiveAltInfo.altLabel.toString();
}
else {
return findAltLabelName(label.parent);
}
}
else {
return findAltLabelName(label.parent);
}
}
private void checkForTypeMismatch(Rule r, LabelElementPair prevLabelPair, LabelElementPair labelPair) {
// label already defined; if same type, no problem
if (prevLabelPair.type != labelPair.type) {
// Current behavior: take a token of rule declaration in case of left-recursive rule
// Desired behavior: take a token of proper label declaration in case of left-recursive rule
// See https://github.com/antlr/antlr4/pull/1585
// Such behavior is referring to the fact that the warning is typically reported on the actual label redefinition,
// but for left-recursive rules the warning is reported on the enclosing rule.
org.antlr.runtime.Token token = r instanceof LeftRecursiveRule
? ((GrammarAST) r.ast.getChild(0)).getToken()
: labelPair.label.token;
errMgr.grammarError(
ErrorType.LABEL_TYPE_CONFLICT,
g.fileName,
token,
labelPair.label.getText(),
labelPair.type + "!=" + prevLabelPair.type);
}
if (!prevLabelPair.element.getText().equals(labelPair.element.getText()) &&
(prevLabelPair.type.equals(LabelType.RULE_LABEL) || prevLabelPair.type.equals(LabelType.RULE_LIST_LABEL)) &&
(labelPair.type.equals(LabelType.RULE_LABEL) || labelPair.type.equals(LabelType.RULE_LIST_LABEL))) {
org.antlr.runtime.Token token = r instanceof LeftRecursiveRule
? ((GrammarAST) r.ast.getChild(0)).getToken()
: labelPair.label.token;
String prevLabelOp = prevLabelPair.type.equals(LabelType.RULE_LIST_LABEL) ? "+=" : "=";
String labelOp = labelPair.type.equals(LabelType.RULE_LIST_LABEL) ? "+=" : "=";
errMgr.grammarError(
ErrorType.LABEL_TYPE_CONFLICT,
g.fileName,
token,
labelPair.label.getText() + labelOp + labelPair.element.getText(),
prevLabelPair.label.getText() + prevLabelOp + prevLabelPair.element.getText());
}
}
public void checkForLabelConflict(Rule r, GrammarAST labelID) {
String name = labelID.getText();
if (nameToRuleMap.containsKey(name)) {
ErrorType etype = ErrorType.LABEL_CONFLICTS_WITH_RULE;
errMgr.grammarError(etype, g.fileName, labelID.token, name, r.name);
}
if (tokenIDs.contains(name)) {
ErrorType etype = ErrorType.LABEL_CONFLICTS_WITH_TOKEN;
errMgr.grammarError(etype, g.fileName, labelID.token, name, r.name);
}
if (r.args != null && r.args.get(name) != null) {
ErrorType etype = ErrorType.LABEL_CONFLICTS_WITH_ARG;
errMgr.grammarError(etype, g.fileName, labelID.token, name, r.name);
}
if (r.retvals != null && r.retvals.get(name) != null) {
ErrorType etype = ErrorType.LABEL_CONFLICTS_WITH_RETVAL;
errMgr.grammarError(etype, g.fileName, labelID.token, name, r.name);
}
if (r.locals != null && r.locals.get(name) != null) {
ErrorType etype = ErrorType.LABEL_CONFLICTS_WITH_LOCAL;
errMgr.grammarError(etype, g.fileName, labelID.token, name, r.name);
}
}
public void checkForAttributeConflicts(Rule r) {
checkDeclarationRuleConflicts(r, r.args, nameToRuleMap.keySet(), ErrorType.ARG_CONFLICTS_WITH_RULE);
checkDeclarationRuleConflicts(r, r.args, tokenIDs, ErrorType.ARG_CONFLICTS_WITH_TOKEN);
checkDeclarationRuleConflicts(r, r.retvals, nameToRuleMap.keySet(), ErrorType.RETVAL_CONFLICTS_WITH_RULE);
checkDeclarationRuleConflicts(r, r.retvals, tokenIDs, ErrorType.RETVAL_CONFLICTS_WITH_TOKEN);
checkDeclarationRuleConflicts(r, r.locals, nameToRuleMap.keySet(), ErrorType.LOCAL_CONFLICTS_WITH_RULE);
checkDeclarationRuleConflicts(r, r.locals, tokenIDs, ErrorType.LOCAL_CONFLICTS_WITH_TOKEN);
checkLocalConflictingDeclarations(r, r.retvals, r.args, ErrorType.RETVAL_CONFLICTS_WITH_ARG);
checkLocalConflictingDeclarations(r, r.locals, r.args, ErrorType.LOCAL_CONFLICTS_WITH_ARG);
checkLocalConflictingDeclarations(r, r.locals, r.retvals, ErrorType.LOCAL_CONFLICTS_WITH_RETVAL);
}
protected void checkDeclarationRuleConflicts(Rule r, AttributeDict attributes, Set ruleNames, ErrorType errorType) {
if (attributes == null) {
return;
}
for (Attribute attribute : attributes.attributes.values()) {
if (ruleNames.contains(attribute.name)) {
errMgr.grammarError(
errorType,
g.fileName,
attribute.token != null ? attribute.token : ((GrammarAST) r.ast.getChild(0)).token,
attribute.name,
r.name);
}
}
}
protected void checkLocalConflictingDeclarations(Rule r, AttributeDict attributes, AttributeDict referenceAttributes, ErrorType errorType) {
if (attributes == null || referenceAttributes == null) {
return;
}
Set conflictingKeys = attributes.intersection(referenceAttributes);
for (String key : conflictingKeys) {
errMgr.grammarError(
errorType,
g.fileName,
attributes.get(key).token != null ? attributes.get(key).token : ((GrammarAST)r.ast.getChild(0)).token,
key,
r.name);
}
}
protected void checkReservedNames(Collection rules) {
for (Rule rule : rules) {
if (reservedNames.contains(rule.name)) {
errMgr.grammarError(ErrorType.RESERVED_RULE_NAME, g.fileName, ((GrammarAST)rule.ast.getChild(0)).getToken(), rule.name);
}
}
}
public void checkForModeConflicts(Grammar g) {
if (g.isLexer()) {
LexerGrammar lexerGrammar = (LexerGrammar)g;
for (String modeName : lexerGrammar.modes.keySet()) {
if (!modeName.equals("DEFAULT_MODE") && reservedNames.contains(modeName)) {
Rule rule = lexerGrammar.modes.get(modeName).iterator().next();
g.tool.errMgr.grammarError(ErrorType.MODE_CONFLICTS_WITH_COMMON_CONSTANTS, g.fileName, rule.ast.parent.getToken(), modeName);
}
if (g.getTokenType(modeName) != Token.INVALID_TYPE) {
Rule rule = lexerGrammar.modes.get(modeName).iterator().next();
g.tool.errMgr.grammarError(ErrorType.MODE_CONFLICTS_WITH_TOKEN, g.fileName, rule.ast.parent.getToken(), modeName);
}
}
}
}
/**
* Algorithm steps:
* 1. Collect all simple string literals (i.e. 'asdf', 'as' 'df', but not [a-z]+, 'a'..'z')
* for all lexer rules in each mode except of autogenerated tokens ({@link #getSingleTokenValues(Rule) getSingleTokenValues})
* 2. Compare every string literal with each other ({@link #checkForOverlap(Grammar, Rule, Rule, List, List) checkForOverlap})
* and throw TOKEN_UNREACHABLE warning if the same string found.
* Complexity: O(m * n^2 / 2), approximately equals to O(n^2)
* where m - number of modes, n - average number of lexer rules per mode.
* See also testUnreachableTokens unit test for details.
*/
public void checkForUnreachableTokens(Grammar g) {
if (g.isLexer()) {
LexerGrammar lexerGrammar = (LexerGrammar)g;
for (List rules : lexerGrammar.modes.values()) {
// Collect string literal lexer rules for each mode
List stringLiteralRules = new ArrayList<>();
List> stringLiteralValues = new ArrayList<>();
for (int i = 0; i < rules.size(); i++) {
Rule rule = rules.get(i);
List ruleStringAlts = getSingleTokenValues(rule);
if (ruleStringAlts != null && ruleStringAlts.size() > 0) {
stringLiteralRules.add(rule);
stringLiteralValues.add(ruleStringAlts);
}
}
// Check string sets intersection
for (int i = 0; i < stringLiteralRules.size(); i++) {
List firstTokenStringValues = stringLiteralValues.get(i);
Rule rule1 = stringLiteralRules.get(i);
checkForOverlap(g, rule1, rule1, firstTokenStringValues, stringLiteralValues.get(i));
// Check fragment rules only with themself
if (!rule1.isFragment()) {
for (int j = i + 1; j < stringLiteralRules.size(); j++) {
Rule rule2 = stringLiteralRules.get(j);
if (!rule2.isFragment()) {
checkForOverlap(g, rule1, stringLiteralRules.get(j), firstTokenStringValues, stringLiteralValues.get(j));
}
}
}
}
}
}
}
/**
* {@return} list of simple string literals for rule {@param rule}
*/
private List getSingleTokenValues(Rule rule)
{
List values = new ArrayList<>();
for (Alternative alt : rule.alt) {
if (alt != null) {
// select first alt if token has a command
Tree rootNode = alt.ast.getChildCount() == 2 &&
alt.ast.getChild(0) instanceof AltAST && alt.ast.getChild(1) instanceof GrammarAST
? alt.ast.getChild(0)
: alt.ast;
if (rootNode.getTokenStartIndex() == -1) {
continue; // ignore autogenerated tokens from combined grammars that start with T__
}
// Ignore alt if contains not only string literals (repetition, optional)
boolean ignore = false;
StringBuilder currentValue = new StringBuilder();
for (int i = 0; i < rootNode.getChildCount(); i++) {
Tree child = rootNode.getChild(i);
if (!(child instanceof TerminalAST)) {
ignore = true;
break;
}
TerminalAST terminalAST = (TerminalAST)child;
if (terminalAST.token.getType() != ANTLRLexer.STRING_LITERAL) {
ignore = true;
break;
}
else {
String text = terminalAST.token.getText();
currentValue.append(text.substring(1, text.length() - 1));
}
}
if (!ignore) {
values.add(currentValue.toString());
}
}
}
return values;
}
/**
* For same rule compare values from next index:
* TOKEN_WITH_SAME_VALUES: 'asdf' | 'asdf';
* For different rules compare from start value:
* TOKEN1: 'asdf';
* TOKEN2: 'asdf';
*/
private void checkForOverlap(Grammar g, Rule rule1, Rule rule2, List firstTokenStringValues, List secondTokenStringValues) {
for (int i = 0; i < firstTokenStringValues.size(); i++) {
int secondTokenInd = rule1 == rule2 ? i + 1 : 0;
String str1 = firstTokenStringValues.get(i);
for (int j = secondTokenInd; j < secondTokenStringValues.size(); j++) {
String str2 = secondTokenStringValues.get(j);
if (str1.equals(str2)) {
errMgr.grammarError(ErrorType.TOKEN_UNREACHABLE, g.fileName,
((GrammarAST) rule2.ast.getChild(0)).token, rule2.name, str2, rule1.name);
}
}
}
}
// CAN ONLY CALL THE TWO NEXT METHODS AFTER GRAMMAR HAS RULE DEFS (see semanticpipeline)
public void checkRuleArgs(Grammar g, List rulerefs) {
if ( rulerefs==null ) return;
for (GrammarAST ref : rulerefs) {
String ruleName = ref.getText();
Rule r = g.getRule(ruleName);
GrammarAST arg = (GrammarAST)ref.getFirstChildWithType(ANTLRParser.ARG_ACTION);
if ( arg!=null && (r==null || r.args==null) ) {
errMgr.grammarError(ErrorType.RULE_HAS_NO_ARGS,
g.fileName, ref.token, ruleName);
}
else if ( arg==null && (r!=null && r.args!=null) ) {
errMgr.grammarError(ErrorType.MISSING_RULE_ARGS,
g.fileName, ref.token, ruleName);
}
}
}
public void checkForQualifiedRuleIssues(Grammar g, List qualifiedRuleRefs) {
for (GrammarAST dot : qualifiedRuleRefs) {
GrammarAST grammar = (GrammarAST)dot.getChild(0);
GrammarAST rule = (GrammarAST)dot.getChild(1);
g.tool.log("semantics", grammar.getText()+"."+rule.getText());
Grammar delegate = g.getImportedGrammar(grammar.getText());
if ( delegate==null ) {
errMgr.grammarError(ErrorType.NO_SUCH_GRAMMAR_SCOPE,
g.fileName, grammar.token, grammar.getText(),
rule.getText());
}
else {
if ( g.getRule(grammar.getText(), rule.getText())==null ) {
errMgr.grammarError(ErrorType.NO_SUCH_RULE_IN_SCOPE,
g.fileName, rule.token, grammar.getText(),
rule.getText());
}
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy