org.antlr.v4.codegen.Target Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of antlr4 Show documentation
Show all versions of antlr4 Show documentation
The ANTLR 4 grammar compiler.
/*
* [The "BSD license"]
* Copyright (c) 2012 Terence Parr
* Copyright (c) 2012 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.codegen;
import org.antlr.v4.codegen.model.RuleFunction;
import org.antlr.v4.misc.Utils;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.Rule;
import org.antlr.v4.tool.ast.GrammarAST;
import org.stringtemplate.v4.ST;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
/** */
public class Target {
/** For pure strings of Java 16-bit Unicode char, how can we display
* it in the target language as a literal. Useful for dumping
* predicates and such that may refer to chars that need to be escaped
* when represented as strings. Also, templates need to be escaped so
* that the target language can hold them as a string.
*
* I have defined (via the constructor) the set of typical escapes,
* but your {@link Target} subclass is free to alter the translated chars
* or add more definitions. This is non-static so each target can have
* a different set in memory at same time.
*/
protected String[] targetCharValueEscape = new String[255];
public CodeGenerator gen;
/** Avoid grammar symbols in this set to prevent conflicts in gen'd code. */
public Set badWords = new HashSet();
public static String[] javaKeywords = {
"abstract", "assert", "boolean", "break", "byte", "case", "catch",
"char", "class", "const", "continue", "default", "do", "double", "else",
"enum", "extends", "false", "final", "finally", "float", "for",
"if", "implements", "import", "instanceof", "int", "interface",
"long", "native", "new", "null", "package", "private", "protected",
"public", "return", "short", "static", "strictfp", "super", "switch",
"synchronized", "this", "throw", "throws", "transient", "true", "try",
"void", "volatile", "while"
};
public Target(CodeGenerator gen) {
targetCharValueEscape['\n'] = "\\n";
targetCharValueEscape['\r'] = "\\r";
targetCharValueEscape['\t'] = "\\t";
targetCharValueEscape['\b'] = "\\b";
targetCharValueEscape['\f'] = "\\f";
targetCharValueEscape['\\'] = "\\\\";
targetCharValueEscape['\''] = "\\'";
targetCharValueEscape['"'] = "\\\"";
this.gen = gen;
addBadWords();
}
public void addBadWords() {
badWords.addAll(Arrays.asList(javaKeywords));
badWords.add("rule");
badWords.add("parserRule");
}
protected void genFile(Grammar g,
ST outputFileST,
String fileName)
{
gen.write(outputFileST, fileName);
}
protected void genListenerFile(Grammar g,
ST outputFileST)
{
String fileName = gen.getListenerFileName();
gen.write(outputFileST, fileName);
}
protected void genRecognizerHeaderFile(Grammar g,
ST headerFileST,
String extName) // e.g., ".h"
{
// no header file by default
}
/** Get a meaningful name for a token type useful during code generation.
* Literals without associated names are converted to the string equivalent
* of their integer values. Used to generate x==ID and x==34 type comparisons
* etc... Essentially we are looking for the most obvious way to refer
* to a token type in the generated code.
*/
public String getTokenTypeAsTargetLabel(Grammar g, int ttype) {
String name = g.getTokenDisplayName(ttype);
// If name is a literal, return the token type instead
if ( name==null || name.charAt(0)=='\'' ) {
return String.valueOf(ttype);
}
return name;
}
public String[] getTokenTypesAsTargetLabels(Grammar g, int[] ttypes) {
String[] labels = new String[ttypes.length];
for (int i=0; i
* Note that we have decided to allow people to use '\"' without penalty, so
* we must build the target string in a loop as {@link Utils#replace} cannot
* handle both {@code \"} and {@code "} without a lot of messing around.
*/
public String getTargetStringLiteralFromANTLRStringLiteral(
CodeGenerator generator,
String literal, boolean addQuotes)
{
StringBuilder sb = new StringBuilder();
String is = literal;
if ( addQuotes ) sb.append('"');
for (int i = 1; i < is.length() -1; i++) {
if (is.charAt(i) == '\\') {
// Anything escaped is what it is! We assume that
// people know how to escape characters correctly. However
// we catch anything that does not need an escape in Java (which
// is what the default implementation is dealing with and remove
// the escape. The C target does this for instance.
//
switch (is.charAt(i+1)) {
// Pass through any escapes that Java also needs
//
case '"':
case 'n':
case 'r':
case 't':
case 'b':
case 'f':
case '\\':
// Pass the escape through
sb.append('\\');
break;
case 'u': // Assume unnnn
// Pass the escape through as double \\
// so that Java leaves as \u0000 string not char
sb.append('\\');
sb.append('\\');
break;
default:
// Remove the escape by virtue of not adding it here
// Thus \' becomes ' and so on
break;
}
// Go past the \ character
i++;
} else {
// Characters that don't need \ in ANTLR 'strings' but do in Java
if (is.charAt(i) == '"') {
// We need to escape " in Java
sb.append('\\');
}
}
// Add in the next character, which may have been escaped
sb.append(is.charAt(i));
}
if ( addQuotes ) sb.append('"');
return sb.toString();
}
/** Assume 16-bit char */
public String encodeIntAsCharEscape(int v) {
if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) {
throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v));
}
if (v >= 0 && v < targetCharValueEscape.length && targetCharValueEscape[v] != null) {
return targetCharValueEscape[v];
}
if (v >= 0x20 && v < 127 && (!Character.isDigit(v) || v == '8' || v == '9')) {
return String.valueOf((char)v);
}
if ( v>=0 && v<=127 ) {
String oct = Integer.toOctalString(v);
return "\\"+ oct;
}
String hex = Integer.toHexString(v|0x10000).substring(1,5);
return "\\u"+hex;
}
public String getLoopLabel(GrammarAST ast) {
return "loop"+ ast.token.getTokenIndex();
}
public String getLoopCounter(GrammarAST ast) {
return "cnt"+ ast.token.getTokenIndex();
}
public String getListLabel(String label) {
ST st = gen.templates.getInstanceOf("ListLabelName");
st.add("label", label);
return st.render();
}
public String getRuleFunctionContextStructName(Rule r) {
if ( r.g.isLexer() ) {
return gen.templates.getInstanceOf("LexerRuleContext").render();
}
return Utils.capitalize(r.name)+gen.templates.getInstanceOf("RuleContextNameSuffix").render();
}
public String getAltLabelContextStructName(String label) {
return Utils.capitalize(label)+gen.templates.getInstanceOf("RuleContextNameSuffix").render();
}
/** If we know which actual function, we can provide the actual ctx type.
* This will contain implicit labels etc... From outside, though, we
* see only ParserRuleContext unless there are externally visible stuff
* like args, locals, explicit labels, etc...
*/
public String getRuleFunctionContextStructName(RuleFunction function) {
Rule r = function.rule;
if ( r.g.isLexer() ) {
return gen.templates.getInstanceOf("LexerRuleContext").render();
}
return Utils.capitalize(r.name)+gen.templates.getInstanceOf("RuleContextNameSuffix").render();
}
// should be same for all refs to same token like ctx.ID within single rule function
// for literals like 'while', we gen _s
public String getImplicitTokenLabel(String tokenName) {
ST st = gen.templates.getInstanceOf("ImplicitTokenLabel");
int ttype = gen.g.getTokenType(tokenName);
if ( tokenName.startsWith("'") ) {
return "s"+ttype;
}
String text = getTokenTypeAsTargetLabel(gen.g, ttype);
st.add("tokenName", text);
return st.render();
}
// x=(A|B)
public String getImplicitSetLabel(String id) {
ST st = gen.templates.getInstanceOf("ImplicitSetLabel");
st.add("id", id);
return st.render();
}
public String getImplicitRuleLabel(String ruleName) {
ST st = gen.templates.getInstanceOf("ImplicitRuleLabel");
st.add("ruleName", ruleName);
return st.render();
}
public String getElementListName(String name) {
ST st = gen.templates.getInstanceOf("ElementListName");
st.add("elemName", getElementName(name));
return st.render();
}
public String getElementName(String name) {
if (".".equals(name)) {
return "_wild";
}
if ( gen.g.getRule(name)!=null ) return name;
int ttype = gen.g.getTokenType(name);
if ( ttype==Token.INVALID_TYPE ) return name;
return getTokenTypeAsTargetLabel(gen.g, ttype);
}
public boolean grammarSymbolCausesIssueInGeneratedCode(GrammarAST idNode) {
switch (idNode.getParent().getType()) {
case ANTLRParser.ASSIGN:
switch (idNode.getParent().getParent().getType()) {
case ANTLRParser.ELEMENT_OPTIONS:
case ANTLRParser.OPTIONS:
return false;
default:
break;
}
break;
case ANTLRParser.AT:
case ANTLRParser.ELEMENT_OPTIONS:
return false;
case ANTLRParser.LEXER_ACTION_CALL:
if (idNode.getChildIndex() == 0) {
// first child is the command name which is part of the ANTLR language
return false;
}
// arguments to the command should be checked
break;
default:
break;
}
return badWords.contains(idNode.getText());
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy