org.antlr.v4.codegen.Target Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of antlr4 Show documentation
Show all versions of antlr4 Show documentation
The ANTLR 4 grammar compiler.
The newest version!
/*
* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
package org.antlr.v4.codegen;
import org.antlr.v4.Tool;
import org.antlr.v4.codegen.model.RuleFunction;
import org.antlr.v4.codegen.model.SerializedATN;
import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.misc.Utils;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.runtime.RuntimeMetaData;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.tool.ErrorType;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.Rule;
import org.antlr.v4.tool.ast.GrammarAST;
import org.stringtemplate.v4.*;
import org.stringtemplate.v4.misc.STMessage;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
/** */
public abstract class Target {
private final static Map languageTemplates = new HashMap<>();
protected final CodeGenerator gen;
protected static final Map defaultCharValueEscape;
static {
// https://docs.oracle.com/javase/tutorial/java/data/characters.html
HashMap map = new HashMap<>();
addEscapedChar(map, '\t', 't');
addEscapedChar(map, '\b', 'b');
addEscapedChar(map, '\n', 'n');
addEscapedChar(map, '\r', 'r');
addEscapedChar(map, '\f', 'f');
addEscapedChar(map, '\'');
addEscapedChar(map, '\"');
addEscapedChar(map, '\\');
defaultCharValueEscape = map;
}
protected Target(CodeGenerator gen) {
this.gen = gen;
}
/** For pure strings of Unicode char, how can we display
* it in the target language as a literal. Useful for dumping
* predicates and such that may refer to chars that need to be escaped
* when represented as strings. Also, templates need to be escaped so
* that the target language can hold them as a string.
* Each target can have a different set in memory at same time.
*/
public Map getTargetCharValueEscape() {
return defaultCharValueEscape;
}
protected static void addEscapedChar(HashMap map, char key) {
addEscapedChar(map, key, key);
}
protected static void addEscapedChar(HashMap map, char key, char representation) {
map.put(key, "\\" + representation);
}
public String getLanguage() { return gen.language; }
public CodeGenerator getCodeGenerator() {
return gen;
}
/** ANTLR tool should check output templates / target are compatible with tool code generation.
* For now, a simple string match used on x.y of x.y.z scheme. We use a method to avoid mismatches
* between a template called VERSION. This value is checked against Tool.VERSION during load of templates.
*
* This additional method forces all targets 4.3 and beyond to add this method.
*
* @since 4.3
*/
public String getVersion() {
return Tool.VERSION;
}
public synchronized STGroup getTemplates() {
String language = getLanguage();
STGroup templates = languageTemplates.get(language);
if (templates == null) {
String version = getVersion();
if (version == null ||
!RuntimeMetaData.getMajorMinorVersion(version).equals(RuntimeMetaData.getMajorMinorVersion(Tool.VERSION))) {
gen.tool.errMgr.toolError(ErrorType.INCOMPATIBLE_TOOL_AND_TEMPLATES, version, Tool.VERSION, language);
}
templates = loadTemplates();
languageTemplates.put(language, templates);
}
return templates;
}
protected abstract Set getReservedWords();
public String escapeIfNeeded(String identifier) {
return getReservedWords().contains(identifier) ? escapeWord(identifier) : identifier;
}
protected String escapeWord(String word) {
return word + "_";
}
protected void genFile(Grammar g, ST outputFileST, String fileName)
{
getCodeGenerator().write(outputFileST, fileName);
}
/** Get a meaningful name for a token type useful during code generation.
* Literals without associated names are converted to the string equivalent
* of their integer values. Used to generate x==ID and x==34 type comparisons
* etc... Essentially we are looking for the most obvious way to refer
* to a token type in the generated code.
*/
public String getTokenTypeAsTargetLabel(Grammar g, int ttype) {
String name = this.escapeIfNeeded(g.getTokenName(ttype));
// If name is not valid, return the token type instead
if ( Grammar.INVALID_TOKEN_NAME.equals(name) ) {
return String.valueOf(ttype);
}
return name;
}
public String[] getTokenTypesAsTargetLabels(Grammar g, int[] ttypes) {
String[] labels = new String[ttypes.length];
for (int i=0; iConvert from an ANTLR string literal found in a grammar file to an
* equivalent string literal in the target language.
*
*
* For Java, this is the translation {@code 'a\n"'} → {@code "a\n\""}.
* Expect single quotes around the incoming literal. Just flip the quotes
* and replace double quotes with {@code \"}.
*
*
* Note that we have decided to allow people to use '\"' without penalty, so
* we must build the target string in a loop as {@link String#replace}
* cannot handle both {@code \"} and {@code "} without a lot of messing
* around.
*
*/
public String getTargetStringLiteralFromANTLRStringLiteral(
CodeGenerator generator,
String literal,
boolean addQuotes,
boolean escapeSpecial)
{
StringBuilder sb = new StringBuilder();
if ( addQuotes ) sb.append('"');
for (int i = 1; i < literal.length() -1; ) {
int codePoint = literal.codePointAt(i);
int toAdvance = Character.charCount(codePoint);
if (codePoint == '\\') {
// Anything escaped is what it is! We assume that
// people know how to escape characters correctly. However
// we catch anything that does not need an escape in Java (which
// is what the default implementation is dealing with and remove
// the escape. The C target does this for instance.
//
int escapedCodePoint = literal.codePointAt(i+toAdvance);
toAdvance++;
switch (escapedCodePoint) {
// Pass through any escapes that Java also needs
//
case 'n':
case 'r':
case 't':
case 'b':
case 'f':
case '\\':
// Pass the escape through
if (escapeSpecial && escapedCodePoint != '\\') {
sb.append('\\');
}
sb.append('\\');
sb.appendCodePoint(escapedCodePoint);
break;
case 'u': // Either unnnn or u{nnnnnn}
if (literal.charAt(i+toAdvance) == '{') {
while (literal.charAt(i+toAdvance) != '}') {
toAdvance++;
}
toAdvance++;
}
else {
toAdvance += 4;
}
if ( i+toAdvance <= literal.length() ) { // we might have an invalid \\uAB or something
String fullEscape = literal.substring(i, i+toAdvance);
appendUnicodeEscapedCodePoint(
CharSupport.getCharValueFromCharInGrammarLiteral(fullEscape),
sb,
escapeSpecial);
}
break;
default:
if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(escapedCodePoint)) {
appendUnicodeEscapedCodePoint(escapedCodePoint, sb, escapeSpecial);
}
else {
sb.appendCodePoint(escapedCodePoint);
}
break;
}
}
else {
if (codePoint == 0x22) {
// ANTLR doesn't escape " in literal strings,
// but every other language needs to do so.
sb.append("\\\"");
}
else if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(codePoint)) {
appendUnicodeEscapedCodePoint(codePoint, sb, escapeSpecial);
}
else {
sb.appendCodePoint(codePoint);
}
}
i += toAdvance;
}
if ( addQuotes ) sb.append('"');
return sb.toString();
}
protected boolean shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(int codePoint) {
// We don't want anyone passing 0x0A (newline) or 0x22
// (double-quote) here because Java treats \\u000A as
// a literal newline and \\u0022 as a literal
// double-quote, so Unicode escaping doesn't help.
assert codePoint != 0x0A && codePoint != 0x22;
return
codePoint < 0x20 || // control characters up to but not including space
codePoint == 0x5C || // backslash
codePoint >= 0x7F; // DEL and beyond (keeps source code 7-bit US-ASCII)
}
/** Assume 16-bit char */
public String encodeInt16AsCharEscape(int v) {
if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) {
throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v));
}
if ( isATNSerializedAsInts() ) {
return Integer.toString(v);
}
char c = (char)v;
String escaped = getTargetCharValueEscape().get(c);
if (escaped != null) {
return escaped;
}
switch (Character.getType(c)) {
case Character.CONTROL:
case Character.LINE_SEPARATOR:
case Character.PARAGRAPH_SEPARATOR:
return escapeChar(v);
default:
if ( v<=127 ) {
return String.valueOf(c); // ascii chars can be as-is, no encoding
}
// else we use hex encoding to ensure pure ascii chars generated
return escapeChar(v);
}
}
protected String escapeChar(int v) {
return String.format("\\u%04x", v);
}
public String getLoopLabel(GrammarAST ast) {
return "loop"+ ast.token.getTokenIndex();
}
public String getLoopCounter(GrammarAST ast) {
return "cnt"+ ast.token.getTokenIndex();
}
public String getListLabel(String label) {
ST st = getTemplates().getInstanceOf("ListLabelName");
st.add("label", label);
return st.render();
}
public String getRuleFunctionContextStructName(Rule r) {
if ( r.g.isLexer() ) {
return getTemplates().getInstanceOf("LexerRuleContext").render();
}
return Utils.capitalize(r.name)+getTemplates().getInstanceOf("RuleContextNameSuffix").render();
}
public String getAltLabelContextStructName(String label) {
return Utils.capitalize(label)+getTemplates().getInstanceOf("RuleContextNameSuffix").render();
}
/** If we know which actual function, we can provide the actual ctx type.
* This will contain implicit labels etc... From outside, though, we
* see only ParserRuleContext unless there are externally visible stuff
* like args, locals, explicit labels, etc...
*/
public String getRuleFunctionContextStructName(RuleFunction function) {
Rule r = function.rule;
if ( r.g.isLexer() ) {
return getTemplates().getInstanceOf("LexerRuleContext").render();
}
return Utils.capitalize(r.name)+getTemplates().getInstanceOf("RuleContextNameSuffix").render();
}
// should be same for all refs to same token like ctx.ID within single rule function
// for literals like 'while', we gen _s
public String getImplicitTokenLabel(String tokenName) {
ST st = getTemplates().getInstanceOf("ImplicitTokenLabel");
int ttype = getCodeGenerator().g.getTokenType(tokenName);
if ( tokenName.startsWith("'") ) {
return "s"+ttype;
}
String text = getTokenTypeAsTargetLabel(getCodeGenerator().g, ttype);
st.add("tokenName", text);
return st.render();
}
// x=(A|B)
public String getImplicitSetLabel(String id) {
ST st = getTemplates().getInstanceOf("ImplicitSetLabel");
st.add("id", id);
return st.render();
}
public String getImplicitRuleLabel(String ruleName) {
ST st = getTemplates().getInstanceOf("ImplicitRuleLabel");
st.add("ruleName", ruleName);
return st.render();
}
public String getElementListName(String name) {
ST st = getTemplates().getInstanceOf("ElementListName");
st.add("elemName", getElementName(name));
return st.render();
}
public String getElementName(String name) {
if (".".equals(name)) {
return "_wild";
}
if ( getCodeGenerator().g.getRule(name)!=null ) return name;
int ttype = getCodeGenerator().g.getTokenType(name);
if ( ttype==Token.INVALID_TYPE ) return name;
return getTokenTypeAsTargetLabel(getCodeGenerator().g, ttype);
}
/** Generate TParser.java and TLexer.java from T.g4 if combined, else
* just use T.java as output regardless of type.
*/
public String getRecognizerFileName(boolean header) {
ST extST = getTemplates().getInstanceOf("codeFileExtension");
String recognizerName = gen.g.getRecognizerName();
return recognizerName+extST.render();
}
/** A given grammar T, return the listener name such as
* TListener.java, if we're using the Java target.
*/
public String getListenerFileName(boolean header) {
assert gen.g.name != null;
ST extST = getTemplates().getInstanceOf("codeFileExtension");
String listenerName = gen.g.name + "Listener";
return listenerName+extST.render();
}
/** A given grammar T, return the visitor name such as
* TVisitor.java, if we're using the Java target.
*/
public String getVisitorFileName(boolean header) {
assert gen.g.name != null;
ST extST = getTemplates().getInstanceOf("codeFileExtension");
String listenerName = gen.g.name + "Visitor";
return listenerName+extST.render();
}
/** A given grammar T, return a blank listener implementation
* such as TBaseListener.java, if we're using the Java target.
*/
public String getBaseListenerFileName(boolean header) {
assert gen.g.name != null;
ST extST = getTemplates().getInstanceOf("codeFileExtension");
String listenerName = gen.g.name + "BaseListener";
return listenerName+extST.render();
}
/** A given grammar T, return a blank listener implementation
* such as TBaseListener.java, if we're using the Java target.
*/
public String getBaseVisitorFileName(boolean header) {
assert gen.g.name != null;
ST extST = getTemplates().getInstanceOf("codeFileExtension");
String listenerName = gen.g.name + "BaseVisitor";
return listenerName+extST.render();
}
/**
* Gets the maximum number of 16-bit unsigned integers that can be encoded
* in a single segment (a declaration in target language) of the serialized ATN.
* E.g., in C++, a small segment length results in multiple decls like:
*
* static const int32_t serializedATNSegment1[] = {
* 0x7, 0x12, 0x2, 0x13, 0x7, 0x13, 0x2, 0x14, 0x7, 0x14, 0x2, 0x15, 0x7,
* 0x15, 0x2, 0x16, 0x7, 0x16, 0x2, 0x17, 0x7, 0x17, 0x2, 0x18, 0x7,
* 0x18, 0x2, 0x19, 0x7, 0x19, 0x2, 0x1a, 0x7, 0x1a, 0x2, 0x1b, 0x7,
* 0x1b, 0x2, 0x1c, 0x7, 0x1c, 0x2, 0x1d, 0x7, 0x1d, 0x2, 0x1e, 0x7,
* 0x1e, 0x2, 0x1f, 0x7, 0x1f, 0x2, 0x20, 0x7, 0x20, 0x2, 0x21, 0x7,
* 0x21, 0x2, 0x22, 0x7, 0x22, 0x2, 0x23, 0x7, 0x23, 0x2, 0x24, 0x7,
* 0x24, 0x2, 0x25, 0x7, 0x25, 0x2, 0x26,
* };
*
* instead of one big one. Targets are free to ignore this like JavaScript does.
*
* This is primarily needed by Java target to limit size of any single ATN string
* to 65k length.
*
* @see SerializedATN#getSegments
*
* @return the serialized ATN segment limit
*/
public int getSerializedATNSegmentLimit() {
return Integer.MAX_VALUE;
}
/** How many bits should be used to do inline token type tests? Java assumes
* a 64-bit word for bitsets. Must be a valid wordsize for your target like
* 8, 16, 32, 64, etc...
*
* @since 4.5
*/
public int getInlineTestSetWordSize() { return 64; }
public boolean grammarSymbolCausesIssueInGeneratedCode(GrammarAST idNode) {
switch (idNode.getParent().getType()) {
case ANTLRParser.ASSIGN:
switch (idNode.getParent().getParent().getType()) {
case ANTLRParser.ELEMENT_OPTIONS:
case ANTLRParser.OPTIONS:
return false;
default:
break;
}
break;
case ANTLRParser.AT:
case ANTLRParser.ELEMENT_OPTIONS:
return false;
case ANTLRParser.LEXER_ACTION_CALL:
if (idNode.getChildIndex() == 0) {
// first child is the command name which is part of the ANTLR language
return false;
}
// arguments to the command should be checked
break;
default:
break;
}
return getReservedWords().contains(idNode.getText());
}
@Deprecated
protected boolean visibleGrammarSymbolCausesIssueInGeneratedCode(GrammarAST idNode) {
return getReservedWords().contains(idNode.getText());
}
public boolean templatesExist() {
return loadTemplatesHelper(false) != null;
}
protected STGroup loadTemplates() {
STGroup result = loadTemplatesHelper(true);
if (result == null) {
return null;
}
result.registerRenderer(Integer.class, new NumberRenderer());
result.registerRenderer(String.class, new StringRenderer());
result.setListener(new STErrorListener() {
@Override
public void compileTimeError(STMessage msg) {
reportError(msg);
}
@Override
public void runTimeError(STMessage msg) {
reportError(msg);
}
@Override
public void IOError(STMessage msg) {
reportError(msg);
}
@Override
public void internalError(STMessage msg) {
reportError(msg);
}
private void reportError(STMessage msg) {
getCodeGenerator().tool.errMgr.toolError(ErrorType.STRING_TEMPLATE_WARNING, msg.cause, msg.toString());
}
});
return result;
}
private STGroup loadTemplatesHelper(boolean reportErrorIfFail) {
String language = getLanguage();
String groupFileName = CodeGenerator.TEMPLATE_ROOT + "/" + language + "/" + language + STGroup.GROUP_FILE_EXTENSION;
try {
return new STGroupFile(groupFileName);
}
catch (IllegalArgumentException iae) {
if (reportErrorIfFail) {
gen.tool.errMgr.toolError(ErrorType.MISSING_CODE_GEN_TEMPLATES, iae, getLanguage());
}
return null;
}
}
/**
* @since 4.3
*/
public boolean wantsBaseListener() {
return true;
}
/**
* @since 4.3
*/
public boolean wantsBaseVisitor() {
return true;
}
/**
* @since 4.3
*/
public boolean supportsOverloadedMethods() {
return true;
}
public boolean isATNSerializedAsInts() {
return true;
}
/** @since 4.6 */
public boolean needsHeader() { return false; } // Override in targets that need header files.
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy