org.antlr.v4.codegen.target.JavaTarget Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of antlr4 Show documentation
Show all versions of antlr4 Show documentation
The ANTLR 4 grammar compiler.
/*
* Copyright (c) 2012 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD-3-Clause license that
* can be found in the LICENSE.txt file in the project root.
*/
package org.antlr.v4.codegen.target;
import org.antlr.v4.codegen.CodeGenerator;
import org.antlr.v4.codegen.Target;
import org.antlr.v4.codegen.UnicodeEscapes;
import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.tool.ast.GrammarAST;
import org.stringtemplate.v4.STGroup;
import org.stringtemplate.v4.StringRenderer;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
/**
*
* @author Sam Harwell
*/
public class JavaTarget extends Target {
/**
* The Java target can cache the code generation templates.
*/
private static final ThreadLocal targetTemplates = new ThreadLocal();
protected static final String[] javaKeywords = {
"abstract", "assert", "boolean", "break", "byte", "case", "catch",
"char", "class", "const", "continue", "default", "do", "double", "else",
"enum", "extends", "false", "final", "finally", "float", "for", "goto",
"if", "implements", "import", "instanceof", "int", "interface",
"long", "native", "new", "null", "package", "private", "protected",
"public", "return", "short", "static", "strictfp", "super", "switch",
"synchronized", "this", "throw", "throws", "transient", "true", "try",
"void", "volatile", "while"
};
/** Avoid grammar symbols in this set to prevent conflicts in gen'd code. */
protected final Set badWords = new HashSet();
public JavaTarget(CodeGenerator gen) {
super(gen, "Java");
}
public Set getBadWords() {
if (badWords.isEmpty()) {
addBadWords();
}
return badWords;
}
protected void addBadWords() {
badWords.addAll(Arrays.asList(javaKeywords));
badWords.add("rule");
badWords.add("parserRule");
}
/**
* {@inheritDoc}
*
* For Java, this is the translation {@code 'a\n"'} → {@code "a\n\""}.
* Expect single quotes around the incoming literal. Just flip the quotes
* and replace double quotes with {@code \"}.
*
*
* Note that we have decided to allow people to use '\"' without penalty, so
* we must build the target string in a loop as {@link String#replace}
* cannot handle both {@code \"} and {@code "} without a lot of messing
* around.
*
*/
@Override
public String getTargetStringLiteralFromANTLRStringLiteral(
CodeGenerator generator,
String literal, boolean addQuotes)
{
StringBuilder sb = new StringBuilder();
String is = literal;
if ( addQuotes ) sb.append('"');
for (int i = 1; i < is.length() -1; ) {
int codePoint = is.codePointAt(i);
int toAdvance = Character.charCount(codePoint);
if (codePoint == '\\') {
// Anything escaped is what it is! We assume that
// people know how to escape characters correctly. However
// we catch anything that does not need an escape in Java (which
// is what the default implementation is dealing with and remove
// the escape. The C target does this for instance.
//
int escapedCodePoint = is.codePointAt(i+toAdvance);
toAdvance++;
switch (escapedCodePoint) {
// Pass through any escapes that Java also needs
//
case 'n':
case 'r':
case 't':
case 'b':
case 'f':
case '\\':
// Pass the escape through
sb.append('\\');
sb.appendCodePoint(escapedCodePoint);
break;
case 'u': // Either unnnn or u{nnnnnn}
if (is.charAt(i+toAdvance) == '{') {
while (is.charAt(i+toAdvance) != '}') {
toAdvance++;
}
toAdvance++;
} else {
toAdvance += 4;
}
if ( i+toAdvance <= is.length() ) { // we might have an invalid \\uAB or something
String fullEscape = is.substring(i, i + toAdvance);
appendUnicodeEscapedCodePoint(
CharSupport.getCharValueFromCharInGrammarLiteral(fullEscape),
sb);
}
break;
default:
if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(escapedCodePoint)) {
appendUnicodeEscapedCodePoint(escapedCodePoint, sb);
} else {
sb.appendCodePoint(escapedCodePoint);
}
break;
}
} else {
if (codePoint == 0x22) {
// ANTLR doesn't escape " in literal strings,
// but every other language needs to do so.
sb.append("\\\"");
} else if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(codePoint)) {
appendUnicodeEscapedCodePoint(codePoint, sb);
} else {
sb.appendCodePoint(codePoint);
}
}
i += toAdvance;
}
if ( addQuotes ) sb.append('"');
return sb.toString();
}
@Override
public String encodeIntAsCharEscape(int v) {
if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) {
throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v));
}
if (v >= 0 && v < targetCharValueEscape.length && targetCharValueEscape[v] != null) {
return targetCharValueEscape[v];
}
if (v >= 0x20 && v < 127 && (!Character.isDigit(v) || v == '8' || v == '9')) {
return String.valueOf((char)v);
}
if ( v>=0 && v<=127 ) {
String oct = Integer.toOctalString(v);
return "\\"+ oct;
}
String hex = Integer.toHexString(v|0x10000).substring(1,5);
return "\\u"+hex;
}
@Override
public int getSerializedATNSegmentLimit() {
// 65535 is the class file format byte limit for a UTF-8 encoded string literal
// 3 is the maximum number of bytes it takes to encode a value in the range 0-0xFFFF
return 65535 / 3;
}
@Override
protected boolean visibleGrammarSymbolCausesIssueInGeneratedCode(GrammarAST idNode) {
return getBadWords().contains(idNode.getText());
}
@Override
protected STGroup loadTemplates() {
STGroup result = targetTemplates.get();
if (result == null) {
result = super.loadTemplates();
result.registerRenderer(String.class, new JavaStringRenderer(), true);
targetTemplates.set(result);
}
return result;
}
protected static class JavaStringRenderer extends StringRenderer {
@Override
public String toString(Object o, String formatString, Locale locale) {
if ("java-escape".equals(formatString)) {
// 5C is the hex code for the \ itself
return ((String)o).replace("\\u", "\\u005Cu");
}
return super.toString(o, formatString, locale);
}
}
@Override
protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) {
UnicodeEscapes.appendJavaStyleEscapedCodePoint(codePoint, sb);
}
}