
org.antlr.v4.codegen.target.JavaTarget Maven / Gradle / Ivy
/*
* Copyright (c) 2012 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD-3-Clause license that
* can be found in the LICENSE.txt file in the project root.
*/
package org.antlr.v4.codegen.target;
import org.antlr.v4.codegen.CodeGenerator;
import org.antlr.v4.codegen.Target;
import org.antlr.v4.codegen.UnicodeEscapes;
import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.tool.ast.GrammarAST;
import org.stringtemplate.v4.STGroup;
import org.stringtemplate.v4.StringRenderer;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
/**
*
* @author Sam Harwell
*/
public class JavaTarget extends Target {
/**
* The Java target can cache the code generation templates.
*/
private static final ThreadLocal targetTemplates = new ThreadLocal();
protected static final String[] javaKeywords = {
"abstract", "assert", "boolean", "break", "byte", "case", "catch",
"char", "class", "const", "continue", "default", "do", "double", "else",
"enum", "extends", "false", "final", "finally", "float", "for", "goto",
"if", "implements", "import", "instanceof", "int", "interface",
"long", "native", "new", "null", "package", "private", "protected",
"public", "return", "short", "static", "strictfp", "super", "switch",
"synchronized", "this", "throw", "throws", "transient", "true", "try",
"void", "volatile", "while"
};
/** Avoid grammar symbols in this set to prevent conflicts in gen'd code. */
protected final Set badWords = new HashSet();
public JavaTarget(CodeGenerator gen) {
super(gen, "Java");
}
public Set getBadWords() {
if (badWords.isEmpty()) {
addBadWords();
}
return badWords;
}
protected void addBadWords() {
badWords.addAll(Arrays.asList(javaKeywords));
badWords.add("rule");
badWords.add("parserRule");
}
/**
* {@inheritDoc}
*
* For Java, this is the translation {@code 'a\n"'} → {@code "a\n\""}.
* Expect single quotes around the incoming literal. Just flip the quotes
* and replace double quotes with {@code \"}.
*
*
* Note that we have decided to allow people to use '\"' without penalty, so
* we must build the target string in a loop as {@link String#replace}
* cannot handle both {@code \"} and {@code "} without a lot of messing
* around.
*
*/
@Override
public String getTargetStringLiteralFromANTLRStringLiteral(
CodeGenerator generator,
String literal, boolean addQuotes)
{
StringBuilder sb = new StringBuilder();
String is = literal;
if ( addQuotes ) sb.append('"');
for (int i = 1; i < is.length() -1; ) {
int codePoint = is.codePointAt(i);
int toAdvance = Character.charCount(codePoint);
if (codePoint == '\\') {
// Anything escaped is what it is! We assume that
// people know how to escape characters correctly. However
// we catch anything that does not need an escape in Java (which
// is what the default implementation is dealing with and remove
// the escape. The C target does this for instance.
//
int escapedCodePoint = is.codePointAt(i+toAdvance);
toAdvance++;
switch (escapedCodePoint) {
// Pass through any escapes that Java also needs
//
case 'n':
case 'r':
case 't':
case 'b':
case 'f':
case '\\':
// Pass the escape through
sb.append('\\');
sb.appendCodePoint(escapedCodePoint);
break;
case 'u': // Either unnnn or u{nnnnnn}
if (is.charAt(i+toAdvance) == '{') {
while (is.charAt(i+toAdvance) != '}') {
toAdvance++;
}
toAdvance++;
} else {
toAdvance += 4;
}
if ( i+toAdvance <= is.length() ) { // we might have an invalid \\uAB or something
String fullEscape = is.substring(i, i + toAdvance);
appendUnicodeEscapedCodePoint(
CharSupport.getCharValueFromCharInGrammarLiteral(fullEscape),
sb);
}
break;
default:
if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(escapedCodePoint)) {
appendUnicodeEscapedCodePoint(escapedCodePoint, sb);
} else {
sb.appendCodePoint(escapedCodePoint);
}
break;
}
} else {
if (codePoint == 0x22) {
// ANTLR doesn't escape " in literal strings,
// but every other language needs to do so.
sb.append("\\\"");
} else if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(codePoint)) {
appendUnicodeEscapedCodePoint(codePoint, sb);
} else {
sb.appendCodePoint(codePoint);
}
}
i += toAdvance;
}
if ( addQuotes ) sb.append('"');
return sb.toString();
}
@Override
public String encodeIntAsCharEscape(int v) {
if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) {
throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v));
}
if (v >= 0 && v < targetCharValueEscape.length && targetCharValueEscape[v] != null) {
return targetCharValueEscape[v];
}
if (v >= 0x20 && v < 127 && (!Character.isDigit(v) || v == '8' || v == '9')) {
return String.valueOf((char)v);
}
if ( v>=0 && v<=127 ) {
String oct = Integer.toOctalString(v);
return "\\"+ oct;
}
String hex = Integer.toHexString(v|0x10000).substring(1,5);
return "\\u"+hex;
}
@Override
public int getSerializedATNSegmentLimit() {
// 65535 is the class file format byte limit for a UTF-8 encoded string literal
// 3 is the maximum number of bytes it takes to encode a value in the range 0-0xFFFF
return 65535 / 3;
}
@Override
protected boolean visibleGrammarSymbolCausesIssueInGeneratedCode(GrammarAST idNode) {
return getBadWords().contains(idNode.getText());
}
@Override
protected STGroup loadTemplates() {
STGroup result = targetTemplates.get();
if (result == null) {
result = super.loadTemplates();
result.registerRenderer(String.class, new JavaStringRenderer(), true);
targetTemplates.set(result);
}
return result;
}
protected static class JavaStringRenderer extends StringRenderer {
@Override
public String toString(Object o, String formatString, Locale locale) {
if ("java-escape".equals(formatString)) {
// 5C is the hex code for the \ itself
return ((String)o).replace("\\u", "\\u005Cu");
}
return super.toString(o, formatString, locale);
}
}
@Override
protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) {
UnicodeEscapes.appendJavaStyleEscapedCodePoint(codePoint, sb);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy