All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.antlr.v4.codegen.Target Maven / Gradle / Ivy

There is a newer version: 4.13.2
Show newest version
/*
 * Copyright (c) 2012-2016 The ANTLR Project. All rights reserved.
 * Use of this file is governed by the BSD 3-clause license that
 * can be found in the LICENSE.txt file in the project root.
 */

package org.antlr.v4.codegen;

import org.antlr.v4.Tool;
import org.antlr.v4.codegen.model.RuleFunction;
import org.antlr.v4.codegen.model.SerializedATN;
import org.antlr.v4.misc.Utils;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.runtime.RuntimeMetaData;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.tool.ErrorType;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.Rule;
import org.antlr.v4.tool.ast.GrammarAST;
import org.stringtemplate.v4.NumberRenderer;
import org.stringtemplate.v4.ST;
import org.stringtemplate.v4.STErrorListener;
import org.stringtemplate.v4.STGroup;
import org.stringtemplate.v4.STGroupFile;
import org.stringtemplate.v4.StringRenderer;
import org.stringtemplate.v4.misc.STMessage;

/** */
public abstract class Target {
	/** For pure strings of Java 16-bit Unicode char, how can we display
	 *  it in the target language as a literal.  Useful for dumping
	 *  predicates and such that may refer to chars that need to be escaped
	 *  when represented as strings.  Also, templates need to be escaped so
	 *  that the target language can hold them as a string.
	 *  

* I have defined (via the constructor) the set of typical escapes, * but your {@link Target} subclass is free to alter the translated chars * or add more definitions. This is non-static so each target can have * a different set in memory at same time. */ protected String[] targetCharValueEscape = new String[255]; protected final CodeGenerator gen; private final String language; private STGroup templates; protected Target(CodeGenerator gen, String language) { targetCharValueEscape['\n'] = "\\n"; targetCharValueEscape['\r'] = "\\r"; targetCharValueEscape['\t'] = "\\t"; targetCharValueEscape['\b'] = "\\b"; targetCharValueEscape['\f'] = "\\f"; targetCharValueEscape['\\'] = "\\\\"; targetCharValueEscape['\''] = "\\'"; targetCharValueEscape['"'] = "\\\""; this.gen = gen; this.language = language; } public CodeGenerator getCodeGenerator() { return gen; } public String getLanguage() { return language; } /** ANTLR tool should check output templates / target are compatible with tool code generation. * For now, a simple string match used on x.y of x.y.z scheme. We use a method to avoid mismatches * between a template called VERSION. This value is checked against Tool.VERSION during load of templates. * * This additional method forces all targets 4.3 and beyond to add this method. * * @since 4.3 */ public abstract String getVersion(); public STGroup getTemplates() { if (templates == null) { String version = getVersion(); if ( version==null || !RuntimeMetaData.getMajorMinorVersion(version).equals(RuntimeMetaData.getMajorMinorVersion(Tool.VERSION))) { gen.tool.errMgr.toolError(ErrorType.INCOMPATIBLE_TOOL_AND_TEMPLATES, version, Tool.VERSION, language); } templates = loadTemplates(); } return templates; } protected void genFile(Grammar g, ST outputFileST, String fileName) { getCodeGenerator().write(outputFileST, fileName); } /** Get a meaningful name for a token type useful during code generation. * Literals without associated names are converted to the string equivalent * of their integer values. Used to generate x==ID and x==34 type comparisons * etc... Essentially we are looking for the most obvious way to refer * to a token type in the generated code. */ public String getTokenTypeAsTargetLabel(Grammar g, int ttype) { String name = g.getTokenName(ttype); // If name is not valid, return the token type instead if ( Grammar.INVALID_TOKEN_NAME.equals(name) ) { return String.valueOf(ttype); } return name; } public String[] getTokenTypesAsTargetLabels(Grammar g, int[] ttypes) { String[] labels = new String[ttypes.length]; for (int i=0; iConvert from an ANTLR string literal found in a grammar file to an * equivalent string literal in the target language. *

*

* For Java, this is the translation {@code 'a\n"'} → {@code "a\n\""}. * Expect single quotes around the incoming literal. Just flip the quotes * and replace double quotes with {@code \"}. *

*

* Note that we have decided to allow people to use '\"' without penalty, so * we must build the target string in a loop as {@link String#replace} * cannot handle both {@code \"} and {@code "} without a lot of messing * around. *

*/ public String getTargetStringLiteralFromANTLRStringLiteral( CodeGenerator generator, String literal, boolean addQuotes) { StringBuilder sb = new StringBuilder(); String is = literal; if ( addQuotes ) sb.append('"'); for (int i = 1; i < is.length() -1; i++) { if (is.charAt(i) == '\\') { // Anything escaped is what it is! We assume that // people know how to escape characters correctly. However // we catch anything that does not need an escape in Java (which // is what the default implementation is dealing with and remove // the escape. The C target does this for instance. // switch (is.charAt(i+1)) { // Pass through any escapes that Java also needs // case '"': case 'n': case 'r': case 't': case 'b': case 'f': case '\\': // Pass the escape through sb.append('\\'); break; case 'u': // Assume unnnn // Pass the escape through as double \\ // so that Java leaves as \u0000 string not char sb.append('\\'); sb.append('\\'); break; default: // Remove the escape by virtue of not adding it here // Thus \' becomes ' and so on break; } // Go past the \ character i++; } else { // Characters that don't need \ in ANTLR 'strings' but do in Java if (is.charAt(i) == '"') { // We need to escape " in Java sb.append('\\'); } } // Add in the next character, which may have been escaped sb.append(is.charAt(i)); } if ( addQuotes ) sb.append('"'); return sb.toString(); } /** Assume 16-bit char */ public String encodeIntAsCharEscape(int v) { if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) { throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v)); } if (v >= 0 && v < targetCharValueEscape.length && targetCharValueEscape[v] != null) { return targetCharValueEscape[v]; } if (v >= 0x20 && v < 127 && (!Character.isDigit(v) || v == '8' || v == '9')) { return String.valueOf((char)v); } if ( v>=0 && v<=127 ) { String oct = Integer.toOctalString(v); return "\\"+ oct; } String hex = Integer.toHexString(v|0x10000).substring(1,5); return "\\u"+hex; } public String getLoopLabel(GrammarAST ast) { return "loop"+ ast.token.getTokenIndex(); } public String getLoopCounter(GrammarAST ast) { return "cnt"+ ast.token.getTokenIndex(); } public String getListLabel(String label) { ST st = getTemplates().getInstanceOf("ListLabelName"); st.add("label", label); return st.render(); } public String getRuleFunctionContextStructName(Rule r) { if ( r.g.isLexer() ) { return getTemplates().getInstanceOf("LexerRuleContext").render(); } return Utils.capitalize(r.name)+getTemplates().getInstanceOf("RuleContextNameSuffix").render(); } public String getAltLabelContextStructName(String label) { return Utils.capitalize(label)+getTemplates().getInstanceOf("RuleContextNameSuffix").render(); } /** If we know which actual function, we can provide the actual ctx type. * This will contain implicit labels etc... From outside, though, we * see only ParserRuleContext unless there are externally visible stuff * like args, locals, explicit labels, etc... */ public String getRuleFunctionContextStructName(RuleFunction function) { Rule r = function.rule; if ( r.g.isLexer() ) { return getTemplates().getInstanceOf("LexerRuleContext").render(); } return Utils.capitalize(r.name)+getTemplates().getInstanceOf("RuleContextNameSuffix").render(); } // should be same for all refs to same token like ctx.ID within single rule function // for literals like 'while', we gen _s public String getImplicitTokenLabel(String tokenName) { ST st = getTemplates().getInstanceOf("ImplicitTokenLabel"); int ttype = getCodeGenerator().g.getTokenType(tokenName); if ( tokenName.startsWith("'") ) { return "s"+ttype; } String text = getTokenTypeAsTargetLabel(getCodeGenerator().g, ttype); st.add("tokenName", text); return st.render(); } // x=(A|B) public String getImplicitSetLabel(String id) { ST st = getTemplates().getInstanceOf("ImplicitSetLabel"); st.add("id", id); return st.render(); } public String getImplicitRuleLabel(String ruleName) { ST st = getTemplates().getInstanceOf("ImplicitRuleLabel"); st.add("ruleName", ruleName); return st.render(); } public String getElementListName(String name) { ST st = getTemplates().getInstanceOf("ElementListName"); st.add("elemName", getElementName(name)); return st.render(); } public String getElementName(String name) { if (".".equals(name)) { return "_wild"; } if ( getCodeGenerator().g.getRule(name)!=null ) return name; int ttype = getCodeGenerator().g.getTokenType(name); if ( ttype==Token.INVALID_TYPE ) return name; return getTokenTypeAsTargetLabel(getCodeGenerator().g, ttype); } /** Generate TParser.java and TLexer.java from T.g4 if combined, else * just use T.java as output regardless of type. */ public String getRecognizerFileName(boolean header) { ST extST = getTemplates().getInstanceOf("codeFileExtension"); String recognizerName = gen.g.getRecognizerName(); return recognizerName+extST.render(); } /** A given grammar T, return the listener name such as * TListener.java, if we're using the Java target. */ public String getListenerFileName(boolean header) { assert gen.g.name != null; ST extST = getTemplates().getInstanceOf("codeFileExtension"); String listenerName = gen.g.name + "Listener"; return listenerName+extST.render(); } /** A given grammar T, return the visitor name such as * TVisitor.java, if we're using the Java target. */ public String getVisitorFileName(boolean header) { assert gen.g.name != null; ST extST = getTemplates().getInstanceOf("codeFileExtension"); String listenerName = gen.g.name + "Visitor"; return listenerName+extST.render(); } /** A given grammar T, return a blank listener implementation * such as TBaseListener.java, if we're using the Java target. */ public String getBaseListenerFileName(boolean header) { assert gen.g.name != null; ST extST = getTemplates().getInstanceOf("codeFileExtension"); String listenerName = gen.g.name + "BaseListener"; return listenerName+extST.render(); } /** A given grammar T, return a blank listener implementation * such as TBaseListener.java, if we're using the Java target. */ public String getBaseVisitorFileName(boolean header) { assert gen.g.name != null; ST extST = getTemplates().getInstanceOf("codeFileExtension"); String listenerName = gen.g.name + "BaseVisitor"; return listenerName+extST.render(); } /** * Gets the maximum number of 16-bit unsigned integers that can be encoded * in a single segment of the serialized ATN. * * @see SerializedATN#getSegments * * @return the serialized ATN segment limit */ public int getSerializedATNSegmentLimit() { return Integer.MAX_VALUE; } /** How many bits should be used to do inline token type tests? Java assumes * a 64-bit word for bitsets. Must be a valid wordsize for your target like * 8, 16, 32, 64, etc... * * @since 4.5 */ public int getInlineTestSetWordSize() { return 64; } public boolean grammarSymbolCausesIssueInGeneratedCode(GrammarAST idNode) { switch (idNode.getParent().getType()) { case ANTLRParser.ASSIGN: switch (idNode.getParent().getParent().getType()) { case ANTLRParser.ELEMENT_OPTIONS: case ANTLRParser.OPTIONS: return false; default: break; } break; case ANTLRParser.AT: case ANTLRParser.ELEMENT_OPTIONS: return false; case ANTLRParser.LEXER_ACTION_CALL: if (idNode.getChildIndex() == 0) { // first child is the command name which is part of the ANTLR language return false; } // arguments to the command should be checked break; default: break; } return visibleGrammarSymbolCausesIssueInGeneratedCode(idNode); } protected abstract boolean visibleGrammarSymbolCausesIssueInGeneratedCode(GrammarAST idNode); public boolean templatesExist() { String groupFileName = CodeGenerator.TEMPLATE_ROOT + "/" + getLanguage() + "/" + getLanguage() + STGroup.GROUP_FILE_EXTENSION; STGroup result = null; try { result = new STGroupFile(groupFileName); } catch (IllegalArgumentException iae) { result = null; } return result!=null; } protected STGroup loadTemplates() { String groupFileName = CodeGenerator.TEMPLATE_ROOT + "/" + getLanguage() + "/" + getLanguage() + STGroup.GROUP_FILE_EXTENSION; STGroup result = null; try { result = new STGroupFile(groupFileName); } catch (IllegalArgumentException iae) { gen.tool.errMgr.toolError(ErrorType.MISSING_CODE_GEN_TEMPLATES, iae, language); } if ( result==null ) return null; result.registerRenderer(Integer.class, new NumberRenderer()); result.registerRenderer(String.class, new StringRenderer()); result.setListener(new STErrorListener() { @Override public void compileTimeError(STMessage msg) { reportError(msg); } @Override public void runTimeError(STMessage msg) { reportError(msg); } @Override public void IOError(STMessage msg) { reportError(msg); } @Override public void internalError(STMessage msg) { reportError(msg); } private void reportError(STMessage msg) { getCodeGenerator().tool.errMgr.toolError(ErrorType.STRING_TEMPLATE_WARNING, msg.cause, msg.toString()); } }); return result; } /** * @since 4.3 */ public boolean wantsBaseListener() { return true; } /** * @since 4.3 */ public boolean wantsBaseVisitor() { return true; } /** * @since 4.3 */ public boolean supportsOverloadedMethods() { return true; } /** @since 4.6 */ public boolean needsHeader() { return false; }; // Override in targets that need header files. }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy