org.antlr.v4.codegen.Target Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of antlr4 Show documentation
The ANTLR 4 grammar compiler.
There is a newer version: 4.13.2
/*
 * [The "BSD license"]
 *  Copyright (c) 2012 Terence Parr
 *  Copyright (c) 2012 Sam Harwell
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *
 *  1. Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *     derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package org.antlr.v4.codegen;

import org.antlr.v4.codegen.model.RuleFunction;
import org.antlr.v4.misc.Utils;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.Rule;
import org.antlr.v4.tool.ast.GrammarAST;
import org.stringtemplate.v4.ST;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

/** */
public class Target {
	/** For pure strings of Java 16-bit Unicode char, how can we display
	 *  it in the target language as a literal.  Useful for dumping
	 *  predicates and such that may refer to chars that need to be escaped
	 *  when represented as strings.  Also, templates need to be escaped so
	 *  that the target language can hold them as a string.
	 *  
	 *  I have defined (via the constructor) the set of typical escapes,
	 *  but your {@link Target} subclass is free to alter the translated chars
	 *  or add more definitions.  This is non-static so each target can have
	 *  a different set in memory at same time.
	 */
	protected String[] targetCharValueEscape = new String[255];

	public CodeGenerator gen;

	/** Avoid grammar symbols in this set to prevent conflicts in gen'd code. */
	public Set badWords = new HashSet();

	public static String[] javaKeywords = {
		"abstract", "assert", "boolean", "break", "byte", "case", "catch",
		"char", "class", "const", "continue", "default", "do", "double", "else",
		"enum", "extends", "false", "final", "finally", "float", "for",
		"if", "implements", "import", "instanceof", "int", "interface",
		"long", "native", "new", "null", "package", "private", "protected",
		"public", "return", "short", "static", "strictfp", "super", "switch",
		"synchronized", "this", "throw", "throws", "transient", "true", "try",
		"void", "volatile", "while"
	};

	public Target(CodeGenerator gen) {
		targetCharValueEscape['\n'] = "\\n";
		targetCharValueEscape['\r'] = "\\r";
		targetCharValueEscape['\t'] = "\\t";
		targetCharValueEscape['\b'] = "\\b";
		targetCharValueEscape['\f'] = "\\f";
		targetCharValueEscape['\\'] = "\\\\";
		targetCharValueEscape['\''] = "\\'";
		targetCharValueEscape['"'] = "\\\"";
		this.gen = gen;
		addBadWords();
	}

	public void addBadWords() {
		badWords.addAll(Arrays.asList(javaKeywords));
		badWords.add("rule");
		badWords.add("parserRule");
	}

	protected void genFile(Grammar g,
						   ST outputFileST,
						   String fileName)
	{
		gen.write(outputFileST, fileName);
	}

	protected void genListenerFile(Grammar g,
								   ST outputFileST)
	{
		String fileName = gen.getListenerFileName();
		gen.write(outputFileST, fileName);
	}

	protected void genRecognizerHeaderFile(Grammar g,
										   ST headerFileST,
										   String extName) // e.g., ".h"
	{
		// no header file by default
	}

	/** Get a meaningful name for a token type useful during code generation.
	 *  Literals without associated names are converted to the string equivalent
	 *  of their integer values. Used to generate x==ID and x==34 type comparisons
	 *  etc...  Essentially we are looking for the most obvious way to refer
	 *  to a token type in the generated code.
	 */
	public String getTokenTypeAsTargetLabel(Grammar g, int ttype) {
		String name = g.getTokenDisplayName(ttype);
		// If name is a literal, return the token type instead
		if ( name==null || name.charAt(0)=='\'' ) {
			return String.valueOf(ttype);
		}
		return name;
	}

	public String[] getTokenTypesAsTargetLabels(Grammar g, int[] ttypes) {
		String[] labels = new String[ttypes.length];
		for (int i=0; i
	 * Note that we have decided to allow people to use '\"' without penalty, so
	 * we must build the target string in a loop as {@link Utils#replace} cannot
	 * handle both {@code \"} and {@code "} without a lot of messing around.
	 */
	public String getTargetStringLiteralFromANTLRStringLiteral(
		CodeGenerator generator,
		String literal, boolean addQuotes)
	{
		StringBuilder sb = new StringBuilder();
		String is = literal;

        if ( addQuotes ) sb.append('"');

        for (int i = 1; i < is.length() -1; i++) {
            if  (is.charAt(i) == '\\') {
                // Anything escaped is what it is! We assume that
                // people know how to escape characters correctly. However
                // we catch anything that does not need an escape in Java (which
                // is what the default implementation is dealing with and remove
                // the escape. The C target does this for instance.
                //
                switch (is.charAt(i+1)) {
                    // Pass through any escapes that Java also needs
                    //
                    case    '"':
                    case    'n':
                    case    'r':
                    case    't':
                    case    'b':
                    case    'f':
                    case    '\\':
						// Pass the escape through
						sb.append('\\');
                        break;

					case    'u':    // Assume unnnn
						// Pass the escape through as double \\
						// so that Java leaves as \u0000 string not char
						sb.append('\\');
						sb.append('\\');
                        break;

                    default:
                        // Remove the escape by virtue of not adding it here
                        // Thus \' becomes ' and so on
                        break;
                }

                // Go past the \ character
                i++;
            } else {
                // Characters that don't need \ in ANTLR 'strings' but do in Java
                if (is.charAt(i) == '"') {
                    // We need to escape " in Java
                    sb.append('\\');
                }
            }
            // Add in the next character, which may have been escaped
            sb.append(is.charAt(i));
        }

		if ( addQuotes ) sb.append('"');

		return sb.toString();
	}

	/** Assume 16-bit char */
	public String encodeIntAsCharEscape(int v) {
		if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) {
			throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v));
		}

		if (v >= 0 && v < targetCharValueEscape.length && targetCharValueEscape[v] != null) {
			return targetCharValueEscape[v];
		}

		if (v >= 0x20 && v < 127 && (!Character.isDigit(v) || v == '8' || v == '9')) {
			return String.valueOf((char)v);
		}

		if ( v>=0 && v<=127 ) {
			String oct = Integer.toOctalString(v);
			return "\\"+ oct;
		}

		String hex = Integer.toHexString(v|0x10000).substring(1,5);
		return "\\u"+hex;
	}

	public String getLoopLabel(GrammarAST ast) {
		return "loop"+ ast.token.getTokenIndex();
	}

	public String getLoopCounter(GrammarAST ast) {
		return "cnt"+ ast.token.getTokenIndex();
	}

	public String getListLabel(String label) {
		ST st = gen.templates.getInstanceOf("ListLabelName");
		st.add("label", label);
		return st.render();
	}

	public String getRuleFunctionContextStructName(Rule r) {
		if ( r.g.isLexer() ) {
			return gen.templates.getInstanceOf("LexerRuleContext").render();
		}
		return Utils.capitalize(r.name)+gen.templates.getInstanceOf("RuleContextNameSuffix").render();
	}

	public String getAltLabelContextStructName(String label) {
		return Utils.capitalize(label)+gen.templates.getInstanceOf("RuleContextNameSuffix").render();
	}

	/** If we know which actual function, we can provide the actual ctx type.
	 *  This will contain implicit labels etc...  From outside, though, we
	 *  see only ParserRuleContext unless there are externally visible stuff
	 *  like args, locals, explicit labels, etc...
	 */
	public String getRuleFunctionContextStructName(RuleFunction function) {
		Rule r = function.rule;
		if ( r.g.isLexer() ) {
			return gen.templates.getInstanceOf("LexerRuleContext").render();
		}
		return Utils.capitalize(r.name)+gen.templates.getInstanceOf("RuleContextNameSuffix").render();
	}

	// should be same for all refs to same token like ctx.ID within single rule function
	// for literals like 'while', we gen _s
	public String getImplicitTokenLabel(String tokenName) {
		ST st = gen.templates.getInstanceOf("ImplicitTokenLabel");
		int ttype = gen.g.getTokenType(tokenName);
		if ( tokenName.startsWith("'") ) {
			return "s"+ttype;
		}
		String text = getTokenTypeAsTargetLabel(gen.g, ttype);
		st.add("tokenName", text);
		return st.render();
	}

	// x=(A|B)
	public String getImplicitSetLabel(String id) {
		ST st = gen.templates.getInstanceOf("ImplicitSetLabel");
		st.add("id", id);
		return st.render();
	}

	public String getImplicitRuleLabel(String ruleName) {
		ST st = gen.templates.getInstanceOf("ImplicitRuleLabel");
		st.add("ruleName", ruleName);
		return st.render();
	}

	public String getElementListName(String name) {
		ST st = gen.templates.getInstanceOf("ElementListName");
		st.add("elemName", getElementName(name));
		return st.render();
	}

	public String getElementName(String name) {
		if (".".equals(name)) {
			return "_wild";
		}

		if ( gen.g.getRule(name)!=null ) return name;
		int ttype = gen.g.getTokenType(name);
		if ( ttype==Token.INVALID_TYPE ) return name;
		return getTokenTypeAsTargetLabel(gen.g, ttype);
	}

	public boolean grammarSymbolCausesIssueInGeneratedCode(GrammarAST idNode) {
		switch (idNode.getParent().getType()) {
		case ANTLRParser.ASSIGN:
			switch (idNode.getParent().getParent().getType()) {
			case ANTLRParser.ELEMENT_OPTIONS:
			case ANTLRParser.OPTIONS:
				return false;

			default:
				break;
			}

			break;

		case ANTLRParser.AT:
		case ANTLRParser.ELEMENT_OPTIONS:
			return false;

		case ANTLRParser.LEXER_ACTION_CALL:
			if (idNode.getChildIndex() == 0) {
				// first child is the command name which is part of the ANTLR language
				return false;
			}

			// arguments to the command should be checked
			break;

		default:
			break;
		}

		return badWords.contains(idNode.getText());
	}

}