jflex.generator.Emitter Maven / Gradle / Ivy
Show all versions of jflex Show documentation
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* JFlex 1.8.0 *
* Copyright (C) 1998-2018 Gerwin Klein *
* All rights reserved. *
* *
* License: BSD *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
package jflex.generator;
import java.io.File;
import java.io.PrintWriter;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import jflex.base.Build;
import jflex.base.Pair;
import jflex.core.AbstractLexScan;
import jflex.core.Action;
import jflex.core.EOFActions;
import jflex.core.LexParse;
import jflex.core.LexScan;
import jflex.core.unicode.CMapBlock;
import jflex.core.unicode.CharClasses;
import jflex.dfa.DFA;
import jflex.exceptions.GeneratorException;
import jflex.io.FileUtils;
import jflex.l10n.ErrorMessages;
import jflex.logging.Out;
import jflex.option.Options;
import jflex.skeleton.Skeleton;
/**
* This class manages the actual code generation, putting the scanner together, filling in skeleton
* sections etc.
*
* Table compression, String packing etc. is also done here.
*
* @author Gerwin Klein
* @version JFlex 1.8.0
*/
public final class Emitter {
private static final Pattern JAVADOC_COMMENT_AND_MAYBE_ANNOTATIONS_PATTERN =
Pattern.compile(
".*/\\*\\*(.*)\\*/" // javadoc comment, embedded '*/' disallowed
+ "(?:\\s*@[a-z][a-z0-9_]*(?:\\.[a-z][a-z0-9_]*)*" // @[p.ack.age.]AnnotationClass
+ " (?:\\s*\\(\\s*(?:\"(?:\\\"|[^\"])*\"" // ignore close parens in double quotes
+ " |'(?:[^']|\\\\(?:'|u[0-9a-f]{4}))'" // ignore close parens in
// single quotes
+ " |[^)])+\\))?" // optional annotation params
+ ")*\\s*", // zero or more annotations, followed by optional whitespace
Pattern.DOTALL | Pattern.CASE_INSENSITIVE | Pattern.COMMENTS);
// bit masks for state attributes
private static final int FINAL = 1;
private static final int NOLOOK = 8;
private final File inputFile;
final String outputFileName;
private final PrintWriter out;
private final Skeleton skel;
private final AbstractLexScan scanner;
private final LexParse parser;
private final DFA dfa;
private boolean[] isTransition;
// for row killing:
private int[] rowMap;
private boolean[] rowKilled;
// for col killing:
private int numCols;
private int[] colMap;
private boolean[] colKilled;
/** maps actions to their switch label */
private final Map actionTable = new LinkedHashMap<>();
private final String visibility;
private String eofCode;
private String eofThrow;
/**
* Emits the java code.
*
* @param inputFile input grammar.
* @param parser a {@link LexParse}.
* @param dfa a {@link DFA}.
* @param writer output file.
*/
Emitter(String outputFileName, File inputFile, LexParse parser, DFA dfa, PrintWriter writer) {
this.outputFileName = outputFileName;
this.out = writer;
this.parser = parser;
this.scanner = parser.scanner;
this.visibility = scanner.visibility();
this.inputFile = inputFile;
this.dfa = dfa;
this.skel = new Skeleton(out);
}
/**
* Computes base name of the class name. Needs to take into account generics.
*
* @param className Class name for which to construct the base name
*/
static String getBaseName(String className) {
int gen = className.indexOf('<');
if (gen < 0) {
return className;
} else {
return className.substring(0, gen);
}
}
/**
* Constructs a file in Options.getDir() or in the same directory as another file. Makes a backup
* if the file already exists.
*
* @param name the name (without path) of the file
* @param input fall back location if {@code path = null} (expected to be a file in the directory
* to write to)
* @return The constructed File
*/
public static File normalize(String name, File input) {
File outputFile;
if (Options.getDir() == null)
if (input == null || input.getParent() == null) outputFile = new File(name);
else outputFile = new File(input.getParent(), name);
else outputFile = new File(Options.getDir(), name);
if (outputFile.exists() && !Options.no_backup) {
File backup = new File(outputFile.toString() + "~");
if (backup.exists()) {
//noinspection ResultOfMethodCallIgnored
backup.delete();
}
if (outputFile.renameTo(backup))
Out.println("Old file \"" + outputFile + "\" saved as \"" + backup + "\"");
else Out.println("Couldn't save old file \"" + outputFile + "\", overwriting!");
}
return outputFile;
}
private void println() {
out.println();
}
private void println(String line) {
out.println(line);
}
private void println(int i) {
out.println(i);
}
private void print(String line) {
out.print(line);
}
private void print(int i) {
out.print(i);
}
private void print(int i, @SuppressWarnings("SameParameterValue") int tab) {
int exp;
if (i < 0) exp = 1;
else exp = 10;
while (tab-- > 1) {
if (Math.abs(i) < exp) print(" ");
exp *= 10;
}
print(i);
}
private boolean hasGenLookAhead() {
return dfa.lookaheadUsed();
}
private void emitLookBuffer() {
if (!hasGenLookAhead()) return;
println(" /** For the backwards DFA of general lookahead statements */");
println(" private boolean [] zzFin = new boolean [ZZ_BUFFERSIZE+1];");
println();
}
private void emitScanError() {
print(" private static void zzScanError(int errorCode)");
if (scanner.scanErrorException() != null) print(" throws " + scanner.scanErrorException());
println(" {");
skel.emitNext();
if (scanner.scanErrorException() == null) println(" throw new Error(message);");
else println(" throw new " + scanner.scanErrorException() + "(message);");
skel.emitNext();
print(" " + visibility + " void yypushback(int number) ");
if (scanner.scanErrorException() == null) println(" {");
else println(" throws " + scanner.scanErrorException + " {");
}
private void emitMain(String functionName) {
if (!(scanner.standalone() || scanner.debugOption() || scanner.cupDebug())) return;
if (scanner.cupDebug()) {
println(" /**");
println(" * Converts an int token code into the name of the");
println(" * token by reflection on the cup symbol class/interface " + scanner.cupSymbol());
println(" */");
println(" private static String getTokenName(int token) {");
println(" try {");
println(
" java.lang.reflect.Field [] classFields = "
+ scanner.cupSymbol()
+ ".class.getFields();");
println(" for (int i = 0; i < classFields.length; i++) {");
println(" if (classFields[i].getInt(null) == token) {");
println(" return classFields[i].getName();");
println(" }");
println(" }");
println(" } catch (Exception e) {");
println(" e.printStackTrace(System.err);");
println(" }");
println("");
println(" return \"UNKNOWN TOKEN\";");
println(" }");
println("");
println(" /**");
println(" * Same as " + functionName + " but also prints the token to standard out");
println(" * for debugging.");
println(" */");
if (scanner.cupCompatible() || scanner.cup2Compatible()) {
// cup interface forces public method
print(" public ");
} else {
print(" " + visibility + " ");
}
if (scanner.tokenType() == null) {
if (scanner.isInteger()) print("int");
else if (scanner.isIntWrap()) print("Integer");
else print("Yytoken");
} else print(scanner.tokenType());
print(" debug_");
print(functionName);
print("() throws java.io.IOException");
if (scanner.lexThrow() != null) {
print(", ");
print(scanner.lexThrow());
}
if (scanner.scanErrorException() != null) {
print(", ");
print(scanner.scanErrorException());
}
println(" {");
println(" " + scanner.tokenType() + " s = " + functionName + "();");
print(" System.out.println( ");
if (scanner.lineCount()) print("\"line:\" + (yyline+1) + ");
if (scanner.columnCount()) print("\" col:\" + (yycolumn+1) + ");
if (scanner.charCount()) print("\" char:\" + yychar + ");
println("\" --\"+ yytext() + \"--\" + getTokenName(s.sym) + \"--\");");
println(" return s;");
println(" }");
println("");
}
if (scanner.standalone()) {
println(" /**");
println(" * Runs the scanner on input files.");
println(" *");
println(" * This is a standalone scanner, it will print any unmatched");
println(" * text to System.out unchanged.");
println(" *");
println(" * @param argv the command line, contains the filenames to run");
println(" * the scanner on.");
println(" */");
} else {
println(" /**");
println(" * Runs the scanner on input files.");
println(" *");
println(" * This main method is the debugging routine for the scanner.");
println(" * It prints debugging information about each returned token to");
println(" * System.out until the end of file is reached, or an error occured.");
println(" *");
println(" * @param argv the command line, contains the filenames to run");
println(" * the scanner on.");
println(" */");
}
String className = getBaseName(scanner.className());
println(" public static void main(String argv[]) {");
println(" if (argv.length == 0) {");
println(
" System.out.println(\"Usage : java "
+ className
+ " [ --encoding ] \");");
println(" }");
println(" else {");
println(" int firstFilePos = 0;");
println(" String encodingName = \"UTF-8\";");
println(" if (argv[0].equals(\"--encoding\")) {");
println(" firstFilePos = 2;");
println(" encodingName = argv[1];");
println(" try {");
println(" // Side-effect: is encodingName valid?");
println(" java.nio.charset.Charset.forName(encodingName);");
println(" } catch (Exception e) {");
println(" System.out.println(\"Invalid encoding '\" + encodingName + \"'\");");
println(" return;");
println(" }");
println(" }");
println(" for (int i = firstFilePos; i < argv.length; i++) {");
println(" " + className + " scanner = null;");
println(" try {");
println(" java.io.FileInputStream stream = new java.io.FileInputStream(argv[i]);");
println(
" java.io.Reader reader = new java.io.InputStreamReader(stream, encodingName);");
println(" scanner = new " + className + "(reader);");
if (scanner.standalone()) {
println(" while ( !scanner.zzAtEOF ) scanner." + functionName + "();");
} else if (scanner.cupDebug()) {
println(" while ( !scanner.zzAtEOF ) scanner.debug_" + functionName + "();");
} else {
println(" do {");
println(" System.out.println(scanner." + functionName + "());");
println(" } while (!scanner.zzAtEOF);");
println("");
}
println(" }");
println(" catch (java.io.FileNotFoundException e) {");
println(" System.out.println(\"File not found : \\\"\"+argv[i]+\"\\\"\");");
println(" }");
println(" catch (java.io.IOException e) {");
println(" System.out.println(\"IO error scanning file \\\"\"+argv[i]+\"\\\"\");");
println(" System.out.println(e);");
println(" }");
println(" catch (Exception e) {");
println(" System.out.println(\"Unexpected exception:\");");
println(" e.printStackTrace();");
println(" }");
println(" }");
println(" }");
println(" }");
println("");
}
private void emitNoMatch() {
println(" zzScanError(ZZ_NO_MATCH);");
}
private void emitNextInput() {
println(" if (zzCurrentPosL < zzEndReadL) {");
println(" zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL);");
println(" zzCurrentPosL += Character.charCount(zzInput);");
println(" }");
println(" else if (zzAtEOF) {");
println(" zzInput = YYEOF;");
println(" break zzForAction;");
println(" }");
println(" else {");
println(" // store back cached positions");
println(" zzCurrentPos = zzCurrentPosL;");
println(" zzMarkedPos = zzMarkedPosL;");
println(" boolean eof = zzRefill();");
println(" // get translated positions and possibly new buffer");
println(" zzCurrentPosL = zzCurrentPos;");
println(" zzMarkedPosL = zzMarkedPos;");
println(" zzBufferL = zzBuffer;");
println(" zzEndReadL = zzEndRead;");
println(" if (eof) {");
println(" zzInput = YYEOF;");
println(" break zzForAction;");
println(" }");
println(" else {");
println(" zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL);");
println(" zzCurrentPosL += Character.charCount(zzInput);");
println(" }");
println(" }");
}
public static String sourceFileString(File file) {
String path = FileUtils.getRelativePath(Options.getRootDirectory(), file);
if (File.separatorChar == '\\') {
path = FileUtils.slashify(path);
}
// Character '\' can be use for Unicode representation, e.g. \\u000A is new line
return path.replace("\\", "\\\\");
}
private void emitHeader() {
println("// DO NOT EDIT");
println("// Generated by JFlex " + Build.VERSION + " http://jflex.de/");
println("// source: " + sourceFileString(inputFile));
println("");
}
private void emitUserCode() {
println(scanner.userCode());
if (scanner.cup2Compatible()) {
println();
println("/* CUP2 imports */");
println("import edu.tum.cup2.scanner.*;");
println("import edu.tum.cup2.grammar.*;");
println();
}
}
private void emitClassName() {
// TODO(#222) Actually fix the fall-through violations
println("// See https://github.com/jflex-de/jflex/issues/222");
println("@SuppressWarnings(\"FallThrough\")");
if (scanner.isPublic()) print("public ");
if (scanner.isAbstract()) print("abstract ");
if (scanner.isFinal()) print("final ");
print("class ");
print(scanner.className());
if (scanner.isExtending() != null) {
print(" extends ");
print(scanner.isExtending());
}
if (scanner.isImplementing() != null) {
print(" implements ");
print(scanner.isImplementing());
}
println(" {");
}
/**
* Try to find out if user code ends with a javadoc comment, maybe followed by one or more
* annotations
*
* @param usercode the user code
* @return true if it ends with a javadoc comment and zero or more annotations
*/
static boolean endsWithJavadoc(CharSequence usercode) {
Matcher matcher = JAVADOC_COMMENT_AND_MAYBE_ANNOTATIONS_PATTERN.matcher(usercode);
return matcher.matches() && !matcher.group(1).contains("*/");
}
private void emitLexicalStates() {
for (String name : scanner.stateNames()) {
int num = scanner.getStateNumber(name);
println(" " + visibility + " static final int " + name + " = " + 2 * num + ";");
}
// can't quite get rid of the indirection, even for non-bol lex states:
// their DFA states might be the same, but their EOF actions might be different
// (see bug #1540228)
println("");
println(" /**");
println(" * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l");
println(" * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l");
println(" * at the beginning of a line");
println(" * l is of the form l = 2*k, k a non negative integer");
println(" */");
println(" private static final int ZZ_LEXSTATE[] = {");
int i, j = 0;
print(" ");
for (i = 0; i < 2 * dfa.numLexStates() - 1; i++) {
print(dfa.entryState(i), 2);
print(", ");
if (++j >= 16) {
println();
print(" ");
j = 0;
}
}
println(dfa.entryState(i));
println(" };");
}
private void emitDynamicInit() {
int count = 0;
int value = dfa.table(0, 0);
println(" /**");
println(" * The transition table of the DFA");
println(" */");
CountEmitter e = new CountEmitter("Trans");
e.setValTranslation(+1); // allow vals in [-1, 0xFFFE]
e.emitInit();
for (int i = 0; i < dfa.numStates(); i++) {
if (!rowKilled[i]) {
for (int c = 0; c < dfa.numInput(); c++) {
if (!colKilled[c]) {
if (dfa.table(i, c) == value) {
count++;
} else {
e.emit(count, value);
count = 1;
value = dfa.table(i, c);
}
}
}
}
}
e.emit(count, value);
e.emitUnpack();
println(e.toString());
}
private void emitCharMapArrayUnPacked() {
CharClasses cl = parser.getCharClasses();
println("");
println(" /**");
println(" * Translates characters to character classes");
println(" */");
println(" private static final char [] ZZ_CMAP = {");
int n = 0; // numbers of entries in current line
print(" ");
int max = cl.getMaxCharCode();
// not very efficient, but good enough for <= 255 characters
for (char c = 0; c <= max; c++) {
print(colMap[cl.getClassCode(c)], 2);
if (c < max) {
print(", ");
if (++n >= 16) {
println();
print(" ");
n = 0;
}
}
}
println();
println(" };");
println();
}
/**
* Performs an in-place update to map the colMap translation over the char classes in the
* second-level cmap table.
*/
private void mapColMap(int[] blocks) {
for (int i = 0; i < blocks.length; i++) {
blocks[i] = colMap[blocks[i]];
}
}
/**
* Emits two-level character translation tables. The translation is from raw input codepoint to
* the column in the generated DFA table.
*
* For maxCharCode < 256, a single-level unpacked array is used instead.
*/
private void emitCharMapTables() {
CharClasses cl = parser.getCharClasses();
if (cl.getMaxCharCode() < 256) {
emitCharMapArrayUnPacked();
} else {
Pair tables = cl.getTables();
mapColMap(tables.snd);
println("");
println(" /**");
println(" * Top-level table for translating characters to character classes");
println(" */");
CountEmitter e = new CountEmitter("cmap_top");
e.emitInit();
e.emitCountValueString(tables.fst);
e.emitUnpack();
println(e.toString());
println("");
println(" /**");
println(" * Second-level tables for translating characters to character classes");
println(" */");
e = new CountEmitter("cmap_blocks");
e.emitInit();
e.emitCountValueString(tables.snd);
e.emitUnpack();
println(e.toString());
}
}
private void emitRowMapArray() {
println("");
println(" /**");
println(" * Translates a state to a row index in the transition table");
println(" */");
HiLowEmitter e = new HiLowEmitter("RowMap");
e.emitInit();
for (int i = 0; i < dfa.numStates(); i++) {
e.emit(rowMap[i] * numCols);
}
e.emitUnpack();
println(e.toString());
}
private void emitAttributes() {
// TODO(lsf): refactor to use CountEmitter.emitCountValueString
println(" /**");
println(" * ZZ_ATTRIBUTE[aState] contains the attributes of state {@code aState}");
println(" */");
CountEmitter e = new CountEmitter("Attribute");
e.emitInit();
int count = 1;
int value = 0;
if (dfa.isFinal(0)) value = FINAL;
if (!isTransition[0]) value |= NOLOOK;
for (int i = 1; i < dfa.numStates(); i++) {
int attribute = 0;
if (dfa.isFinal(i)) attribute = FINAL;
if (!isTransition[i]) attribute |= NOLOOK;
if (value == attribute) {
count++;
} else {
e.emit(count, value);
count = 1;
value = attribute;
}
}
e.emit(count, value);
e.emitUnpack();
println(e.toString());
}
private void emitClassCode() {
if (scanner.classCode() != null) {
println(" /* user code: */");
println(scanner.classCode());
}
if (scanner.cup2Compatible()) {
// convenience methods for CUP2
println();
println(" /* CUP2 code: */");
println(" private ScannerToken token(Terminal terminal, T value) {");
println(" return new ScannerToken(terminal, value, yyline, yycolumn);");
println(" }");
println();
println(" private ScannerToken