jflex.generator.Emitter Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jflex Show documentation
JFlex is a lexical analyzer generator (also known as scanner generator) for Java™, written in Java.
There is a newer version: 1.8.2
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 * JFlex 1.8.0                                                             *
 * Copyright (C) 1998-2018  Gerwin Klein                     *
 * All rights reserved.                                                    *
 *                                                                         *
 * License: BSD                                                            *
 *                                                                         *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

package jflex.generator;

import java.io.File;
import java.io.PrintWriter;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import jflex.base.Build;
import jflex.base.Pair;
import jflex.core.AbstractLexScan;
import jflex.core.Action;
import jflex.core.EOFActions;
import jflex.core.LexParse;
import jflex.core.LexScan;
import jflex.core.unicode.CMapBlock;
import jflex.core.unicode.CharClasses;
import jflex.dfa.DFA;
import jflex.exceptions.GeneratorException;
import jflex.io.FileUtils;
import jflex.l10n.ErrorMessages;
import jflex.logging.Out;
import jflex.option.Options;
import jflex.skeleton.Skeleton;

/**
 * This class manages the actual code generation, putting the scanner together, filling in skeleton
 * sections etc.
 *
 * Table compression, String packing etc. is also done here.
 *
 * @author Gerwin Klein
 * @version JFlex 1.8.0
 */
public final class Emitter {
  private static final Pattern JAVADOC_COMMENT_AND_MAYBE_ANNOTATIONS_PATTERN =
      Pattern.compile(
          ".*/\\*\\*(.*)\\*/" // javadoc comment, embedded '*/' disallowed
              + "(?:\\s*@[a-z][a-z0-9_]*(?:\\.[a-z][a-z0-9_]*)*" // @[p.ack.age.]AnnotationClass
              + "   (?:\\s*\\(\\s*(?:\"(?:\\\"|[^\"])*\"" // ignore close parens in double quotes
              + "                   |'(?:[^']|\\\\(?:'|u[0-9a-f]{4}))'" // ignore close parens in
              // single quotes
              + "                   |[^)])+\\))?" // optional annotation params
              + ")*\\s*", // zero or more annotations, followed by optional whitespace
          Pattern.DOTALL | Pattern.CASE_INSENSITIVE | Pattern.COMMENTS);

  // bit masks for state attributes
  private static final int FINAL = 1;
  private static final int NOLOOK = 8;

  private final File inputFile;
  final String outputFileName;

  private final PrintWriter out;
  private final Skeleton skel;
  private final AbstractLexScan scanner;
  private final LexParse parser;
  private final DFA dfa;

  private boolean[] isTransition;

  // for row killing:
  private int[] rowMap;
  private boolean[] rowKilled;

  // for col killing:
  private int numCols;
  private int[] colMap;
  private boolean[] colKilled;

  /** maps actions to their switch label */
  private final Map actionTable = new LinkedHashMap<>();

  private final String visibility;
  private String eofCode;
  private String eofThrow;

  /**
   * Emits the java code.
   *
   * @param inputFile input grammar.
   * @param parser a {@link LexParse}.
   * @param dfa a {@link DFA}.
   * @param writer output file.
   */
  Emitter(String outputFileName, File inputFile, LexParse parser, DFA dfa, PrintWriter writer) {
    this.outputFileName = outputFileName;
    this.out = writer;
    this.parser = parser;
    this.scanner = parser.scanner;
    this.visibility = scanner.visibility();
    this.inputFile = inputFile;
    this.dfa = dfa;
    this.skel = new Skeleton(out);
  }

  /**
   * Computes base name of the class name. Needs to take into account generics.
   *
   * @param className Class name for which to construct the base name
   */
  static String getBaseName(String className) {
    int gen = className.indexOf('<');
    if (gen < 0) {
      return className;
    } else {
      return className.substring(0, gen);
    }
  }

  /**
   * Constructs a file in Options.getDir() or in the same directory as another file. Makes a backup
   * if the file already exists.
   *
   * @param name the name (without path) of the file
   * @param input fall back location if {@code path = null} (expected to be a file in the directory
   *     to write to)
   * @return The constructed File
   */
  public static File normalize(String name, File input) {
    File outputFile;

    if (Options.getDir() == null)
      if (input == null || input.getParent() == null) outputFile = new File(name);
      else outputFile = new File(input.getParent(), name);
    else outputFile = new File(Options.getDir(), name);

    if (outputFile.exists() && !Options.no_backup) {
      File backup = new File(outputFile.toString() + "~");

      if (backup.exists()) {
        //noinspection ResultOfMethodCallIgnored
        backup.delete();
      }

      if (outputFile.renameTo(backup))
        Out.println("Old file \"" + outputFile + "\" saved as \"" + backup + "\"");
      else Out.println("Couldn't save old file \"" + outputFile + "\", overwriting!");
    }

    return outputFile;
  }

  private void println() {
    out.println();
  }

  private void println(String line) {
    out.println(line);
  }

  private void println(int i) {
    out.println(i);
  }

  private void print(String line) {
    out.print(line);
  }

  private void print(int i) {
    out.print(i);
  }

  private void print(int i, @SuppressWarnings("SameParameterValue") int tab) {
    int exp;

    if (i < 0) exp = 1;
    else exp = 10;

    while (tab-- > 1) {
      if (Math.abs(i) < exp) print(" ");
      exp *= 10;
    }

    print(i);
  }

  private boolean hasGenLookAhead() {
    return dfa.lookaheadUsed();
  }

  private void emitLookBuffer() {
    if (!hasGenLookAhead()) return;

    println("  /** For the backwards DFA of general lookahead statements */");
    println("  private boolean [] zzFin = new boolean [ZZ_BUFFERSIZE+1];");
    println();
  }

  private void emitScanError() {
    print("  private static void zzScanError(int errorCode)");

    if (scanner.scanErrorException() != null) print(" throws " + scanner.scanErrorException());

    println(" {");

    skel.emitNext();

    if (scanner.scanErrorException() == null) println("    throw new Error(message);");
    else println("    throw new " + scanner.scanErrorException() + "(message);");

    skel.emitNext();

    print("  " + visibility + " void yypushback(int number) ");

    if (scanner.scanErrorException() == null) println(" {");
    else println(" throws " + scanner.scanErrorException + " {");
  }

  private void emitMain(String functionName) {
    if (!(scanner.standalone() || scanner.debugOption() || scanner.cupDebug())) return;

    if (scanner.cupDebug()) {
      println("  /**");
      println("   * Converts an int token code into the name of the");
      println("   * token by reflection on the cup symbol class/interface " + scanner.cupSymbol());
      println("   */");
      println("  private static String getTokenName(int token) {");
      println("    try {");
      println(
          "      java.lang.reflect.Field [] classFields = "
              + scanner.cupSymbol()
              + ".class.getFields();");
      println("      for (int i = 0; i < classFields.length; i++) {");
      println("        if (classFields[i].getInt(null) == token) {");
      println("          return classFields[i].getName();");
      println("        }");
      println("      }");
      println("    } catch (Exception e) {");
      println("      e.printStackTrace(System.err);");
      println("    }");
      println("");
      println("    return \"UNKNOWN TOKEN\";");
      println("  }");
      println("");
      println("  /**");
      println("   * Same as " + functionName + " but also prints the token to standard out");
      println("   * for debugging.");
      println("   */");

      if (scanner.cupCompatible() || scanner.cup2Compatible()) {
        // cup interface forces public method
        print("  public ");
      } else {
        print("  " + visibility + " ");
      }
      if (scanner.tokenType() == null) {
        if (scanner.isInteger()) print("int");
        else if (scanner.isIntWrap()) print("Integer");
        else print("Yytoken");
      } else print(scanner.tokenType());

      print(" debug_");

      print(functionName);

      print("() throws java.io.IOException");

      if (scanner.lexThrow() != null) {
        print(", ");
        print(scanner.lexThrow());
      }

      if (scanner.scanErrorException() != null) {
        print(", ");
        print(scanner.scanErrorException());
      }

      println(" {");

      println("    " + scanner.tokenType() + " s = " + functionName + "();");
      print("    System.out.println( ");
      if (scanner.lineCount()) print("\"line:\" + (yyline+1) + ");
      if (scanner.columnCount()) print("\" col:\" + (yycolumn+1) + ");
      if (scanner.charCount()) print("\" char:\" + yychar + ");
      println("\" --\"+ yytext() + \"--\" + getTokenName(s.sym) + \"--\");");
      println("    return s;");
      println("  }");
      println("");
    }

    if (scanner.standalone()) {
      println("  /**");
      println("   * Runs the scanner on input files.");
      println("   *");
      println("   * This is a standalone scanner, it will print any unmatched");
      println("   * text to System.out unchanged.");
      println("   *");
      println("   * @param argv   the command line, contains the filenames to run");
      println("   *               the scanner on.");
      println("   */");
    } else {
      println("  /**");
      println("   * Runs the scanner on input files.");
      println("   *");
      println("   * This main method is the debugging routine for the scanner.");
      println("   * It prints debugging information about each returned token to");
      println("   * System.out until the end of file is reached, or an error occured.");
      println("   *");
      println("   * @param argv   the command line, contains the filenames to run");
      println("   *               the scanner on.");
      println("   */");
    }

    String className = getBaseName(scanner.className());

    println("  public static void main(String argv[]) {");
    println("    if (argv.length == 0) {");
    println(
        "      System.out.println(\"Usage : java "
            + className
            + " [ --encoding  ] \");");
    println("    }");
    println("    else {");
    println("      int firstFilePos = 0;");
    println("      String encodingName = \"UTF-8\";");
    println("      if (argv[0].equals(\"--encoding\")) {");
    println("        firstFilePos = 2;");
    println("        encodingName = argv[1];");
    println("        try {");
    println("          // Side-effect: is encodingName valid?");
    println("          java.nio.charset.Charset.forName(encodingName);");
    println("        } catch (Exception e) {");
    println("          System.out.println(\"Invalid encoding '\" + encodingName + \"'\");");
    println("          return;");
    println("        }");
    println("      }");
    println("      for (int i = firstFilePos; i < argv.length; i++) {");
    println("        " + className + " scanner = null;");
    println("        try {");
    println("          java.io.FileInputStream stream = new java.io.FileInputStream(argv[i]);");
    println(
        "          java.io.Reader reader = new java.io.InputStreamReader(stream, encodingName);");
    println("          scanner = new " + className + "(reader);");
    if (scanner.standalone()) {
      println("          while ( !scanner.zzAtEOF ) scanner." + functionName + "();");
    } else if (scanner.cupDebug()) {
      println("          while ( !scanner.zzAtEOF ) scanner.debug_" + functionName + "();");
    } else {
      println("          do {");
      println("            System.out.println(scanner." + functionName + "());");
      println("          } while (!scanner.zzAtEOF);");
      println("");
    }

    println("        }");
    println("        catch (java.io.FileNotFoundException e) {");
    println("          System.out.println(\"File not found : \\\"\"+argv[i]+\"\\\"\");");
    println("        }");
    println("        catch (java.io.IOException e) {");
    println("          System.out.println(\"IO error scanning file \\\"\"+argv[i]+\"\\\"\");");
    println("          System.out.println(e);");
    println("        }");
    println("        catch (Exception e) {");
    println("          System.out.println(\"Unexpected exception:\");");
    println("          e.printStackTrace();");
    println("        }");
    println("      }");
    println("    }");
    println("  }");
    println("");
  }

  private void emitNoMatch() {
    println("            zzScanError(ZZ_NO_MATCH);");
  }

  private void emitNextInput() {
    println("          if (zzCurrentPosL < zzEndReadL) {");
    println("            zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL);");
    println("            zzCurrentPosL += Character.charCount(zzInput);");
    println("          }");
    println("          else if (zzAtEOF) {");
    println("            zzInput = YYEOF;");
    println("            break zzForAction;");
    println("          }");
    println("          else {");
    println("            // store back cached positions");
    println("            zzCurrentPos  = zzCurrentPosL;");
    println("            zzMarkedPos   = zzMarkedPosL;");
    println("            boolean eof = zzRefill();");
    println("            // get translated positions and possibly new buffer");
    println("            zzCurrentPosL  = zzCurrentPos;");
    println("            zzMarkedPosL   = zzMarkedPos;");
    println("            zzBufferL      = zzBuffer;");
    println("            zzEndReadL     = zzEndRead;");
    println("            if (eof) {");
    println("              zzInput = YYEOF;");
    println("              break zzForAction;");
    println("            }");
    println("            else {");
    println("              zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL);");
    println("              zzCurrentPosL += Character.charCount(zzInput);");
    println("            }");
    println("          }");
  }

  public static String sourceFileString(File file) {
    String path = FileUtils.getRelativePath(Options.getRootDirectory(), file);
    if (File.separatorChar == '\\') {
      path = FileUtils.slashify(path);
    }
    // Character '\' can be use for Unicode representation, e.g. \\u000A is new line
    return path.replace("\\", "\\\\");
  }

  private void emitHeader() {
    println("// DO NOT EDIT");
    println("// Generated by JFlex " + Build.VERSION + " http://jflex.de/");
    println("// source: " + sourceFileString(inputFile));
    println("");
  }

  private void emitUserCode() {
    println(scanner.userCode());

    if (scanner.cup2Compatible()) {
      println();
      println("/* CUP2 imports */");
      println("import edu.tum.cup2.scanner.*;");
      println("import edu.tum.cup2.grammar.*;");
      println();
    }
  }

  private void emitClassName() {
    // TODO(#222) Actually fix the fall-through violations
    println("// See https://github.com/jflex-de/jflex/issues/222");
    println("@SuppressWarnings(\"FallThrough\")");
    if (scanner.isPublic()) print("public ");

    if (scanner.isAbstract()) print("abstract ");

    if (scanner.isFinal()) print("final ");

    print("class ");
    print(scanner.className());

    if (scanner.isExtending() != null) {
      print(" extends ");
      print(scanner.isExtending());
    }

    if (scanner.isImplementing() != null) {
      print(" implements ");
      print(scanner.isImplementing());
    }

    println(" {");
  }

  /**
   * Try to find out if user code ends with a javadoc comment, maybe followed by one or more
   * annotations
   *
   * @param usercode the user code
   * @return true if it ends with a javadoc comment and zero or more annotations
   */
  static boolean endsWithJavadoc(CharSequence usercode) {
    Matcher matcher = JAVADOC_COMMENT_AND_MAYBE_ANNOTATIONS_PATTERN.matcher(usercode);
    return matcher.matches() && !matcher.group(1).contains("*/");
  }

  private void emitLexicalStates() {
    for (String name : scanner.stateNames()) {
      int num = scanner.getStateNumber(name);

      println("  " + visibility + " static final int " + name + " = " + 2 * num + ";");
    }

    // can't quite get rid of the indirection, even for non-bol lex states:
    // their DFA states might be the same, but their EOF actions might be different
    // (see bug #1540228)
    println("");
    println("  /**");
    println("   * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l");
    println("   * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l");
    println("   *                  at the beginning of a line");
    println("   * l is of the form l = 2*k, k a non negative integer");
    println("   */");
    println("  private static final int ZZ_LEXSTATE[] = {");

    int i, j = 0;
    print("    ");

    for (i = 0; i < 2 * dfa.numLexStates() - 1; i++) {
      print(dfa.entryState(i), 2);

      print(", ");

      if (++j >= 16) {
        println();
        print("    ");
        j = 0;
      }
    }

    println(dfa.entryState(i));
    println("  };");
  }

  private void emitDynamicInit() {
    int count = 0;
    int value = dfa.table(0, 0);

    println("  /**");
    println("   * The transition table of the DFA");
    println("   */");

    CountEmitter e = new CountEmitter("Trans");
    e.setValTranslation(+1); // allow vals in [-1, 0xFFFE]
    e.emitInit();

    for (int i = 0; i < dfa.numStates(); i++) {
      if (!rowKilled[i]) {
        for (int c = 0; c < dfa.numInput(); c++) {
          if (!colKilled[c]) {
            if (dfa.table(i, c) == value) {
              count++;
            } else {
              e.emit(count, value);

              count = 1;
              value = dfa.table(i, c);
            }
          }
        }
      }
    }

    e.emit(count, value);
    e.emitUnpack();

    println(e.toString());
  }

  private void emitCharMapArrayUnPacked() {

    CharClasses cl = parser.getCharClasses();

    println("");
    println("  /**");
    println("   * Translates characters to character classes");
    println("   */");
    println("  private static final char [] ZZ_CMAP = {");

    int n = 0; // numbers of entries in current line
    print("    ");

    int max = cl.getMaxCharCode();

    // not very efficient, but good enough for <= 255 characters
    for (char c = 0; c <= max; c++) {
      print(colMap[cl.getClassCode(c)], 2);

      if (c < max) {
        print(", ");
        if (++n >= 16) {
          println();
          print("    ");
          n = 0;
        }
      }
    }

    println();
    println("  };");
    println();
  }

  /**
   * Performs an in-place update to map the colMap translation over the char classes in the
   * second-level cmap table.
   */
  private void mapColMap(int[] blocks) {
    for (int i = 0; i < blocks.length; i++) {
      blocks[i] = colMap[blocks[i]];
    }
  }

  /**
   * Emits two-level character translation tables. The translation is from raw input codepoint to
   * the column in the generated DFA table.
   *
   * 
For maxCharCode < 256, a single-level unpacked array is used instead.
   */
  private void emitCharMapTables() {
    CharClasses cl = parser.getCharClasses();

    if (cl.getMaxCharCode() < 256) {
      emitCharMapArrayUnPacked();
    } else {
      Pair tables = cl.getTables();
      mapColMap(tables.snd);

      println("");
      println("  /**");
      println("   * Top-level table for translating characters to character classes");
      println("   */");
      CountEmitter e = new CountEmitter("cmap_top");
      e.emitInit();
      e.emitCountValueString(tables.fst);
      e.emitUnpack();
      println(e.toString());

      println("");
      println("  /**");
      println("   * Second-level tables for translating characters to character classes");
      println("   */");
      e = new CountEmitter("cmap_blocks");
      e.emitInit();
      e.emitCountValueString(tables.snd);
      e.emitUnpack();
      println(e.toString());
    }
  }

  private void emitRowMapArray() {
    println("");
    println("  /**");
    println("   * Translates a state to a row index in the transition table");
    println("   */");

    HiLowEmitter e = new HiLowEmitter("RowMap");
    e.emitInit();
    for (int i = 0; i < dfa.numStates(); i++) {
      e.emit(rowMap[i] * numCols);
    }
    e.emitUnpack();
    println(e.toString());
  }

  private void emitAttributes() {
    // TODO(lsf): refactor to use CountEmitter.emitCountValueString
    println("  /**");
    println("   * ZZ_ATTRIBUTE[aState] contains the attributes of state {@code aState}");
    println("   */");

    CountEmitter e = new CountEmitter("Attribute");
    e.emitInit();

    int count = 1;
    int value = 0;
    if (dfa.isFinal(0)) value = FINAL;
    if (!isTransition[0]) value |= NOLOOK;

    for (int i = 1; i < dfa.numStates(); i++) {
      int attribute = 0;
      if (dfa.isFinal(i)) attribute = FINAL;
      if (!isTransition[i]) attribute |= NOLOOK;

      if (value == attribute) {
        count++;
      } else {
        e.emit(count, value);
        count = 1;
        value = attribute;
      }
    }

    e.emit(count, value);
    e.emitUnpack();

    println(e.toString());
  }

  private void emitClassCode() {
    if (scanner.classCode() != null) {
      println("  /* user code: */");
      println(scanner.classCode());
    }

    if (scanner.cup2Compatible()) {
      // convenience methods for CUP2
      println();
      println("  /* CUP2 code: */");
      println("  private  ScannerToken token(Terminal terminal, T value) {");
      println("    return new ScannerToken(terminal, value, yyline, yycolumn);");
      println("  }");
      println();
      println("  private ScannerToken