All Downloads are FREE. Search and download functionalities are using the official Maven repository.

morfologik.fsa.builders.FSAUtils Maven / Gradle / Ivy

package morfologik.fsa.builders;

import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.BitSet;
import java.util.TreeMap;

import morfologik.fsa.FSA;
import morfologik.fsa.FSA5;
import morfologik.fsa.FSAFlags;
import morfologik.fsa.StateVisitor;

import com.carrotsearch.hppc.IntIntHashMap;

/**
 * Other FSA-related utilities not directly associated with the class hierarchy.
 */
public final class FSAUtils {
  public final static class IntIntHolder {
    public int a;
    public int b;

    public IntIntHolder(int a, int b) {
      this.a = a;
      this.b = b;
    }

    public IntIntHolder() {
    }
  }

  /**
   * Returns the right-language reachable from a given FSA node, formatted as an
   * input for the graphviz package (expressed in the dot
   * language).
   * 
   * @param fsa The automaton to visualize.
   * @param node Starting node (subgraph will be visualized unless it's the automaton's root node).
   * @return Returns the dot language description of the automaton.
   */
  public static String toDot(FSA fsa, int node) {
    try {
      StringWriter w = new StringWriter();
      toDot(w, fsa, node);
      return w.toString();
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  /**
   * Saves the right-language reachable from a given FSA node, formatted as an
   * input for the graphviz package (expressed in the dot
   * language), to the given writer.
   * 
   * @param w The writer to write dot language description of the automaton. 
   * @param fsa The automaton to visualize.
   * @param node Starting node (subgraph will be visualized unless it's the automaton's root node).
   * @throws IOException Rethrown if an I/O exception occurs. 
   */
  public static void toDot(Writer w, FSA fsa, int node) throws IOException {
    w.write("digraph Automaton {\n");
    w.write("  rankdir = LR;\n");

    final BitSet visited = new BitSet();

    w.write("  stop [shape=doublecircle,label=\"\"];\n");
    w.write("  initial [shape=plaintext,label=\"\"];\n");
    w.write("  initial -> " + node + "\n\n");

    visitNode(w, 0, fsa, node, visited);
    w.write("}\n");
  }

  private static void visitNode(Writer w, int d, FSA fsa, int s, BitSet visited) throws IOException {
    visited.set(s);
    w.write("  ");
    w.write(Integer.toString(s));

    if (fsa.getFlags().contains(FSAFlags.NUMBERS)) {
      int nodeNumber = fsa.getRightLanguageCount(s);
      w.write(" [shape=circle,label=\"" + nodeNumber + "\"];\n");
    } else {
      w.write(" [shape=circle,label=\"\"];\n");
    }

    for (int arc = fsa.getFirstArc(s); arc != 0; arc = fsa.getNextArc(arc)) {
      w.write("  ");
      w.write(Integer.toString(s));
      w.write(" -> ");
      if (fsa.isArcTerminal(arc)) {
        w.write("stop");
      } else {
        w.write(Integer.toString(fsa.getEndNode(arc)));
      }

      final byte label = fsa.getArcLabel(arc);
      w.write(" [label=\"");
      if (Character.isLetterOrDigit(label))
        w.write((char) label);
      else {
        w.write("0x");
        w.write(Integer.toHexString(label & 0xFF));
      }
      w.write("\"");
      if (fsa.isArcFinal(arc))
        w.write(" arrowhead=\"tee\"");
      if (fsa instanceof FSA5) {
        if (((FSA5) fsa).isNextSet(arc)) {
          w.write(" color=\"blue\"");
        }
      }

      w.write("]\n");
    }

    for (int arc = fsa.getFirstArc(s); arc != 0; arc = fsa.getNextArc(arc)) {
      if (!fsa.isArcTerminal(arc)) {
        int endNode = fsa.getEndNode(arc);
        if (!visited.get(endNode)) {
          visitNode(w, d + 1, fsa, endNode, visited);
        }
      }
    }
  }

  /**
   * Calculate fan-out ratio (how many nodes have a given number of outgoing arcs).  
   * 
   * @param fsa The automaton to calculate fanout for. 
   * @param root The starting node for calculations.
   * 
   * @return The returned map contains keys for the number of outgoing arcs and
   * an associated value being the number of nodes with that arc number. 
   */
  public static TreeMap calculateFanOuts(final FSA fsa, int root) {
    final int[] result = new int[256];
    fsa.visitInPreOrder(new StateVisitor() {
      public boolean accept(int state) {
        int count = 0;
        for (int arc = fsa.getFirstArc(state); arc != 0; arc = fsa.getNextArc(arc)) {
          count++;
        }
        result[count]++;
        return true;
      }
    });

    TreeMap output = new TreeMap();

    int low = 1; // Omit #0, there is always a single node like that (dummy).
    while (low < result.length && result[low] == 0) {
      low++;
    }

    int high = result.length - 1;
    while (high >= 0 && result[high] == 0) {
      high--;
    }

    for (int i = low; i <= high; i++) {
      output.put(i, result[i]);
    }

    return output;
  }

  /**
   * Calculate the size of "right language" for each state in an FSA. The right
   * language is the number of sequences encoded from a given node in the automaton.
   * 
   * @param fsa The automaton to calculate right language for.
   * @return Returns a map with node identifiers as keys and their right language
   * counts as associated values. 
   */
  public static IntIntHashMap rightLanguageForAllStates(final FSA fsa) {
    final IntIntHashMap numbers = new IntIntHashMap();

    fsa.visitInPostOrder(new StateVisitor() {
      public boolean accept(int state) {
        int thisNodeNumber = 0;
        for (int arc = fsa.getFirstArc(state); arc != 0; arc = fsa.getNextArc(arc)) {
          thisNodeNumber += (fsa.isArcFinal(arc) ? 1 : 0)
              + (fsa.isArcTerminal(arc) ? 0 : numbers.get(fsa.getEndNode(arc)));
        }
        numbers.put(state, thisNodeNumber);

        return true;
      }
    });

    return numbers;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy