All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.fsm.DFSA Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.fsm; 
import edu.stanford.nlp.util.logging.Redwood;

import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Scored;

import java.io.IOException;
import java.io.Writer;
import java.util.*;

/**
 * DFSA: A class for representing a deterministic finite state automaton
 * without epsilon transitions.
 *
 * @author Dan Klein
 * @author Michel Galley (AT&T FSM library format printing)
 * @author Sarah Spikes ([email protected]) - cleanup and filling in types
 */
public final class DFSA implements Scored  {

  /** A logger for this class */
  private static Redwood.RedwoodChannels log = Redwood.channels(DFSA.class);

  Object dfsaID;
  DFSAState initialState;

  public DFSA(DFSAState initialState, double score) {
    this.initialState = initialState;
    this.score = score;
  }

  public DFSA(DFSAState initialState) {
    this.initialState = initialState;
    this.score = Double.NaN;
  }

  private double score;

  @Override
  public double score() {
    return score;
  }

  public void setScore(double score) {
    this.score = score;
  }

  public DFSAState initialState() {
    return initialState;
  }

  public void setInitialState(DFSAState initialState) {
    this.initialState = initialState;
  }

  public Set> states() {
    Set> visited = Generics.newHashSet();
    List> toVisit = new ArrayList<>();
    toVisit.add(initialState());
    exploreStates(toVisit, visited);
    return visited;
  }

  private static  void exploreStates(List> toVisit, Set> visited) {
    while (!toVisit.isEmpty()) {
      DFSAState state = toVisit.get(toVisit.size() - 1);
      toVisit.remove(toVisit.size() - 1);
      if (!visited.contains(state)) {
        toVisit.addAll(state.successorStates());
        visited.add(state);
      }
    }
  }

  public DFSA(Object dfsaID) {
    this.dfsaID = dfsaID;
    this.score = 0;
  }


  private static  void printTrieDFSAHelper(DFSAState state, int level) {
    if (state.isAccepting()) {
      return;
    }
    Set inputs = state.continuingInputs();
    for (T input : inputs) {
      DFSATransition transition = state.transition(input);
      System.out.print(level);
      System.out.print(input);
      for (int i = 0; i < level; i++) {
        System.out.print("   ");
      }
      System.out.print(transition.score());
      System.out.print(" ");
      System.out.println(input);
      printTrieDFSAHelper(transition.target(), level + 1);
    }
  }

  public static  void printTrieDFSA(DFSA dfsa) {
    log.info("DFSA: " + dfsa.dfsaID);
    printTrieDFSAHelper(dfsa.initialState(), 2);
  }

  public void printAttFsmFormat(Writer w) throws IOException {
    Queue> q = new LinkedList<>();
    Set> visited = Generics.newHashSet();
    q.offer(initialState);
    while(q.peek() != null) {
      DFSAState state = q.poll();
      if(state == null || visited.contains(state))
        continue;
      visited.add(state);
      if (state.isAccepting()) {
        w.write(state.toString()+"\t"+state.score()+"\n");
        continue;
      }
      TreeSet inputs = new TreeSet<>(state.continuingInputs());
      for (T input : inputs) {
        DFSATransition transition = state.transition(input);
        DFSAState target = transition.target();
        if(!visited.contains(target))
          q.add(target);
        w.write(state.toString()+"\t"+target.toString()+"\t"+transition.getInput()+"\t"+transition.score()+"\n");
      }
    }
  }

  private static  void printTrieAsRulesHelper(DFSAState state, String prefix, Writer w) throws IOException {
    if (state.isAccepting()) {
      return;
    }
    Set inputs = state.continuingInputs();
    for (T input : inputs) {
      DFSATransition transition = state.transition(input);
      DFSAState target = transition.target();
      Set inputs2 = target.continuingInputs();
      boolean allTerminate = true;
      for (T input2 : inputs2) {
        DFSATransition transition2 = target.transition(input2);
        DFSAState target2 = transition2.target();
        if (target2.isAccepting()) {
          // it's a binary end rule.  Print it.
          w.write(prefix + " --> " + input + " " + input2 + "\n");
        } else {
          allTerminate = false;
        }
      }
      if (!allTerminate) {
        // there are some longer continuations.  Print continuation rule
        String newPrefix = prefix + "_" + input;
        w.write(prefix + " --> " + input + " " + newPrefix + "\n");
        printTrieAsRulesHelper(transition.target(), newPrefix, w);
      }
    }
  }

  public static  void printTrieAsRules(DFSA dfsa, Writer w) throws IOException {
    printTrieAsRulesHelper(dfsa.initialState(), dfsa.dfsaID.toString(), w);
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy