All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.parser.lexparser.UnaryGrammar Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

There is a newer version: 3.9.2
Show newest version
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Index;

import java.io.*;
import java.util.*;

/**
 * Maintains efficient indexing of unary grammar rules.
 *
 * @author Dan Klein
 * @author Christopher Manning
 */
public class UnaryGrammar implements Serializable, Iterable {

  private final Index index;

  private transient List[] rulesWithParent; // = null;
  private transient List[] rulesWithChild; // = null;

  private transient List[] closedRulesWithParent; // = null;
  private transient List[] closedRulesWithChild; // = null;

  private transient UnaryRule[][] closedRulesWithP; // = null;
  private transient UnaryRule[][] closedRulesWithC; // = null;

  /** The basic list of UnaryRules.  Really this is treated as a set */
  private Map coreRules; // = null;
  /** The closure of the basic list of UnaryRules.  Treated as a set */
  private transient Map bestRulesUnderMax; // = null;

  // private transient Map backTrace = null;

  public int numClosedRules() {
    return bestRulesUnderMax.keySet().size();
  }

  public UnaryRule getRule(UnaryRule ur) {
    return coreRules.get(ur);
  }

  public Iterator closedRuleIterator() {
    return bestRulesUnderMax.keySet().iterator();
  }

  public int numRules() {
    return coreRules.keySet().size();
  }

  public Iterator iterator() {
    return ruleIterator();
  }

  public Iterator ruleIterator() {
    return coreRules.keySet().iterator();
  }

  public List rules() {
    return new ArrayList<>(coreRules.keySet());
  }

  /** Remove A -> A UnaryRules from bestRulesUnderMax. */
  public final void purgeRules() {
    Map bR = Generics.newHashMap();
    for (UnaryRule ur : bestRulesUnderMax.keySet()) {
      if (ur.parent != ur.child) {
        bR.put(ur, ur);
      } else {
        closedRulesWithParent[ur.parent].remove(ur);
        closedRulesWithChild[ur.child].remove(ur);
      }
    }
    bestRulesUnderMax = bR;
    makeCRArrays();
  }

  /* -----------------
  // Not needed any more as we reconstruct unaries in extractBestParse
  public List getBestPath(int parent, int child) {
    List path = new ArrayList();
    UnaryRule tempR = new UnaryRule();
    tempR.parent = parent;
    tempR.child = child;
    //System.out.println("Building path...");
    int loc = parent;
    while (loc != child) {
      path.add(new Integer(loc));
      //System.out.println("Path is "+path);
      tempR.parent = loc;
      Integer nextInt = backTrace.get(tempR);
      if (nextInt == null) {
        loc = child;
      } else {
        loc = nextInt.intValue();
      }
      //System.out.println(Numberer.getGlobalNumberer(stateSpace).object(parent)+"->"+Numberer.getGlobalNumberer(stateSpace).object(child)+" went via "+Numberer.getGlobalNumberer(stateSpace).object(loc));
      if (path.size() > 10) {
        throw new RuntimeException("UnaryGrammar path > 10");
      }
    }
    path.add(new Integer(child));
    return path;
  }
  --------------------------- */

  private void closeRulesUnderMax(UnaryRule ur) {
    for (int i = 0, isz = closedRulesWithChild[ur.parent].size(); i < isz; i++) {
      UnaryRule pr = closedRulesWithChild[ur.parent].get(i);
      for (int j = 0, jsz = closedRulesWithParent[ur.child].size(); j < jsz; j++) {
        UnaryRule cr = closedRulesWithParent[ur.child].get(j);
        UnaryRule resultR = new UnaryRule(pr.parent, cr.child,
                                          pr.score + cr.score + ur.score);
        relaxRule(resultR);
        /* ----- No longer need to maintain unary rule backpointers
        if (relaxRule(resultR)) {
          if (resultR.parent != ur.parent) {
            backTrace.put(resultR, new Integer(ur.parent));
          } else {
            backTrace.put(resultR, new Integer(ur.child));
          }
        }
        -------- */
      }
    }
  }

  /** Possibly update the best way to make this UnaryRule in the
   *  bestRulesUnderMax hash and closedRulesWithX lists.
   *
   *  @param ur A UnaryRule with a score
   *  @return true if ur is the new best scoring case of that unary rule.
   */
  private boolean relaxRule(UnaryRule ur) {
    UnaryRule bestR = bestRulesUnderMax.get(ur);
    if (bestR == null) {
      bestRulesUnderMax.put(ur, ur);
      closedRulesWithParent[ur.parent].add(ur);
      closedRulesWithChild[ur.child].add(ur);
      return true;
    } else {
      if (bestR.score < ur.score) {
        bestR.score = ur.score;
        return true;
      }
      return false;
    }
  }

  public double scoreRule(UnaryRule ur) {
    UnaryRule bestR = bestRulesUnderMax.get(ur);
    return (bestR != null ? bestR.score : Double.NEGATIVE_INFINITY);
  }

  public final void addRule(UnaryRule ur) {
    // add rules' closure
    closeRulesUnderMax(ur);
    coreRules.put(ur, ur);
    rulesWithParent[ur.parent].add(ur);
    rulesWithChild[ur.child].add(ur);
  }

  //public Iterator closedRuleIterator() {
  //  return bestRulesUnderMax.keySet().iterator();
  //}

  private static final UnaryRule[] EMPTY_UNARY_RULE_ARRAY = new UnaryRule[0];

  void makeCRArrays() {
    int numStates = index.size();
    closedRulesWithP = new UnaryRule[numStates][];
    closedRulesWithC = new UnaryRule[numStates][];
    for (int i = 0; i < numStates; i++) {
      // cdm [2012]: Would it be faster to use same EMPTY_UNARY_RULE_ARRAY when of size zero?  It must be!
      closedRulesWithP[i] = closedRulesWithParent[i].toArray(new UnaryRule[closedRulesWithParent[i].size()]);
      closedRulesWithC[i] = closedRulesWithChild[i].toArray(new UnaryRule[closedRulesWithChild[i].size()]);
    }
  }

  public UnaryRule[] closedRulesByParent(int state) {
    if (state >= closedRulesWithP.length) {  // cdm [2012]: This check shouldn't be needed; delete
      return EMPTY_UNARY_RULE_ARRAY;
    }
    return closedRulesWithP[state];
  }

  public UnaryRule[] closedRulesByChild(int state) {
    if (state >= closedRulesWithC.length) {  // cdm [2012]: This check shouldn't be needed; delete
      return EMPTY_UNARY_RULE_ARRAY;
    }
    return closedRulesWithC[state];
  }

  public Iterator closedRuleIteratorByParent(int state) {
    if (state >= closedRulesWithParent.length) {
      List lur = Collections.emptyList();
      return lur.iterator();
    }
    return closedRulesWithParent[state].iterator();
  }

  public Iterator closedRuleIteratorByChild(int state) {
    if (state >= closedRulesWithChild.length) {
      List lur = Collections.emptyList();
      return lur.iterator();
    }
    return closedRulesWithChild[state].iterator();
  }

  public Iterator ruleIteratorByParent(int state) {
    if (state >= rulesWithParent.length) {
      List lur = Collections.emptyList();
      return lur.iterator();
    }
    return rulesWithParent[state].iterator();
  }

  public Iterator ruleIteratorByChild(int state) {
    if (state >= rulesWithChild.length) {
      List lur = Collections.emptyList();
      return lur.iterator();
    }
    return rulesWithChild[state].iterator();
  }

  public List rulesByParent(int state) {
    if (state >= rulesWithParent.length) {
      return Collections.emptyList();
    }
    return rulesWithParent[state];
  }

  public List rulesByChild(int state) {
    if (state >= rulesWithChild.length) {
      return Collections.emptyList();
    }
    return rulesWithChild[state];
  }

  public List[] rulesWithParent() {
    return rulesWithParent;
  }

  private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException {
    stream.defaultReadObject();
    Set allRules = Generics.newHashSet(coreRules.keySet());
    init();
    for (UnaryRule ur : allRules) {
      addRule(ur);
    }
    purgeRules();
  }

  /** Create all the array variables, and put in A -> A UnaryRules to feed
   *  the closure algorithm. They then get deleted later.
   */
  @SuppressWarnings("unchecked")
  private void init() {
    int numStates = index.size();
    coreRules = Generics.newHashMap();
    rulesWithParent = new List[numStates];
    rulesWithChild = new List[numStates];
    closedRulesWithParent = new List[numStates];
    closedRulesWithChild = new List[numStates];
    bestRulesUnderMax = Generics.newHashMap();
    // backTrace = Generics.newHashMap();
    for (int s = 0; s < numStates; s++) {
      rulesWithParent[s] = new ArrayList<>();
      rulesWithChild[s] = new ArrayList<>();
      closedRulesWithParent[s] = new ArrayList<>();
      closedRulesWithChild[s] = new ArrayList<>();
      UnaryRule selfR = new UnaryRule(s, s, 0.0);
      relaxRule(selfR);
    }
  }

  public UnaryGrammar(Index stateIndex) {
    this.index = stateIndex;
    init();
  }

  /**
   * Populates data in this UnaryGrammar from a character stream.
   *
   * @param in The Reader the grammar is read from.
   * @throws IOException If there is a reading problem
   */
  public void readData(BufferedReader in) throws IOException {
    String line;
    int lineNum = 1;
    // all lines have one rule per line
    line = in.readLine();
    while (line != null && line.length() > 0) {
      try {
        addRule(new UnaryRule(line, index));
      } catch (Exception e) {
        throw new IOException("Error on line " + lineNum);
      }
      lineNum++;
      line = in.readLine();
    }
    purgeRules();
  }

  /**
   * Writes out data from this Object.
   * @param w Data is written to this Writer
   */
  public void writeData(Writer w) {
    PrintWriter out = new PrintWriter(w);
    // all lines have one rule per line
    for (UnaryRule ur : this) {
      out.println(ur.toString(index));
    }
    out.flush();
  }

  /**
   * Writes out a lot of redundant data from this Object to the Writer w.
   * @param w Data is written to this Writer
   */
  public void writeAllData(Writer w) {
    int numStates = index.size();
    PrintWriter out = new PrintWriter(w);
    // all lines have one rule per line
    out.println("Unary ruleIterator");
    for (Iterator rI = ruleIterator(); rI.hasNext(); ) {
      out.println(rI.next().toString(index));
    }
    out.println("Unary closedRuleIterator");
    for (Iterator rI = closedRuleIterator(); rI.hasNext(); ) {
      out.println(rI.next().toString(index));
    }
    out.println("Unary rulesWithParentIterator");
    for (int i = 0; i < numStates; i++) {
      out.println(index.get(i));
      for (Iterator rI = ruleIteratorByParent(i); rI.hasNext(); ) {
        out.print("  ");
        out.println(rI.next().toString(index));
      }
    }
    out.println("Unary closedRulesWithParentIterator");
    for (int i = 0; i < numStates; i++) {
      out.println(index.get(i));
      for (Iterator rI = closedRuleIteratorByParent(i); rI.hasNext(); ) {
        out.print("  ");
        out.println(rI.next().toString(index));
      }
    }
    out.flush();
  }

  @Override
  public String toString() {
    Writer w = new StringWriter();
    writeData(w);
    return w.toString();
  }

  private static final long serialVersionUID = 1L;

} // end class UnaryGrammar




© 2015 - 2024 Weber Informatics LLC | Privacy Policy