All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.parser.lexparser.BinaryGrammar Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

There is a newer version: 3.9.2
Show newest version
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Index;

import java.io.*;
import java.util.*;

/**
 * Maintains efficient indexing of binary grammar rules.
 *
 * @author Dan Klein
 * @author Christopher Manning (generified and optimized storage)
 */
public class BinaryGrammar implements Serializable, Iterable {

  // private static final BinaryRule[] EMPTY_BINARY_RULE_ARRAY = new BinaryRule[0];

  private final Index index;

  private final List allRules;

  private transient List[] rulesWithParent;
  private transient List[] rulesWithLC;
  private transient List[] rulesWithRC;
  private transient Set[] ruleSetWithLC;
  private transient Set[] ruleSetWithRC;
  private transient BinaryRule[][] splitRulesWithLC;
  private transient BinaryRule[][] splitRulesWithRC;
  //  private transient BinaryRule[][] splitRulesWithParent = null;
  private transient Map ruleMap;
  // for super speed! (maybe)
  private transient boolean[] synthetic;


  public int numRules() {
    return allRules.size();
  }

  public List rules() {
    return allRules;
  }

  public boolean isSynthetic(int state) {
    return synthetic[state];
  }

  /**
   * Populates the "splitRules" accessor lists using the existing rule lists.
   * If the state is synthetic, these lists contain all rules for the state.
   * If the state is NOT synthetic, these lists contain only the rules in
   * which both children are not synthetic.
   * 

* This method must be called before the grammar is * used, either after training or deserializing grammar. */ public void splitRules() { // first initialize the synthetic array int numStates = index.size(); synthetic = new boolean[numStates]; for (int s = 0; s < numStates; s++) { try { //System.out.println(((String)index.get(s))); // debugging synthetic[s] = (index.get(s).charAt(0) == '@'); } catch (NullPointerException e) { synthetic[s] = true; } } splitRulesWithLC = new BinaryRule[numStates][]; splitRulesWithRC = new BinaryRule[numStates][]; // splitRulesWithParent = new BinaryRule[numStates][]; // rules accessed by their "synthetic" child or left child if none for (int state = 0; state < numStates; state++) { // System.out.println("Splitting rules for state: " + index.get(state)); // check synthetic if (isSynthetic(state)) { splitRulesWithLC[state] = rulesWithLC[state].toArray(new BinaryRule[rulesWithLC[state].size()]); // cdm 2012: I thought sorting the rules might help with speed (memory locality) but didn't seem to // Arrays.sort(splitRulesWithLC[state]); splitRulesWithRC[state] = rulesWithRC[state].toArray(new BinaryRule[rulesWithRC[state].size()]); // Arrays.sort(splitRulesWithRC[state]); } else { // if state is not synthetic, we add rule to splitRules only if both children are not synthetic // do left List ruleList = new ArrayList(); for (BinaryRule br : rulesWithLC[state]) { if ( ! isSynthetic(br.rightChild)) { ruleList.add(br); } } splitRulesWithLC[state] = ruleList.toArray(new BinaryRule[ruleList.size()]); // Arrays.sort(splitRulesWithLC[state]); // do right ruleList.clear(); for (BinaryRule br : rulesWithRC[state]) { if ( ! isSynthetic(br.leftChild)) { ruleList.add(br); } } splitRulesWithRC[state] = ruleList.toArray(new BinaryRule[ruleList.size()]); // Arrays.sort(splitRulesWithRC[state]); } // parent accessor // splitRulesWithParent[state] = toBRArray(rulesWithParent[state]); } } public BinaryRule[] splitRulesWithLC(int state) { // if (state >= splitRulesWithLC.length) { // return EMPTY_BINARY_RULE_ARRAY; // } return splitRulesWithLC[state]; } public BinaryRule[] splitRulesWithRC(int state) { // if (state >= splitRulesWithRC.length) { // return EMPTY_BINARY_RULE_ARRAY; // } return splitRulesWithRC[state]; } // public BinaryRule[] splitRulesWithParent(int state) { // return splitRulesWithParent[state]; // } // the sensible version public double scoreRule(BinaryRule br) { BinaryRule rule = ruleMap.get(br); return (rule != null ? rule.score : Double.NEGATIVE_INFINITY); } public void addRule(BinaryRule br) { // System.out.println("BG adding rule " + br); rulesWithParent[br.parent].add(br); rulesWithLC[br.leftChild].add(br); rulesWithRC[br.rightChild].add(br); ruleSetWithLC[br.leftChild].add(br); ruleSetWithRC[br.rightChild].add(br); allRules.add(br); ruleMap.put(br, br); } public Iterator iterator() { return allRules.iterator(); } public Iterator ruleIteratorByParent(int state) { if (state >= rulesWithParent.length) { return Collections.emptyList().iterator(); } return rulesWithParent[state].iterator(); } public Iterator ruleIteratorByRightChild(int state) { if (state >= rulesWithRC.length) { return Collections.emptyList().iterator(); } return rulesWithRC[state].iterator(); } public Iterator ruleIteratorByLeftChild(int state) { if (state >= rulesWithLC.length) { return Collections.emptyList().iterator(); } return rulesWithLC[state].iterator(); } public List ruleListByParent(int state) { if (state >= rulesWithParent.length) { return Collections.emptyList(); } return rulesWithParent[state]; } public List ruleListByRightChild(int state) { if (state >= rulesWithRC.length) { return Collections.emptyList(); } return rulesWithRC[state]; } public List ruleListByLeftChild(int state) { if (state >= rulesWithRC.length) { return Collections.emptyList(); } return rulesWithLC[state]; } /* ---- public Set ruleSetByRightChild(int state) { if (state >= ruleSetWithRC.length) { return Collections.emptySet(); } return ruleSetWithRC[state]; } public Set ruleSetByLeftChild(int state) { if (state >= ruleSetWithRC.length) { return Collections.emptySet(); } return ruleSetWithLC[state]; } --- */ private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException { stream.defaultReadObject(); init(); for (BinaryRule br : allRules) { rulesWithParent[br.parent].add(br); rulesWithLC[br.leftChild].add(br); rulesWithRC[br.rightChild].add(br); ruleMap.put(br, br); } splitRules(); } @SuppressWarnings("unchecked") private void init() { ruleMap = Generics.newHashMap(); int numStates = index.size(); rulesWithParent = new List[numStates]; rulesWithLC = new List[numStates]; rulesWithRC = new List[numStates]; ruleSetWithLC = new Set[numStates]; ruleSetWithRC = new Set[numStates]; for (int s = 0; s < numStates; s++) { rulesWithParent[s] = new ArrayList(); rulesWithLC[s] = new ArrayList(); rulesWithRC[s] = new ArrayList(); ruleSetWithLC[s] = Generics.newHashSet(); ruleSetWithRC[s] = Generics.newHashSet(); } } public BinaryGrammar(Index stateIndex) { this.index = stateIndex; allRules = new ArrayList(); init(); } /** * Populates data in this BinaryGrammar from the character stream * given by the Reader r. * * @param in Where input is read from * @throws IOException If format is bung */ public void readData(BufferedReader in) throws IOException { //if (Test.verbose) System.err.println(">> readData"); String line; int lineNum = 1; line = in.readLine(); while (line != null && line.length() > 0) { try { addRule(new BinaryRule(line, index)); } catch (Exception e) { throw new IOException("Error on line " + lineNum); } lineNum++; line = in.readLine(); } splitRules(); } /** * Writes out data from this Object to the Writer w. * * @param w Where output is written * @throws IOException If data can't be written */ public void writeData(Writer w) throws IOException { PrintWriter out = new PrintWriter(w); for (BinaryRule br : this) { out.println(br.toString(index)); } out.flush(); } private static final long serialVersionUID = 1L; } // end class BinaryGrammar





© 2015 - 2024 Weber Informatics LLC | Privacy Policy