edu.stanford.nlp.parser.lexparser.UnaryGrammar Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
package edu.stanford.nlp.parser.lexparser;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Index;
import java.io.*;
import java.util.*;
/**
* Maintains efficient indexing of unary grammar rules.
*
* @author Dan Klein
* @author Christopher Manning
*/
public class UnaryGrammar implements Serializable, Iterable {
private final Index index;
private transient List[] rulesWithParent; // = null;
private transient List[] rulesWithChild; // = null;
private transient List[] closedRulesWithParent; // = null;
private transient List[] closedRulesWithChild; // = null;
private transient UnaryRule[][] closedRulesWithP; // = null;
private transient UnaryRule[][] closedRulesWithC; // = null;
/** The basic list of UnaryRules. Really this is treated as a set */
private Map coreRules; // = null;
/** The closure of the basic list of UnaryRules. Treated as a set */
private transient Map bestRulesUnderMax; // = null;
// private transient Map backTrace = null;
public int numClosedRules() {
return bestRulesUnderMax.keySet().size();
}
public UnaryRule getRule(UnaryRule ur) {
return coreRules.get(ur);
}
public Iterator closedRuleIterator() {
return bestRulesUnderMax.keySet().iterator();
}
public int numRules() {
return coreRules.keySet().size();
}
public Iterator iterator() {
return ruleIterator();
}
public Iterator ruleIterator() {
return coreRules.keySet().iterator();
}
public List rules() {
return new ArrayList<>(coreRules.keySet());
}
/** Remove A -> A UnaryRules from bestRulesUnderMax. */
public final void purgeRules() {
Map bR = Generics.newHashMap();
for (UnaryRule ur : bestRulesUnderMax.keySet()) {
if (ur.parent != ur.child) {
bR.put(ur, ur);
} else {
closedRulesWithParent[ur.parent].remove(ur);
closedRulesWithChild[ur.child].remove(ur);
}
}
bestRulesUnderMax = bR;
makeCRArrays();
}
/* -----------------
// Not needed any more as we reconstruct unaries in extractBestParse
public List getBestPath(int parent, int child) {
List path = new ArrayList();
UnaryRule tempR = new UnaryRule();
tempR.parent = parent;
tempR.child = child;
//System.out.println("Building path...");
int loc = parent;
while (loc != child) {
path.add(new Integer(loc));
//System.out.println("Path is "+path);
tempR.parent = loc;
Integer nextInt = backTrace.get(tempR);
if (nextInt == null) {
loc = child;
} else {
loc = nextInt.intValue();
}
//System.out.println(Numberer.getGlobalNumberer(stateSpace).object(parent)+"->"+Numberer.getGlobalNumberer(stateSpace).object(child)+" went via "+Numberer.getGlobalNumberer(stateSpace).object(loc));
if (path.size() > 10) {
throw new RuntimeException("UnaryGrammar path > 10");
}
}
path.add(new Integer(child));
return path;
}
--------------------------- */
private void closeRulesUnderMax(UnaryRule ur) {
for (int i = 0, isz = closedRulesWithChild[ur.parent].size(); i < isz; i++) {
UnaryRule pr = closedRulesWithChild[ur.parent].get(i);
for (int j = 0, jsz = closedRulesWithParent[ur.child].size(); j < jsz; j++) {
UnaryRule cr = closedRulesWithParent[ur.child].get(j);
UnaryRule resultR = new UnaryRule(pr.parent, cr.child,
pr.score + cr.score + ur.score);
relaxRule(resultR);
/* ----- No longer need to maintain unary rule backpointers
if (relaxRule(resultR)) {
if (resultR.parent != ur.parent) {
backTrace.put(resultR, new Integer(ur.parent));
} else {
backTrace.put(resultR, new Integer(ur.child));
}
}
-------- */
}
}
}
/** Possibly update the best way to make this UnaryRule in the
* bestRulesUnderMax hash and closedRulesWithX lists.
*
* @param ur A UnaryRule with a score
* @return true if ur is the new best scoring case of that unary rule.
*/
private boolean relaxRule(UnaryRule ur) {
UnaryRule bestR = bestRulesUnderMax.get(ur);
if (bestR == null) {
bestRulesUnderMax.put(ur, ur);
closedRulesWithParent[ur.parent].add(ur);
closedRulesWithChild[ur.child].add(ur);
return true;
} else {
if (bestR.score < ur.score) {
bestR.score = ur.score;
return true;
}
return false;
}
}
public double scoreRule(UnaryRule ur) {
UnaryRule bestR = bestRulesUnderMax.get(ur);
return (bestR != null ? bestR.score : Double.NEGATIVE_INFINITY);
}
public final void addRule(UnaryRule ur) {
// add rules' closure
closeRulesUnderMax(ur);
coreRules.put(ur, ur);
rulesWithParent[ur.parent].add(ur);
rulesWithChild[ur.child].add(ur);
}
//public Iterator closedRuleIterator() {
// return bestRulesUnderMax.keySet().iterator();
//}
private static final UnaryRule[] EMPTY_UNARY_RULE_ARRAY = new UnaryRule[0];
void makeCRArrays() {
int numStates = index.size();
closedRulesWithP = new UnaryRule[numStates][];
closedRulesWithC = new UnaryRule[numStates][];
for (int i = 0; i < numStates; i++) {
// cdm [2012]: Would it be faster to use same EMPTY_UNARY_RULE_ARRAY when of size zero? It must be!
closedRulesWithP[i] = closedRulesWithParent[i].toArray(new UnaryRule[closedRulesWithParent[i].size()]);
closedRulesWithC[i] = closedRulesWithChild[i].toArray(new UnaryRule[closedRulesWithChild[i].size()]);
}
}
public UnaryRule[] closedRulesByParent(int state) {
if (state >= closedRulesWithP.length) { // cdm [2012]: This check shouldn't be needed; delete
return EMPTY_UNARY_RULE_ARRAY;
}
return closedRulesWithP[state];
}
public UnaryRule[] closedRulesByChild(int state) {
if (state >= closedRulesWithC.length) { // cdm [2012]: This check shouldn't be needed; delete
return EMPTY_UNARY_RULE_ARRAY;
}
return closedRulesWithC[state];
}
public Iterator closedRuleIteratorByParent(int state) {
if (state >= closedRulesWithParent.length) {
List lur = Collections.emptyList();
return lur.iterator();
}
return closedRulesWithParent[state].iterator();
}
public Iterator closedRuleIteratorByChild(int state) {
if (state >= closedRulesWithChild.length) {
List lur = Collections.emptyList();
return lur.iterator();
}
return closedRulesWithChild[state].iterator();
}
public Iterator ruleIteratorByParent(int state) {
if (state >= rulesWithParent.length) {
List lur = Collections.emptyList();
return lur.iterator();
}
return rulesWithParent[state].iterator();
}
public Iterator ruleIteratorByChild(int state) {
if (state >= rulesWithChild.length) {
List lur = Collections.emptyList();
return lur.iterator();
}
return rulesWithChild[state].iterator();
}
public List rulesByParent(int state) {
if (state >= rulesWithParent.length) {
return Collections.emptyList();
}
return rulesWithParent[state];
}
public List rulesByChild(int state) {
if (state >= rulesWithChild.length) {
return Collections.emptyList();
}
return rulesWithChild[state];
}
public List[] rulesWithParent() {
return rulesWithParent;
}
private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException {
stream.defaultReadObject();
Set allRules = Generics.newHashSet(coreRules.keySet());
init();
for (UnaryRule ur : allRules) {
addRule(ur);
}
purgeRules();
}
/** Create all the array variables, and put in A -> A UnaryRules to feed
* the closure algorithm. They then get deleted later.
*/
@SuppressWarnings("unchecked")
private void init() {
int numStates = index.size();
coreRules = Generics.newHashMap();
rulesWithParent = new List[numStates];
rulesWithChild = new List[numStates];
closedRulesWithParent = new List[numStates];
closedRulesWithChild = new List[numStates];
bestRulesUnderMax = Generics.newHashMap();
// backTrace = Generics.newHashMap();
for (int s = 0; s < numStates; s++) {
rulesWithParent[s] = new ArrayList<>();
rulesWithChild[s] = new ArrayList<>();
closedRulesWithParent[s] = new ArrayList<>();
closedRulesWithChild[s] = new ArrayList<>();
UnaryRule selfR = new UnaryRule(s, s, 0.0);
relaxRule(selfR);
}
}
public UnaryGrammar(Index stateIndex) {
this.index = stateIndex;
init();
}
/**
* Populates data in this UnaryGrammar from a character stream.
*
* @param in The Reader the grammar is read from.
* @throws IOException If there is a reading problem
*/
public void readData(BufferedReader in) throws IOException {
String line;
int lineNum = 1;
// all lines have one rule per line
line = in.readLine();
while (line != null && line.length() > 0) {
try {
addRule(new UnaryRule(line, index));
} catch (Exception e) {
throw new IOException("Error on line " + lineNum);
}
lineNum++;
line = in.readLine();
}
purgeRules();
}
/**
* Writes out data from this Object.
* @param w Data is written to this Writer
*/
public void writeData(Writer w) {
PrintWriter out = new PrintWriter(w);
// all lines have one rule per line
for (UnaryRule ur : this) {
out.println(ur.toString(index));
}
out.flush();
}
/**
* Writes out a lot of redundant data from this Object to the Writer w.
* @param w Data is written to this Writer
*/
public void writeAllData(Writer w) {
int numStates = index.size();
PrintWriter out = new PrintWriter(w);
// all lines have one rule per line
out.println("Unary ruleIterator");
for (Iterator rI = ruleIterator(); rI.hasNext(); ) {
out.println(rI.next().toString(index));
}
out.println("Unary closedRuleIterator");
for (Iterator rI = closedRuleIterator(); rI.hasNext(); ) {
out.println(rI.next().toString(index));
}
out.println("Unary rulesWithParentIterator");
for (int i = 0; i < numStates; i++) {
out.println(index.get(i));
for (Iterator rI = ruleIteratorByParent(i); rI.hasNext(); ) {
out.print(" ");
out.println(rI.next().toString(index));
}
}
out.println("Unary closedRulesWithParentIterator");
for (int i = 0; i < numStates; i++) {
out.println(index.get(i));
for (Iterator rI = closedRuleIteratorByParent(i); rI.hasNext(); ) {
out.print(" ");
out.println(rI.next().toString(index));
}
}
out.flush();
}
@Override
public String toString() {
Writer w = new StringWriter();
writeData(w);
return w.toString();
}
private static final long serialVersionUID = 1L;
} // end class UnaryGrammar