![JAR search and dependency download from the Maven repository](/logo.png)
edu.berkeley.nlp.ling.AbstractCollinsHeadFinder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of berkeleyparser Show documentation
Show all versions of berkeleyparser Show documentation
The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).
The newest version!
package edu.berkeley.nlp.ling;
import java.io.Serializable;
import java.util.HashMap;
import java.util.List;
import edu.berkeley.nlp.syntax.Tree;
import edu.berkeley.nlp.syntax.Trees;
import edu.berkeley.nlp.treebank.TreebankLanguagePack;
/**
* A base class for Head Finders similar to the one described in
* Michael Collins' 1999 thesis. For a given constituent we perform
*
* for categoryList in categoryLists
* for index = 1 to n [or n to 1 if R->L]
* for category in categoryList
* if category equals daughter[index] choose it.
*
* with a final default that goes with the direction (L->R or R->L)
* For most constituents, there will be only one category in the list,
* the exception being, in Collins' original version, NP.
*
* It is up to the overriding base class to initialize the map
* from constituent type to categoryLists, "nonTerminalInfo", in its constructor.
* Entries are presumed to be of type String[][]. Each String[] is a list of
* categories, except for the first entry, which specifies direction of
* traversal and must be one of "right", "left" or "rightdis" or "leftdis".
*
* "left" means search left-to-right by category and then by position
* "leftdis" means search left-to-right by position and then by category
* "right" means search right-to-left by category and then by position
* "rightdis" means search right-to-left by position and then by category
*
*
* 10/28/2002 -- Category label identity checking now uses the
* equals() method instead of ==, so not interning category labels
* shouldn't break things anymore. (Roger Levy)
* 2003/02/10 -- Changed to use TreebankLanguagePack and to cut on
* characters that set off annotations, so this should work even if
* functional tags are still on nodes.
* 2004/03/30 -- Made abstract base class and subclasses for CollinsHeadFinder,
* ModCollinsHeadFinder, SemanticHeadFinder, ChineseHeadFinder
* (and trees.icegb.ICEGBHeadFinder, trees.international.negra.NegraHeadFinder,
* and movetrees.EnglishPennMaxProjectionHeadFinder)
*
* @author Christopher Manning
* @author Galen Andrew
*/
public abstract class AbstractCollinsHeadFinder implements HeadFinder, Serializable {
private static final boolean DEBUG = false;
protected final TreebankLanguagePack tlp;
protected HashMap nonTerminalInfo;
// default direction if no rule is found for category
// subclasses can turn it on if they like
protected String[] defaultRule = null;
protected AbstractCollinsHeadFinder(TreebankLanguagePack tlp) {
this.tlp = tlp;
}
/**
* A way for subclasses for corpora with explicit head markings
* to return the explicitly marked head
*
* @param t a tree to find the head of
* @return the marked head-- null if no marked head
*/
// to be overridden in subclasses for corpora
//
protected Tree findMarkedHead(Tree t) {
return null;
}
/**
* Determine which daughter of the current parse tree is the head.
*
* @param t The parse tree to examine the daughters of.
* If this is a leaf, null
is returned
* @return The daughter parse tree that is the head of t
* @see Tree#percolateHeads(HeadFinder)
* for a routine to call this and spread heads throughout a tree
*/
public Tree determineHead(Tree t) {
if (nonTerminalInfo == null) {
throw new RuntimeException("Classes derived from AbstractCollinsHeadFinder must" + " create and fill HashMap nonTerminalInfo.");
}
if (t.isLeaf()) {
return null;
}
List> kids = t.getChildren();
Tree theHead;
// first check if subclass found explicitly marked head
if ((theHead = findMarkedHead(t)) != null) {
return theHead;
}
// if the node is a unary, then that kid must be the head
// it used to special case preterminal and ROOT/TOP case
// but that seemed bad (especially hardcoding string "ROOT")
if (kids.size() == 1) {
return kids.get(0);
}
return determineNonTrivialHead(t);
}
/* Called by determineHead and may be overridden in subclasses
* if special treatment is necessary for particular categories
*/
protected Tree determineNonTrivialHead(Tree t) {
Tree theHead = null;
String motherCat = Trees.FunctionNodeStripper.transformLabel(t);
if (DEBUG) {
System.err.println("Looking for head of " + t.getLabel());
}
// We know we have nonterminals underneath
// (a bit of a Penn Treebank assumption, but).
// Look at label.
// a total special case....
// first look for POS tag at end
// this appears to be redundant in the Collins case since the rule already would do that
// Tree lastDtr = t.lastChild();
// if (tlp.basicCategory(lastDtr.label().value()).equals("POS")) {
// theHead = lastDtr;
// } else {
String[][] how = (String[][]) nonTerminalInfo.get(motherCat);
if (how == null) {
if (DEBUG) {
System.err.println("Warning: No rule found for " + motherCat);
}
if (defaultRule != null) {
return traverseLocate(t.getChildren(), defaultRule, true);
} else {
return null;
}
}
for (int i = 0; i < how.length; i++) {
boolean deflt = (i == how.length - 1);
theHead = traverseLocate(t.getChildren(), how[i], deflt);
if (theHead != null) {
break;
}
}
return theHead;
}
/**
* Attempt to locate head daughter tree from among daughters.
* Go through daughterTrees looking for things from a set found by
* looking up the motherkey specifier in a hash map, and if
* you do not find one, take leftmost or rightmost thing iff
* deflt is true, otherwise return null
.
*/
protected Tree traverseLocate(List> daughterTrees, String[] how, boolean deflt) {
int headIdx = 0;
String childCat;
boolean found = false;
if (how[0].equals("left")) {
twoloop:
for (int i = 1; i < how.length; i++) {
for (headIdx = 0; headIdx < daughterTrees.size(); headIdx++) {
childCat = Trees.FunctionNodeStripper.transformLabel(daughterTrees.get(headIdx));
if (how[i].equals(childCat)) {
found = true;
break twoloop;
}
}
}
if (!found) {
// none found by tag, so return first or null
if (deflt) {
headIdx = 0;
} else {
return null;
}
}
} else if (how[0].equals("leftdis")) {
twoloop:
for (headIdx = 0; headIdx < daughterTrees.size(); headIdx++) {
childCat = Trees.FunctionNodeStripper.transformLabel(daughterTrees.get(headIdx));
for (int i = 1; i < how.length; i++) {
if (how[i].equals(childCat)) {
found = true;
break twoloop;
}
}
}
if (!found) {
// none found by tag, so return first or null
if (deflt) {
headIdx = 0;
} else {
return null;
}
}
} else if (how[0].equals("right")) {
// from right
twoloop:
for (int i = 1; i < how.length; i++) {
for (headIdx = daughterTrees.size() - 1; headIdx >= 0; headIdx--) {
childCat = Trees.FunctionNodeStripper.transformLabel(daughterTrees.get(headIdx));
if (how[i].equals(childCat)) {
found = true;
break twoloop;
}
}
}
if (!found) {
// none found by tag, so return last or null
if (deflt) {
headIdx = daughterTrees.size() - 1;
} else {
return null;
}
}
} else if (how[0].equals("rightdis")) {
// from right, but search for any, not in turn
twoloop:
for (headIdx = daughterTrees.size() - 1; headIdx >= 0; headIdx--) {
childCat = Trees.FunctionNodeStripper.transformLabel(daughterTrees.get(headIdx));
for (int i = 1; i < how.length; i++) {
if (DEBUG) {
System.err.println("Testing for whether " + how[i] + " == " + childCat + ": " + ((how[i].equals(childCat)) ? "true" : "false"));
}
if (how[i].equals(childCat)) {
found = true;
break twoloop;
}
}
}
if (!found) {
// none found by tag, so return last or null
if (deflt) {
headIdx = daughterTrees.size() - 1;
} else {
return null;
}
}
} else {
throw new RuntimeException("ERROR: invalid direction type to nonTerminalInfo map in AbstractCollinsHeadFinder.");
}
headIdx = postOperationFix(headIdx, daughterTrees);
return daughterTrees.get(headIdx);
}
/**
* A way for subclasses to fix any heads under special conditions
* The default does nothing.
*
* @param headIdx the index of the proposed head
* @param daughterTrees the array of daughter trees
* @return the new headIndex
*/
protected int postOperationFix(int headIdx, List> daughterTrees) {
return headIdx;
}
private static final long serialVersionUID = -6540278059442931087L;
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy