![JAR search and dependency download from the Maven repository](/logo.png)
edu.berkeley.nlp.syntax.Tree Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of berkeleyparser Show documentation
Show all versions of berkeleyparser Show documentation
The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).
The newest version!
package edu.berkeley.nlp.syntax;
import edu.berkeley.nlp.util.CollectionUtils;
import edu.berkeley.nlp.util.MapFactory;
import edu.berkeley.nlp.util.MyMethod;
import edu.berkeley.nlp.util.Pair;
import java.io.Serializable;
import java.util.*;
/**
* Represent linguistic trees, with each node consisting of a label and a list
* of children.
*
* @author Dan Klein
*
* Added function to get a map of subtrees to constituents.
*/
public class Tree implements Serializable, Comparable>, Iterable> {
private static final long serialVersionUID = 1L;
L label;
List> children;
public void setChild(int i, Tree child) {
children.set(i,child);
}
public void setChildren(List> c) {
this.children = c;
}
public List> getChildren() {
return children;
}
public Tree getChild(int i) {
return children.get(i);
}
public L getLabel() {
return label;
}
public boolean isLeaf() {
return getChildren().isEmpty();
}
public boolean isPreTerminal() {
return getChildren().size() == 1 && getChildren().get(0).isLeaf();
}
public List getYield() {
List yield = new ArrayList();
appendYield(this, yield);
return yield;
}
public Collection> getConstituentCollection() {
Collection> constituents = new ArrayList>();
appendConstituent(this, constituents, 0);
return constituents;
}
/**
* John: I changed this from a hash map because it was broken as a HashMap.
*/
public Map, Constituent> getConstituents() {
Map, Constituent> constituents = new IdentityHashMap, Constituent>();
appendConstituent(this, constituents, 0);
return constituents;
}
public Map, List>> getSpanMap() {
Map, Constituent> cMap = getConstituents();
Map, List>> spanMap = new HashMap();
for (Map.Entry, Constituent> entry : cMap.entrySet()) {
Tree t = entry.getKey();
Constituent c = entry.getValue();
Pair span = Pair.newPair(c.getStart(),c.getEnd()+1);
CollectionUtils.addToValueList(spanMap,span,t);
}
for (List> trees : spanMap.values()) {
Collections.sort(trees,new Comparator>() {
public int compare(Tree t1, Tree t2) {
return t2.getDepth()-t1.getDepth();
}});
}
return spanMap;
}
public Map, Constituent> getConstituents(MapFactory mf) {
Map, Constituent> constituents = mf.buildMap();
appendConstituent(this, constituents, 0);
return constituents;
}
private static int appendConstituent(Tree tree,
Map, Constituent> constituents, int index) {
if (tree.isLeaf()) {
Constituent c = new Constituent(tree.getLabel(), index, index);
constituents.put(tree, c);
return 1; // Length of a leaf constituent
} else {
int nextIndex = index;
for (Tree kid : tree.getChildren()) {
nextIndex += appendConstituent(kid, constituents, nextIndex);
}
Constituent c = new Constituent(tree.getLabel(), index, nextIndex - 1);
constituents.put(tree, c);
return nextIndex - index; // Length of a leaf constituent
}
}
private static int appendConstituent(Tree tree,
Collection> constituents, int index) {
if (tree.isLeaf() || tree.isPreTerminal()) {
Constituent c = new Constituent(tree.getLabel(), index, index);
constituents.add(c);
return 1; // Length of a leaf constituent
} else {
int nextIndex = index;
for (Tree kid : tree.getChildren()) {
nextIndex += appendConstituent(kid, constituents, nextIndex);
}
Constituent c = new Constituent(tree.getLabel(), index, nextIndex - 1);
constituents.add(c);
return nextIndex - index; // Length of a leaf constituent
}
}
private static void appendNonTerminals(Tree tree, List> yield) {
if (tree.isLeaf()) {
return;
}
yield.add(tree);
for (Tree child : tree.getChildren()) {
appendNonTerminals(child, yield);
}
}
public List> getTerminals() {
List> yield = new ArrayList>();
appendTerminals(this, yield);
return yield;
}
public List> getNonTerminals(){
List> yield = new ArrayList>();
appendNonTerminals(this, yield);
return yield;
}
private static void appendTerminals(Tree tree, List> yield) {
if (tree.isLeaf()) {
yield.add(tree);
return;
}
for (Tree child : tree.getChildren()) {
appendTerminals(child, yield);
}
}
/**
* Clone the structure of the tree. Unfortunately, the new labels are copied
* by reference from the current tree.
*
* @return
*/
public Tree shallowClone() {
ArrayList> newChildren = new ArrayList>(children.size());
for (Tree child : children) {
newChildren.add(child.shallowClone());
}
return new Tree(label, newChildren);
}
/**
* Return a clone of just the root node of this tree (with no children)
*
* @return
*/
public Tree shallowCloneJustRoot() {
return new Tree(label);
}
private static void appendYield(Tree tree, List yield) {
if (tree.isLeaf()) {
yield.add(tree.getLabel());
return;
}
for (Tree child : tree.getChildren()) {
appendYield(child, yield);
}
}
public List getPreTerminalYield() {
List yield = new ArrayList();
appendPreTerminalYield(this, yield);
return yield;
}
public List getTerminalYield() {
List> terms = getTerminals();
List yield = new ArrayList();
for (Tree term : terms) {
yield.add(term.getLabel());
}
return yield;
}
public List> getPreTerminals() {
List> preterms = new ArrayList>();
appendPreTerminals(this, preterms);
return preterms;
}
public List> getTreesOfDepth(int depth) {
List> trees = new ArrayList>();
appendTreesOfDepth(this, trees, depth);
return trees;
}
private static void appendPreTerminalYield(Tree tree, List yield) {
if (tree.isPreTerminal()) {
yield.add(tree.getLabel());
return;
}
for (Tree child : tree.getChildren()) {
appendPreTerminalYield(child, yield);
}
}
private static void appendPreTerminals(Tree tree, List> yield) {
if (tree.isPreTerminal()) {
yield.add(tree);
return;
}
for (Tree child : tree.getChildren()) {
appendPreTerminals(child, yield);
}
}
private static void appendTreesOfDepth(Tree tree, List> yield, int depth) {
if (tree.getDepth() == depth) {
yield.add(tree);
return;
}
for (Tree child : tree.getChildren()) {
appendTreesOfDepth(child, yield, depth);
}
}
public List> getPreOrderTraversal() {
ArrayList> traversal = new ArrayList>();
traversalHelper(this, traversal, true);
return traversal;
}
public List> getPostOrderTraversal() {
ArrayList> traversal = new ArrayList>();
traversalHelper(this, traversal, false);
return traversal;
}
private static void traversalHelper(Tree tree, List> traversal,
boolean preOrder) {
if (preOrder) traversal.add(tree);
for (Tree child : tree.getChildren()) {
traversalHelper(child, traversal, preOrder);
}
if (!preOrder) traversal.add(tree);
}
public int getDepth() {
int maxDepth = 0;
for (Tree child : children) {
int depth = child.getDepth();
if (depth > maxDepth) maxDepth = depth;
}
return maxDepth + 1;
}
public int size() {
int sum = 0;
for (Tree child : children) {
sum += child.size();
}
return sum + 1;
}
public List> getAtDepth(int depth) {
List> yield = new ArrayList>();
appendAtDepth(depth, this, yield);
return yield;
}
private static void appendAtDepth(int depth, Tree tree, List> yield) {
if (depth < 0) return;
if (depth == 0) {
yield.add(tree);
return;
}
for (Tree child : tree.getChildren()) {
appendAtDepth(depth - 1, child, yield);
}
}
public void setLabel(L label) {
this.label = label;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
toStringBuilder(sb);
return sb.toString();
}
public void toStringBuilder(StringBuilder sb) {
if (!isLeaf()) sb.append('(');
if (getLabel() != null) {
sb.append(getLabel());
}
if (!isLeaf()) {
for (Tree child : getChildren()) {
sb.append(' ');
child.toStringBuilder(sb);
}
sb.append(')');
}
}
/**
* Same as toString(), but escapes terminals like so:
* ( becomes -LRB-
* ) becomes -RRB-
* \ becomes -BACKSLASH- ("\" does not occur in PTB; this is our own convention)
* This is useful because otherwise it's hard to tell a "(" terminal from the tree's bracket
* structure, or tell an escaping \ from a literal.
*/
public String toEscapedString() {
StringBuilder sb = new StringBuilder();
toStringBuilderEscaped(sb);
return sb.toString();
}
public void toStringBuilderEscaped(StringBuilder sb) {
if (!isLeaf()) sb.append('(');
if (getLabel() != null) {
if (isLeaf()) {
String escapedLabel = getLabel().toString();
escapedLabel = escapedLabel.replaceAll("\\(", "-LRB-");
escapedLabel = escapedLabel.replaceAll("\\)", "-RRB-");
escapedLabel = escapedLabel.replaceAll("\\\\", "-BACKSLASH-");
sb.append(escapedLabel);
} else {
sb.append(getLabel());
}
}
if (!isLeaf()) {
for (Tree child : getChildren()) {
sb.append(' ');
child.toStringBuilderEscaped(sb);
}
sb.append(')');
}
}
public Tree(L label, List> children) {
this.label = label;
this.children = children;
}
public Tree(L label) {
this.label = label;
this.children = Collections.emptyList();
}
/**
* Get the set of all subtrees inside the tree by returning a tree rooted at
* each node. These are not copies, but all share structure. The
* tree is regarded as a subtree of itself.
*
* @return the Set
of all subtrees in the tree.
*/
public Set> subTrees() {
return (Set>) subTrees(new HashSet>());
}
/**
* Get the list of all subtrees inside the tree by returning a tree rooted
* at each node. These are not copies, but all share structure. The
* tree is regarded as a subtree of itself.
*
* @return the List
of all subtrees in the tree.
*/
public List> subTreeList() {
return (List>) subTrees(new ArrayList>());
}
/**
* Add the set of all subtrees inside a tree (including the tree itself) to
* the given Collection
.
*
* @param n
* A collection of nodes to which the subtrees will be added
* @return The collection parameter with the subtrees added
*/
public Collection> subTrees(Collection> n) {
n.add(this);
List> kids = getChildren();
for (Tree kid : kids) {
kid.subTrees(n);
}
return n;
}
/**
* Returns an iterator over the nodes of the tree. This method implements
* the iterator()
method required by the
* Collections
interface. It does a preorder (children after
* node) traversal of the tree. (A possible extension to the class at some
* point would be to allow different traversal orderings via variant
* iterators.)
*
* @return An iterator over the nodes of the tree
*/
public Iterator> iterator() {
return new TreeIterator();
}
private class TreeIterator implements Iterator> {
private List> treeStack;
private TreeIterator() {
treeStack = new ArrayList>();
treeStack.add(Tree.this);
}
public boolean hasNext() {
return (!treeStack.isEmpty());
}
public Tree next() {
int lastIndex = treeStack.size() - 1;
Tree tr = treeStack.remove(lastIndex);
List> kids = tr.getChildren();
// so that we can efficiently use one List, we reverse them
for (int i = kids.size() - 1; i >= 0; i--) {
treeStack.add(kids.get(i));
}
return tr;
}
/**
* Not supported
*/
public void remove() {
throw new UnsupportedOperationException();
}
}
/**
* Applies a transformation to all labels in the tree and returns the
* resulting tree.
*
* @param
* Output type of the transformation
* @param trans
* The transformation to apply
* @return Transformed tree
*/
public Tree transformNodes(MyMethod trans) {
ArrayList> newChildren = new ArrayList>(children.size());
for (Tree child : children) {
newChildren.add(child.transformNodes(trans));
}
return new Tree(trans.call(label), newChildren);
}
/**
* Applies a transformation to all nodes in the tree and returns the
* resulting tree. Different from transformNodes
in that you
* get the full node and not just the label
*
* @param
* @param trans
* @return
*/
public Tree transformNodesUsingNode(MyMethod, O> trans) {
ArrayList> newChildren = new ArrayList>(children.size());
O newLabel = trans.call(this);
for (Tree child : children) {
newChildren.add(child.transformNodesUsingNode(trans));
}
return new Tree(newLabel, newChildren);
}
public Tree transformNodesUsingNodePostOrder(MyMethod, O> trans) {
ArrayList> newChildren = new ArrayList>(children.size());
for (Tree child : children) {
newChildren.add(child.transformNodesUsingNode(trans));
}
O newLabel = trans.call(this);
return new Tree(newLabel, newChildren);
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((label == null) ? 0 : label.hashCode());
for (Tree child : children) {
result = prime * result + ((child == null) ? 0 : child.hashCode());
}
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
if (!(obj instanceof Tree)) return false;
final Tree other = (Tree) obj;
if (!this.label.equals(other.label)) return false;
if (this.getChildren().size() != other.getChildren().size()) return false;
for (int i = 0; i < getChildren().size(); ++i) {
if (!getChildren().get(i).equals(other.getChildren().get(i))) return false;
}
return true;
}
public int compareTo(Tree o) {
if (!(o.getLabel() instanceof Comparable && getLabel() instanceof Comparable))
throw new IllegalArgumentException("Tree labels are not comparable");
int cmp = ((Comparable) o.getLabel()).compareTo(getLabel());
if (cmp != 0) return cmp;
int cmp2 = Double.compare(this.getChildren().size(), o.getChildren().size());
if (cmp2 != 0) return cmp2;
for (int i = 0; i < getChildren().size(); ++i) {
int cmp3 = getChildren().get(i).compareTo(o.getChildren().get(i));
if (cmp3 != 0) return cmp3;
}
return 0;
}
public boolean isPhrasal() {
return getYield().size() > 1;
}
public Constituent getLeastCommonAncestorConstituent(int i, int j) {
final List yield = getYield();
final Constituent leastCommonAncestorConstituentHelper = getLeastCommonAncestorConstituentHelper(
this, 0, yield.size(), i, j);
return leastCommonAncestorConstituentHelper;
}
public Tree getTopTreeForSpan(int i, int j) {
final List yield = getYield();
return getTopTreeForSpanHelper(this, 0, yield.size(), i, j);
}
private static Tree getTopTreeForSpanHelper(Tree tree, int start, int end,
int i, int j) {
assert i <= j;
if (start == i && end == j) {
assert tree.getLabel().toString().matches("\\w+");
return tree;
}
Queue> queue = new LinkedList>();
queue.addAll(tree.getChildren());
int currStart = start;
while (!queue.isEmpty()) {
Tree remove = queue.remove();
List currYield = remove.getYield();
final int currEnd = currStart + currYield.size();
if (currStart <= i && currEnd >= j)
return getTopTreeForSpanHelper(remove, currStart, currEnd, i, j);
currStart += currYield.size();
}
return null;
}
private static Constituent getLeastCommonAncestorConstituentHelper(Tree tree,
int start, int end, int i, int j) {
if (start == i && end == j) return new Constituent(tree.getLabel(), start, end);
Queue> queue = new LinkedList>();
queue.addAll(tree.getChildren());
int currStart = start;
while (!queue.isEmpty()) {
Tree remove = queue.remove();
List currYield = remove.getYield();
final int currEnd = currStart + currYield.size();
if (currStart <= i && currEnd >= j) {
final Constituent leastCommonAncestorConstituentHelper = getLeastCommonAncestorConstituentHelper(
remove, currStart, currEnd, i, j);
if (leastCommonAncestorConstituentHelper != null) return leastCommonAncestorConstituentHelper;
else break;
}
currStart += currYield.size();
}
return new Constituent(tree.getLabel(), start, end);
}
public boolean hasUnariesOtherThanRoot()
{
assert children.size() == 1;
return hasUnariesHelper(children.get(0));
}
private boolean hasUnariesHelper(Tree tree)
{
if (tree.isPreTerminal())
return false;
if (tree.getChildren().size() == 1)
return true;
for (Tree child : tree.getChildren())
{
if (hasUnariesHelper(child))
return true;
}
return false;
}
public boolean hasUnaryChain(){
return hasUnaryChainHelper(this, false);
}
private boolean hasUnaryChainHelper(Tree tree, boolean unaryAbove){
boolean result = false;
if (tree.getChildren().size()==1){
if (unaryAbove) return true;
else if (tree.getChildren().get(0).isPreTerminal()) return false;
else return hasUnaryChainHelper(tree.getChildren().get(0), true);
}
else {
for (Tree child : tree.getChildren()){
if (!child.isPreTerminal())
result = result || hasUnaryChainHelper(child,false);
}
}
return result;
}
public void removeUnaryChains(){
removeUnaryChainHelper(this, null);
}
private void removeUnaryChainHelper(Tree tree, Tree parent){
if (tree.isLeaf()) return;
if (tree.getChildren().size()==1&&!tree.isPreTerminal()){
if (parent!=null) {
tree = tree.getChildren().get(0);
parent.getChildren().set(0, tree);
removeUnaryChainHelper(tree, parent);
}
else
removeUnaryChainHelper(tree.getChildren().get(0), tree);
}
else {
for (Tree child : tree.getChildren()){
if (!child.isPreTerminal())
removeUnaryChainHelper(child,null);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy