All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.syntax.Tree Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
package edu.berkeley.nlp.syntax;

import edu.berkeley.nlp.util.CollectionUtils;
import edu.berkeley.nlp.util.MapFactory;
import edu.berkeley.nlp.util.MyMethod;
import edu.berkeley.nlp.util.Pair;

import java.io.Serializable;
import java.util.*;

/**
 * Represent linguistic trees, with each node consisting of a label and a list
 * of children.
 * 
 * @author Dan Klein
 * 
 * Added function to get a map of subtrees to constituents.
 */
public class Tree implements Serializable, Comparable>, Iterable> {

	private static final long serialVersionUID = 1L;

	L label;

	List> children;

  public void setChild(int i, Tree child) {
    children.set(i,child);
  }

	public void setChildren(List> c) {
		this.children = c;
	}

	public List> getChildren() {
		return children;
	}

  public Tree getChild(int i) {
    return children.get(i);
  }

	public L getLabel() {
		return label;
	}

	public boolean isLeaf() {
		return getChildren().isEmpty();
	}

	public boolean isPreTerminal() {
		return getChildren().size() == 1 && getChildren().get(0).isLeaf();
	}

	public List getYield() {
		List yield = new ArrayList();
		appendYield(this, yield);
		return yield;
	}

	public Collection> getConstituentCollection() {
		Collection> constituents = new ArrayList>();
		appendConstituent(this, constituents, 0);
		return constituents;
	}

	/**
	 * John: I changed this from a hash map because it was broken as a HashMap.
	 */
	public Map, Constituent> getConstituents() {
		Map, Constituent> constituents = new IdentityHashMap, Constituent>();
		appendConstituent(this, constituents, 0);
		return constituents;
	}

  public Map, List>> getSpanMap() {
    Map, Constituent> cMap = getConstituents();
    Map, List>> spanMap = new HashMap();
    for (Map.Entry, Constituent> entry : cMap.entrySet()) {
      Tree t = entry.getKey();
      Constituent c = entry.getValue();
      Pair span = Pair.newPair(c.getStart(),c.getEnd()+1);
      CollectionUtils.addToValueList(spanMap,span,t);
    }
    for (List> trees : spanMap.values()) {
      Collections.sort(trees,new Comparator>() {
        public int compare(Tree t1, Tree t2) {
          return t2.getDepth()-t1.getDepth();
      }});          
    }
    return spanMap;
  }

	public Map, Constituent> getConstituents(MapFactory mf) {
		Map, Constituent> constituents = mf.buildMap();
		appendConstituent(this, constituents, 0);
		return constituents;
	}

	private static  int appendConstituent(Tree tree,
			Map, Constituent> constituents, int index) {
		if (tree.isLeaf()) {
			Constituent c = new Constituent(tree.getLabel(), index, index);
			constituents.put(tree, c);
			return 1; // Length of a leaf constituent
		} else {
			int nextIndex = index;
			for (Tree kid : tree.getChildren()) {
				nextIndex += appendConstituent(kid, constituents, nextIndex);
			}
			Constituent c = new Constituent(tree.getLabel(), index, nextIndex - 1);
			constituents.put(tree, c);
			return nextIndex - index; // Length of a leaf constituent
		}
	}

	private static  int appendConstituent(Tree tree,
			Collection> constituents, int index) {
		if (tree.isLeaf() || tree.isPreTerminal()) {
			Constituent c = new Constituent(tree.getLabel(), index, index);
			constituents.add(c);
			return 1; // Length of a leaf constituent
		} else {
			int nextIndex = index;
			for (Tree kid : tree.getChildren()) {
				nextIndex += appendConstituent(kid, constituents, nextIndex);
			}
			Constituent c = new Constituent(tree.getLabel(), index, nextIndex - 1);
			constituents.add(c);
			return nextIndex - index; // Length of a leaf constituent
		}
	}

	private static  void appendNonTerminals(Tree tree, List> yield) {
	  	if (tree.isLeaf()) {
	  	
	  		return;
	  	}
	  	yield.add(tree);
	    for (Tree child : tree.getChildren()) {
	      appendNonTerminals(child, yield);
	    }
	}
	  
	public List> getTerminals() {
		List> yield = new ArrayList>();
		appendTerminals(this, yield);
		return yield;
	}

    public List> getNonTerminals(){
	  	List> yield = new ArrayList>();
	  	appendNonTerminals(this, yield);
	  	return yield;
    }

	
	private static  void appendTerminals(Tree tree, List> yield) {
		if (tree.isLeaf()) {
			yield.add(tree);
			return;
		}
		for (Tree child : tree.getChildren()) {
			appendTerminals(child, yield);
		}
	}

	/**
	 * Clone the structure of the tree. Unfortunately, the new labels are copied
	 * by reference from the current tree.
	 * 
	 * @return
	 */
	public Tree shallowClone() {
		ArrayList> newChildren = new ArrayList>(children.size());
		for (Tree child : children) {
			newChildren.add(child.shallowClone());
		}
		return new Tree(label, newChildren);
	}

	/**
	 * Return a clone of just the root node of this tree (with no children)
	 * 
	 * @return
	 */
	public Tree shallowCloneJustRoot() {

		return new Tree(label);
	}

	private static  void appendYield(Tree tree, List yield) {
		if (tree.isLeaf()) {
			yield.add(tree.getLabel());
			return;
		}
		for (Tree child : tree.getChildren()) {
			appendYield(child, yield);
		}
	}

	public List getPreTerminalYield() {
		List yield = new ArrayList();
		appendPreTerminalYield(this, yield);
		return yield;
	}

	public List getTerminalYield() {
		List> terms = getTerminals();
		List yield = new ArrayList();
		for (Tree term : terms) {
			yield.add(term.getLabel());
		}
		return yield;
	}

	public List> getPreTerminals() {
		List> preterms = new ArrayList>();
		appendPreTerminals(this, preterms);
		return preterms;
	}

	public List> getTreesOfDepth(int depth) {
		List> trees = new ArrayList>();
		appendTreesOfDepth(this, trees, depth);
		return trees;
	}

	private static  void appendPreTerminalYield(Tree tree, List yield) {
		if (tree.isPreTerminal()) {
			yield.add(tree.getLabel());
			return;
		}
		for (Tree child : tree.getChildren()) {
			appendPreTerminalYield(child, yield);
		}
	}

	private static  void appendPreTerminals(Tree tree, List> yield) {
		if (tree.isPreTerminal()) {
			yield.add(tree);
			return;
		}
		for (Tree child : tree.getChildren()) {
			appendPreTerminals(child, yield);
		}
	}

	private static  void appendTreesOfDepth(Tree tree, List> yield, int depth) {
		if (tree.getDepth() == depth) {
			yield.add(tree);
			return;
		}
		for (Tree child : tree.getChildren()) {
			appendTreesOfDepth(child, yield, depth);
		}
	}

	public List> getPreOrderTraversal() {
		ArrayList> traversal = new ArrayList>();
		traversalHelper(this, traversal, true);
		return traversal;
	}

	public List> getPostOrderTraversal() {
		ArrayList> traversal = new ArrayList>();
		traversalHelper(this, traversal, false);
		return traversal;
	}

	private static  void traversalHelper(Tree tree, List> traversal,
			boolean preOrder) {
		if (preOrder) traversal.add(tree);
		for (Tree child : tree.getChildren()) {
			traversalHelper(child, traversal, preOrder);
		}
		if (!preOrder) traversal.add(tree);
	}

	public int getDepth() {
		int maxDepth = 0;
		for (Tree child : children) {
			int depth = child.getDepth();
			if (depth > maxDepth) maxDepth = depth;
		}
		return maxDepth + 1;
	}

  public int size() {
    int sum = 0;
    for (Tree child : children) {
      sum += child.size();
    }
    return sum + 1;
  }

	public List> getAtDepth(int depth) {
		List> yield = new ArrayList>();
		appendAtDepth(depth, this, yield);
		return yield;
	}

	private static  void appendAtDepth(int depth, Tree tree, List> yield) {
		if (depth < 0) return;
		if (depth == 0) {
			yield.add(tree);
			return;
		}
		for (Tree child : tree.getChildren()) {
			appendAtDepth(depth - 1, child, yield);
		}
	}

	public void setLabel(L label) {
		this.label = label;
	}

	@Override
	public String toString() {
		StringBuilder sb = new StringBuilder();
		toStringBuilder(sb);
		return sb.toString();
	}

	public void toStringBuilder(StringBuilder sb) {
		if (!isLeaf()) sb.append('(');
		if (getLabel() != null) {
			sb.append(getLabel());
		}
		if (!isLeaf()) {
			for (Tree child : getChildren()) {
				sb.append(' ');
				child.toStringBuilder(sb);
			}
			sb.append(')');
		}
	}

	/**
	 * Same as toString(), but escapes terminals like so:
	 * ( becomes -LRB-
	 * ) becomes -RRB-
	 * \ becomes -BACKSLASH- ("\" does not occur in PTB; this is our own convention)
	 * This is useful because otherwise it's hard to tell a "(" terminal from the tree's bracket
	 * structure, or tell an escaping \ from a literal.
	 */
	public String toEscapedString() {
		StringBuilder sb = new StringBuilder();
		toStringBuilderEscaped(sb);
		return sb.toString();
	}

	public void toStringBuilderEscaped(StringBuilder sb) {
		if (!isLeaf()) sb.append('(');
		if (getLabel() != null) {
			if (isLeaf()) {
				String escapedLabel = getLabel().toString();
				escapedLabel = escapedLabel.replaceAll("\\(", "-LRB-");
				escapedLabel = escapedLabel.replaceAll("\\)", "-RRB-");
				escapedLabel = escapedLabel.replaceAll("\\\\", "-BACKSLASH-");
				sb.append(escapedLabel);
			} else {
				sb.append(getLabel());
			}
		}
		if (!isLeaf()) {
			for (Tree child : getChildren()) {
				sb.append(' ');
				child.toStringBuilderEscaped(sb);
			}
			sb.append(')');
		}
	}

	public Tree(L label, List> children) {
		this.label = label;
		this.children = children;
	}

	public Tree(L label) {
		this.label = label;
		this.children = Collections.emptyList();
	}

	/**
	 * Get the set of all subtrees inside the tree by returning a tree rooted at
	 * each node. These are not copies, but all share structure. The
	 * tree is regarded as a subtree of itself.
	 * 
	 * @return the Set of all subtrees in the tree.
	 */
	public Set> subTrees() {
		return (Set>) subTrees(new HashSet>());
	}

	/**
	 * Get the list of all subtrees inside the tree by returning a tree rooted
	 * at each node. These are not copies, but all share structure. The
	 * tree is regarded as a subtree of itself.
	 * 
	 * @return the List of all subtrees in the tree.
	 */
	public List> subTreeList() {
		return (List>) subTrees(new ArrayList>());
	}

	/**
	 * Add the set of all subtrees inside a tree (including the tree itself) to
	 * the given Collection.
	 * 
	 * @param n
	 *            A collection of nodes to which the subtrees will be added
	 * @return The collection parameter with the subtrees added
	 */
	public Collection> subTrees(Collection> n) {
		n.add(this);
		List> kids = getChildren();
		for (Tree kid : kids) {
			kid.subTrees(n);
		}
		return n;
	}

	/**
	 * Returns an iterator over the nodes of the tree. This method implements
	 * the iterator() method required by the
	 * Collections interface. It does a preorder (children after
	 * node) traversal of the tree. (A possible extension to the class at some
	 * point would be to allow different traversal orderings via variant
	 * iterators.)
	 * 
	 * @return An iterator over the nodes of the tree
	 */
	public Iterator> iterator() {
		return new TreeIterator();
	}

	private class TreeIterator implements Iterator> {

		private List> treeStack;

		private TreeIterator() {
			treeStack = new ArrayList>();
			treeStack.add(Tree.this);
		}

		public boolean hasNext() {
			return (!treeStack.isEmpty());
		}

		public Tree next() {
			int lastIndex = treeStack.size() - 1;
			Tree tr = treeStack.remove(lastIndex);
			List> kids = tr.getChildren();
			// so that we can efficiently use one List, we reverse them
			for (int i = kids.size() - 1; i >= 0; i--) {
				treeStack.add(kids.get(i));
			}
			return tr;
		}

		/**
		 * Not supported
		 */
		public void remove() {
			throw new UnsupportedOperationException();
		}

	}

	/**
	 * Applies a transformation to all labels in the tree and returns the
	 * resulting tree.
	 * 
	 * @param 
	 *            Output type of the transformation
	 * @param trans
	 *            The transformation to apply
	 * @return Transformed tree
	 */
	public  Tree transformNodes(MyMethod trans) {
		ArrayList> newChildren = new ArrayList>(children.size());
		for (Tree child : children) {
			newChildren.add(child.transformNodes(trans));
		}
		return new Tree(trans.call(label), newChildren);
	}

	/**
	 * Applies a transformation to all nodes in the tree and returns the
	 * resulting tree. Different from transformNodes in that you
	 * get the full node and not just the label
	 * 
	 * @param 
	 * @param trans
	 * @return
	 */
	public  Tree transformNodesUsingNode(MyMethod, O> trans) {
		ArrayList> newChildren = new ArrayList>(children.size());
		O newLabel = trans.call(this);
		for (Tree child : children) {
			newChildren.add(child.transformNodesUsingNode(trans));
		}
		return new Tree(newLabel, newChildren);
	}

	public  Tree transformNodesUsingNodePostOrder(MyMethod, O> trans) {
		ArrayList> newChildren = new ArrayList>(children.size());
		for (Tree child : children) {
			newChildren.add(child.transformNodesUsingNode(trans));
		}
		O newLabel = trans.call(this);
		return new Tree(newLabel, newChildren);
	}

	@Override
	public int hashCode() {
		final int prime = 31;
		int result = 1;
		result = prime * result + ((label == null) ? 0 : label.hashCode());
		for (Tree child : children) {
			result = prime * result + ((child == null) ? 0 : child.hashCode());
		}
		return result;
	}

	@Override
	public boolean equals(Object obj) {
		if (this == obj) return true;
		if (obj == null) return false;
		if (getClass() != obj.getClass()) return false;
		if (!(obj instanceof Tree)) return false;
		final Tree other = (Tree) obj;
		if (!this.label.equals(other.label)) return false;
		if (this.getChildren().size() != other.getChildren().size()) return false;
		for (int i = 0; i < getChildren().size(); ++i) {

			if (!getChildren().get(i).equals(other.getChildren().get(i))) return false;
		}
		return true;

	}

	public int compareTo(Tree o) {
		if (!(o.getLabel() instanceof Comparable && getLabel() instanceof Comparable))
			throw new IllegalArgumentException("Tree labels are not comparable");
		int cmp = ((Comparable) o.getLabel()).compareTo(getLabel());
		if (cmp != 0) return cmp;
		int cmp2 = Double.compare(this.getChildren().size(), o.getChildren().size());
		if (cmp2 != 0) return cmp2;
		for (int i = 0; i < getChildren().size(); ++i) {

			int cmp3 = getChildren().get(i).compareTo(o.getChildren().get(i));
			if (cmp3 != 0) return cmp3;
		}
		return 0;

	}

	public boolean isPhrasal() {
		return getYield().size() > 1;
	}

	public Constituent getLeastCommonAncestorConstituent(int i, int j) {
		final List yield = getYield();
		final Constituent leastCommonAncestorConstituentHelper = getLeastCommonAncestorConstituentHelper(
				this, 0, yield.size(), i, j);

		return leastCommonAncestorConstituentHelper;
	}

	public Tree getTopTreeForSpan(int i, int j) {
		final List yield = getYield();
		return getTopTreeForSpanHelper(this, 0, yield.size(), i, j);
	}

	private static  Tree getTopTreeForSpanHelper(Tree tree, int start, int end,
			int i, int j) {

		assert i <= j;
		if (start == i && end == j) {
			assert tree.getLabel().toString().matches("\\w+");
			return tree;
		}

		Queue> queue = new LinkedList>();
		queue.addAll(tree.getChildren());
		int currStart = start;
		while (!queue.isEmpty()) {
			Tree remove = queue.remove();
			List currYield = remove.getYield();
			final int currEnd = currStart + currYield.size();
			if (currStart <= i && currEnd >= j)
				return getTopTreeForSpanHelper(remove, currStart, currEnd, i, j);
			currStart += currYield.size();
		}
		return null;
	}

	private static  Constituent getLeastCommonAncestorConstituentHelper(Tree tree,
			int start, int end, int i, int j) {

		if (start == i && end == j) return new Constituent(tree.getLabel(), start, end);

		Queue> queue = new LinkedList>();
		queue.addAll(tree.getChildren());
		int currStart = start;
		while (!queue.isEmpty()) {
			Tree remove = queue.remove();
			List currYield = remove.getYield();
			final int currEnd = currStart + currYield.size();
			if (currStart <= i && currEnd >= j) {
				final Constituent leastCommonAncestorConstituentHelper = getLeastCommonAncestorConstituentHelper(
						remove, currStart, currEnd, i, j);
				if (leastCommonAncestorConstituentHelper != null) return leastCommonAncestorConstituentHelper;
				else break;
			}
			currStart += currYield.size();
		}
		return new Constituent(tree.getLabel(), start, end);
	}

	  public boolean hasUnariesOtherThanRoot()
	  {
	  	assert children.size() == 1;
	  	return hasUnariesHelper(children.get(0));
	  	
	  }
	  
	  private boolean hasUnariesHelper(Tree tree)
	  {
	  	if (tree.isPreTerminal())
	  		return false;
	  	if (tree.getChildren().size() == 1)
	  		return true;
	  	for (Tree child : tree.getChildren())
	  	{
	  		if (hasUnariesHelper(child))
	  			return true;
	  	}
	  	return false;
	  }
	  
	  public boolean hasUnaryChain(){
	  	return hasUnaryChainHelper(this, false);
	  }
	  	
	  private boolean hasUnaryChainHelper(Tree tree, boolean unaryAbove){
	  	boolean result = false;
			if (tree.getChildren().size()==1){
				if (unaryAbove) return true;
				else if (tree.getChildren().get(0).isPreTerminal()) return false;
				else return hasUnaryChainHelper(tree.getChildren().get(0), true);
	  	}
	  	else {
	  		for (Tree child : tree.getChildren()){
	  			if (!child.isPreTerminal()) 
	  				result = result || hasUnaryChainHelper(child,false);
	  		}
	  	}
	  	return result;
	  }
	  
	  public void removeUnaryChains(){
	  	removeUnaryChainHelper(this, null);
	  }
	  	
	  private void removeUnaryChainHelper(Tree tree, Tree parent){
	  	if (tree.isLeaf()) return;
	  	if (tree.getChildren().size()==1&&!tree.isPreTerminal()){
				if (parent!=null) {
					tree = tree.getChildren().get(0);
					parent.getChildren().set(0, tree);
					removeUnaryChainHelper(tree, parent);
				}
				else 
					removeUnaryChainHelper(tree.getChildren().get(0), tree);
	  	}
	  	else {
	  		for (Tree child : tree.getChildren()){
	  			if (!child.isPreTerminal()) 
	  				removeUnaryChainHelper(child,null);
	  		}
	  	}
	  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy