All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.trees.Tree Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.trees;
import edu.stanford.nlp.util.logging.Redwood;

import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.io.Serializable;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.*;
import java.util.function.Predicate;

import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasIndex;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.LabelFactory;
import edu.stanford.nlp.ling.LabeledWord;
import edu.stanford.nlp.ling.SentenceUtils;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.util.*;

/**
 * The abstract class Tree is used to collect all of the
 * tree types, and acts as a generic extensible type.  This is the
 * standard implementation of inheritance-based polymorphism.
 * All Tree objects support accessors for their children (a
 * Tree[]), their label (a Label), and their
 * score (a double).  However, different concrete
 * implementations may or may not include the latter two, in which
 * case a default value is returned.  The class Tree defines no data
 * fields.  The two abstract methods that must be implemented are:
 * children(), and treeFactory().  Notes
 * that setChildren(Tree[]) is now an optional
 * operation, whereas it was previously required to be
 * implemented. There is now support for finding the parent of a
 * tree.  This may be done by search from a tree root, or via a
 * directly stored parent.  The Tree class now
 * implements the Collection interface: in terms of
 * this, each node of the tree is an element of the
 * collection; hence one can explore the tree by using the methods of
 * this interface.  A Tree is regarded as a read-only
 * Collection (even though the Tree class
 * has various methods that modify trees).  Moreover, the
 * implementation is not thread-safe: no attempt is made to
 * detect and report concurrent modifications.
 *
 * @author Christopher Manning
 * @author Dan Klein
 * @author Sarah Spikes ([email protected]) - filled in types
 */
public abstract class Tree extends AbstractCollection implements Label, Labeled, Scored, Serializable  {

  /** A logger for this class */
  private static Redwood.RedwoodChannels log = Redwood.channels(Tree.class);

  private static final long serialVersionUID = 5441849457648722744L;

  /**
   * A leaf node should have a zero-length array for its
   * children. For efficiency, classes can use this array as a
   * return value for children() for leaf nodes if desired.
   * This can also be used elsewhere when you want an empty Tree array.
   */
  public static final Tree[] EMPTY_TREE_ARRAY = new Tree[0];

  public Tree() {
  }

  /**
   * Says whether a node is a leaf.  Can be used on an arbitrary
   * Tree.  Being a leaf is defined as having no
   * children.  This must be implemented as returning a zero-length
   * Tree[] array for children().
   *
   * @return true if this object is a leaf
   */
  public boolean isLeaf() {
    return numChildren() == 0;
  }


  /**
   * Says how many children a tree node has in its local tree.
   * Can be used on an arbitrary Tree.  Being a leaf is defined
   * as having no children.
   *
   * @return The number of direct children of the tree node
   */
  public int numChildren() {
    return children().length;
  }


  /**
   * Says whether the current node has only one child.
   * Can be used on an arbitrary Tree.
   *
   * @return Whether the node heads a unary rewrite
   */
  public boolean isUnaryRewrite() {
    return numChildren() == 1;
  }


  /**
   * Return whether this node is a preterminal or not.  A preterminal is
   * defined to be a node with one child which is itself a leaf.
   *
   * @return true if the node is a preterminal; false otherwise
   */
  public boolean isPreTerminal() {
    Tree[] kids = children();
    return (kids.length == 1) && (kids[0].isLeaf());
  }


  /**
   * Return whether all the children of this node are preterminals or not.
   * A preterminal is
   * defined to be a node with one child which is itself a leaf.
   * Considered false if the node has no children
   *
   * @return true if the node is a prepreterminal; false otherwise
   */
  public boolean isPrePreTerminal() {
    Tree[] kids = children();
    if (kids.length == 0) {
      return false;
    }
    for (Tree kid : kids) {
      if ( ! kid.isPreTerminal()) {
        return false;
      }
    }
    return true;
  }


  /**
   * Return whether this node is a phrasal node or not.  A phrasal node
   * is defined to be a node which is not a leaf or a preterminal.
   * Worded positively, this means that it must have two or more children,
   * or one child that is not a leaf.
   *
   * @return true if the node is phrasal;
   *         false otherwise
   */
  public boolean isPhrasal() {
    Tree[] kids = children();
    return !(kids == null || kids.length == 0 || (kids.length == 1 && kids[0].isLeaf()));
  }


  /**
   * Implements equality for Tree's.  Two Tree objects are equal if they
   * have equal {@link #value}s, the same number of children, and their children
   * are pairwise equal.
   *
   * @param o The object to compare with
   * @return Whether two things are equal
   */
  @Override
  public boolean equals(Object o) {
    if (o == this) {
      return true;
    }
    if (!(o instanceof Tree)) {
      return false;
    }
    Tree t = (Tree) o;
    String value1 = this.value();
    String value2 = t.value();
    if (value1 != null || value2 != null) {
    	if (value1 == null || value2 == null || !value1.equals(value2)) {
    		return false;
    	}
    }
    Tree[] myKids = children();
    Tree[] theirKids = t.children();
    //if((myKids == null && (theirKids == null || theirKids.length != 0)) || (theirKids == null && myKids.length != 0) || (myKids.length != theirKids.length)){
    if (myKids.length != theirKids.length) {
      return false;
    }
    for (int i = 0; i < myKids.length; i++) {
      if (!myKids[i].equals(theirKids[i])) {
        return false;
      }
    }
    return true;
  }


  /**
   * Implements a hashCode for Tree's.  Two trees should have the same
   * hashcode if they are equal, so we hash on the label value and
   * the children's label values.
   *
   * @return The hash code
   */
  @Override
  public int hashCode() {
    String v = this.value();
    int hc = (v == null) ? 1 : v.hashCode();
    Tree[] kids = children();
    for (int i = 0; i < kids.length; i++) {
      v = kids[i].value();
      int hc2 = (v == null) ? i : v.hashCode();
      hc ^= (hc2 << i);
    }
    return hc;
  }


  /**
   * Returns the position of a Tree in the children list, if present,
   * or -1 if it is not present.  Trees are checked for presence with
   * object equality, ==.  Note that there are very few cases where an
   * indexOf that used .equals() instead of == would be useful and
   * correct.  In most cases, you want to figure out which child of
   * the parent a known tree is, so looking for object equality will
   * be faster and will avoid cases where you happen to have two
   * subtrees that are exactly the same.
   *
   * @param tree The tree to look for in children list
   * @return Its index in the list or -1
   */
  public int objectIndexOf(Tree tree) {
    Tree[] kids = children();
    for (int i = 0; i < kids.length; i++) {
      if (kids[i] == tree) {
        return i;
      }
    }
    return -1;
  }


  /**
   * Returns an array of children for the current node.  If there
   * are no children (if the node is a leaf), this must return a
   * Tree[] array of length 0.  A null children() value for tree
   * leaves was previously supported, but no longer is.
   * A caller may assume that either isLeaf() returns
   * true, or this node has a nonzero number of children.
   *
   * @return The children of the node
   * @see #getChildrenAsList()
   */
  public abstract Tree[] children();


  /**
   * Returns a List of children for the current node.  If there are no
   * children, then a (non-null) List<Tree> of size 0 will
   * be returned.  The list has new list structure but pointers to,
   * not copies of the children.  That is, the returned list is mutable,
   * and simply adding to or deleting items from it is safe, but beware
   * changing the contents of the children.
   *
   * @return The children of the node
   */
  public List getChildrenAsList() {
    return new ArrayList<>(Arrays.asList(children()));
  }


  /**
   * Set the children of this node to be the children given in the
   * array.  This is an optional operation; by default it is
   * unsupported.  Note for subclasses that if there are no
   * children, the children() method must return a Tree[] array of
   * length 0.  This class provides a
   * {@code EMPTY_TREE_ARRAY} canonical zero-length Tree[] array
   * to represent zero children, but it is not required that
   * leaf nodes use this particular zero-length array to represent
   * a leaf node.
   *
   * @param children The array of children, each a Tree
   * @see #setChildren(List)
   */
  public void setChildren(Tree[] children) {
    throw new UnsupportedOperationException();
  }


  /**
   * Set the children of this tree node to the given list.  This
   * method is implemented in the Tree class by
   * converting the List into a tree array and calling
   * the array-based method.  Subclasses which use a
   * List-based representation of tree children should
   * override this method.  This implementation allows the case
   * that the List is null: it yields a
   * node with no children (represented by a canonical zero-length
   * children() array).
   *
   * @param childTreesList A list of trees to become children of the node.
   *          This method does not retain the List that you pass it (copying
   *          is done), but it will retain the individual children (they are
   *          not copied).
   * @see #setChildren(Tree[])
   */
  public void setChildren(List childTreesList) {
    if (childTreesList == null || childTreesList.isEmpty()) {
      setChildren(EMPTY_TREE_ARRAY);
    } else {
      Tree[] childTrees = new Tree[childTreesList.size()];
      childTreesList.toArray(childTrees);
      setChildren(childTrees);
    }
  }


  /**
   * Returns the label associated with the current node, or null
   * if there is no label.  The default implementation always
   * returns {@code null}.
   *
   * @return The label of the node
   */
  @Override
  public Label label() {
    return null;
  }


  /**
   * Sets the label associated with the current node, if there is one.
   * The default implementation ignores the label.
   *
   * @param label The label
   */
  @Override
  public void setLabel(Label label) {
    // a noop
  }


  /**
   * Returns the score associated with the current node, or NaN
   * if there is no score.  The default implementation returns NaN.
   *
   * @return The score
   */
  @Override
  public double score() {
    return Double.NaN;
  }


  /**
   * Sets the score associated with the current node, if there is one.
   *
   * @param score The score
   */
  public void setScore(double score) {
    throw new UnsupportedOperationException("You must use a tree type that implements scoring in order call setScore()");
  }


  /**
   * Returns the first child of a tree, or null if none.
   *
   * @return The first child
   */
  public Tree firstChild() {
    Tree[] kids = children();
    if (kids.length == 0) {
      return null;
    }
    return kids[0];
  }


  /**
   * Returns the last child of a tree, or null if none.
   *
   * @return The last child
   */
  public Tree lastChild() {
    Tree[] kids = children();
    if (kids.length == 0) {
      return null;
    }
    return kids[kids.length - 1];
  }

  /** Return the highest node of the (perhaps trivial) unary chain that
   *  this node is part of.
   *  In case this node is the only child of its parent, trace up the chain of
   *  unaries, and return the uppermost node of the chain (the node whose
   *  parent has multiple children, or the node that is the root of the tree).
   *
   *  @param root The root of the tree that contains this subtree
   *  @return The uppermost node of the unary chain, if this node is in a unary
   *         chain, or else the current node
   */
  public Tree upperMostUnary(Tree root) {
    Tree parent = parent(root);
    if (parent == null) {
      return this;
    }
    if (parent.numChildren() > 1) {
      return this;
    }
    return parent.upperMostUnary(root);
  }

  /**
   * Assign a SpanAnnotation on each node of this tree.
   *  The index starts at zero.
   */
  public void setSpans() {
    constituentsNodes(0);
  }

  /**
   * Returns SpanAnnotation of this node, or null if annotation is not assigned.
   * Use setSpans() to assign SpanAnnotations to a tree.
   *
   * @return an IntPair: the SpanAnnotation of this node.
   */
  public IntPair getSpan() {
    if(label() instanceof CoreMap && ((CoreMap) label()).containsKey(CoreAnnotations.SpanAnnotation.class))
      return ((CoreMap) label()).get(CoreAnnotations.SpanAnnotation.class);
    return null;
  }

  /**
   * Returns the Constituents generated by the parse tree. Constituents
   * are computed with respect to whitespace (e.g., at the word level).
   *
   * @return a Set of the constituents as constituents of
   *         type Constituent
   */
  public Set constituents() {
    return constituents(new SimpleConstituentFactory());
  }


  /**
   * Returns the Constituents generated by the parse tree.
   * The Constituents of a sentence include the preterminal categories
   * but not the leaves.
   *
   * @param cf ConstituentFactory used to build the Constituent objects
   * @return a Set of the constituents as SimpleConstituent type
   *         (in the current implementation, a HashSet
   */
  public Set constituents(ConstituentFactory cf) {
    return constituents(cf,false);
  }

  /**
   * Returns the Constituents generated by the parse tree.
   * The Constituents of a sentence include the preterminal categories
   * but not the leaves.
   *
   * @param cf ConstituentFactory used to build the Constituent objects
   * @param maxDepth The maximum depth at which to add constituents,
   *                 where 0 is the root level.  Negative maxDepth
   *                 indicates no maximum.
   * @return a Set of the constituents as SimpleConstituent type
   *         (in the current implementation, a HashSet
   */
  public Set constituents(ConstituentFactory cf, int maxDepth) {
    Set constituentsSet = Generics.newHashSet();
    constituents(constituentsSet, 0, cf, false, null, maxDepth, 0);
    return constituentsSet;
  }

  /**
   * Returns the Constituents generated by the parse tree.
   * The Constituents of a sentence include the preterminal categories
   * but not the leaves.
   *
   * @param cf ConstituentFactory used to build the Constituent objects
   * @param charLevel If true, compute bracketings irrespective of whitespace boundaries.
   * @return a Set of the constituents as SimpleConstituent type
   *         (in the current implementation, a HashSet
   */
  public Set constituents(ConstituentFactory cf, boolean charLevel) {
    Set constituentsSet = Generics.newHashSet();
    constituents(constituentsSet, 0, cf, charLevel, null, -1, 0);
    return constituentsSet;
  }

  public Set constituents(ConstituentFactory cf, boolean charLevel, Predicate filter) {
    Set constituentsSet = Generics.newHashSet();
    constituents(constituentsSet, 0, cf, charLevel, filter, -1, 0);
    return constituentsSet;
  }

  /**
   * Same as int constituents but just puts the span as an IntPair
   * in the CoreLabel of the nodes.
   *
   * @param left The left position to begin labeling from
   * @return The index of the right frontier of the constituent
   */
  private int constituentsNodes(int left) {
    if (isLeaf()) {
      if (label() instanceof CoreLabel) {
        ((CoreLabel) label()).set(CoreAnnotations.SpanAnnotation.class, new IntPair(left, left));
      } else {
        throw new UnsupportedOperationException("Can only set spans on trees which use CoreLabel");
      }
      return (left + 1);
    }
    int position = left;

    // enumerate through daughter trees
    Tree[] kids = children();
    for (Tree kid : kids)
      position = kid.constituentsNodes(position);

    //Parent span
    if (label() instanceof CoreLabel) {
      ((CoreLabel) label()).set(CoreAnnotations.SpanAnnotation.class, new IntPair(left, position - 1));
    } else {
      throw new UnsupportedOperationException("Can only set spans on trees which use CoreLabel");
    }

    return position;
  }

  /**
   * Adds the constituents derived from this tree to
   * the ordered Constituent Set, beginning
   * numbering from the second argument and returning the number of
   * the right edge.  The reason for the return of the right frontier
   * is in order to produce bracketings recursively by threading through
   * the daughters of a given tree.
   *
   * @param constituentsSet set of constituents to add results of bracketing
   *                        this tree to
   * @param left            left position to begin labeling the bracketings with
   * @param cf              ConstituentFactory used to build the Constituent objects
   * @param charLevel       If true, compute constituents without respect to whitespace. Otherwise, preserve whitespace boundaries.
   * @param filter          A filter to use to decide whether or not to add a tree as a constituent.
   * @param maxDepth        The maximum depth at which to allow constituents.  Set to negative to indicate all depths allowed.
   * @param depth           The current depth
   * @return Index of right frontier of Constituent
   */
  private int constituents(Set constituentsSet, int left, ConstituentFactory cf, boolean charLevel, Predicate filter, int maxDepth, int depth) {

    if(isPreTerminal())
      return left + ((charLevel) ? firstChild().value().length() : 1);

    int position = left;

    // log.info("In bracketing trees left is " + left);
    // log.info("  label is " + label() +
    //                       "; num daughters: " + children().length);
    Tree[] kids = children();
    for (Tree kid : kids) {
      position = kid.constituents(constituentsSet, position, cf, charLevel, filter, maxDepth, depth + 1);
      // log.info("  position went to " + position);
    }

    if ((filter == null || filter.test(this)) &&
        (maxDepth < 0 || depth <= maxDepth)) {
      //Compute span of entire tree at the end of recursion
      constituentsSet.add(cf.newConstituent(left, position - 1, label(), score()));
    }
    // log.info("  added " + label());
    return position;
  }


  /**
   * Returns a new Tree that represents the local Tree at a certain node.
   * That is, it builds a new tree that copies the mother and daughter
   * nodes (but not their Labels), as non-Leaf nodes,
   * but zeroes out their children.
   *
   * @return A local tree
   */
  public Tree localTree() {
    Tree[] kids = children();
    Tree[] newKids = new Tree[kids.length];
    TreeFactory tf = treeFactory();
    for (int i = 0, n = kids.length; i < n; i++) {
      newKids[i] = tf.newTreeNode(kids[i].label(), Arrays.asList(EMPTY_TREE_ARRAY));
    }
    return tf.newTreeNode(label(), Arrays.asList(newKids));
  }


  /**
   * Returns a set of one level Trees that ares the local trees
   * of the tree.
   * That is, it builds a new tree that copies the mother and daughter
   * nodes (but not their Labels), for each phrasal node,
   * but zeroes out their children.
   *
   * @return A set of local tree
   */
  public Set localTrees() {
    Set set = Generics.newHashSet();
    for (Tree st : this) {
      if (st.isPhrasal()) {
        set.add(st.localTree());
      }
    }
    return set;
  }


  /**
   * Most instances of Tree will take a lot more than
   * than the default StringBuffer size of 16 to print
   * as an indented list of the whole tree, so we enlarge the default.
   */
  private static final int initialPrintStringBuilderSize = 500;

  /**
   * Appends the printed form of a parse tree (as a bracketed String)
   * to a {@code StringBuilder}.
   * The implementation of this may be more efficient than for
   * {@code toString()} on complex trees.
   *
   * @param sb The {@code StringBuilder} to which the tree will be appended
   * @return Returns the {@code StringBuilder} passed in with extra stuff in it
   */
  public StringBuilder toStringBuilder(StringBuilder sb) {
    return toStringBuilder(sb, true);
  }

  /**
   * Appends the printed form of a parse tree (as a bracketed String)
   * to a {@code StringBuilder}.
   * The implementation of this may be more efficient than for
   * {@code toString()} on complex trees.
   *
   * @param sb The {@code StringBuilder} to which the tree will be appended
   * @param printOnlyLabelValue If true, print only the value() of each node's label
   * @return Returns the {@code StringBuilder} passed in with extra stuff in it
   */
  public StringBuilder toStringBuilder(StringBuilder sb, boolean printOnlyLabelValue) {
    if (isLeaf()) {
      if (label() != null) {
        if(printOnlyLabelValue) {
          sb.append(label().value());
        } else {
          sb.append(label());
        }
      }
      return sb;
    } else {
      sb.append('(');
      if (label() != null) {
        if (printOnlyLabelValue) {
          if (value() != null) {
            sb.append(label().value());
          }
          // don't print a null, just nothing!
        } else {
          sb.append(label());
        }
      }
      Tree[] kids = children();
      if (kids != null) {
        for (Tree kid : kids) {
          sb.append(' ');
          kid.toStringBuilder(sb, printOnlyLabelValue);
        }
      }
      return sb.append(')');
    }
  }


  /**
   * Converts parse tree to string in Penn Treebank format.
   * 

* Implementation note: Internally, the method gains * efficiency by chaining use of a single StringBuilder * through all the printing. * * @return the tree as a bracketed list on one line */ @Override public String toString() { return toStringBuilder(new StringBuilder(Tree.initialPrintStringBuilderSize)).toString(); } private static final int indentIncr = 2; private static String makeIndentString(int indent) { StringBuilder sb = new StringBuilder(indent); for (int i = 0; i < indentIncr; i++) { sb.append(' '); } return sb.toString(); } public void printLocalTree() { printLocalTree(new PrintWriter(System.out, true)); } /** * Only prints the local tree structure, does not recurse */ public void printLocalTree(PrintWriter pw) { pw.print("(" + label() + ' '); for (Tree kid : children()) { pw.print("("); pw.print(kid.label()); pw.print(") "); } pw.println(")"); } /** * Indented list printing of a tree. The tree is printed in an * indented list notation, with node labels followed by node scores. */ public void indentedListPrint() { indentedListPrint(new PrintWriter(System.out, true), false); } /** * Indented list printing of a tree. The tree is printed in an * indented list notation, with node labels followed by node scores. * * @param pw The PrintWriter to print the tree to * @param printScores Whether to print the scores (log probs) of tree nodes */ public void indentedListPrint(PrintWriter pw, boolean printScores) { indentedListPrint("", makeIndentString(indentIncr), pw, printScores); } /** * Indented list printing of a tree. The tree is printed in an * indented list notation, with node labels followed by node scores. * String parameters are used rather than integer levels for efficiency. * * @param indent The base String (normally just spaces) * to print before each line of tree * @param pad The additional String (normally just more * spaces) to add when going to a deeper level of Tree. * @param pw The PrintWriter to print the tree to * @param printScores Whether to print the scores (log probs) of tree nodes */ private void indentedListPrint(String indent, String pad, PrintWriter pw, boolean printScores) { StringBuilder sb = new StringBuilder(indent); Label label = label(); if (label != null) { sb.append(label.toString()); } if (printScores) { sb.append(" "); sb.append(score()); } pw.println(sb.toString()); Tree[] children = children(); String newIndent = indent + pad; for (Tree child : children) { child.indentedListPrint(newIndent, pad, pw, printScores); } } /** * Indented xml printing of a tree. The tree is printed in an * indented xml notation. */ public void indentedXMLPrint() { indentedXMLPrint(new PrintWriter(System.out, true), false); } /** * Indented xml printing of a tree. The tree is printed in an * indented xml notation, with node labels followed by node scores. * * @param pw The PrintWriter to print the tree to * @param printScores Whether to print the scores (log probs) of tree nodes */ public void indentedXMLPrint(PrintWriter pw, boolean printScores) { indentedXMLPrint("", makeIndentString(indentIncr), pw, printScores); } /** * Indented xml printing of a tree. The tree is printed in an * indented xml notation, with node labels followed by node scores. * String parameters are used rather than integer levels for efficiency. * * @param indent The base String (normally just spaces) * to print before each line of tree * @param pad The additional String (normally just more * spaces) to add when going to a deeper level of * Tree. * @param pw The PrintWriter to print the tree to * @param printScores Whether to print the scores (log probs) of tree nodes */ private void indentedXMLPrint(String indent, String pad, PrintWriter pw, boolean printScores) { StringBuilder sb = new StringBuilder(indent); Tree[] children = children(); Label label = label(); if (label != null) { sb.append("<"); if (children.length > 0) { sb.append("node value=\""); } else { sb.append("leaf value=\""); } sb.append(XMLUtils.escapeXML(SentenceUtils.wordToString(label, true))); sb.append("\""); if (printScores) { sb.append(" score="); sb.append(score()); } if (children.length > 0) { sb.append(">"); } else { sb.append("/>"); } } else { if (children.length > 0) { sb.append(""); } else { sb.append(""); } } pw.println(sb.toString()); if (children.length > 0) { String newIndent = indent + pad; for (Tree child : children) { child.indentedXMLPrint(newIndent, pad, pw, printScores); } pw.println(indent + ""); } } private static void displayChildren(Tree[] trChildren, int indent, boolean parentLabelNull, boolean onlyLabelValue, PrintWriter pw) { boolean firstSibling = true; boolean leftSibIsPreTerm = true; // counts as true at beginning for (Tree currentTree : trChildren) { currentTree.display(indent, parentLabelNull, firstSibling, leftSibIsPreTerm, false, onlyLabelValue, pw); leftSibIsPreTerm = currentTree.isPreTerminal(); // CC is a special case for English, but leave it in so we can exactly match PTB3 tree formatting if (currentTree.value() != null && currentTree.value().startsWith("CC")) { leftSibIsPreTerm = false; } firstSibling = false; } } /** * Returns the value of the nodes label as a String. This is done by * calling toString() on the value, if it exists. Otherwise, * an empty string is returned. * * @return The label of a tree node as a String */ public String nodeString() { return (value() == null) ? "" : value(); } /** * Display a node, implementing Penn Treebank style layout */ private void display(int indent, boolean parentLabelNull, boolean firstSibling, boolean leftSiblingPreTerminal, boolean topLevel, boolean onlyLabelValue, PrintWriter pw) { // the condition for staying on the same line in Penn Treebank boolean suppressIndent = (parentLabelNull || (firstSibling && isPreTerminal()) || (leftSiblingPreTerminal && isPreTerminal() && (label() == null || !label().value().startsWith("CC")))); if (suppressIndent) { pw.print(" "); // pw.flush(); } else { if (!topLevel) { pw.println(); } for (int i = 0; i < indent; i++) { pw.print(" "); // pw.flush(); } } if (isLeaf() || isPreTerminal()) { String terminalString = toStringBuilder(new StringBuilder(), onlyLabelValue).toString(); pw.print(terminalString); pw.flush(); return; } pw.print("("); String nodeString; if (onlyLabelValue) { String value = value(); nodeString = (value == null) ? "" : value; } else { nodeString = nodeString(); } pw.print(nodeString); // pw.flush(); boolean parentIsNull = label() == null || label().value() == null; displayChildren(children(), indent + 1, parentIsNull, true, pw); pw.print(")"); pw.flush(); } /** * Print the tree as done in Penn Treebank merged files. * The formatting should be exactly the same, but we don't print the * trailing whitespace found in Penn Treebank trees. * The basic deviation from a bracketed indented tree is to in general * collapse the printing of adjacent preterminals onto one line of * tags and words. Additional complexities are that conjunctions * (tag CC) are not collapsed in this way, and that the unlabeled * outer brackets are collapsed onto the same line as the next * bracket down. * * @param pw The tree is printed to this PrintWriter */ public void pennPrint(PrintWriter pw) { pennPrint(pw, true); } public void pennPrint(PrintWriter pw, boolean printOnlyLabelValue) { display(0, false, false, false, true, printOnlyLabelValue, pw); pw.println(); pw.flush(); } /** * Print the tree as done in Penn Treebank merged files. * The formatting should be exactly the same, but we don't print the * trailing whitespace found in Penn Treebank trees. * The basic deviation from a bracketed indented tree is to in general * collapse the printing of adjacent preterminals onto one line of * tags and words. Additional complexities are that conjunctions * (tag CC) are not collapsed in this way, and that the unlabeled * outer brackets are collapsed onto the same line as the next * bracket down. * * @param ps The tree is printed to this PrintStream */ public void pennPrint(PrintStream ps) { pennPrint(new PrintWriter(new OutputStreamWriter(ps), true)); } public void pennPrint(PrintStream ps, boolean printOnlyLabelValue) { pennPrint(new PrintWriter(new OutputStreamWriter(ps), true), printOnlyLabelValue); } /** * Calls pennPrint() and saves output to a String * * @return The indent S-expression representation of a Tree */ public String pennString() { StringWriter sw = new StringWriter(); pennPrint(new PrintWriter(sw)); return sw.toString(); } /** * Print the tree as done in Penn Treebank merged files. * The formatting should be exactly the same, but we don't print the * trailing whitespace found in Penn Treebank trees. * The tree is printed to System.out. The basic deviation * from a bracketed indented tree is to in general * collapse the printing of adjacent preterminals onto one line of * tags and words. Additional complexities are that conjunctions * (tag CC) are not collapsed in this way, and that the unlabeled * outer brackets are collapsed onto the same line as the next * bracket down. */ public void pennPrint() { pennPrint(System.out); } /** * Finds the depth of the tree. The depth is defined as the length * of the longest path from this node to a leaf node. Leaf nodes * have depth zero. POS tags have depth 1. Phrasal nodes have * depth >= 2. * * @return the depth */ public int depth() { if (isLeaf()) { return 0; } int maxDepth = 0; Tree[] kids = children(); for (Tree kid : kids) { int curDepth = kid.depth(); if (curDepth > maxDepth) { maxDepth = curDepth; } } return maxDepth + 1; } /** * Finds the distance from this node to the specified node. * return -1 if this is not an ancestor of node. * * @param node A subtree contained in this tree * @return the depth */ public int depth(Tree node) { Tree p = node.parent(this); if (this == node) { return 0; } if (p == null) { return -1; } int depth = 1; while (this != p) { p = p.parent(this); depth++; } return depth; } /** * Returns the tree leaf that is the head of the tree. * * @param hf The head-finding algorithm to use * @param parent The parent of this tree * @return The head tree leaf if any, else null */ public Tree headTerminal(HeadFinder hf, Tree parent) { if (isLeaf()) { return this; } Tree head = hf.determineHead(this, parent); if (head != null) { return head.headTerminal(hf, parent); } log.info("Head is null: " + this); return null; } /** * Returns the tree leaf that is the head of the tree. * * @param hf The headfinding algorithm to use * @return The head tree leaf if any, else null */ public Tree headTerminal(HeadFinder hf) { return headTerminal(hf, null); } /** * Returns the preterminal tree that is the head of the tree. * See {@link #isPreTerminal()} for * the definition of a preterminal node. Beware that some tree nodes may * have no preterminal head. * * @param hf The headfinding algorithm to use * @return The head preterminal tree, if any, else null * @throws IllegalArgumentException if called on a leaf node */ public Tree headPreTerminal(HeadFinder hf) { if (isPreTerminal()) { return this; } else if (isLeaf()) { throw new IllegalArgumentException("Called headPreTerminal on a leaf: " + this); } else { Tree head = hf.determineHead(this); if (head != null) { return head.headPreTerminal(hf); } log.info("Head preterminal is null: " + this); return null; } } /** * Finds the head words of each tree and assigns * HeadWordLabelAnnotation on each node pointing to the correct * CoreLabel. This relies on the nodes being CoreLabels, so it * throws an IllegalArgumentException if this is ever not true. */ public void percolateHeadAnnotations(HeadFinder hf) { if (!(label() instanceof CoreLabel)) { throw new IllegalArgumentException("Expected CoreLabels in the trees"); } CoreLabel nodeLabel = (CoreLabel) label(); if (isLeaf()) { return; } if (isPreTerminal()) { nodeLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, (CoreLabel) children()[0].label()); nodeLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, nodeLabel); return; } for (Tree kid : children()) { kid.percolateHeadAnnotations(hf); } final Tree head = hf.determineHead(this); if (head == null) { throw new NullPointerException("HeadFinder " + hf + " returned null for " + this); } else if (head.isLeaf()) { nodeLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, (CoreLabel) head.label()); nodeLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, (CoreLabel) head.parent(this).label()); } else if (head.isPreTerminal()) { nodeLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, (CoreLabel) head.children()[0].label()); nodeLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, (CoreLabel) head.label()); } else { if (!(head.label() instanceof CoreLabel)) { throw new AssertionError("Horrible bug"); } CoreLabel headLabel = (CoreLabel) head.label(); nodeLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class)); nodeLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class)); } } /** * Finds the heads of the tree. This code assumes that the label * does store and return sensible values for the category, word, and tag. * It will be a no-op otherwise. The tree is modified. The routine * assumes the Tree has word leaves and tag preterminals, and copies * their category to word and tag respectively, if they have a null * value. * * @param hf The headfinding algorithm to use */ public void percolateHeads(HeadFinder hf) { Label nodeLabel = label(); if (isLeaf()) { // Sanity check: word() is usually set by the TreeReader. if (nodeLabel instanceof HasWord) { HasWord w = (HasWord) nodeLabel; if (w.word() == null) { w.setWord(nodeLabel.value()); } } } else { for (Tree kid : children()) { kid.percolateHeads(hf); } final Tree head = hf.determineHead(this); if (head != null) { final Label headLabel = head.label(); // Set the head tag. String headTag = (headLabel instanceof HasTag) ? ((HasTag) headLabel).tag() : null; if (headTag == null && head.isLeaf()) { // below us is a leaf headTag = nodeLabel.value(); } // Set the head word String headWord = (headLabel instanceof HasWord) ? ((HasWord) headLabel).word() : null; if (headWord == null && head.isLeaf()) { // below us is a leaf // this might be useful despite case for leaf above in // case the leaf label type doesn't support word() headWord = headLabel.value(); } // Set the head index int headIndex = (headLabel instanceof HasIndex) ? ((HasIndex) headLabel).index() : -1; if (nodeLabel instanceof HasWord) { ((HasWord) nodeLabel).setWord(headWord); } if (nodeLabel instanceof HasTag) { ((HasTag) nodeLabel).setTag(headTag); } if (nodeLabel instanceof HasIndex && headIndex >= 0) { ((HasIndex) nodeLabel).setIndex(headIndex); } } else { log.info("Head is null: " + this); } } } /** * Return a Set of TaggedWord-TaggedWord dependencies, represented as * Dependency objects, for the Tree. This will only give * useful results if the internal tree node labels support HasWord and * HasTag, and head percolation has already been done (see * percolateHeads()). * * @return Set of dependencies (each a Dependency) */ public Set> dependencies() { return dependencies(Filters.>acceptFilter()); } public Set> dependencies(Predicate> f) { return dependencies(f, true, true, false); } /** * Convert a constituency label to a dependency label. Options are provided for selecting annotations * to copy. * * @param oldLabel * @param copyLabel * @param copyIndex * @param copyPosTag */ private static Label makeDependencyLabel(Label oldLabel, boolean copyLabel, boolean copyIndex, boolean copyPosTag) { if ( ! copyLabel) return oldLabel; String wordForm = (oldLabel instanceof HasWord) ? ((HasWord) oldLabel).word() : oldLabel.value(); Label newLabel = oldLabel.labelFactory().newLabel(wordForm); if (newLabel instanceof HasWord) ((HasWord) newLabel).setWord(wordForm); if (copyPosTag && newLabel instanceof HasTag && oldLabel instanceof HasTag) { String tag = ((HasTag) oldLabel).tag(); ((HasTag) newLabel).setTag(tag); } if (copyIndex && newLabel instanceof HasIndex && oldLabel instanceof HasIndex) { int index = ((HasIndex) oldLabel).index(); ((HasIndex) newLabel).setIndex(index); } return newLabel; } /** * Return a set of TaggedWord-TaggedWord dependencies, represented as * Dependency objects, for the Tree. This will only give * useful results if the internal tree node labels support HasWord and * head percolation has already been done (see percolateHeads()). * * @param f Dependencies are excluded for which the Dependency is not * accepted by the Filter * @return Set of dependencies (each a Dependency) */ public Set> dependencies(Predicate> f, boolean isConcrete, boolean copyLabel, boolean copyPosTag) { Set> deps = Generics.newHashSet(); for (Tree node : this) { // Skip leaves and unary re-writes if (node.isLeaf() || node.children().length < 2) { continue; } // Create the head label (percolateHeads has already been executed) Label headLabel = makeDependencyLabel(node.label(), copyLabel, isConcrete, copyPosTag); String headWord = ((HasWord) headLabel).word(); if (headWord == null) { headWord = headLabel.value(); } int headIndex = (isConcrete && (headLabel instanceof HasIndex)) ? ((HasIndex) headLabel).index() : -1; // every child with a different (or repeated) head is an argument boolean seenHead = false; for (Tree child : node.children()) { Label depLabel = makeDependencyLabel(child.label(), copyLabel, isConcrete, copyPosTag); String depWord = ((HasWord) depLabel).word(); if (depWord == null) { depWord = depLabel.value(); } int depIndex = (isConcrete && (depLabel instanceof HasIndex)) ? ((HasIndex) depLabel).index() : -1; if (!seenHead && headIndex == depIndex && headWord.equals(depWord)) { seenHead = true; } else { Dependency dependency = (isConcrete && depIndex != headIndex) ? new UnnamedConcreteDependency(headLabel, depLabel) : new UnnamedDependency(headLabel, depLabel); if (f.test(dependency)) { deps.add(dependency); } } } } return deps; } /** * Return a set of Label-Label dependencies, represented as * Dependency objects, for the Tree. The Labels are the ones of the leaf * nodes of the tree, without mucking with them. * * @param f Dependencies are excluded for which the Dependency is not * accepted by the Filter * @param hf The HeadFinder to use to identify the head of constituents. * The code assumes * that it can use headPreTerminal(hf) to find a * tag and word to make a CoreLabel. * @return Set of dependencies (each a Dependency between two * CoreLabels, which each contain a tag(), word(), * and value(), the last two of which are identical). */ public Set> mapDependencies(Predicate> f, HeadFinder hf) { if (hf == null) { throw new IllegalArgumentException("mapDependencies: need HeadFinder"); } Set> deps = Generics.newHashSet(); for (Tree node : this) { if (node.isLeaf() || node.children().length < 2) { continue; } // Label l = node.label(); // log.info("doing kids of label: " + l); //Tree hwt = node.headPreTerminal(hf); Tree hwt = node.headTerminal(hf); // log.info("have hf, found head preterm: " + hwt); if (hwt == null) { throw new IllegalStateException("mapDependencies: HeadFinder failed!"); } for (Tree child : node.children()) { // Label dl = child.label(); // Tree dwt = child.headPreTerminal(hf); Tree dwt = child.headTerminal(hf); if (dwt == null) { throw new IllegalStateException("mapDependencies: HeadFinder failed!"); } //log.info("kid is " + dl); //log.info("transformed to " + dml.toString("value{map}")); if (dwt != hwt) { Dependency p = new UnnamedDependency(hwt.label(), dwt.label()); if (f.test(p)) { deps.add(p); } } } } return deps; } /** * Return a set of Label-Label dependencies, represented as * Dependency objects, for the Tree. The Labels are the ones of the leaf * nodes of the tree, without mucking with them. The head of the sentence is a * dependent of a synthetic "root" label. * * @param f Dependencies are excluded for which the Dependency is not * accepted by the Filter * @param hf The HeadFinder to use to identify the head of constituents. * The code assumes * that it can use headPreTerminal(hf) to find a * tag and word to make a CoreLabel. * @param rootName Name of the root node. * @return Set of dependencies (each a Dependency between two * CoreLabels, which each contain a tag(), word(), * and value(), the last two of which are identical). */ public Set> mapDependencies(Predicate> f, HeadFinder hf, String rootName) { Set> deps = mapDependencies(f, hf); if(rootName != null) { Label hl = headTerminal(hf).label(); CoreLabel rl = new CoreLabel(); rl.set(CoreAnnotations.TextAnnotation.class, rootName); rl.set(CoreAnnotations.IndexAnnotation.class, 0); deps.add(new NamedDependency(rl, hl, rootName)); } return deps; } /** * Gets the yield of the tree. The Label of all leaf nodes * is returned * as a list ordered by the natural left to right order of the * leaves. Null values, if any, are inserted into the list like any * other value. * * @return a List of the data in the tree's leaves. */ public ArrayList

Implementation notes: c. 2003: This has been rewritten to thread, so only one List * is used. 2007: This method was duplicated to start to give type safety to Sentence. * This method will now make a Word for any Leaf which does not itself implement HasWord, and * put the Word into the Sentence, so the Sentence elements MUST implement HasWord. * * @param y The list in which the yield of the tree will be placed. * Normally, this will be empty when the routine is called, but * if not, the new yield is added to the end of the list. * @return a List of the data in the tree's leaves. */ public ArrayList

* Implementation note: when we summon up enough courage, this * method will be changed to take and return a {@code List}. * * @param ty The list in which the tagged yield of the tree will be * placed. Normally, this will be empty when the routine is called, * but if not, the new yield is added to the end of the list. * @return a List of the data in the tree's leaves. */ public > X taggedYield(X ty) { if (isPreTerminal()) { ty.add(new TaggedWord(firstChild().label(), label())); } else { for (Tree kid : children()) { kid.taggedYield(ty); } } return ty; } public List labeledYield(List ty) { if (isPreTerminal()) { ty.add(new LabeledWord(firstChild().label(), label())); } else { for (Tree kid : children()) { kid.labeledYield(ty); } } return ty; } /** Returns a {@code List} from the tree. * These are a copy of the complete token representation * that adds the tag as the tag and value. * * @return A tagged, labeled yield. */ public List taggedLabeledYield() { List ty = new ArrayList<>(); taggedLabeledYield(ty, 0); return ty; } private int taggedLabeledYield(List ty, int termIdx) { if (isPreTerminal()) { // usually this will fill in all the usual keys for a token CoreLabel taggedWord = new CoreLabel(firstChild().label()); // but in case this just came from reading a tree that just has a value for words if (taggedWord.word() == null) { taggedWord.setWord(firstChild().value()); } final String tag = (value() == null) ? "" : value(); // set value and tag to the tag taggedWord.setValue(tag); taggedWord.setTag(tag); taggedWord.setIndex(termIdx); ty.add(taggedWord); return termIdx + 1; } else { for (Tree kid : getChildrenAsList()) termIdx = kid.taggedLabeledYield(ty, termIdx); } return termIdx; } /** * Gets the preterminal yield (i.e., tags) of the tree. All data in * preterminal nodes is returned as a list ordered by the natural left to * right order of the tree. Null values, if any, are inserted into the * list like any other value. Pre-leaves are nodes of height 1. * * @return a {@code List} of the data in the tree's pre-leaves. */ public List to * the label() of its parent, and all its children will * then be promoted to become children of the parent (in the same * position in the sequence of daughters. * * @return A flattened version of this tree. */ public Tree flatten() { return flatten(treeFactory()); } /** * Return a flattened version of a tree. In many circumstances, this * will just return the tree, but if the tree is something like a * binarized version of a dependency grammar tree, then it will be * flattened back to a dependency grammar tree representation. Formally, * a node will be removed from the tree when: it is not a terminal or * preterminal, and its label()equal() to * the label() of its parent, and all its children will * then be promoted to become children of the parent (in the same * position in the sequence of daughters.

* Note: In the current implementation, the tree structure is mainly * duplicated, but the links between preterminals and terminals aren't. * * @param tf TreeFactory used to create tree structure for flattened tree * @return A flattened version of this tree. */ public Tree flatten(TreeFactory tf) { if (isLeaf() || isPreTerminal()) { return this; } Tree[] kids = children(); List newChildren = new ArrayList<>(kids.length); for (Tree child : kids) { if (child.isLeaf() || child.isPreTerminal()) { newChildren.add(child); } else { Tree newChild = child.flatten(tf); if (label().equals(newChild.label())) { newChildren.addAll(newChild.getChildrenAsList()); } else { newChildren.add(newChild); } } } return tf.newTreeNode(label(), newChildren); } /** * Get the set of all subtrees inside the tree by returning a tree * rooted at each node. These are not copies, but all share * structure. The tree is regarded as a subtree of itself. *

* Note: If you only want to form this Set so that you can * iterate over it, it is more efficient to simply use the Tree class's * own iterator() method. This will iterate over the exact same * elements (but perhaps/probably in a different order). * * @return the Set of all subtrees in the tree. */ public Set subTrees() { return subTrees(Generics.newHashSet()); } /** * Get the list of all subtrees inside the tree by returning a tree * rooted at each node. These are not copies, but all share * structure. The tree is regarded as a subtree of itself. *

* Note: If you only want to form this Collection so that you can * iterate over it, it is more efficient to simply use the Tree class's * own iterator() method. This will iterate over the exact same * elements (but perhaps/probably in a different order). * * @return the List of all subtrees in the tree. */ public List subTreeList() { return subTrees(new ArrayList<>()); } /** * Add the set of all subtrees inside a tree (including the tree itself) * to the given Collection. *

* Note: If you only want to form this Collection so that you can * iterate over it, it is more efficient to simply use the Tree class's * own iterator() method. This will iterate over the exact same * elements (but perhaps/probably in a different order). * * @param n A collection of nodes to which the subtrees will be added. * @return The collection parameter with the subtrees added. */ public > T subTrees(T n) { n.add(this); Tree[] kids = children(); for (Tree kid : kids) { kid.subTrees(n); } return n; } /** * Makes a deep copy of not only the Tree structure but of the labels as well. * Uses the TreeFactory of the root node given by treeFactory(). * Assumes that your labels give a non-null labelFactory(). * (Added by Aria Haghighi.) * * @return A deep copy of the tree structure and its labels */ public Tree deepCopy() { return deepCopy(treeFactory()); } /** * Makes a deep copy of not only the Tree structure but of the labels as well. * The new tree will have nodes made by the given TreeFactory. * Each Label is copied using the labelFactory() returned * by the corresponding node's label. * It assumes that your labels give non-null labelFactory. * (Added by Aria Haghighi.) * * @param tf The TreeFactory used to make all nodes in the copied * tree structure * @return A Tree that is a deep copy of the tree structure and * Labels of the original tree. */ public Tree deepCopy(TreeFactory tf) { return deepCopy(tf, label().labelFactory()); } /** * Makes a deep copy of not only the Tree structure but of the labels as well. * Each tree is copied with the given TreeFactory. * Each Label is copied using the given LabelFactory. * That is, the tree and label factories can transform the nature of the * data representation. * * @param tf The TreeFactory used to make all nodes in the copied * tree structure * @param lf The LabelFactory used to make all nodes in the copied * tree structure * @return A Tree that is a deep copy of the tree structure and * Labels of the original tree. */ @SuppressWarnings({"unchecked"}) public Tree deepCopy(TreeFactory tf, LabelFactory lf) { Label label = lf.newLabel(label()); if (isLeaf()) { return tf.newLeaf(label); } Tree[] kids = children(); // NB: The below list may not be of type Tree but TreeGraphNode, so we leave it untyped List newKids = new ArrayList(kids.length); for (Tree kid : kids) { newKids.add(kid.deepCopy(tf, lf)); } return tf.newTreeNode(label, newKids); } /** * Create a deep copy of the tree structure. The entire structure is * recursively copied, but label data themselves are not cloned. * The copy is built using a TreeFactory that will * produce a Tree like the input one. * * @return A deep copy of the tree structure (but not its labels). */ public Tree treeSkeletonCopy() { return treeSkeletonCopy(treeFactory()); } /** * Create a deep copy of the tree structure. The entire structure is * recursively copied, but label data themselves are not cloned. * By specifying an appropriate TreeFactory, this * method can be used to change the type of a Tree. * * @param tf The TreeFactory to be used for creating * the returned Tree * @return A deep copy of the tree structure (but not its labels). */ public Tree treeSkeletonCopy(TreeFactory tf) { Tree t; if (isLeaf()) { t = tf.newLeaf(label()); } else { Tree[] kids = children(); List newKids = new ArrayList<>(kids.length); for (Tree kid : kids) { newKids.add(kid.treeSkeletonCopy(tf)); } t = tf.newTreeNode(label(), newKids); } return t; } /** * Returns a deep copy of everything but the leaf labels. The leaf * labels are reused from the original tree. This is useful for * cases such as the dependency converter, which wants to finish * with the same labels in the dependencies as the parse tree. */ public Tree treeSkeletonConstituentCopy() { return treeSkeletonConstituentCopy(treeFactory(), label().labelFactory()); } public Tree treeSkeletonConstituentCopy(TreeFactory tf, LabelFactory lf) { if (isLeaf()) { // Reuse the current label for a leaf. This way, trees which // are based on tokens in a sentence can have the same tokens // even after a "deep copy". // TODO: the LabeledScoredTreeFactory copies the label for a new // leaf. Perhaps we could add a newLeafNoCopy or something like // that for efficiency. Tree newLeaf = tf.newLeaf(label()); newLeaf.setLabel(label()); return newLeaf; } Label label = lf.newLabel(label()); Tree[] kids = children(); List newKids = new ArrayList<>(kids.length); for (Tree kid : kids) { newKids.add(kid.treeSkeletonConstituentCopy(tf, lf)); } return tf.newTreeNode(label, newKids); } /** * Create a transformed Tree. The tree is traversed in a depth-first, * left-to-right order, and the TreeTransformer is called * on each node. It returns some Tree. The transformed * tree has a new tree structure (i.e., a "deep copy" is done), but it * will usually share its labels with the original tree. * * @param transformer The function that transforms tree nodes or subtrees * @return a transformation of this Tree */ public Tree transform(final TreeTransformer transformer) { return transform(transformer, treeFactory()); } /** * Create a transformed Tree. The tree is traversed in a depth-first, * left-to-right order, and the TreeTransformer is called * on each node. It returns some Tree. The transformed * tree has a new tree structure (i.e., a deep copy of the structure of the tree is done), but it * will usually share its labels with the original tree. * * @param transformer The function that transforms tree nodes or subtrees * @param tf The TreeFactory which will be used for creating * new nodes for the returned Tree * @return a transformation of this Tree */ public Tree transform(final TreeTransformer transformer, final TreeFactory tf) { Tree t; if (isLeaf()) { t = tf.newLeaf(label()); } else { Tree[] kids = children(); List newKids = new ArrayList<>(kids.length); for (Tree kid : kids) { newKids.add(kid.transform(transformer, tf)); } t = tf.newTreeNode(label(), newKids); } return transformer.transformTree(t); } /** * Creates a (partial) deep copy of the tree, where all nodes that the * filter does not accept are spliced out. If the result is not a tree * (that is, it's a forest), an empty root node is generated. * * @param nodeFilter a Filter method which returns true to mean * keep this node, false to mean delete it * @return a filtered copy of the tree */ public Tree spliceOut(final Predicate nodeFilter) { return spliceOut(nodeFilter, treeFactory()); } /** * Creates a (partial) deep copy of the tree, where all nodes that the * filter does not accept are spliced out. That is, the particular * modes for which the Filter returns false * are removed from the Tree, but those nodes' children * are kept (assuming they pass the Filter, and they are * added in the appropriate left-to-right ordering as new children of * the parent node. If the root node is deleted, so that the result * would not be a tree (that is, it's a forest), an empty root node is * generated. If nothing is accepted, null is returned. * * @param nodeFilter a Filter method which returns true to mean * keep this node, false to mean delete it * @param tf A TreeFactory for making new trees. Used if * the root node is deleted. * @return a filtered copy of the tree. */ public Tree spliceOut(final Predicate nodeFilter, final TreeFactory tf) { List l = spliceOutHelper(nodeFilter, tf); if (l.isEmpty()) { return null; } else if (l.size() == 1) { return l.get(0); } // for a forest, make a new root return tf.newTreeNode((Label) null, l); } private List spliceOutHelper(Predicate nodeFilter, TreeFactory tf) { // recurse over all children first Tree[] kids = children(); List l = new ArrayList<>(); for (Tree kid : kids) { l.addAll(kid.spliceOutHelper(nodeFilter, tf)); } // check if this node is being spliced out if (nodeFilter.test(this)) { // no, so add our children and return Tree t; if ( ! l.isEmpty()) { t = tf.newTreeNode(label(), l); } else { t = tf.newLeaf(label()); } l = new ArrayList<>(1); l.add(t); return l; } // we're out, so return our children return l; } /** * Creates a deep copy of the tree, where all nodes that the filter * does not accept and all children of such nodes are pruned. If all * of a node's children are pruned, that node is cut as well. * A Filter can assume * that it will not be called with a null argument. *

* For example, the following code excises all PP nodes from a Tree:
* * Filter f = new Filter {
* public boolean accept(Tree t) {
* return ! t.label().value().equals("PP");
* }
* };
* tree.prune(f); *

* * If the root of the tree is pruned, null will be returned. * * @param filter the filter to be applied * @return a filtered copy of the tree, including the possibility of * null if the root node of the tree is filtered */ public Tree prune(final Predicate filter) { return prune(filter, treeFactory()); } /** * Creates a deep copy of the tree, where all nodes that the filter * does not accept and all children of such nodes are pruned. If all * of a node's children are pruned, that node is cut as well. * A Filter can assume * that it will not be called with a null argument. * * @param filter the filter to be applied * @param tf the TreeFactory to be used to make new Tree nodes if needed * @return a filtered copy of the tree, including the possibility of * null if the root node of the tree is filtered */ public Tree prune(Predicate filter, TreeFactory tf) { // is the current node to be pruned? if ( ! filter.test(this)) { return null; } // if not, recurse over all children List l = new ArrayList<>(); Tree[] kids = children(); for (Tree kid : kids) { Tree prunedChild = kid.prune(filter, tf); if (prunedChild != null) { l.add(prunedChild); } } // and check if this node has lost all its children if (l.isEmpty() && !(kids.length == 0)) { return null; } // if we're still ok, copy the node if (isLeaf()) { return tf.newLeaf(label()); } return tf.newTreeNode(label(), l); } /** * Returns first child if this is unary and if the label at the current * node is either "ROOT" or empty. * * @return The first child if this is unary and if the label at the current * node is either "ROOT" or empty, else this */ public Tree skipRoot() { if(!isUnaryRewrite()) return this; String lab = label().value(); return (lab == null || lab.isEmpty() || "ROOT".equals(lab)) ? firstChild() : this; } /** * Return a TreeFactory that produces trees of the * appropriate type. * * @return A factory to produce Trees */ public abstract TreeFactory treeFactory(); /** * Return the parent of the tree node. This routine may return * null meaning simply that the implementation doesn't * know how to determine the parent node, rather than there is no * such node. * * @return The parent Tree node or null * @see Tree#parent(Tree) */ public Tree parent() { throw new UnsupportedOperationException(); } /** * Return the parent of the tree node. This routine will traverse * a tree (depth first) from the given root, and will * correctly find the parent, regardless of whether the concrete * class stores parents. It will only return null if this * node is the root node, or if this node is not * contained within the tree rooted at root. * * @param root The root node of the whole Tree * @return the parent Tree node if any; * else null */ public Tree parent(Tree root) { Tree[] kids = root.children(); return parentHelper(root, kids, this); } private static Tree parentHelper(Tree parent, Tree[] kids, Tree node) { for (Tree kid : kids) { if (kid == node) { return parent; } Tree ret = node.parent(kid); if (ret != null) { return ret; } } return null; } /** * Returns the number of nodes the tree contains. This method * implements the size() function required by the * Collections interface. The size of the tree is the * number of nodes it contains (of all types, including the leaf nodes * and the root). * * @return The size of the tree * @see #depth() */ @Override public int size() { int size = 1; Tree[] kids = children(); for (Tree kid : kids) { size += kid.size(); } return size; } /** * Return the ancestor tree node height nodes up from the current node. * * @param height How many nodes up to go. A parameter of 0 means return * this node, 1 means to return the parent node and so on. * @param root The root node that this Tree is embedded under * @return The ancestor at height height. It returns null * if it does not exist or the tree implementation does not keep track * of parents */ public Tree ancestor(int height, Tree root) { if (height < 0) { throw new IllegalArgumentException("ancestor: height cannot be negative"); } if (height == 0) { return this; } Tree par = parent(root); if (par == null) { return null; } return par.ancestor(height - 1, root); } private static class TreeIterator implements Iterator { private final List treeStack; protected TreeIterator(Tree t) { treeStack = new ArrayList<>(); treeStack.add(t); } @Override public boolean hasNext() { return (!treeStack.isEmpty()); } @Override public Tree next() { int lastIndex = treeStack.size() - 1; if (lastIndex < 0) { throw new NoSuchElementException("TreeIterator exhausted"); } Tree tr = treeStack.remove(lastIndex); Tree[] kids = tr.children(); // so that we can efficiently use one List, we reverse them for (int i = kids.length - 1; i >= 0; i--) { treeStack.add(kids[i]); } return tr; } /** * Not supported */ @Override public void remove() { throw new UnsupportedOperationException(); } @Override public String toString() { return "TreeIterator"; } } /** * Returns an iterator over all the nodes of the tree. This method * implements the iterator() method required by the * Collections interface. It does a preorder * (children after node) traversal of the tree. (A possible * extension to the class at some point would be to allow different * traversal orderings via variant iterators.) * * @return An iterator over the nodes of the tree */ @Override public Iterator iterator() { return new TreeIterator(this); } public List postOrderNodeList() { List nodes = new ArrayList<>(); postOrderRecurse(this, nodes); return nodes; } private static void postOrderRecurse(Tree t, List nodes) { for (Tree c : t.children()) { postOrderRecurse(c, nodes); } nodes.add(t); } public List preOrderNodeList() { List nodes = new ArrayList<>(); preOrderRecurse(this, nodes); return nodes; } private static void preOrderRecurse(Tree t, List nodes) { nodes.add(t); for (Tree c : t.children()) { preOrderRecurse(c, nodes); } } /** * This gives you a tree from a String representation (as a * bracketed Tree, of the kind produced by toString(), * pennPrint(), or as in the Penn Treebank). * It's not the most efficient thing to do for heavy duty usage. * The Tree returned is created by a * LabeledScoredTreeReaderFactory. This means that "standard" * normalizations (stripping functional categories, indices, * empty nodes, and A-over-A nodes) will be done on it. * * @param str The tree as a bracketed list in a String. * @return The Tree * @throws RuntimeException If Tree format is not valid */ public static Tree valueOf(String str) { return valueOf(str, new LabeledScoredTreeReaderFactory()); } /** * This gives you a tree from a String representation (as a * bracketed Tree, of the kind produced by toString(), * pennPrint(), or as in the Penn Treebank. * It's not the most efficient thing to do for heavy duty usage. * * @param str The tree as a bracketed list in a String. * @param trf The TreeFactory used to make the new Tree * @return The Tree * @throws RuntimeException If the Tree format is not valid */ public static Tree valueOf(String str, TreeReaderFactory trf) { try { return trf.newTreeReader(new StringReader(str)).readTree(); } catch (IOException ioe) { throw new RuntimeException("Tree.valueOf() tree construction failed", ioe); } } /** * Return the child at some daughter index. The children are numbered * starting with an index of 0. * * @param i The daughter index * @return The tree at that daughter index */ public Tree getChild(int i) { Tree[] kids = children(); return kids[i]; } /** * Destructively removes the child at some daughter index and returns it. * Note * that this method will throw an {@link ArrayIndexOutOfBoundsException} if * the daughter index is too big for the list of daughters. * * @param i The daughter index * @return The tree at that daughter index */ public Tree removeChild(int i) { Tree[] kids = children(); Tree kid = kids[i]; Tree[] newKids = new Tree[kids.length - 1]; for (int j = 0; j < newKids.length; j++) { if (j < i) { newKids[j] = kids[j]; } else { newKids[j] = kids[j + 1]; } } setChildren(newKids); return kid; } /** * Adds the tree t at the index position among the daughters. Note * that this method will throw an {@link ArrayIndexOutOfBoundsException} if * the daughter index is too big for the list of daughters. * * @param i the index position at which to add the new daughter * @param t the new daughter */ public void addChild(int i, Tree t) { Tree[] kids = children(); Tree[] newKids = new Tree[kids.length + 1]; if (i != 0) { System.arraycopy(kids, 0, newKids, 0, i); } newKids[i] = t; if (i != kids.length) { System.arraycopy(kids, i, newKids, i + 1, kids.length - i); } setChildren(newKids); } /** * Adds the tree t at the last index position among the daughters. * * @param t the new daughter */ public void addChild(Tree t) { addChild(children().length, t); } /** * Replaces the ith child of this with the tree t. * Note * that this method will throw an {@link ArrayIndexOutOfBoundsException} if * the child index is too big for the list of children. * * @param i The index position at which to replace the child * @param t The new child * @return The tree that was previously the ith d */ public Tree setChild(int i, Tree t) { Tree[] kids = children(); Tree old = kids[i]; kids[i] = t; return old; } /** * Returns true if this dominates the Tree passed in * as an argument. Object equality (==) rather than .equals() is used * to determine domination. * t.dominates(t) returns false. */ public boolean dominates(Tree t) { List dominationPath = dominationPath(t); return dominationPath != null && dominationPath.size() > 1; } /** * Returns the path of nodes leading down to a dominated node, * including this and the dominated node itself. * Returns null if t is not dominated by this. Object * equality (==) is the relevant criterion. * t.dominationPath(t) returns null. */ public List dominationPath(Tree t) { //Tree[] result = dominationPathHelper(t, 0); Tree[] result = dominationPath(t, 0); if (result == null) { return null; } return Arrays.asList(result); } private Tree[] dominationPathHelper(Tree t, int depth) { Tree[] kids = children(); for (int i = kids.length - 1; i >= 0; i--) { Tree t1 = kids[i]; if (t1 == null) { return null; } Tree[] result; if ((result = t1.dominationPath(t, depth + 1)) != null) { result[depth] = this; return result; } } return null; } private Tree[] dominationPath(Tree t, int depth) { if (this == t) { Tree[] result = new Tree[depth + 1]; result[depth] = this; return result; } return dominationPathHelper(t, depth); } /** * Given nodes t1 and t2 which are * dominated by this node, returns a list of all the nodes on the * path from t1 to t2, inclusive, or null if none found. */ public List pathNodeToNode(Tree t1, Tree t2) { if (!contains(t1) || !contains(t2)) { return null; } if (t1 == t2) { return Collections.singletonList(t1); } if (t1.dominates(t2)) { return t1.dominationPath(t2); } if (t2.dominates(t1)) { List path = t2.dominationPath(t1); Collections.reverse(path); return path; } Tree joinNode = joinNode(t1, t2); if (joinNode == null) { return null; } List t1DomPath = joinNode.dominationPath(t1); List t2DomPath = joinNode.dominationPath(t2); if (t1DomPath == null || t2DomPath == null) { return null; } ArrayList path = new ArrayList<>(); path.addAll(t1DomPath); Collections.reverse(path); path.remove(joinNode); path.addAll(t2DomPath); return path; } /** * Given nodes t1 and t2 which are * dominated by this node, returns their "join node": the node * j such that j dominates both * t1 and t2, and every other node which * dominates both t1 and t2 * dominates j. * In the special case that t1 dominates t2, return t1, and vice versa. * Return null if no such node can be found. */ public Tree joinNode(Tree t1, Tree t2) { if (!contains(t1) || !contains(t2)) { return null; } if (this == t1 || this == t2) { return this; } Tree joinNode = null; List t1DomPath = dominationPath(t1); List t2DomPath = dominationPath(t2); if (t1DomPath == null || t2DomPath == null) { return null; } Iterator it1 = t1DomPath.iterator(); Iterator it2 = t2DomPath.iterator(); while (it1.hasNext() && it2.hasNext()) { Tree n1 = it1.next(); Tree n2 = it2.next(); if (n1 != n2) { break; } joinNode = n1; } return joinNode; } /** * Given nodes {@code t1} and {@code t2} which are * dominated by this node, returns {@code true} iff * {@code t1} c-commands {@code t2}. (A node c-commands * its sister(s) and any nodes below its sister(s).) */ public boolean cCommands(Tree t1, Tree t2) { List sibs = t1.siblings(this); if (sibs == null) { return false; } for (Tree sib : sibs) { if (sib == t2 || sib.contains(t2)) { return true; } } return false; } /** * Returns the siblings of this Tree node. The siblings are all * children of the parent of this node except this node. * * @param root The root within which this tree node is contained * @return The siblings as a list, an empty list if there are no siblings. * The returned list is a modifiable new list structure, but contains * the actual children. */ public List siblings(Tree root) { Tree parent = parent(root); if (parent == null) { return null; } List siblings = parent.getChildrenAsList(); siblings.remove(this); return siblings; } /** * insert dtr after position existing * daughters in this. */ public void insertDtr(Tree dtr, int position) { Tree[] kids = children(); if (position > kids.length) { throw new IllegalArgumentException("Can't insert tree after the " + position + "th daughter in " + this + "; only " + kids.length + " daughters exist!"); } Tree[] newKids = new Tree[kids.length + 1]; int i = 0; for (; i < position; i++) { newKids[i] = kids[i]; } newKids[i] = dtr; for (; i < kids.length; i++) { newKids[i + 1] = kids[i]; } setChildren(newKids); } // --- composition methods to implement Label interface @Override public String value() { Label lab = label(); if (lab == null) { return null; } return lab.value(); } @Override public void setValue(String value) { Label lab = label(); if (lab != null) { lab.setValue(value); } } @Override public void setFromString(String labelStr) { Label lab = label(); if (lab != null) { lab.setFromString(labelStr); } } /** * Returns a factory that makes labels of the same type as this one. * May return null if no appropriate factory is known. * * @return the LabelFactory for this kind of label */ @Override public LabelFactory labelFactory() { Label lab = label(); if (lab == null) { return null; } return lab.labelFactory(); } /** * Returns the positional index of the left edge of node within the tree, * as measured by characters. Returns -1 if node is not found. * Note: These methods were written for internal evaluation routines. They are * not the right methods to relate tree nodes to textual offsets. For these, * look at the appropriate annotations on a CoreLabel (CharacterOffsetBeginAnnotation, etc.). */ public int leftCharEdge(Tree node) { MutableInteger i = new MutableInteger(0); if (leftCharEdge(node, i)) { return i.intValue(); } return -1; } private boolean leftCharEdge(Tree node, MutableInteger i) { if (this == node) { return true; } else if (isLeaf()) { i.set(i.intValue() + value().length()); return false; } else { for (Tree child : children()) { if (child.leftCharEdge(node, i)) { return true; } } return false; } } /** * Returns the positional index of the right edge of node within the tree, * as measured by characters. Returns -1 if node is not found. * * rightCharEdge returns the index of the rightmost character + 1, so that * rightCharEdge(getLeaves().get(i)) == leftCharEdge(getLeaves().get(i+1)) * * Note: These methods were written for internal evaluation routines. They are * not the right methods to relate tree nodes to textual offsets. For these, * look at the appropriate annotations on a CoreLabel (CharacterOffsetBeginAnnotation, etc.). * * @param node The subtree to look for in this Tree * @return The positional index of the right edge of node */ public int rightCharEdge(Tree node) { List s = getLeaves(); int length = 0; for (Tree leaf : s) { length += leaf.label().value().length(); } MutableInteger i = new MutableInteger(length); if (rightCharEdge(node, i)) { return i.intValue(); } return -1; } private boolean rightCharEdge(Tree node, MutableInteger i) { if (this == node) { return true; } else if (isLeaf()) { i.set(i.intValue() - label().value().length()); return false; } else { for (int j = children().length - 1; j >= 0; j--) { if (children()[j].rightCharEdge(node, i)) { return true; } } return false; } } /** * Calculates the node's number, defined as the number of nodes traversed in a left-to-right, depth-first search of the * tree starting at root and ending at this. Returns -1 if root does not contain this. * @param root the root node of the relevant tree * @return the number of the current node, or -1 if root does not contain this. */ public int nodeNumber(Tree root) { MutableInteger i = new MutableInteger(1); if(nodeNumberHelper(root,i)) return i.intValue(); return -1; } private boolean nodeNumberHelper(Tree t, MutableInteger i) { if(this==t) return true; i.incValue(1); for (int j = 0; j < t.children().length; j++) { if (nodeNumberHelper(t.children()[j],i)) return true; } return false; } /** * Fetches the ith node in the tree, with node numbers defined * as in {@link #nodeNumber(Tree)}. * * @param i the node number to fetch * @return the ith node in the tree * @throws IndexOutOfBoundsException if i is not between 1 and * the number of nodes (inclusive) contained in this. */ public Tree getNodeNumber(int i) { return getNodeNumberHelper(new MutableInteger(1),i); } private Tree getNodeNumberHelper(MutableInteger i, int target) { int i1 = i.intValue(); if(i1 == target) return this; if(i1 > target) throw new IndexOutOfBoundsException("Error -- tree does not contain " + i + " nodes."); i.incValue(1); for(int j = 0; j < children().length; j++) { Tree temp = children()[j].getNodeNumberHelper(i, target); if(temp != null) return temp; } return null; } /** * Assign sequential integer indices to the leaves of the tree * rooted at this Tree, starting with 1. * The leaves are traversed from left * to right. If the node is already indexed, then it uses the existing index. * This will only work if the leaves extend CoreMap. */ public void indexLeaves() { indexLeaves(1, false); } /** * Index the leaves, and optionally overwrite existing IndexAnnotations if they exist. * * @param overWrite Whether to replace an existing index for a leaf. */ public void indexLeaves(boolean overWrite) { indexLeaves(1, overWrite); } /** * Assign sequential integer indices to the leaves of the subtree * rooted at this Tree, beginning with * startIndex, and traversing the leaves from left * to right. If node is already indexed, then it uses the existing index. * This method only works if the labels of the tree implement * CoreLabel! * * @param startIndex index for this node * @param overWrite Whether to replace an existing index for a leaf. * @return the next index still unassigned */ public int indexLeaves(int startIndex, boolean overWrite) { if (isLeaf()) { /*CoreLabel afl = (CoreLabel) label(); Integer oldIndex = afl.get(CoreAnnotations.IndexAnnotation.class); if (!overWrite && oldIndex != null && oldIndex >= 0) { startIndex = oldIndex; } else { afl.set(CoreAnnotations.IndexAnnotation.class, startIndex); }*/ if(label() instanceof HasIndex) { HasIndex hi = (HasIndex) label(); int oldIndex = hi.index(); if (!overWrite && oldIndex >= 0) { startIndex = oldIndex; } else { hi.setIndex(startIndex); } startIndex++; } } else { for (Tree kid : children()) { startIndex = kid.indexLeaves(startIndex, overWrite); } } return startIndex; } /** * Percolates terminal indices through a dependency tree. The terminals should be indexed, e.g., * by calling indexLeaves() on the tree. *

* This method assumes CoreLabels! */ public void percolateHeadIndices() { if (isPreTerminal()) { int nodeIndex = ((HasIndex) firstChild().label()).index(); ((HasIndex) label()).setIndex(nodeIndex); return; } // Assign the head index to the first child that we encounter with a matching // surface form. Obviously a head can have the same surface form as its dependent, // and in this case the head index is ambiguous. String wordAnnotation = ((HasWord) label()).word(); if (wordAnnotation == null) { wordAnnotation = value(); } boolean seenHead = false; for (Tree child : children()) { child.percolateHeadIndices(); String childWordAnnotation = ((HasWord) child.label()).word(); if (childWordAnnotation == null) { childWordAnnotation = child.value(); } if ( !seenHead && wordAnnotation.equals(childWordAnnotation)) { seenHead = true; int nodeIndex = ((HasIndex) child.label()).index(); ((HasIndex) label()).setIndex(nodeIndex); } } } /** Index all spans (constituents) in the tree. * For this, spans uses 0-based indexing and the span records the fencepost * to the left of the first word and after the last word of the span. * The spans are only recorded if the Tree has labels of a class which * extends CoreMap. */ public void indexSpans() { indexSpans(0); } public void indexSpans(int startIndex) { indexSpans(new MutableInteger(startIndex)); } /** * Assigns span indices (BeginIndexAnnotation and EndIndexAnnotation) to all nodes in a tree. * The beginning index is equivalent to the IndexAnnotation of the first leaf in the constituent. * The end index is equivalent to the first integer after the IndexAnnotation of the last leaf in the constituent. * * @param startIndex Begin indexing at this value */ public Pair indexSpans(MutableInteger startIndex) { int start = Integer.MAX_VALUE; int end = Integer.MIN_VALUE; if(isLeaf()){ start = startIndex.intValue(); end = startIndex.intValue() + 1; startIndex.incValue(1); } else { for (Tree kid : children()) { Pair span = kid.indexSpans(startIndex); if(span.first < start) start = span.first; if(span.second > end) end = span.second; } } Label label = label(); if (label instanceof CoreMap) { CoreMap afl = (CoreMap) label(); afl.set(CoreAnnotations.BeginIndexAnnotation.class, start); afl.set(CoreAnnotations.EndIndexAnnotation.class, end); } return new Pair<>(start, end); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy