Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
edu.stanford.nlp.trees.Trees Maven / Gradle / Ivy
Go to download
Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.
package edu.stanford.nlp.trees;
import edu.stanford.nlp.io.IOUtils;
import java.util.function.Function;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.MutableInteger;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.ling.*;
import java.util.*;
import java.io.*;
/**
* Various static utilities for the Tree
class.
*
* @author Roger Levy
* @author Dan Klein
* @author Aria Haghighi (tree path methods)
*/
public class Trees {
private static final LabeledScoredTreeFactory defaultTreeFactory = new LabeledScoredTreeFactory();
private Trees() {}
/**
* Returns the positional index of the left edge of a tree t
* within a given root, as defined by the size of the yield of all
* material preceding t .
*/
public static int leftEdge(Tree t, Tree root) {
MutableInteger i = new MutableInteger(0);
if (leftEdge(t, root, i)) {
return i.intValue();
} else {
throw new RuntimeException("Tree is not a descendant of root.");
// return -1;
}
}
/**
* Returns the positional index of the left edge of a tree t
* within a given root, as defined by the size of the yield of all
* material preceding t .
* This method returns -1 if no path is found, rather than exceptioning.
*
* @see Trees#leftEdge(Tree, Tree)
*/
public static int leftEdgeUnsafe(Tree t, Tree root) {
MutableInteger i = new MutableInteger(0);
if (leftEdge(t, root, i)) {
return i.intValue();
} else {
return -1;
}
}
static boolean leftEdge(Tree t, Tree t1, MutableInteger i) {
if (t == t1) {
return true;
} else if (t1.isLeaf()) {
int j = t1.yield().size(); // so that empties don't add size
i.set(i.intValue() + j);
return false;
} else {
for (Tree kid : t1.children()) {
if (leftEdge(t, kid, i)) {
return true;
}
}
return false;
}
}
/**
* Returns the positional index of the right edge of a tree
* t within a given root, as defined by the size of the yield
* of all material preceding t plus all the material
* contained in t .
*/
public static int rightEdge(Tree t, Tree root) {
MutableInteger i = new MutableInteger(root.yield().size());
if (rightEdge(t, root, i)) {
return i.intValue();
} else {
throw new RuntimeException("Tree is not a descendant of root.");
// return root.yield().size() + 1;
}
}
/**
* Returns the positional index of the right edge of a tree
* t within a given root, as defined by the size of the yield
* of all material preceding t plus all the material
* contained in t .
* This method returns root.yield().size() + 1 if no path is found, rather than exceptioning.
*
* @see Trees#rightEdge(Tree, Tree)
*/
public static int rightEdgeUnsafe(Tree t, Tree root) {
MutableInteger i = new MutableInteger(root.yield().size());
if (rightEdge(t, root, i)) {
return i.intValue();
} else {
return root.yield().size() + 1;
}
}
static boolean rightEdge(Tree t, Tree t1, MutableInteger i) {
if (t == t1) {
return true;
} else if (t1.isLeaf()) {
int j = t1.yield().size(); // so that empties don't add size
i.set(i.intValue() - j);
return false;
} else {
Tree[] kids = t1.children();
for (int j = kids.length - 1; j >= 0; j--) {
if (rightEdge(t, kids[j], i)) {
return true;
}
}
return false;
}
}
/**
* Returns a lexicalized Tree whose Labels are CategoryWordTag
* instances, all corresponds to the input tree.
*/
public static Tree lexicalize(Tree t, HeadFinder hf) {
Function a =
TreeFunctions.getLabeledTreeToCategoryWordTagTreeFunction();
Tree t1 = a.apply(t);
t1.percolateHeads(hf);
return t1;
}
/**
* returns the leaves in a Tree in the order that they're found.
*/
public static List leaves(Tree t) {
List l = new ArrayList<>();
leaves(t, l);
return l;
}
private static void leaves(Tree t, List l) {
if (t.isLeaf()) {
l.add(t);
} else {
for (Tree kid : t.children()) {
leaves(kid, l);
}
}
}
public static List preTerminals(Tree t) {
List l = new ArrayList<>();
preTerminals(t, l);
return l;
}
private static void preTerminals(Tree t, List l) {
if (t.isPreTerminal()) {
l.add(t);
} else {
for (Tree kid : t.children()) {
preTerminals(kid, l);
}
}
}
/**
* returns the labels of the leaves in a Tree in the order that they're found.
*/
public static List leafLabels(Tree t) {
List l = new ArrayList<>();
leafLabels(t, l);
return l;
}
private static void leafLabels(Tree t, List l) {
if (t.isLeaf()) {
l.add(t.label());
} else {
for (Tree kid : t.children()) {
leafLabels(kid, l);
}
}
}
/**
* returns the labels of the leaves in a Tree, augmented with POS tags. assumes that
* the labels are CoreLabels.
*/
public static List taggedLeafLabels(Tree t) {
List l = new ArrayList<>();
taggedLeafLabels(t, l);
return l;
}
private static void taggedLeafLabels(Tree t, List l) {
if (t.isPreTerminal()) {
CoreLabel fl = (CoreLabel)t.getChild(0).label();
fl.set(CoreAnnotations.TagLabelAnnotation.class, t.label());
l.add(fl);
} else {
for (Tree kid : t.children()) {
taggedLeafLabels(kid, l);
}
}
}
/**
* Given a tree, set the tags on the leaf nodes if they are not
* already set. Do this by using the preterminal's value as a tag.
*/
public static void setLeafTagsIfUnset(Tree tree) {
if (tree.isPreTerminal()) {
Tree leaf = tree.children()[0];
if (!(leaf.label() instanceof HasTag)) {
return;
}
HasTag label = (HasTag) leaf.label();
if (label.tag() == null) {
label.setTag(tree.value());
}
} else {
for (Tree child : tree.children()) {
setLeafTagsIfUnset(child);
}
}
}
/**
* Replace the labels of the leaves with the given leaves.
*/
public static void setLeafLabels(Tree tree, List labels) {
Iterator leafIterator = tree.getLeaves().iterator();
Iterator labelIterator = labels.iterator();
while (leafIterator.hasNext() && labelIterator.hasNext()) {
Tree leaf = leafIterator.next();
Label label = labelIterator.next();
leaf.setLabel(label);
//leafIterator.next().setLabel(labelIterator.next());
}
if (leafIterator.hasNext()) {
throw new IllegalArgumentException("Tree had more leaves than the labels provided");
}
if (labelIterator.hasNext()) {
throw new IllegalArgumentException("More labels provided than tree had leaves");
}
}
/**
* returns the maximal projection of head
in
* root
given a {@link HeadFinder}
*/
public static Tree maximalProjection(Tree head, Tree root, HeadFinder hf) {
Tree projection = head;
if (projection == root) {
return root;
}
Tree parent = projection.parent(root);
while (hf.determineHead(parent) == projection) {
projection = parent;
if (projection == root) {
return root;
}
parent = projection.parent(root);
}
return projection;
}
/* applies a TreeVisitor to all projections (including the node itself) of a node in a Tree.
* Does nothing if head is not in root.
* @return the maximal projection of head in root.
*/
public static Tree applyToProjections(TreeVisitor v, Tree head, Tree root, HeadFinder hf) {
Tree projection = head;
Tree parent = projection.parent(root);
if (parent == null && projection != root) {
return null;
}
v.visitTree(projection);
if (projection == root) {
return root;
}
while (hf.determineHead(parent) == projection) {
projection = parent;
v.visitTree(projection);
if (projection == root) {
return root;
}
parent = projection.parent(root);
}
return projection;
}
/**
* gets the n
th terminal in tree
. The first terminal is number zero.
*/
public static Tree getTerminal(Tree tree, int n) {
return getTerminal(tree, new MutableInteger(0), n);
}
static Tree getTerminal(Tree tree, MutableInteger i, int n) {
if (i.intValue() == n) {
if (tree.isLeaf()) {
return tree;
} else {
return getTerminal(tree.children()[0], i, n);
}
} else {
if (tree.isLeaf()) {
i.set(i.intValue() + tree.yield().size());
return null;
} else {
for (Tree kid : tree.children()) {
Tree result = getTerminal(kid, i, n);
if (result != null) {
return result;
}
}
return null;
}
}
}
/**
* gets the n
th preterminal in tree
. The first terminal is number zero.
*/
public static Tree getPreTerminal(Tree tree, int n) {
return getPreTerminal(tree, new MutableInteger(0), n);
}
static Tree getPreTerminal(Tree tree, MutableInteger i, int n) {
if (i.intValue() == n) {
if (tree.isPreTerminal()) {
return tree;
} else {
return getPreTerminal(tree.children()[0], i, n);
}
} else {
if (tree.isPreTerminal()) {
i.set(i.intValue() + tree.yield().size());
return null;
} else {
for (Tree kid : tree.children()) {
Tree result = getPreTerminal(kid, i, n);
if (result != null) {
return result;
}
}
return null;
}
}
}
/**
* returns the syntactic category of the tree as a list of the syntactic categories of the mother and the daughters
*/
public static List localTreeAsCatList(Tree t) {
List l = new ArrayList<>(t.children().length + 1);
l.add(t.label().value());
for (int i = 0; i < t.children().length; i++) {
l.add(t.children()[i].label().value());
}
return l;
}
/**
* Returns the index of daughter
in parent
by ==.
* Returns -1 if daughter
not found.
*/
public static int objectEqualityIndexOf(Tree parent, Tree daughter) {
for (int i = 0; i < parent.children().length; i++) {
if (daughter == parent.children()[i]) {
return i;
}
}
return -1;
}
/** Returns a String reporting what kinds of Tree and Label nodes this
* Tree contains.
*
* @param t The tree to examine.
* @return A human-readable String reporting what kinds of Tree and Label nodes this
* Tree contains.
*/
public static String toStructureDebugString(Tree t) {
String tCl = StringUtils.getShortClassName(t);
String tfCl = StringUtils.getShortClassName(t.treeFactory());
String lCl = StringUtils.getShortClassName(t.label());
String lfCl = StringUtils.getShortClassName(t.label().labelFactory());
Set otherClasses = Generics.newHashSet();
String leafLabels = null;
String tagLabels = null;
String phraseLabels = null;
String leaves = null;
String nodes = null;
for (Tree st : t) {
String stCl = StringUtils.getShortClassName(st);
String stfCl = StringUtils.getShortClassName(st.treeFactory());
String slCl = StringUtils.getShortClassName(st.label());
String slfCl = StringUtils.getShortClassName(st.label().labelFactory());
if ( ! tCl.equals(stCl)) {
otherClasses.add(stCl);
}
if ( ! tfCl.equals(stfCl)) {
otherClasses.add(stfCl);
}
if ( ! lCl.equals(slCl)) {
otherClasses.add(slCl);
}
if ( ! lfCl.equals(slfCl)) {
otherClasses.add(slfCl);
}
if (st.isPhrasal()) {
if (nodes == null) {
nodes = stCl;
} else if ( ! nodes.equals(stCl)) {
nodes = "mixed";
}
if (phraseLabels == null) {
phraseLabels = slCl;
} else if ( ! phraseLabels.equals(slCl)) {
phraseLabels = "mixed";
}
} else if (st.isPreTerminal()) {
if (nodes == null) {
nodes = stCl;
} else if ( ! nodes.equals(stCl)) {
nodes = "mixed";
}
if (tagLabels == null) {
tagLabels = StringUtils.getShortClassName(slCl);
} else if ( ! tagLabels.equals(slCl)) {
tagLabels = "mixed";
}
} else if (st.isLeaf()) {
if (leaves == null) {
leaves = stCl;
} else if ( ! leaves.equals(stCl)) {
leaves = "mixed";
}
if (leafLabels == null) {
leafLabels = slCl;
} else if ( ! leafLabels.equals(slCl)) {
leafLabels = "mixed";
}
} else {
throw new IllegalStateException("Bad tree state: " + t);
}
} // end for Tree st : this
StringBuilder sb = new StringBuilder();
sb.append("Tree with root of class ").append(tCl).append(" and factory ").append(tfCl);
sb.append(" and root label class ").append(lCl).append(" and factory ").append(lfCl);
if ( ! otherClasses.isEmpty()) {
sb.append(" and the following classes also found within the tree: ").append(otherClasses);
return " with " + nodes + " interior nodes and " + leaves +
" leaves, and " + phraseLabels + " phrase labels, " +
tagLabels + " tag labels, and " + leafLabels + " leaf labels.";
} else {
sb.append(" (and uniform use of these Tree and Label classes throughout the tree).");
}
return sb.toString();
}
/** Turns a sentence into a flat phrasal tree.
* The structure is S -> tag*. And then each tag goes to a word.
* The tag is either found from the label or made "WD".
* The tag and phrasal node have a StringLabel.
*
* @param s The Sentence to make the Tree from
* @return The one phrasal level Tree
*/
public static Tree toFlatTree(List s) {
return toFlatTree(s, new StringLabelFactory());
}
/** Turns a sentence into a flat phrasal tree.
* The structure is S -> tag*. And then each tag goes to a word.
* The tag is either found from the label or made "WD".
* The tag and phrasal node have a StringLabel.
*
* @param s The Sentence to make the Tree from
* @param lf The LabelFactory with which to create the new Tree labels
* @return The one phrasal level Tree
*/
public static Tree toFlatTree(List s, LabelFactory lf) {
List daughters = new ArrayList<>(s.size());
for (HasWord word : s) {
Tree wordNode = new LabeledScoredTreeNode(lf.newLabel(word.word()));
if (word instanceof TaggedWord) {
TaggedWord taggedWord = (TaggedWord) word;
wordNode = new LabeledScoredTreeNode(new StringLabel(taggedWord.tag()), Collections.singletonList(wordNode));
} else {
wordNode = new LabeledScoredTreeNode(lf.newLabel("WD"), Collections.singletonList(wordNode));
}
daughters.add(wordNode);
}
return new LabeledScoredTreeNode(new StringLabel("S"), daughters);
}
public static String treeToLatex(Tree t) {
StringBuilder connections = new StringBuilder();
StringBuilder hierarchy = new StringBuilder();
treeToLatexHelper(t,connections,hierarchy,0,1,0);
return "\\tree"+hierarchy+ '\n' +connections+ '\n';
}
private static int treeToLatexHelper(Tree t, StringBuilder c, StringBuilder h,
int n, int nextN, int indent) {
StringBuilder sb = new StringBuilder();
for (int i=0; iith leaf of a tree from the left.
* The leftmost leaf is numbered 0.
*
* @return The i th leaf as a Tree, or null
* if there is no such leaf.
*/
public static Tree getLeaf(Tree tree, int i) {
int count = -1;
for (Tree next : tree) {
if (next.isLeaf()) {
count++;
}
if (count == i) {
return next;
}
}
return null;
}
/**
* Get lowest common ancestor of all the nodes in the list with the tree rooted at root
*/
public static Tree getLowestCommonAncestor(List nodes, Tree root) {
List> paths = new ArrayList<>();
int min = Integer.MAX_VALUE;
for (Tree t : nodes) {
List path = pathFromRoot(t, root);
if (path == null) return null;
min = Math.min(min, path.size());
paths.add(path);
}
Tree commonAncestor = null;
for (int i = 0; i < min; ++i) {
Tree ancestor = paths.get(0).get(i);
boolean quit = false;
for (List path : paths) {
if (!path.get(i).equals(ancestor)) {
quit = true;
break;
}
}
if (quit) break;
commonAncestor = ancestor;
}
return commonAncestor;
}
/**
* returns a list of categories that is the path from Tree from to Tree
* to within Tree root. If either from or to is not in root,
* returns null. Otherwise includes both from and to in the list.
*/
public static List pathNodeToNode(Tree from, Tree to, Tree root) {
List fromPath = pathFromRoot(from, root);
//System.out.println(treeListToCatList(fromPath));
if (fromPath == null)
return null;
List toPath = pathFromRoot(to, root);
//System.out.println(treeListToCatList(toPath));
if (toPath == null)
return null;
//System.out.println(treeListToCatList(fromPath));
//System.out.println(treeListToCatList(toPath));
int last = 0;
int min = fromPath.size() <= toPath.size() ? fromPath.size() : toPath.size();
Tree lastNode = null;
// while((! (fromPath.isEmpty() || toPath.isEmpty())) && fromPath.get(0).equals(toPath.get(0))) {
// lastNode = (Tree) fromPath.remove(0);
// toPath.remove(0);
// }
while (last < min && fromPath.get(last).equals(toPath.get(last))) {
lastNode = fromPath.get(last);
last++;
}
//System.out.println(treeListToCatList(fromPath));
//System.out.println(treeListToCatList(toPath));
List totalPath = new ArrayList<>();
for (int i = fromPath.size() - 1; i >= last; i--) {
Tree t = fromPath.get(i);
totalPath.add("up-" + t.label().value());
}
if (lastNode != null)
totalPath.add("up-" + lastNode.label().value());
for (Tree t: toPath)
totalPath.add("down-" + t.label().value());
// for(ListIterator i = fromPath.listIterator(fromPath.size()); i.hasPrevious(); ){
// Tree t = (Tree) i.previous();
// totalPath.add("up-" + t.label().value());
// }
// if(lastNode != null)
// totalPath.add("up-" + lastNode.label().value());
// for(ListIterator j = toPath.listIterator(); j.hasNext(); ){
// Tree t = (Tree) j.next();
// totalPath.add("down-" + t.label().value());
// }
return totalPath;
}
/**
* returns list of tree nodes to root from t. Includes root and
* t. Returns null if tree not found dominated by root
*/
public static List pathFromRoot(Tree t, Tree root) {
if (t == root) {
//if (t.equals(root)) {
List l = new ArrayList<>(1);
l.add(t);
return l;
} else if (root == null) {
return null;
}
return root.dominationPath(t);
}
/**
* replaces all instances (by ==) of node with node1. Doesn't affect
* the node t itself
*/
public static void replaceNode(Tree node, Tree node1, Tree t) {
if (t.isLeaf())
return;
Tree[] kids = t.children();
List newKids = new ArrayList<>(kids.length);
for (Tree kid : kids) {
if (kid != node) {
newKids.add(kid);
replaceNode(node, node1, kid);
} else {
newKids.add(node1);
}
}
t.setChildren(newKids);
}
/**
* returns the node of a tree which represents the lowest common
* ancestor of nodes t1 and t2 dominated by root. If either t1 or
* or t2 is not dominated by root, returns null.
*/
public static Tree getLowestCommonAncestor(Tree t1, Tree t2, Tree root) {
List t1Path = pathFromRoot(t1, root);
List t2Path = pathFromRoot(t2, root);
if (t1Path == null || t2Path == null) return null;
int min = Math.min(t1Path.size(), t2Path.size());
Tree commonAncestor = null;
for (int i = 0; i < min && t1Path.get(i).equals(t2Path.get(i)); ++i) {
commonAncestor = t1Path.get(i);
}
return commonAncestor;
}
// todo [cdm 2015]: These next two methods duplicate the Tree.valueOf methods!
/**
* Simple tree reading utility method. Given a tree formatted as a PTB string, returns a Tree made by a specific TreeFactory.
*/
public static Tree readTree(String ptbTreeString, TreeFactory treeFactory) {
try {
PennTreeReader ptr = new PennTreeReader(new StringReader(ptbTreeString), treeFactory);
return ptr.readTree();
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
/**
* Simple tree reading utility method. Given a tree formatted as a PTB string, returns a Tree made by the default TreeFactory (LabeledScoredTreeFactory)
*/
public static Tree readTree(String str) {
return readTree(str, defaultTreeFactory);
}
/**
* Outputs the labels on the trees, not just the words.
*/
public static void outputTreeLabels(Tree tree) {
outputTreeLabels(tree, 0);
}
public static void outputTreeLabels(Tree tree, int depth) {
for (int i = 0; i < depth; ++i) {
System.out.print(" ");
}
System.out.println(tree.label());
for (Tree child : tree.children()) {
outputTreeLabels(child, depth + 1);
}
}
/**
* Converts the tree labels to CoreLabels.
* We need this because we store additional info in the CoreLabel, like token span.
* @param tree
*/
public static void convertToCoreLabels(Tree tree) {
Label l = tree.label();
if (!(l instanceof CoreLabel)) {
CoreLabel cl = new CoreLabel();
cl.setValue(l.value());
tree.setLabel(cl);
}
for (Tree kid : tree.children()) {
convertToCoreLabels(kid);
}
}
/**
* Set the sentence index of all the leaves in the tree
* (only works on CoreLabel)
*/
public static void setSentIndex(Tree tree, int sentIndex) {
List leaves = tree.yield();
for (Label leaf : leaves) {
if (!(leaf instanceof CoreLabel)) {
throw new IllegalArgumentException("Only works on CoreLabel");
}
((CoreLabel) leaf).setSentIndex(sentIndex);
}
}
}