Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
package edu.stanford.nlp.trees;
import java.io.*;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.*;
import java.util.concurrent.locks.Lock;
import java.util.function.Predicate;
import java.util.function.Function;
import edu.stanford.nlp.graph.DirectedMultiGraph;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.AbstractCoreLabel;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.parser.lexparser.TreebankLangParserParams;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.WhitespaceTokenizer;
import edu.stanford.nlp.util.*;
import static edu.stanford.nlp.trees.GrammaticalRelation.DEPENDENT;
import static edu.stanford.nlp.trees.GrammaticalRelation.ROOT;
/**
* A {@code GrammaticalStructure} stores dependency relations between
* nodes in a tree. A new GrammaticalStructure is constructed
* from an existing parse tree with the help of {@link
* GrammaticalRelation GrammaticalRelation}, which
* defines a hierarchy of grammatical relations, along with
* patterns for identifying them in parse trees. The constructor for
* GrammaticalStructure uses these definitions to
* populate the new GrammaticalStructure with as many
* labeled grammatical relations as it can. Once constructed, the new
* GrammaticalStructure can be printed in various
* formats, or interrogated using the interface methods in this
* class. Internally, this uses a representation via a {@code TreeGraphNode},
* that is, a tree with additional labeled
* arcs between nodes, for representing the grammatical relations in a
* parse tree.
*
* Caveat emptor! This is a work in progress.
* Nothing in here should be relied upon to function perfectly.
* Feedback welcome.
*
* @author Bill MacCartney
* @author Galen Andrew (refactoring English-specific stuff)
* @author Ilya Sherman (dependencies)
* @author Daniel Cer
* @see EnglishGrammaticalRelations
* @see GrammaticalRelation
* @see EnglishGrammaticalStructure
*/
public abstract class GrammaticalStructure implements Serializable {
private static final boolean PRINT_DEBUGGING = System.getProperty("GrammaticalStructure", null) != null;
/**
* A specification for the types of extra edges to add to the dependency tree.
* If you're in doubt, use {@link edu.stanford.nlp.trees.GrammaticalStructure.Extras#NONE}.
*/
public static enum Extras {
/**
*
Don't include any additional edges.
*
* Note: In older code (2014 and before) including extras was a boolean flag. This option is the equivalent of
* the false flag.
*
*/
NONE(false, false, false),
/**
* Include only the extra reference edges, and save them as reference edges without collapsing.
*/
REF_ONLY_UNCOLLAPSED(true, false, false),
/**
* Include only the extra reference edges, but collapsing these edges to clone the edge type of the referent.
* So, for example, My dog who eats sausage may have a "ref" edge from who to dog
* that would be deleted and replaced with an "nsubj" edge from eats to dog.
*/
REF_ONLY_COLLAPSED(true, false, true),
/**
* Add extra subjects only, not adding any of the other extra edge types.
*/
SUBJ_ONLY(false, true, false),
/**
* @see edu.stanford.nlp.trees.GrammaticalStructure.Extras#SUBJ_ONLY
* @see edu.stanford.nlp.trees.GrammaticalStructure.Extras#REF_ONLY_UNCOLLAPSED
*/
REF_UNCOLLAPSED_AND_SUBJ(true, true, false),
/**
* @see edu.stanford.nlp.trees.GrammaticalStructure.Extras#SUBJ_ONLY
* @see edu.stanford.nlp.trees.GrammaticalStructure.Extras#REF_ONLY_COLLAPSED
*/
REF_COLLAPSED_AND_SUBJ(true, true, true),
/**
*
* Do the maximal amount of extra processing.
* Currently, this is equivalent to {@link edu.stanford.nlp.trees.GrammaticalStructure.Extras#REF_COLLAPSED_AND_SUBJ}.
*
*
* Note: In older code (2014 and before) including extras was a boolean flag. This option is the equivalent of
* the true flag.
*
*/
MAXIMAL(true, true, true);
/** Add "ref" edges */
public final boolean doRef;
/** Add extra subject edges */
public final boolean doSubj;
/** collapse the "ref" edges */
public final boolean collapseRef;
/** Constructor. Nothing exciting here. */
Extras(boolean doRef, boolean doSubj, boolean collapseRef) {
this.doRef = doRef;
this.doSubj = doSubj;
this.collapseRef = collapseRef;
}
}
protected final List typedDependencies;
protected final List allTypedDependencies;
protected final Predicate puncFilter;
/**
* The root Tree node for this GrammaticalStructure.
*/
protected final TreeGraphNode root;
/**
* A map from arbitrary integer indices to nodes.
*/
private final Map indexMap = Generics.newHashMap();
/**
* Create a new GrammaticalStructure, analyzing the parse tree and
* populate the GrammaticalStructure with as many labeled
* grammatical relation arcs as possible.
*
* @param t A Tree to analyze
* @param relations A set of GrammaticalRelations to consider
* @param relationsLock Something needed to make this thread-safe
* @param transformer A transformer to apply to the tree before converting
* @param hf A HeadFinder for analysis
* @param puncFilter A Filter to reject punctuation. To delete punctuation
* dependencies, this filter should return false on
* punctuation word strings, and true otherwise.
* If punctuation dependencies should be kept, you
* should pass in a Filters.<String>acceptFilter().
*/
public GrammaticalStructure(Tree t, Collection relations,
Lock relationsLock, TreeTransformer transformer,
HeadFinder hf, Predicate puncFilter) {
TreeGraphNode treegraph = new TreeGraphNode(t, (TreeGraphNode) null);
// TODO: create the tree and reuse the leaf labels in one pass,
// avoiding a wasteful copy of the labels.
Trees.setLeafLabels(treegraph, t.yield());
Trees.setLeafTagsIfUnset(treegraph);
if (transformer != null) {
Tree transformed = transformer.transformTree(treegraph);
if (!(transformed instanceof TreeGraphNode)) {
throw new RuntimeException("Transformer did not change TreeGraphNode into another TreeGraphNode: " + transformer);
}
this.root = (TreeGraphNode) transformed;
} else {
this.root = treegraph;
}
indexNodes(this.root);
// add head word and tag to phrase nodes
if (hf == null) {
throw new AssertionError("Cannot use null HeadFinder");
}
root.percolateHeads(hf);
if (root.value() == null) {
root.setValue("ROOT"); // todo: cdm: it doesn't seem like this line should be here
}
// add dependencies, using heads
this.puncFilter = puncFilter;
// NoPunctFilter puncDepFilter = new NoPunctFilter(puncFilter);
NoPunctTypedDependencyFilter puncTypedDepFilter = new NoPunctTypedDependencyFilter(puncFilter);
DirectedMultiGraph basicGraph = new DirectedMultiGraph();
DirectedMultiGraph completeGraph = new DirectedMultiGraph();
// analyze the root (and its descendants, recursively)
if (relationsLock != null) {
relationsLock.lock();
}
try {
analyzeNode(root, root, relations, hf, puncFilter, basicGraph, completeGraph);
}
finally {
if (relationsLock != null) {
relationsLock.unlock();
}
}
attachStrandedNodes(root, root, false, puncFilter, basicGraph);
// add typed dependencies
typedDependencies = getDeps(puncTypedDepFilter, basicGraph);
allTypedDependencies = Generics.newArrayList(typedDependencies);
getExtraDeps(allTypedDependencies, puncTypedDepFilter, completeGraph);
}
/**
* Assign sequential integer indices (starting with 1) to all
* nodes of the subtree rooted at this
* Tree. The leaves are indexed first,
* from left to right. Then the internal nodes are indexed,
* using a pre-order tree traversal.
*/
private void indexNodes(TreeGraphNode tree) {
indexNodes(tree, indexLeaves(tree, 1));
}
/**
* Assign sequential integer indices to the leaves of the subtree
* rooted at this TreeGraphNode, beginning with
* startIndex, and traversing the leaves from left
* to right. If node is already indexed, then it uses the existing index.
*
* @param startIndex index for this node
* @return the next index still unassigned
*/
private int indexLeaves(TreeGraphNode tree, int startIndex) {
if (tree.isLeaf()) {
int oldIndex = tree.index();
if (oldIndex >= 0) {
startIndex = oldIndex;
} else {
tree.setIndex(startIndex);
}
addNodeToIndexMap(startIndex, tree);
startIndex++;
} else {
for (TreeGraphNode child : tree.children) {
startIndex = indexLeaves(child, startIndex);
}
}
return startIndex;
}
/**
* Assign sequential integer indices to all nodes of the subtree
* rooted at this TreeGraphNode, beginning with
* startIndex, and doing a pre-order tree traversal.
* Any node which already has an index will not be re-indexed
* — this is so that we can index the leaves first, and
* then index the rest.
*
* @param startIndex index for this node
* @return the next index still unassigned
*/
private int indexNodes(TreeGraphNode tree, int startIndex) {
if (tree.index() < 0) { // if this node has no index
addNodeToIndexMap(startIndex, tree);
tree.setIndex(startIndex++);
}
if (!tree.isLeaf()) {
for (TreeGraphNode child : tree.children) {
startIndex = indexNodes(child, startIndex);
}
}
return startIndex;
}
/**
* Store a mapping from an arbitrary integer index to a node in
* this treegraph. Normally a client shouldn't need to use this,
* as the nodes are automatically indexed by the
* TreeGraph constructor.
*
* @param index the arbitrary integer index
* @param node the TreeGraphNode to be indexed
*/
private void addNodeToIndexMap(int index, TreeGraphNode node) {
indexMap.put(Integer.valueOf(index), node);
}
/**
* Return the node in the this treegraph corresponding to the
* specified integer index.
*
* @param index the integer index of the node you want
* @return the TreeGraphNode having the specified
* index (or null if such does not exist)
*/
private TreeGraphNode getNodeByIndex(int index) {
return indexMap.get(Integer.valueOf(index));
}
/**
* Return the root Tree of this GrammaticalStructure.
*
* @return the root Tree of this GrammaticalStructure
*/
public TreeGraphNode root() {
return root;
}
private static void throwDepFormatException(String dep) {
throw new RuntimeException(String.format("Dependencies should be for the format 'type(arg-idx, arg-idx)'. Could not parse '%s'", dep));
}
/**
* Create a grammatical structure from its string representation.
*
* Like buildCoNLLXGrammaticalStructure,
* this method fakes up the parts of the tree structure that are not
* used by the grammatical relation transformation operations.
*
* Note: Added by daniel cer
*
* @param tokens
* @param posTags
* @param deps
*/
public static GrammaticalStructure fromStringReps(List tokens, List posTags, List deps) {
if (tokens.size() != posTags.size()) {
throw new RuntimeException(String.format(
"tokens.size(): %d != pos.size(): %d%n", tokens.size(), posTags
.size()));
}
List tgWordNodes = new ArrayList(tokens.size());
List tgPOSNodes = new ArrayList(tokens.size());
CoreLabel rootLabel = new CoreLabel();
rootLabel.setValue("ROOT");
List nodeWords = new ArrayList(tgPOSNodes.size() + 1);
nodeWords.add(new IndexedWord(rootLabel));
SemanticHeadFinder headFinder = new SemanticHeadFinder();
Iterator posIter = posTags.iterator();
for (String wordString : tokens) {
String posString = posIter.next();
CoreLabel wordLabel = new CoreLabel();
wordLabel.setWord(wordString);
wordLabel.setValue(wordString);
wordLabel.setTag(posString);
TreeGraphNode word = new TreeGraphNode(wordLabel);
CoreLabel tagLabel = new CoreLabel();
tagLabel.setValue(posString);
tagLabel.setWord(posString);
TreeGraphNode pos = new TreeGraphNode(tagLabel);
tgWordNodes.add(word);
tgPOSNodes.add(pos);
TreeGraphNode[] childArr = {word};
pos.setChildren(childArr);
word.setParent(pos);
pos.percolateHeads(headFinder);
nodeWords.add(new IndexedWord(wordLabel));
}
TreeGraphNode root = new TreeGraphNode(rootLabel);
root.setChildren(tgPOSNodes.toArray(new TreeGraphNode[tgPOSNodes.size()]));
root.setIndex(0);
// Build list of TypedDependencies
List tdeps = new ArrayList(deps.size());
for (String depString : deps) {
int firstBracket = depString.indexOf('(');
if (firstBracket == -1) throwDepFormatException(depString);
String type = depString.substring(0, firstBracket);
if (depString.charAt(depString.length() - 1) != ')') throwDepFormatException(depString);
String args = depString.substring(firstBracket + 1, depString.length() - 1);
int argSep = args.indexOf(", ");
if (argSep == -1) throwDepFormatException(depString);
String parentArg = args.substring(0, argSep);
String childArg = args.substring(argSep + 2);
int parentDash = parentArg.lastIndexOf('-');
if (parentDash == -1) throwDepFormatException(depString);
int childDash = childArg.lastIndexOf('-');
if (childDash == -1) throwDepFormatException(depString);
//System.err.printf("parentArg: %s%n", parentArg);
int parentIdx = Integer.parseInt(parentArg.substring(parentDash+1).replace("'", ""));
int childIdx = Integer.parseInt(childArg.substring(childDash+1).replace("'", ""));
GrammaticalRelation grel = new GrammaticalRelation(GrammaticalRelation.Language.Any, type, null, DEPENDENT);
TypedDependency tdep = new TypedDependency(grel, nodeWords.get(parentIdx), nodeWords.get(childIdx));
tdeps.add(tdep);
}
// TODO add some elegant way to construct language
// appropriate GrammaticalStructures (e.g., English, Chinese, etc.)
return new GrammaticalStructure(tdeps, root) {
private static final long serialVersionUID = 1L;
};
}
public GrammaticalStructure(List projectiveDependencies, TreeGraphNode root) {
this.root = root;
indexNodes(this.root);
this.puncFilter = Filters.acceptFilter();
allTypedDependencies = typedDependencies = new ArrayList(projectiveDependencies);
}
public GrammaticalStructure(Tree t, Collection relations,
HeadFinder hf, Predicate puncFilter) {
this(t, relations, null, null, hf, puncFilter);
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(root.toPrettyString(0).substring(1));
sb.append("Typed Dependencies:\n");
sb.append(typedDependencies);
return sb.toString();
}
private static void attachStrandedNodes(TreeGraphNode t, TreeGraphNode root, boolean attach, Predicate puncFilter, DirectedMultiGraph basicGraph) {
if (t.isLeaf()) {
return;
}
if (attach && puncFilter.test(t.headWordNode().label().value())) {
// make faster by first looking for links from parent
// it is necessary to look for paths using all directions
// because sometimes there are edges created from lower nodes to
// nodes higher up
TreeGraphNode parent = t.parent().highestNodeWithSameHead();
if (!basicGraph.isEdge(parent, t) && basicGraph.getShortestPath(root, t, false) == null) {
basicGraph.add(parent, t, GrammaticalRelation.DEPENDENT);
}
}
for (TreeGraphNode kid : t.children()) {
attachStrandedNodes(kid, root, (kid.headWordNode() != t.headWordNode()), puncFilter, basicGraph);
}
}
// cdm dec 2009: I changed this to automatically fail on preterminal nodes, since they shouldn't match for GR parent patterns. Should speed it up.
private static void analyzeNode(TreeGraphNode t, TreeGraphNode root, Collection relations, HeadFinder hf, Predicate puncFilter, DirectedMultiGraph basicGraph, DirectedMultiGraph completeGraph) {
if (t.isPhrasal()) { // don't do leaves or preterminals!
TreeGraphNode tHigh = t.highestNodeWithSameHead();
for (GrammaticalRelation egr : relations) {
if (egr.isApplicable(t)) {
for (TreeGraphNode u : egr.getRelatedNodes(t, root, hf)) {
TreeGraphNode uHigh = u.highestNodeWithSameHead();
if (uHigh == tHigh) {
continue;
}
if (!puncFilter.test(uHigh.headWordNode().label().value())) {
continue;
}
completeGraph.add(tHigh, uHigh, egr);
// If there are two patterns that add dependencies, X --> Z and Y --> Z, and X dominates Y, then the dependency Y --> Z is not added to the basic graph to prevent unwanted duplication.
// Similarly, if there is already a path from X --> Y, and an expression would trigger Y --> X somehow, we ignore that
Set parents = basicGraph.getParents(uHigh);
if ((parents == null || parents.size() == 0 || parents.contains(tHigh)) &&
basicGraph.getShortestPath(uHigh, tHigh, true) == null) {
// System.err.println("Adding " + egr.getShortName() + " from " + t + " to " + u + " tHigh=" + tHigh + "(" + tHigh.headWordNode() + ") uHigh=" + uHigh + "(" + uHigh.headWordNode() + ")");
basicGraph.add(tHigh, uHigh, egr);
}
}
}
}
// now recurse into children
for (TreeGraphNode kid : t.children()) {
analyzeNode(kid, root, relations, hf, puncFilter, basicGraph, completeGraph);
}
}
}
private void getExtraDeps(List deps, Predicate puncTypedDepFilter, DirectedMultiGraph completeGraph) {
getExtras(deps);
// adds stuff to basicDep based on the tregex patterns over the tree
getTreeDeps(deps, completeGraph, puncTypedDepFilter, extraTreeDepFilter());
Collections.sort(deps);
}
/**
* Helps the constructor build a list of typed dependencies using
* information from a {@code GrammaticalStructure}.
*/
private List getDeps(Predicate puncTypedDepFilter, DirectedMultiGraph basicGraph) {
List basicDep = Generics.newArrayList();
for (TreeGraphNode gov : basicGraph.getAllVertices()) {
for (TreeGraphNode dep : basicGraph.getChildren(gov)) {
GrammaticalRelation reln = getGrammaticalRelationCommonAncestor(gov.headWordNode().label(), gov.label(), dep.headWordNode().label(), dep.label(), basicGraph.getEdges(gov, dep));
// System.err.println(" Gov: " + gov + " Dep: " + dep + " Reln: " + reln);
basicDep.add(new TypedDependency(reln, new IndexedWord(gov.headWordNode().label()), new IndexedWord(dep.headWordNode().label())));
}
}
// add the root
TreeGraphNode dependencyRoot = new TreeGraphNode(new Word("ROOT"));
dependencyRoot.setIndex(0);
TreeGraphNode rootDep = root().headWordNode();
if (rootDep == null) {
List leaves = Trees.leaves(root());
if (leaves.size() > 0) {
Tree leaf = leaves.get(0);
if (!(leaf instanceof TreeGraphNode)) {
throw new AssertionError("Leaves should be TreeGraphNodes");
}
rootDep = (TreeGraphNode) leaf;
if (rootDep.headWordNode() != null) {
rootDep = rootDep.headWordNode();
}
}
}
if (rootDep != null) {
TypedDependency rootTypedDep = new TypedDependency(ROOT, new IndexedWord(dependencyRoot.label()), new IndexedWord(rootDep.label()));
if (puncTypedDepFilter.test(rootTypedDep)) {
basicDep.add(rootTypedDep);
}
}
postProcessDependencies(basicDep);
Collections.sort(basicDep);
return basicDep;
}
/**
* Returns a Filter which checks dependencies for usefulness as
* extra tree-based dependencies. By default, everything is
* accepted. One example of how this can be useful is in the
* English dependencies, where the REL dependency is used as an
* intermediate and we do not want this to be added when we make a
* second pass over the trees for missing dependencies.
*/
protected Predicate extraTreeDepFilter() {
return Filters.acceptFilter();
}
/**
* Post process the dependencies in whatever way this language
* requires. For example, English might replace "rel" dependencies
* with either dobj or pobj depending on the surrounding
* dependencies.
*/
protected void postProcessDependencies(List basicDep) {
// no post processing by default
}
/**
* Get extra dependencies that do not depend on the tree structure,
* but rather only depend on the existing dependency structure.
* For example, the English xsubj dependency can be extracted that way.
*/
protected void getExtras(List basicDep) {
// no extra dependencies by default
}
/** Look through the tree t and adds to the List basicDep
* additional dependencies which aren't
* in the List but which satisfy the filter puncTypedDepFilter.
*
* @param deps The list of dependencies which may be augmented
* @param completeGraph a graph of all the tree dependencies found earlier
* @param puncTypedDepFilter The filter that may skip punctuation dependencies
* @param extraTreeDepFilter Additional dependencies are added only if they pass this filter
*/
private static void getTreeDeps(List deps,
DirectedMultiGraph completeGraph,
Predicate puncTypedDepFilter,
Predicate extraTreeDepFilter) {
for (TreeGraphNode gov : completeGraph.getAllVertices()) {
for (TreeGraphNode dep : completeGraph.getChildren(gov)) {
for (GrammaticalRelation rel : removeGrammaticalRelationAncestors(completeGraph.getEdges(gov, dep))) {
TypedDependency newDep = new TypedDependency(rel, new IndexedWord(gov.headWordNode().label()), new IndexedWord(dep.headWordNode().label()));
if (!deps.contains(newDep) && puncTypedDepFilter.test(newDep) && extraTreeDepFilter.test(newDep)) {
newDep.setExtra();
deps.add(newDep);
}
}
}
}
}
private static class NoPunctFilter implements Predicate>, Serializable {
private Predicate npf;
NoPunctFilter(Predicate f) {
this.npf = f;
}
@Override
public boolean test(Dependency