All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.semgraph.SemanticGraph Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.semgraph;
import edu.stanford.nlp.util.logging.Redwood;

import edu.stanford.nlp.graph.DirectedMultiGraph;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.stats.TwoDimensionalCounter;
import edu.stanford.nlp.trees.*;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.MapFactory;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringParsingTask;
import edu.stanford.nlp.util.StringUtils;

import java.io.Serializable;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static edu.stanford.nlp.trees.GrammaticalRelation.ROOT;

// todo [cdm 2013]: The treatment of roots in this class should probably be redone.
// todo [cdm 2013]: Probably we should put fake root node in graph and arc(s) from it.
// todo [cdm 2013]: At any rate, printing methods should print the root

/**
 * Represents a semantic graph of a sentence or document, with IndexedWord
 * objects for nodes.
 * 

* Notes: *
* The root is not at present represented as a vertex in the graph. * At present you need to get a root/roots * from the separate roots variable and to know about it. * This should maybe be changed, because otherwise, doing things like * simply getting the set of nodes or edges from the graph doesn't give * you root nodes or edges. *
* Given the kinds of representations that we normally use with * typedDependenciesCollapsed, there can be (small) cycles in a * SemanticGraph, and these cycles may involve the node that is conceptually the * root of the graph, so there may be no node without a parent node. You can * better get at the root(s) via the variable and methods provided. *
* There is no mechanism for returning all edges at once (eg edgeSet()). * This is intentional. Use edgeIterable() to iterate over the edges if necessary. * * @author Christopher Cox * @author Teg Grenager * @see SemanticGraphEdge * @see IndexedWord */ public class SemanticGraph implements Serializable { /** A logger for this class */ private static Redwood.RedwoodChannels log = Redwood.channels(SemanticGraph.class); public static final boolean addSRLArcs = false; private static final SemanticGraphFormatter formatter = new SemanticGraphFormatter(); /** * The distinguished root vertices, if known. */ private final Collection roots; private final DirectedMultiGraph graph; private static final MapFactory>> outerMapFactory = MapFactory.hashMapFactory(); private static final MapFactory> innerMapFactory = MapFactory.hashMapFactory(); private static final MapFactory wordMapFactory = MapFactory.hashMapFactory(); private LinkedList comments = new LinkedList<>(); public int edgeCount() { return graph.getNumEdges(); } public int outDegree(IndexedWord vertex) { return graph.getOutDegree(vertex); } public int inDegree(IndexedWord vertex) { return graph.getInDegree(vertex); } public List getAllEdges(IndexedWord gov, IndexedWord dep) { return graph.getEdges(gov, dep); } // TODO: this is a bad method to use because there can be multiple // edges. All users of this method should be switched to iterating // over getAllEdges. This has already been done for all uses // outside RTE. public SemanticGraphEdge getEdge(IndexedWord gov, IndexedWord dep) { List edges = graph.getEdges(gov, dep); if (edges == null || edges.isEmpty()) return null; return edges.get(0); } public void addVertex(IndexedWord vertex) { graph.addVertex(vertex); } public boolean containsVertex(IndexedWord vertex) { return graph.containsVertex(vertex); } public boolean containsEdge(IndexedWord source, IndexedWord target) { return graph.isEdge(source, target); } public boolean containsEdge(SemanticGraphEdge edge) { return containsEdge(edge.getSource(), edge.getTarget()); } public Set vertexSet() { return graph.getAllVertices(); } public boolean removeEdge(SemanticGraphEdge e) { return graph.removeEdge(e.getSource(), e.getTarget(), e); } public boolean removeVertex(IndexedWord vertex) { return graph.removeVertex(vertex); } /** * This returns an ordered list of vertices (based upon their * indices in the sentence). This creates and sorts a list, so * prefer vertexSet unless you have a good reason to want nodes in * index order. * * @return Ordered list of vertices */ public List vertexListSorted() { ArrayList vlist = new ArrayList<>(vertexSet()); Collections.sort(vlist); return vlist; } /** * Returns an ordered list of edges in the graph. * This creates and sorts a list, so prefer edgeIterable(). * * @return A ordered list of edges in the graph. */ public List edgeListSorted() { ArrayList edgeList = new ArrayList<>(); for (SemanticGraphEdge edge : edgeIterable()) { edgeList.add(edge); } Collections.sort(edgeList, SemanticGraphEdge.orderByTargetComparator()); return edgeList; } public Iterable edgeIterable() { return graph.edgeIterable(); } public Iterator outgoingEdgeIterator(IndexedWord v) { return graph.outgoingEdgeIterator(v); } public Iterable outgoingEdgeIterable(IndexedWord v) { return graph.outgoingEdgeIterable(v); } public Iterator incomingEdgeIterator(IndexedWord v) { return graph.incomingEdgeIterator(v); } public Iterable incomingEdgeIterable(IndexedWord v) { return graph.incomingEdgeIterable(v); } public List outgoingEdgeList(IndexedWord v) { return CollectionUtils.toList(outgoingEdgeIterable(v)); } public List incomingEdgeList(IndexedWord v) { return CollectionUtils.toList(incomingEdgeIterable(v)); } public boolean isEmpty() { return graph.isEmpty(); } /** * Searches up to 2 levels to determine how far ancestor is from child (i.e., * returns 1 if "ancestor" is a parent, or 2 if ancestor is a grandparent. * * @param child * candidate child * @param ancestor * candidate ancestor * @return the number of generations between "child" and "ancestor" (1 is an * immediate parent), or -1 if there is no relationship found. */ public int isAncestor(IndexedWord child, IndexedWord ancestor) { Set parents = this.getParents(child); if (parents.contains(ancestor)) { return 1; } for (IndexedWord parent : parents) { Set grandparents = this.getParents(parent); if (grandparents.contains(ancestor)) { return 2; } } return -1; } /** * Return the maximum distance to a least common ancestor. We only search as * high as grandparents. We return -1 if no common parent or grandparent is * found. * * @return The maximum distance to a least common ancestor. */ public int commonAncestor(IndexedWord v1, IndexedWord v2) { if (v1.equals(v2)) { return 0; } Set v1Parents = this.getParents(v1); Set v2Parents = this.getParents(v2); Set v1GrandParents = wordMapFactory.newSet(); Set v2GrandParents = wordMapFactory.newSet(); if (v1Parents.contains(v2) || v2Parents.contains(v1)) { return 1; } // does v1 have any parents that are v2's parents? for (IndexedWord v1Parent : v1Parents) { if (v2Parents.contains(v1Parent)) { return 1; } v1GrandParents.addAll(this.getParents(v1Parent)); } // build v2 grandparents for (IndexedWord v2Parent : v2Parents) { v2GrandParents.addAll(this.getParentList(v2Parent)); } if (v1GrandParents.contains(v2) || v2GrandParents.contains(v1)) { return 2; } // Are any of v1's parents a grandparent of v2? for (IndexedWord v2GrandParent : v2GrandParents) { if (v1Parents.contains(v2GrandParent)) { return 2; } } // Are any of v2's parents a grandparent of v1? for (IndexedWord v1GrandParent : v1GrandParents) { if (v2Parents.contains(v1GrandParent)) { return 2; } } for (IndexedWord v2GrandParent : v2GrandParents) { if (v1GrandParents.contains(v2GrandParent)) { return 2; } } return -1; } /** * Returns the least common ancestor. We only search as high as grandparents. * We return null if no common parent or grandparent is found. Any of the * input words can also be the answer if one is the parent or grandparent of * other, or if the input words are the same. * * @return The least common ancestor. */ public IndexedWord getCommonAncestor(IndexedWord v1, IndexedWord v2) { if (v1.equals(v2)) { return v1; } if (this.isAncestor(v1, v2) >= 1) { return v2; } if (this.isAncestor(v2, v1) >= 1) { return v1; } Set v1Parents = this.getParents(v1); Set v2Parents = this.getParents(v2); Set v1GrandParents = wordMapFactory.newSet(); Set v2GrandParents = wordMapFactory.newSet(); // does v1 have any parents that are v2's parents? for (IndexedWord v1Parent : v1Parents) { if (v2Parents.contains(v1Parent)) { return v1Parent; } v1GrandParents.addAll(this.getParents(v1Parent)); } // does v1 have any grandparents that are v2's parents? for (IndexedWord v1GrandParent : v1GrandParents) { if (v2Parents.contains(v1GrandParent)) { return v1GrandParent; } } // build v2 grandparents for (IndexedWord v2Parent : v2Parents) { v2GrandParents.addAll(this.getParents(v2Parent)); } // does v1 have any parents or grandparents that are v2's grandparents? for (IndexedWord v2GrandParent : v2GrandParents) { if (v1Parents.contains(v2GrandParent)) { return v2GrandParent; } if (v1GrandParents.contains(v2GrandParent)) { return v2GrandParent; } } return null; } // todo [cdm 2013]: Completely RTE-specific methods like this one should be used to a static class of helper methods under RTE // If "det" is true, the search for a child is restricted to the "determiner" // grammatical relation. public boolean matchPatternToVertex(String pattern, IndexedWord vertex, boolean det) { if (!containsVertex(vertex)) { throw new IllegalArgumentException(); } String pat = pattern.replaceAll("<", ",<"); pat = pat.replaceAll(">", ",>"); String[] nodePath = pat.split(","); for (String s : nodePath) { if (s.equals("")) { continue; } String word = s.substring(1); char dir = s.charAt(0); if (dir == '<') { // look for a matching parent boolean match = false; for (IndexedWord parent : getParents(vertex)) { String lemma = parent.get(CoreAnnotations.LemmaAnnotation.class); if (lemma.equals(word)) { match = true; break; } } if (!match) { return false; } } else if (dir == '>') { if (det) { // look for a matching child with "det" relation Set children = wordMapFactory.newSet(); children.addAll(getChildrenWithReln(vertex, EnglishGrammaticalRelations.DETERMINER)); children.addAll(getChildrenWithReln(vertex, EnglishGrammaticalRelations.PREDETERMINER)); boolean match = false; for (IndexedWord child : children) { String lemma = child.get(CoreAnnotations.LemmaAnnotation.class); if (lemma.equals("")) { lemma = child.word().toLowerCase(); } if (lemma.equals(word)) { match = true; break; } } if (!match) { return false; } } else {// take any relation, except "det" List> children = childPairs(vertex); boolean match = false; for (Pair pair : children) { if (pair.first().toString().equals("det")) continue; IndexedWord child = pair.second(); String lemma = child.get(CoreAnnotations.LemmaAnnotation.class); if (lemma.equals("")) { lemma = child.word().toLowerCase(); } if (lemma.equals(word)) { match = true; break; } } if (!match) { return false; } } } else { throw new RuntimeException("Warning: bad pattern \"%s\"\n" + pattern); } } return true; } // todo [cdm 2013]: Completely RTE-specific methods like this one should be used to a static class of helper methods under RTE public boolean matchPatternToVertex(String pattern, IndexedWord vertex) { if (!containsVertex(vertex)) { throw new IllegalArgumentException(); } String pat = pattern.replaceAll("<", ",<"); pat = pat.replaceAll(">", ",>"); String[] nodePath = pat.split(","); for (String s : nodePath) { if (s.equals("")) { continue; } String word = s.substring(1); char dir = s.charAt(0); if (dir == '<') { // look for a matching parent boolean match = false; for (IndexedWord parent : getParents(vertex)) { String lemma = parent.get(CoreAnnotations.LemmaAnnotation.class); if (lemma.equals(word)) { match = true; break; } } if (!match) { return false; } } else if (dir == '>') { // look for a matching child boolean match = false; for (IndexedWord child : getChildren(vertex)) { String lemma = child.get(CoreAnnotations.LemmaAnnotation.class); if (lemma == null || lemma.equals("")) { lemma = child.word().toLowerCase(); } if (lemma.equals(word)) { match = true; break; } } if (!match) { return false; } } else { throw new RuntimeException("Warning: bad pattern \"%s\"\n" + pattern); } } return true; } public List getChildList(IndexedWord vertex) { if (!containsVertex(vertex)) { throw new IllegalArgumentException(); } List result = new ArrayList<>(getChildren(vertex)); Collections.sort(result); return result; } public Set getChildren(IndexedWord vertex) { if (!containsVertex(vertex)) { throw new IllegalArgumentException(); } return graph.getChildren(vertex); } public boolean hasChildren(IndexedWord vertex) { return outgoingEdgeIterator(vertex).hasNext(); } public List getIncomingEdgesSorted(IndexedWord vertex) { List edges = incomingEdgeList(vertex); Collections.sort(edges); return edges; } public List getOutEdgesSorted(IndexedWord vertex) { List edges = outgoingEdgeList(vertex); Collections.sort(edges); return edges; } public List getParentList(IndexedWord vertex) { if (!containsVertex(vertex)) { throw new IllegalArgumentException(); } List result = new ArrayList<>(getParents(vertex)); Collections.sort(result); return result; } public Set getParents(IndexedWord vertex) { if (!containsVertex(vertex)) { throw new IllegalArgumentException(); } return graph.getParents(vertex); } /** * Method for getting the siblings of a particular node. Siblings are the * other children of your parent, where parent is determined as the parent * returned by getParent * * @return collection of sibling nodes (does not include vertex) * the collection is empty if your parent is null */ public Collection getSiblings(IndexedWord vertex) { IndexedWord parent = this.getParent(vertex); if (parent != null) { Set result = wordMapFactory.newSet(); result.addAll(this.getChildren(parent)); result.remove(vertex);//remove this vertex - you're not your own sibling return result; } else { return Collections.emptySet(); } } /** * Helper function for the public function with the same name. *
* Builds up the list backwards. */ private List getPathToRoot(IndexedWord vertex, List used) { used.add(vertex); // TODO: Apparently the order of the nodes in the path to the root // makes a difference for the RTE system. Look into this some more List parents = getParentList(vertex); // Set parents = wordMapFactory.newSet(); // parents.addAll(getParents(vertex)); parents.removeAll(used); if (roots.contains(vertex) || (parents.isEmpty())) { used.remove(used.size() - 1); if (roots.contains(vertex)) return Generics.newArrayList(); else return null; // no path found } for (IndexedWord parent : parents) { List path = getPathToRoot(parent, used); if (path != null) { path.add(parent); used.remove(used.size() - 1); return path; } } used.remove(used.size() - 1); return null; } /** * Find the path from the given node to a root. The path does not include the * given node. Returns an empty list if vertex is a root. Returns null if a * root is inaccessible (should never happen). */ public List getPathToRoot(IndexedWord vertex) { List path = getPathToRoot(vertex, Generics.newArrayList()); if (path != null) Collections.reverse(path); return path; } /** * Return the real syntactic parent of vertex. */ public IndexedWord getParent(IndexedWord vertex) { List path = getPathToRoot(vertex); if (path != null && path.size() > 0) return path.get(0); else return null; } /** * Returns the first {@link edu.stanford.nlp.ling.IndexedWord * IndexedWord} in this {@code SemanticGraph} having the given integer index, * or throws {@code IllegalArgumentException} if no such node is found. */ public IndexedWord getNodeByIndex(int index) throws IllegalArgumentException { IndexedWord node = getNodeByIndexSafe(index); if (node == null) throw new IllegalArgumentException("No SemanticGraph vertex with index " + index); else return node; } /** * Same as above, but returns {@code null} if the index does not exist * (instead of throwing an exception). */ public IndexedWord getNodeByIndexSafe(int index) { for (IndexedWord vertex : vertexSet()) { if (vertex.index() == index) { return vertex; } } return null; } /** * Returns the first {@link edu.stanford.nlp.ling.IndexedWord * IndexedWord} in this SemanticGraph having the given word or * regex, or return null if no such found. */ public IndexedWord getNodeByWordPattern(String pattern) { Pattern p = Pattern.compile(pattern); for (IndexedWord vertex : vertexSet()) { String w = vertex.word(); if ((w == null && pattern == null) || w != null && p.matcher(w).matches()) { return vertex; } } return null; } /** * Returns all nodes of type {@link edu.stanford.nlp.ling.IndexedWord * IndexedWord} in this SemanticGraph having the given word or * regex, or returns empty list if no such found. */ public List getAllNodesByWordPattern(String pattern) { Pattern p = Pattern.compile(pattern); List nodes = new ArrayList<>(); for (IndexedWord vertex : vertexSet()) { String w = vertex.word(); if ((w == null && pattern == null) || w != null && p.matcher(w).matches()) { nodes.add(vertex); } } return nodes; } public List getAllNodesByPartOfSpeechPattern(String pattern) { Pattern p = Pattern.compile(pattern); List nodes = new ArrayList<>(); for (IndexedWord vertex : vertexSet()) { String pos = vertex.tag(); if ((pos == null && pattern == null) || pos != null && p.matcher(pos).matches()) { nodes.add(vertex); } } return nodes; } /** * Returns the set of descendants governed by this node in the graph. * */ public Set descendants(IndexedWord vertex) { if (!containsVertex(vertex)) { throw new IllegalArgumentException(); } // Do a depth first search Set descendantSet = wordMapFactory.newSet(); descendantsHelper(vertex, descendantSet); return descendantSet; } private void descendantsHelper(IndexedWord curr, Set descendantSet) { if (descendantSet.contains(curr)) { return; } descendantSet.add(curr); for (IndexedWord child : getChildren(curr)) { descendantsHelper(child, descendantSet); } } /** * Returns a list of pairs of a relation name and the child * IndexedFeatureLabel that bears that relation. */ public List> childPairs(IndexedWord vertex) { if (!containsVertex(vertex)) { throw new IllegalArgumentException(); } List> childPairs = Generics.newArrayList(); for (SemanticGraphEdge e : outgoingEdgeIterable(vertex)) { childPairs.add(new Pair<>(e.getRelation(), e.getTarget())); } return childPairs; } /** * Returns a list of pairs of a relation name and the parent * IndexedFeatureLabel to which we bear that relation. */ public List> parentPairs(IndexedWord vertex) { if (!containsVertex(vertex)) { throw new IllegalArgumentException(); } List> parentPairs = Generics.newArrayList(); for (SemanticGraphEdge e : incomingEdgeIterable(vertex)) { parentPairs.add(new Pair<>(e.getRelation(), e.getSource())); } return parentPairs; } /** * Returns a set of relations which this node has with its parents. * * @return The set of relations which this node has with its parents. */ public Set relns(IndexedWord vertex) { if (!containsVertex(vertex)) { throw new IllegalArgumentException(); } Set relns = Generics.newHashSet(); List> pairs = parentPairs(vertex); for (Pair p : pairs) { relns.add(p.first()); } return relns; } /** * Returns the relation that node a has with node b. *
* Note: there may be multiple arcs between a and * b, and this method only returns one relation. */ public GrammaticalRelation reln(IndexedWord a, IndexedWord b) { if (!containsVertex(a)) { throw new IllegalArgumentException(); } List> pairs = childPairs(a); for (Pair p : pairs) if (p.second().equals(b)) return p.first(); return null; } /** * Returns a list of relations which this node has with its children. */ public Set childRelns(IndexedWord vertex) { if (!containsVertex(vertex)) { throw new IllegalArgumentException(); } Set relns = Generics.newHashSet(); List> pairs = childPairs(vertex); for (Pair p : pairs) { relns.add(p.first()); } return relns; } public Collection getRoots() { return roots; } /** * Initially looks for nodes which have no incoming arcs. If there are any, it * returns a list of them. If not, it looks for nodes from which every other * node is reachable. If there are any, it returns a list of them. Otherwise, * it returns an empty list. * * @return A list of root nodes or an empty list. */ private List getVerticesWithoutParents() { List result = new ArrayList<>(); for (IndexedWord v : vertexSet()) { int inDegree = inDegree(v); if (inDegree == 0) { result.add(v); } } Collections.sort(result); return result; } /** Returns the (first) root of this SemanticGraph. */ public IndexedWord getFirstRoot() { if (roots.isEmpty()) throw new RuntimeException("No roots in graph:\n" + this + "\nFind where this graph was created and make sure you're adding roots."); return roots.iterator().next(); } public void addRoot(IndexedWord root) { addVertex(root); roots.add(root); } /** * This method should not be used if possible. TODO: delete it * * Recomputes the roots, based of actual candidates. This is done to * ensure a rooted tree after a sequence of edits. If the none of the vertices * can act as a root (due to a cycle), keep old rootset, retaining only the * existing vertices on that list. * * TODO: this cannot deal with "Hamburg is a city which everyone likes", as * the intended root node,'Hamburg, is also the dobj of the relative clause. A * possible solution would be to create edgeset routines that allow filtering * over a predicate, and specifically filter out dobj relations for choosing * next best candidate. This could also be useful for dealing with * non-syntactic arcs in the future. TODO: There is also the possibility the * roots could be empty at the end, and will need to be resolved. TODO: * determine if this is a reasonably correct solution. */ public void resetRoots() { Collection newRoots = getVerticesWithoutParents(); if (newRoots.size() > 0) { roots.clear(); roots.addAll(newRoots); return; } /* * else { Collection oldRoots = new * ArrayList(roots); for (IndexedWord oldRoot : oldRoots) { if * (!containsVertex(oldRoot)) removeVertex(oldRoot); } } */ // If no apparent root candidates are available, likely due to loop back // edges (rcmod), find the node that dominates the most nodes, and let // that be the new root. Note this implementation epitomizes K.I.S.S., and // is brain dead and non-optimal, and will require further work. TwoDimensionalCounter nodeDists = TwoDimensionalCounter.identityHashMapCounter(); for (IndexedWord node1 : vertexSet()) { for (IndexedWord node2 : vertexSet()) { // want directed paths only List path = getShortestDirectedPathEdges(node1, node2); if (path != null) { int dist = path.size(); nodeDists.setCount(node1, node2, dist); } } } // K.I.S.S. alg: just sum up and see who's on top, values don't have much // meaning outside of determining dominance. ClassicCounter dominatedEdgeCount = ClassicCounter.identityHashMapCounter(); for (IndexedWord outer : vertexSet()) { for (IndexedWord inner : vertexSet()) { dominatedEdgeCount.incrementCount(outer, nodeDists.getCount(outer, inner)); } } IndexedWord winner = Counters.argmax(dominatedEdgeCount); // TODO: account for multiply rooted graphs later setRoot(winner); } public void setRoot(IndexedWord word) { roots.clear(); roots.add(word); } public void setRoots(Collection words) { roots.clear(); roots.addAll(words); } /** * * @return A sorted list of the vertices * @throws IllegalStateException if this graph is not a DAG */ public List topologicalSort() { return graph.topologicalSort(); } /** * Does the given vertex have at least one child with the given {@code reln} and the lemma childLemma? */ public boolean hasChild(IndexedWord vertex, GrammaticalRelation reln, String childLemma) { if (!containsVertex(vertex)) { throw new IllegalArgumentException(); } for (SemanticGraphEdge edge : outgoingEdgeIterable(vertex)) { if (edge.getRelation().equals(reln)) { if (edge.getTarget().get(CoreAnnotations.LemmaAnnotation.class).equals(childLemma)) { return true; } } } return false; } /** * Does the given vertex have at least one child with the given {@code reln}? */ public boolean hasChildWithReln(IndexedWord vertex, GrammaticalRelation reln) { if (!containsVertex(vertex)) { throw new IllegalArgumentException(); } for (SemanticGraphEdge edge : outgoingEdgeIterable(vertex)) { if (edge.getRelation().equals(reln)) { return true; } } return false; } /** * Returns true if vertex has an incoming relation reln * * @param vertex A node in this graph * @param reln The relation we want to check * @return true if vertex has an incoming relation reln */ public boolean hasParentWithReln(IndexedWord vertex, GrammaticalRelation reln) { if (!containsVertex(vertex)) { throw new IllegalArgumentException(); } for (SemanticGraphEdge edge : incomingEdgeIterable(vertex)) { if (edge.getRelation().equals(reln)) { return true; } } return false; } /** * Returns the first IndexedFeatureLabel bearing a certain grammatical * relation, or null if none. */ public IndexedWord getChildWithReln(IndexedWord vertex, GrammaticalRelation reln) { if (vertex.equals(IndexedWord.NO_WORD)) return null; if (!containsVertex(vertex)) throw new IllegalArgumentException(); for (SemanticGraphEdge edge : outgoingEdgeIterable(vertex)) { if (edge.getRelation().equals(reln)) { return edge.getTarget(); } } return null; } /** * Returns a set of all parents bearing a certain grammatical relation, or an * empty set if none. */ public Set getParentsWithReln(IndexedWord vertex, GrammaticalRelation reln) { if (vertex.equals(IndexedWord.NO_WORD)) return Collections.emptySet(); if (!containsVertex(vertex)) throw new IllegalArgumentException(); Set parentList = wordMapFactory.newSet(); for (SemanticGraphEdge edge : incomingEdgeIterable(vertex)) { if (edge.getRelation().equals(reln)) { parentList.add(edge.getSource()); } } return parentList; } /** * Returns a set of all children bearing a certain grammatical relation, or * an empty set if none. */ public Set getChildrenWithReln(IndexedWord vertex, GrammaticalRelation reln) { if (vertex.equals(IndexedWord.NO_WORD)) return Collections.emptySet(); if (!containsVertex(vertex)) throw new IllegalArgumentException(); Set childList = wordMapFactory.newSet(); for (SemanticGraphEdge edge : outgoingEdgeIterable(vertex)) { if (edge.getRelation().equals(reln)) { childList.add(edge.getTarget()); } } return childList; } /** * Returns a set of all children bearing one of a set of grammatical * relations, or an empty set if none. * * NOTE: this will only work for relation types that are classes. Those that * are collapsed are currently not handled correctly since they are identified * by strings. */ public Set getChildrenWithRelns(IndexedWord vertex, Collection relns) { if (vertex.equals(IndexedWord.NO_WORD)) return Collections.emptySet(); if (!containsVertex(vertex)) { throw new IllegalArgumentException(); } Set childList = wordMapFactory.newSet(); for (SemanticGraphEdge edge : outgoingEdgeIterable(vertex)) { if (relns.contains(edge.getRelation())) { childList.add(edge.getTarget()); } } return childList; } /** * Given a governor, dependent, and the relation between them, returns the * SemanticGraphEdge object of that arc if it exists, otherwise returns null. */ public SemanticGraphEdge getEdge(IndexedWord gov, IndexedWord dep, GrammaticalRelation reln) { Collection edges = getAllEdges(gov, dep); if (edges != null) { for (SemanticGraphEdge edge : edges) { if (!edge.getSource().equals(gov)) continue; if ((edge.getRelation().equals(reln))) { return edge; } } } return null; } public boolean isNegatedVertex(IndexedWord vertex) { if (vertex == IndexedWord.NO_WORD) { return false; } if (!containsVertex(vertex)) { throw new IllegalArgumentException("Vertex " + vertex + " not in graph " + this); } return (hasChildWithReln(vertex, EnglishGrammaticalRelations.NEGATION_MODIFIER) || hasChild(vertex, GrammaticalRelation.DEPENDENT, "nor")); } private boolean isNegatedVerb(IndexedWord vertex) { if (!containsVertex(vertex)) { throw new IllegalArgumentException(); } return (vertex.tag().startsWith("VB") && isNegatedVertex(vertex)); } /** * Check if the vertex is in a "conditional" context. Right now it's only * returning true if vertex has an "if" marker attached to it, i.e. the vertex * is in a clause headed by "if". */ public boolean isInConditionalContext(IndexedWord vertex) { for (IndexedWord child : getChildrenWithReln(vertex, EnglishGrammaticalRelations.MARKER)) { if (child.word().equalsIgnoreCase("if")) { return true; } } return false; } // Obsolete; use functions in rte.feat.NegPolarityFeaturizers instead public boolean attachedNegatedVerb(IndexedWord vertex) { for (IndexedWord parent : getParents(vertex)) { if (isNegatedVerb(parent)) { return true; } } return false; } /** Returns true iff this vertex stands in the "aux" relation to (any of) * its parent(s). */ public boolean isAuxiliaryVerb(IndexedWord vertex) { Set relns = relns(vertex); if (relns.isEmpty()) return false; boolean result = relns.contains(EnglishGrammaticalRelations.AUX_MODIFIER) || relns.contains(EnglishGrammaticalRelations.AUX_PASSIVE_MODIFIER); // log.info("I say " + vertex + (result ? " is" : " is not") + // " an aux"); return result; } public Set getLeafVertices() { Set result = wordMapFactory.newSet(); for (IndexedWord v : vertexSet()) { if (outDegree(v) == 0) { result.add(v); } } return result; } /** * Returns the number of nodes in the graph */ public int size() { return this.vertexSet().size(); } /** * Returns all nodes reachable from root. * * @param root the root node of the subgraph * @return all nodes in subgraph */ public Set getSubgraphVertices(IndexedWord root) { Set result = wordMapFactory.newSet(); result.add(root); List queue = Generics.newLinkedList(); queue.add(root); while (! queue.isEmpty()) { IndexedWord current = queue.remove(0); for (IndexedWord child : this.getChildren(current)) { if ( ! result.contains(child)) { result.add(child); queue.add(child); } } } return result; } /** * @return true if the graph contains no cycles. */ public boolean isDag() { Set unused = wordMapFactory.newSet(); unused.addAll(vertexSet()); while (!unused.isEmpty()) { IndexedWord arbitrary = unused.iterator().next(); boolean result = isDagHelper(arbitrary, unused, wordMapFactory.newSet()); if (result) { return false; } } return true; } /** * * @param root root node of the subgraph. * @return true if the subgraph rooted at root contains no cycles. */ public boolean isDag(IndexedWord root) { Set unused = wordMapFactory.newSet(); unused.addAll(this.getSubgraphVertices(root)); while (!unused.isEmpty()) { IndexedWord arbitrary = unused.iterator().next(); boolean result = isDagHelper(arbitrary, unused, wordMapFactory.newSet()); if (result) { return false; } } return true; } private boolean isDagHelper(IndexedWord current, Set unused, Set trail) { if (trail.contains(current)) { return true; } else if (!unused.contains(current)) { return false; } unused.remove(current); trail.add(current); for (IndexedWord child : getChildren(current)) { boolean result = isDagHelper(child, unused, trail); if (result) { return true; } } trail.remove(current); return false; } // ============================================================================ // String display // ============================================================================ /** * Recursive depth first traversal. Returns a structured representation of the * dependency graph. * * Example: * *

   *  -> need-3 (root)
   *    -> We-0 (nsubj)
   *    -> do-1 (aux)
   *    -> n't-2 (neg)
   *    -> badges-6 (dobj)
   *      -> no-4 (det)
   *      -> stinking-5 (amod)
   * 
* * This is a quite ugly way to print a SemanticGraph. * You might instead want to try {@link #toString(OutputFormat)}. */ @Override public String toString() { return toString(CoreLabel.OutputFormat.VALUE_TAG); } public String toString(CoreLabel.OutputFormat wordFormat) { Collection rootNodes = getRoots(); if (rootNodes.isEmpty()) { // Shouldn't happen, but return something! return toString(OutputFormat.READABLE); } StringBuilder sb = new StringBuilder(); Set used = wordMapFactory.newSet(); for (IndexedWord root : rootNodes) { sb.append("-> ").append(root.toString(wordFormat)).append(" (root)\n"); recToString(root, wordFormat, sb, 1, used); } Set nodes = wordMapFactory.newSet(); nodes.addAll(vertexSet()); nodes.removeAll(used); while (!nodes.isEmpty()) { IndexedWord node = nodes.iterator().next(); sb.append(node.toString(wordFormat)).append("\n"); recToString(node, wordFormat, sb, 1, used); nodes.removeAll(used); } return sb.toString(); } // helper for toString() private void recToString(IndexedWord curr, CoreLabel.OutputFormat wordFormat, StringBuilder sb, int offset, Set used) { used.add(curr); List edges = outgoingEdgeList(curr); Collections.sort(edges); for (SemanticGraphEdge edge : edges) { IndexedWord target = edge.getTarget(); sb.append(space(2 * offset)).append("-> ").append(target.toString(wordFormat)).append(" (").append(edge.getRelation()).append(")\n"); if (!used.contains(target)) { // recurse recToString(target, wordFormat, sb, offset + 1, used); } } } private static String space(int width) { StringBuilder b = new StringBuilder(); for (int i = 0; i < width; i++) { b.append(' '); } return b.toString(); } public String toRecoveredSentenceString() { StringBuilder sb = new StringBuilder(); boolean pastFirst = false; for (IndexedWord word : vertexListSorted()) { if (pastFirst) { sb.append(' '); } pastFirst = true; sb.append(word.word()); } return sb.toString(); } public String toRecoveredSentenceStringWithIndexMarking() { StringBuilder sb = new StringBuilder(); boolean pastFirst = false; int index = 0; for (IndexedWord word : vertexListSorted()) { if (pastFirst) { sb.append(' '); } pastFirst = true; sb.append(word.word()); sb.append("("); sb.append(index++); sb.append(")"); } return sb.toString(); } /** * Similar to toRecoveredString, but will fill in words that were * collapsed into relations (i.e. prep_for --> 'for'). Mostly to deal with * collapsed dependency trees. * * TODO: consider merging with toRecoveredString() NOTE: assumptions currently * are for English. NOTE: currently takes immediate successors to current word * and expands them. This assumption may not be valid for other conditions or * languages? */ public String toEnUncollapsedSentenceString() { List uncompressedList = Generics.newLinkedList(vertexSet()); List> specifics = Generics.newArrayList(); // Collect the specific relations and the governed nodes, and then process // them one by one, // to avoid concurrent modification exceptions. for (IndexedWord word : vertexSet()) { for (SemanticGraphEdge edge : getIncomingEdgesSorted(word)) { GrammaticalRelation relation = edge.getRelation(); // Extract the specific: need to account for possibility that relation // can // be a String or GrammaticalRelation (how did it happen this way?) String specific = relation.getSpecific(); if (specific == null) { if (edge.getRelation().equals(EnglishGrammaticalRelations.AGENT)) { specific = "by"; } } // Insert the specific at the leftmost token that is not governed by // this node. if (specific != null) { Pair specPair = new Pair<>(specific, word); specifics.add(specPair); } } } for (Pair tuple : specifics) { insertSpecificIntoList(tuple.first(), tuple.second(), uncompressedList); } return StringUtils.join(uncompressedList, " "); } /** * Inserts the given specific portion of an uncollapsed relation back into the * targetList * * @param specific Specific relation to put in. * @param relnTgtNode Node governed by the uncollapsed relation * @param tgtList Target List of words */ private void insertSpecificIntoList(String specific, IndexedWord relnTgtNode, List tgtList) { int currIndex = tgtList.indexOf(relnTgtNode); Set descendants = descendants(relnTgtNode); IndexedWord specificNode = new IndexedWord(); specificNode.set(CoreAnnotations.LemmaAnnotation.class, specific); specificNode.set(CoreAnnotations.TextAnnotation.class, specific); specificNode.set(CoreAnnotations.OriginalTextAnnotation.class, specific); while ((currIndex >= 1) && descendants.contains(tgtList.get(currIndex - 1))) { currIndex--; } tgtList.add(currIndex, specificNode); } public enum OutputFormat { LIST, XML, READABLE, RECURSIVE } /** * Returns a String representation of the result of this set of typed * dependencies in a user-specified format. Currently, four formats are * supported ({@link OutputFormat}): *
*
list
*
(Default.) Formats the dependencies as logical relations, as * exemplified by the following: * *
   *  nsubj(died-1, Sam-0)
   *  tmod(died-1, today-2)
   * 
* *
*
readable
*
Formats the dependencies as a table with columns dependent, relation, and governor, as exemplified by the * following: * *
   *  Sam-0               nsubj               died-1
   *  today-2             tmod                died-1
   * 
* *
*
xml
*
Formats the dependencies as XML, as exemplified by the following: * *
   *  <dependencies>
   *    <dep type="nsubj">
   *      <governor idx="1">died</governor>
   *      <dependent idx="0">Sam</dependent>
   *    </dep>
   *    <dep type="tmod">
   *      <governor idx="1">died</governor>
   *      <dependent idx="2">today</dependent>
   *    </dep>
   *  </dependencies>
   * 
*
* *
recursive
*
* The default output for {@link #toString()} *
* *
* * @param format A {@code String} specifying the desired format * @return A {@code String} representation of the typed dependencies in * this {@code GrammaticalStructure} */ public String toString(OutputFormat format) { switch(format) { case XML: return toXMLString(); case READABLE: return toReadableString(); case LIST: return toList(); case RECURSIVE: return toString(); default: throw new IllegalArgumentException("Unsupported format " + format); } } /** * Returns a String representation of this graph as a list of typed * dependencies, as exemplified by the following: * *
   *  nsubj(died-6, Sam-3)
   *  tmod(died-6, today-9)
   * 
* * @return a String representation of this set of typed * dependencies */ public String toList() { StringBuilder buf = new StringBuilder(); for (IndexedWord root : getRoots()) { buf.append("root(ROOT-0, "); buf.append(root.toString(CoreLabel.OutputFormat.VALUE_INDEX)).append(")\n"); } for (SemanticGraphEdge edge : this.edgeListSorted()) { buf.append(edge.getRelation().toString()).append("("); buf.append(edge.getSource().toString(CoreLabel.OutputFormat.VALUE_INDEX)).append(", "); buf.append(edge.getTarget().toString(CoreLabel.OutputFormat.VALUE_INDEX)).append(")\n"); } return buf.toString(); } /** * Similar to toList(), but uses POS tags instead of word and index. */ public String toPOSList() { StringBuilder buf = new StringBuilder(); for (SemanticGraphEdge edge : this.edgeListSorted()) { buf.append(edge.getRelation().toString()).append("("); buf.append(edge.getSource().toString()).append(","); buf.append(edge.getTarget()).append(")\n"); } return buf.toString(); } private String toReadableString() { StringBuilder buf = new StringBuilder(); buf.append(String.format("%-20s%-20s%-20s%n", "dep", "reln", "gov")); buf.append(String.format("%-20s%-20s%-20s%n", "---", "----", "---")); for (IndexedWord root : getRoots()) { buf.append(String.format("%-20s%-20s%-20s%n", root.toString(CoreLabel.OutputFormat.VALUE_TAG_INDEX), "root", "root")); } for (SemanticGraphEdge edge : this.edgeListSorted()) { buf.append(String.format("%-20s%-20s%-20s%n", edge.getTarget().toString(CoreLabel.OutputFormat.VALUE_TAG_INDEX), edge.getRelation().toString(), edge.getSource().toString(CoreLabel.OutputFormat.VALUE_TAG_INDEX))); } return buf.toString(); } private String toXMLString() { StringBuilder buf = new StringBuilder("\n"); for (SemanticGraphEdge edge : this.edgeListSorted()) { String reln = edge.getRelation().toString(); String gov = (edge.getSource()).word(); int govIdx = (edge.getSource()).index(); String dep = (edge.getTarget()).word(); int depIdx = (edge.getTarget()).index(); buf.append(" \n"); buf.append(" ").append(gov).append("\n"); buf.append(" ").append(dep).append("\n"); buf.append(" \n"); } buf.append("\n"); return buf.toString(); } public String toCompactString() { return toCompactString(false); } public String toCompactString(boolean showTags) { StringBuilder sb = new StringBuilder(); Set used = wordMapFactory.newSet(); Collection roots = getRoots(); if (roots.isEmpty()) { if (size() == 0) { return "[EMPTY_SEMANTIC_GRAPH]"; } else { return "[UNROOTED_SEMANTIC_GRAPH]"; } // return toString("readable"); } for (IndexedWord root : roots) { toCompactStringHelper(root, sb, used, showTags); } return sb.toString(); } private void toCompactStringHelper(IndexedWord node, StringBuilder sb, Set used, boolean showTags) { used.add(node); try { boolean isntLeaf = (outDegree(node) > 0); if (isntLeaf) { sb.append("["); } sb.append(node.word()); if (showTags) { sb.append("/"); sb.append(node.tag()); } for (SemanticGraphEdge edge : getOutEdgesSorted(node)) { IndexedWord target = edge.getTarget(); sb.append(" ").append(edge.getRelation()).append(">"); if (!used.contains(target)) { // avoid infinite loop toCompactStringHelper(target, sb, used, showTags); } else { sb.append(target.word()); if (showTags) { sb.append("/"); sb.append(target.tag()); } } } if (isntLeaf) { sb.append("]"); } } catch (IllegalArgumentException e) { log.info("WHOA! SemanticGraph.toCompactStringHelper() ran into problems at node " + node); throw new IllegalArgumentException(e); } } /** * Returns a String representation of this semantic graph, * formatted by the default semantic graph formatter. */ public String toFormattedString() { return formatter.formatSemanticGraph(this); } /** * Returns a String representation of this semantic graph, * formatted by the supplied semantic graph formatter. */ public String toFormattedString(SemanticGraphFormatter formatter) { return formatter.formatSemanticGraph(this); } /** * Pretty-prints this semantic graph to System.out, formatted by * the supplied semantic graph formatter. */ public void prettyPrint(SemanticGraphFormatter formatter) { System.out.println(formatter.formatSemanticGraph(this)); } /** * Pretty-prints this semantic graph to System.out, formatted by * the default semantic graph formatter. */ public void prettyPrint() { System.out.println(formatter.formatSemanticGraph(this)); } /** * Returns an unnamed dot format digraph. * Nodes will be labeled with the word and edges will be labeled * with the dependency. */ public String toDotFormat() { return toDotFormat(""); } /** * Returns a dot format digraph with the given name. * Nodes will be labeled with the word and edges will be labeled * with the dependency. */ public String toDotFormat(String graphname) { return toDotFormat(graphname, CoreLabel.OutputFormat.VALUE_TAG_INDEX); } public String toDotFormat(String graphname, CoreLabel.OutputFormat indexedWordFormat) { StringBuilder output = new StringBuilder(); output.append("digraph " + graphname + " {\n"); for (IndexedWord word : graph.getAllVertices()) { output.append(" N_" + word.index() + " [label=\"" + word.toString(indexedWordFormat) + "\"];\n"); } for (SemanticGraphEdge edge : graph.edgeIterable()) { output.append(" N_" + edge.getSource().index() + " -> N_" + edge.getTarget().index() + " [label=\"" + edge.getRelation() + "\"];\n"); } output.append("}\n"); return output.toString(); } public SemanticGraphEdge addEdge(IndexedWord s, IndexedWord d, GrammaticalRelation reln, double weight, boolean isExtra) { SemanticGraphEdge newEdge = new SemanticGraphEdge(s, d, reln, weight, isExtra); graph.add(s, d, newEdge); return newEdge; } public SemanticGraphEdge addEdge(SemanticGraphEdge edge) { SemanticGraphEdge newEdge = new SemanticGraphEdge(edge.getGovernor(), edge.getDependent(), edge.getRelation(), edge.getWeight(), edge.isExtra()); graph.add(edge.getGovernor(), edge.getDependent(), newEdge); return newEdge; } // ======================================================================= /** * Tries to parse a String representing a SemanticGraph. Right now it's fairly * dumb, could be made more sophisticated. *

* * Example: "[ate subj>Bill dobj>[muffins compound>blueberry]]" *

* * This is the same format generated by toCompactString(). */ public static SemanticGraph valueOf(String s) { return (new SemanticGraphParsingTask(s)).parse(); } public SemanticGraph() { graph = new DirectedMultiGraph<>(outerMapFactory, innerMapFactory); roots = wordMapFactory.newSet(); } /** * Returns a new SemanticGraph which is a copy of the supplied SemanticGraph. * Both the nodes ({@link IndexedWord}s) and the edges (SemanticGraphEdges) * are copied. */ public SemanticGraph(SemanticGraph g) { graph = new DirectedMultiGraph<>(g.graph); roots = wordMapFactory.newSet(g.roots); } /** * Copies a the current graph, but also sets the mapping from the old to new * graph. */ public SemanticGraph(SemanticGraph g, Map prevToNewMap) { graph = new DirectedMultiGraph<>(outerMapFactory, innerMapFactory); if (prevToNewMap == null) { prevToNewMap = wordMapFactory.newMap(); } Set vertexes = g.vertexSet(); for (IndexedWord vertex : vertexes) { IndexedWord newVertex = new IndexedWord(vertex); newVertex.setCopyCount(vertex.copyCount()); addVertex(newVertex); prevToNewMap.put(vertex, newVertex); } roots = wordMapFactory.newSet(); for (IndexedWord oldRoot : g.getRoots()) { roots.add(prevToNewMap.get(oldRoot)); } for (SemanticGraphEdge edge : g.edgeIterable()) { IndexedWord newGov = prevToNewMap.get(edge.getGovernor()); IndexedWord newDep = prevToNewMap.get(edge.getDependent()); addEdge(newGov, newDep, edge.getRelation(), edge.getWeight(), edge.isExtra()); } } /** * This is the constructor used by the parser. */ public SemanticGraph(Collection dependencies) { graph = new DirectedMultiGraph<>(outerMapFactory, innerMapFactory); roots = wordMapFactory.newSet(); for (TypedDependency d : dependencies) { IndexedWord gov = d.gov(); IndexedWord dep = d.dep(); GrammaticalRelation reln = d.reln(); if (reln != ROOT) { // the root relation only points to the root: the governor is a fake node that we don't want to add in the graph // It is unnecessary to call addVertex, since addEdge will // implicitly add vertices if needed //addVertex(gov); //addVertex(dep); addEdge(gov, dep, reln, Double.NEGATIVE_INFINITY, d.extra()); } else { //it's the root and we add it addVertex(dep); roots.add(dep); } } // there used to be an if clause that filtered out the case of empty // dependencies. However, I could not understand (or replicate) the error // it alluded to, and it led to empty dependency graphs for very short // fragments, // which meant they were ignored by the RTE system. Changed. (pado) // See also SemanticGraphFactory.makeGraphFromTree(). } /** * Returns the nodes in the shortest undirected path between two edges in the * graph. if source == target, returns a singleton list * * @param source * node * @param target * node * @return nodes along shortest undirected path from source to target, in * order */ public List getShortestUndirectedPathNodes(IndexedWord source, IndexedWord target) { return graph.getShortestPath(source, target, false); } public List getShortestUndirectedPathEdges(IndexedWord source, IndexedWord target) { return graph.getShortestPathEdges(source, target, false); } /** * Returns the shortest directed path between two edges in the graph. * * @param source * node * @param target * node * @return shortest directed path from source to target */ public List getShortestDirectedPathNodes(IndexedWord source, IndexedWord target) { return graph.getShortestPath(source, target, true); } public List getShortestDirectedPathEdges(IndexedWord source, IndexedWord target) { return graph.getShortestPathEdges(source, target, true); } public SemanticGraph makeSoftCopy() { SemanticGraph newSg = new SemanticGraph(); if ( ! this.roots.isEmpty()) newSg.setRoot(this.getFirstRoot()); for (SemanticGraphEdge edge : this.edgeIterable()) { newSg.addEdge(edge.getSource(), edge.getTarget(), edge.getRelation(), edge.getWeight(), edge.isExtra()); } return newSg; } // ============================================================================ private static final Pattern WORD_AND_INDEX_PATTERN = Pattern.compile("([^-]+)-([0-9]+)"); /** * This nested class is a helper for valueOf(). It represents the task of * parsing a specific String representing a SemanticGraph. */ private static class SemanticGraphParsingTask extends StringParsingTask { private SemanticGraph sg; private Set indexesUsed = Generics.newHashSet(); public SemanticGraphParsingTask(String s) { super(s); } @Override public SemanticGraph parse() { sg = new SemanticGraph(); try { readWhiteSpace(); if (!isLeftBracket(peek())) return null; readDep(null, null); return sg; } catch (ParserException e) { log.info("SemanticGraphParser warning: " + e.getMessage()); return null; } } private void readDep(IndexedWord gov, String reln) { readWhiteSpace(); if (!isLeftBracket(peek())) { // it's a leaf String label = readName(); IndexedWord dep = makeVertex(label); sg.addVertex(dep); if (gov == null) sg.roots.add(dep); sg.addEdge(gov, dep, GrammaticalRelation.valueOf(reln), Double.NEGATIVE_INFINITY, false); } else { readLeftBracket(); String label = readName(); IndexedWord dep = makeVertex(label); sg.addVertex(dep); if (gov == null) sg.roots.add(dep); if (gov != null && reln != null) { sg.addEdge(gov, dep, GrammaticalRelation.valueOf(reln), Double.NEGATIVE_INFINITY, false); } readWhiteSpace(); while (!isRightBracket(peek()) && !isEOF) { reln = readName(); readRelnSeparator(); readDep(dep, reln); readWhiteSpace(); } readRightBracket(); } } private IndexedWord makeVertex(String word) { Integer index; // initialized below Pair wordAndIndex = readWordAndIndex(word); if (wordAndIndex != null) { word = wordAndIndex.first(); index = wordAndIndex.second(); } else { index = getNextFreeIndex(); } indexesUsed.add(index); // Note that, despite the use of indexesUsed and getNextFreeIndex(), // nothing is actually enforcing that no indexes are used twice. This // could occur if some words in the string representation being parsed // come with index markers and some do not. IndexedWord ifl = new IndexedWord(null, 0, index); // log.info("SemanticGraphParsingTask>>> word = " + word); // log.info("SemanticGraphParsingTask>>> index = " + index); // log.info("SemanticGraphParsingTask>>> indexesUsed = " + // indexesUsed); String[] wordAndTag = word.split("/"); ifl.set(CoreAnnotations.TextAnnotation.class, wordAndTag[0]); ifl.set(CoreAnnotations.ValueAnnotation.class, wordAndTag[0]); if (wordAndTag.length > 1) ifl.set(CoreAnnotations.PartOfSpeechAnnotation.class, wordAndTag[1]); return ifl; } private static Pair readWordAndIndex(String word) { Matcher matcher = WORD_AND_INDEX_PATTERN.matcher(word); if (!matcher.matches()) { return null; } else { word = matcher.group(1); Integer index = Integer.valueOf(matcher.group(2)); return new Pair<>(word, index); } } private Integer getNextFreeIndex() { int i = 0; while (indexesUsed.contains(i)) i++; return i; } private void readLeftBracket() { // System.out.println("Read left."); readWhiteSpace(); char ch = read(); if (!isLeftBracket(ch)) throw new ParserException("Expected left paren!"); } private void readRightBracket() { // System.out.println("Read right."); readWhiteSpace(); char ch = read(); if (!isRightBracket(ch)) throw new ParserException("Expected right paren!"); } private void readRelnSeparator() { readWhiteSpace(); if (isRelnSeparator(peek())) read(); } private static boolean isLeftBracket(char ch) { return ch == '['; } private static boolean isRightBracket(char ch) { return ch == ']'; } private static boolean isRelnSeparator(char ch) { return ch == '>'; } @Override protected boolean isPunct(char ch) { return isLeftBracket(ch) || isRightBracket(ch) || isRelnSeparator(ch); } } // end SemanticGraphParsingTask // ======================================================================= @Override public boolean equals(Object o) { if (o == this) { return true; } if (!(o instanceof SemanticGraph)) { return false; } SemanticGraph g = (SemanticGraph) o; return graph.equals(g.graph) && roots.equals(g.roots); } @Override public int hashCode() { return graph.hashCode(); } /** * Given a semantic graph, and a target relation, returns a list of all * relations (edges) matching. * */ public List findAllRelns(GrammaticalRelation tgtRelation) { ArrayList relns = new ArrayList<>(); for (SemanticGraphEdge edge : edgeIterable()) { GrammaticalRelation edgeRelation = edge.getRelation(); if ((edgeRelation != null) && (edgeRelation.equals(tgtRelation))) { relns.add(edge); } } return relns; } /** * Delete all duplicate edges. * */ public void deleteDuplicateEdges() { graph.deleteDuplicateEdges(); } /** Returns a list of TypedDependency in the graph. * This method goes through all SemanticGraphEdge and converts them * to TypedDependency. * * @return A List of TypedDependency in the graph */ public Collection typedDependencies() { Collection dependencies = new ArrayList<>(); IndexedWord root = null; for (IndexedWord node : roots) { if (root == null) { root = new IndexedWord(node.docID(), node.sentIndex(), 0); root.setValue("ROOT"); } TypedDependency dependency = new TypedDependency(ROOT, root, node); dependencies.add(dependency); } for (SemanticGraphEdge e : this.edgeIterable()){ TypedDependency dependency = new TypedDependency(e.getRelation(), e.getGovernor(), e.getDependent()); if (e.isExtra()) { dependency.setExtra(); } dependencies.add(dependency); } return dependencies; } /** * Returns the span of the subtree yield of this node. That is, the span of all the nodes under it. * In the case of projective graphs, the words in this span are also the yield of the constituent rooted * at this node. * * @param word The word acting as the root of the constituent we are finding. * @return A span, represented as a pair of integers. The span is zero indexed. The begin is inclusive and the end is exclusive. */ public Pair yieldSpan(IndexedWord word) { int min = Integer.MAX_VALUE; int max = Integer.MIN_VALUE; Stack fringe = new Stack<>(); fringe.push(word); while (!fringe.isEmpty()) { IndexedWord parent = fringe.pop(); min = Math.min(min, parent.index() - 1); max = Math.max(max, parent.index()); for (SemanticGraphEdge edge : outgoingEdgeIterable(parent)) { if (!edge.isExtra()) { fringe.push(edge.getDependent()); } } } return Pair.makePair(min, max); } /** * Store a comment line with this semantic graph. * * @param comment */ public void addComment(String comment) { this.comments.add(comment); } /** * Return the list of comments stored with this graph. * * @return A list of comments. */ public List getComments() { return this.comments; } private static final long serialVersionUID = 1L; }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy