Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
edu.stanford.nlp.semgraph.semgrex.NodePattern Maven / Gradle / Ivy
package edu.stanford.nlp.semgraph.semgrex;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Pair;
public class NodePattern extends SemgrexPattern {
private static final long serialVersionUID = -5981133879119233896L;
private GraphRelation reln;
private boolean negDesc;
/**
* A hash map from a key to a pair (case_sensitive_pattern, case_insensitive_pattern)
* If the type of the entry is a String, then string comparison is safe.
* If the type is a Boolean, it will always either match or not match corresponding to the Boolean
* value.
* Otherwise, the type will be a Pattern, and you must use Pattern.matches().
*/
private Map> attributes;
private boolean isRoot;
private boolean isLink;
private boolean isEmpty;
private String name;
private String descString;
SemgrexPattern child;
// specifies the groups in a regex that are captured as
// matcher-global string variables
private List> variableGroups;
public NodePattern(GraphRelation r, boolean negDesc,
Map attrs,
boolean root, boolean empty, String name) {
this(r, negDesc, attrs, root, empty, name,
new ArrayList<>(0));
}
// TODO: there is no capacity for named variable groups in the parser right now
public NodePattern(GraphRelation r, boolean negDesc,
Map attrs,
boolean root, boolean empty, String name,
List> variableGroups) {
this.reln = r;
this.negDesc = negDesc;
attributes = Generics.newHashMap();
descString = "{";
for (Map.Entry entry : attrs.entrySet()) {
if (!descString.equals("{"))
descString += ";";
String key = entry.getKey();
String value = entry.getValue();
// Add the attributes for this key
if (value.equals("__")) {
attributes.put(key, Pair.makePair(true, true));
} else if (value.matches("/.*/")) {
boolean isRegexp = false;
for (int i = 1; i < value.length() - 1; ++i) {
char chr = value.charAt(i);
if ( !( (chr >= 'A' && chr <= 'Z') || (chr >= 'a' && chr <= 'z') || (chr >= '0' && chr <= '9') ) ) {
isRegexp = true;
break;
}
}
String patternContent = value.substring(1, value.length() - 1);
if (isRegexp) {
attributes.put(key, Pair.makePair(
Pattern.compile(patternContent),
Pattern.compile(patternContent, Pattern.CASE_INSENSITIVE))
);
} else {
attributes.put(key, Pair.makePair(patternContent, patternContent));
}
} else { // raw description
attributes.put(key, Pair.makePair(value, value));
}
// if (value.equals("__")) {
// attributes.put(key, Pair.makePair(Pattern.compile(".*"), Pattern.compile(".*", Pattern.CASE_INSENSITIVE)));
// } else if (value.matches("/.*/")) {
// attributes.put(key, Pair.makePair(
// Pattern.compile(value.substring(1, value.length() - 1)),
// Pattern.compile(value.substring(1, value.length() - 1), Pattern.CASE_INSENSITIVE))
// );
// } else { // raw description
// attributes.put(key, Pair.makePair(
// Pattern.compile("^(" + value + ")$"),
// Pattern.compile("^(" + value + ")$", Pattern.CASE_INSENSITIVE))
// );
// }
descString += (key + ':' + value);
}
if (root)
descString += "$";
else if (empty)
descString += "#";
descString += '}';
this.name = name;
this.child = null;
this.isRoot = root;
this.isEmpty = empty;
this.variableGroups = variableGroups;
}
@SuppressWarnings("unchecked")
public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean ignoreCase) {
// System.out.println(node.word());
if (isRoot)
return (negDesc ? !sg.getRoots().contains(node) : sg.getRoots().contains(node));
// System.out.println("not root");
if (isEmpty)
return (negDesc ? !node.equals(IndexedWord.NO_WORD) : node.equals(IndexedWord.NO_WORD));
// System.err.println("Attributes are: " + attributes);
for (Map.Entry> attr : attributes.entrySet()) {
String key = attr.getKey();
// System.out.println(key);
String nodeValue;
// if (key.equals("idx"))
// nodeValue = Integer.toString(node.index());
// else {
Class c = Env.lookupAnnotationKey(env, key);
//find class for the key
Object value = node.get(c);
if (value == null)
nodeValue = null;
else
nodeValue = value.toString();
// }
// System.out.println(nodeValue);
if (nodeValue == null)
return negDesc;
// Get the node pattern
Object toMatch = ignoreCase ? attr.getValue().second : attr.getValue().first;
boolean matches;
if (toMatch instanceof Boolean) {
matches = ((Boolean) toMatch);
} else if (toMatch instanceof String) {
if (ignoreCase) {
matches = nodeValue.equalsIgnoreCase(toMatch.toString());
} else {
matches = nodeValue.equals(toMatch.toString());
}
} else if (toMatch instanceof Pattern) {
matches = ((Pattern) toMatch).matcher(nodeValue).matches();
} else {
throw new IllegalStateException("Unknown matcher type: " + toMatch + " (of class + " + toMatch.getClass() + ")");
}
if (!matches) {
// System.out.println("doesn't match");
// System.out.println("");
return negDesc;
}
}
// System.out.println("matches");
// System.out.println("");
return !negDesc;
}
public void makeLink() {
isLink = true;
}
public boolean isRoot() {
return isRoot;
}
public boolean isNull() {
return isEmpty;
}
@Override
public String localString() {
return toString(true, false);
}
@Override
public String toString() {
return toString(true, true);
}
@Override
public String toString(boolean hasPrecedence) {
return toString(hasPrecedence, true);
}
public String toString(boolean hasPrecedence, boolean addChild) {
StringBuilder sb = new StringBuilder();
if (isNegated()) {
sb.append('!');
}
if (isOptional()) {
sb.append('?');
}
sb.append(' ');
if (reln != null) {
sb.append(reln.toString());
sb.append(' ');
}
if (!hasPrecedence && addChild && child != null) {
sb.append('(');
}
if (negDesc) {
sb.append('!');
}
sb.append(descString);
if (name != null) {
sb.append('=').append(name);
}
if (addChild && child != null) {
sb.append(' ');
sb.append(child.toString(false));
if (!hasPrecedence) {
sb.append(')');
}
}
return sb.toString();
}
@Override
public void setChild(SemgrexPattern n) {
child = n;
}
@Override
public List getChildren() {
if (child == null) {
return Collections.emptyList();
} else {
return Collections.singletonList(child);
}
}
public String getName() {
return name;
}
@Override
public SemgrexMatcher matcher(SemanticGraph sg, IndexedWord node,
Map namesToNodes,
Map namesToRelations,
VariableStrings variableStrings,
boolean ignoreCase) {
return new NodeMatcher(this, sg, null, null, true, node, namesToNodes, namesToRelations, variableStrings,
ignoreCase);
}
@Override
public SemgrexMatcher matcher(SemanticGraph sg,
Alignment alignment, SemanticGraph sg_align,
boolean hyp, IndexedWord node,
Map namesToNodes,
Map namesToRelations,
VariableStrings variableStrings,
boolean ignoreCase) {
// System.err.println("making matcher: " +
// ((reln.equals(GraphRelation.ALIGNED_ROOT)) ? false : hyp));
return new NodeMatcher(this, sg, alignment, sg_align,
(reln.equals(GraphRelation.ALIGNED_ROOT)) ? false : hyp,
(reln.equals(GraphRelation.ALIGNED_ROOT)) ? sg_align.getFirstRoot() : node,
namesToNodes, namesToRelations,
variableStrings, ignoreCase);
}
private static class NodeMatcher extends SemgrexMatcher {
/**
* when finished = true, it means I have exhausted my potential
* node match candidates.
*/
private boolean finished = false;
private Iterator nodeMatchCandidateIterator = null;
private final NodePattern myNode;
/**
* a NodeMatcher only has a single child; if it is the left side
* of multiple relations, a CoordinationMatcher is used.
*/
private SemgrexMatcher childMatcher;
private boolean matchedOnce = false;
private boolean committedVariables = false;
private String nextMatchReln = null;
private IndexedWord nextMatch = null;
private boolean namedFirst = false;
private boolean relnNamedFirst = false;
private boolean ignoreCase = false;
// universal: childMatcher is null if and only if
// myNode.child == null OR resetChild has never been called
public NodeMatcher(NodePattern n, SemanticGraph sg, Alignment alignment, SemanticGraph sg_align, boolean hyp,
IndexedWord node, Map namesToNodes, Map namesToRelations,
VariableStrings variableStrings, boolean ignoreCase) {
super(sg, alignment, sg_align, hyp, node, namesToNodes, namesToRelations, variableStrings);
myNode = n;
this.ignoreCase = ignoreCase;
resetChildIter();
}
@Override
void resetChildIter() {
nodeMatchCandidateIterator = myNode.reln.searchNodeIterator(node, hyp ? sg : sg_aligned);
if (myNode.reln instanceof GraphRelation.ALIGNMENT)
((GraphRelation.ALIGNMENT) myNode.reln).setAlignment(alignment, hyp,
(GraphRelation.SearchNodeIterator) nodeMatchCandidateIterator);
finished = false;
if (nextMatch != null) {
decommitVariableGroups();
decommitNamedNodes();
decommitNamedRelations();
}
nextMatch = null;
}
private void resetChild() {
if (childMatcher == null) {
if (myNode.child == null) {
matchedOnce = false;
} else {
childMatcher = myNode.child.matcher(sg, alignment, sg_aligned,
(myNode.reln instanceof GraphRelation.ALIGNMENT) ? !hyp : hyp, nextMatch, namesToNodes, namesToRelations,
variableStrings, ignoreCase);
}
} else {
childMatcher.resetChildIter(nextMatch);
}
}
/*
* goes to the next node in the tree that is a successful match to my
* description pattern
*/
// when finished = false; break; is called, it means I successfully matched.
@SuppressWarnings("null")
private void goToNextNodeMatch() {
decommitVariableGroups(); // make sure variable groups are free.
decommitNamedNodes();
decommitNamedRelations();
finished = true;
Matcher m = null;
while (nodeMatchCandidateIterator.hasNext()) {
if (myNode.reln.getName() != null) {
String foundReln = namesToRelations.get(myNode.reln.getName());
nextMatchReln = ((GraphRelation.SearchNodeIterator) nodeMatchCandidateIterator).getReln();
if ((foundReln != null) && (!nextMatchReln.equals(foundReln))) {
nextMatch = nodeMatchCandidateIterator.next();
continue;
}
}
nextMatch = nodeMatchCandidateIterator.next();
// System.err.println("going to next match: " + nextMatch.word() + " " +
// myNode.descString + " " + myNode.isLink);
if (myNode.descString.equals("{}") && myNode.isLink) {
IndexedWord otherNode = namesToNodes.get(myNode.name);
if (otherNode != null) {
if (otherNode.equals(nextMatch)) {
if ( ! myNode.negDesc) {
finished = false;
break;
}
} else {
if (myNode.negDesc) {
finished = false;
break;
}
}
} else {
boolean found = myNode.nodeAttrMatch(nextMatch,
hyp ? sg : sg_aligned,
ignoreCase);
if (found) {
for (Pair varGroup : myNode.variableGroups) {
// if variables have been captured from a regex, they
// must match any previous matchings
String thisVariable = varGroup.second();
String thisVarString = variableStrings.getString(thisVariable);
if (thisVarString != null &&
!thisVarString.equals(m.group(varGroup.first()))) {
// failed to match a variable
found = false;
break;
}
}
// nodeAttrMatch already checks negDesc, so no need to
// check for that here
finished = false;
break;
}
}
} else { // try to match the description pattern.
boolean found = myNode.nodeAttrMatch(nextMatch,
hyp ? sg : sg_aligned,
ignoreCase);
if (found) {
for (Pair varGroup : myNode.variableGroups) {
// if variables have been captured from a regex, they
// must match any previous matchings
String thisVariable = varGroup.second();
String thisVarString = variableStrings.getString(thisVariable);
if (thisVarString != null &&
!thisVarString.equals(m.group(varGroup.first()))) {
// failed to match a variable
found = false;
break;
}
}
// nodeAttrMatch already checks negDesc, so no need to
// check for that here
finished = false;
break;
}
}
} // end while
if ( ! finished) { // I successfully matched.
resetChild();
if (myNode.name != null) {
// note: have to fill in the map as we go for backreferencing
if (!namesToNodes.containsKey(myNode.name)) {
// System.err.println("making namedFirst");
namedFirst = true;
}
// System.err.println("adding named node: " + myNode.name + "=" +
// nextMatch.word());
namesToNodes.put(myNode.name, nextMatch);
}
if (myNode.reln.getName() != null) {
if (!namesToRelations.containsKey(myNode.reln.getName()))
relnNamedFirst = true;
namesToRelations.put(myNode.reln.getName(), nextMatchReln);
}
commitVariableGroups(m); // commit my variable groups.
}
// finished is false exiting this if and only if nextChild exists
// and has a label or backreference that matches
// (also it will just have been reset)
}
private void commitVariableGroups(Matcher m) {
committedVariables = true; // commit all my variable groups.
for (Pair varGroup : myNode.variableGroups) {
String thisVarString = m.group(varGroup.first());
variableStrings.setVar(varGroup.second(), thisVarString);
}
}
private void decommitVariableGroups() {
if (committedVariables) {
for (Pair varGroup : myNode.variableGroups) {
variableStrings.unsetVar(varGroup.second());
}
}
committedVariables = false;
}
private void decommitNamedNodes() {
if (namesToNodes.containsKey(myNode.name) && namedFirst) {
namedFirst = false;
namesToNodes.remove(myNode.name);
}
}
private void decommitNamedRelations() {
if (namesToRelations.containsKey(myNode.reln.name) && relnNamedFirst) {
relnNamedFirst = false;
namesToRelations.remove(myNode.reln.name);
}
}
/*
* tries to match the unique child of the NodePattern node to a node.
* Returns "true" if succeeds.
*/
private boolean matchChild() {
// entering here (given that it's called only once in matches())
// we know finished is false, and either nextChild == null
// (meaning goToNextChild has not been called) or nextChild exists
// and has a label or backreference that matches
if (nextMatch == null) { // I haven't been initialized yet, so my child
// certainly can't be matched yet.
return false;
}
if (childMatcher == null) {
if (!matchedOnce) {
matchedOnce = true;
return true;
}
return false;
}
// childMatcher.namesToNodes.putAll(this.namesToNodes);
// childMatcher.namesToRelations.putAll(this.namesToRelations);
boolean match = childMatcher.matches();
if (match) {
// namesToNodes.putAll(childMatcher.namesToNodes);
// namesToRelations.putAll(childMatcher.namesToRelations);
// System.out.println(node.word() + " " +
// namesToNodes.get("partnerTwo"));
} else {
if (nextMatch != null) {
decommitVariableGroups();
decommitNamedNodes();
decommitNamedRelations();
}
}
return match;
}
// find the next local match
@Override
public boolean matches() {
// System.out.println(toString());
// System.out.println(namesToNodes);
// System.err.println("matches: " + myNode.reln);
// this is necessary so that a negated/optional node matches only once
if (finished) {
// System.out.println(false);
return false;
}
while (!finished) {
if (matchChild()) {
if (myNode.isNegated()) {
// negated node only has to fail once
finished = true;
return false; // cannot be optional and negated
} else {
if (myNode.isOptional()) {
finished = true;
}
// System.out.println(true);
return true;
}
} else {
goToNextNodeMatch();
}
}
if (myNode.isNegated()) { // couldn't match my relation/pattern, so
// succeeded!
return true;
} else { // couldn't match my relation/pattern, so failed!
nextMatch = null;
decommitVariableGroups();
decommitNamedNodes();
decommitNamedRelations();
// didn't match, but return true anyway if optional
return myNode.isOptional();
}
}
@Override
public IndexedWord getMatch() {
return nextMatch;
}
@Override
public String toString() {
return "node matcher for: " + myNode.localString();
}
} // end static class NodeMatcher
}