All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.semgraph.semgrex.NodePattern Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.semgraph.semgrex; 
import edu.stanford.nlp.util.logging.Redwood;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Pair;

public class NodePattern extends SemgrexPattern  {

  /** A logger for this class */
  private static Redwood.RedwoodChannels log = Redwood.channels(NodePattern.class);

  private static final long serialVersionUID = -5981133879119233896L;
  private GraphRelation reln;
  private boolean negDesc;
  /**
   *  A hash map from a key to a pair (case_sensitive_pattern, case_insensitive_pattern)
   *  If the type of the entry is a String, then string comparison is safe.
   *  If the type is a Boolean, it will always either match or not match corresponding to the Boolean
   *  value.
   *  Otherwise, the type will be a Pattern, and you must use Pattern.matches().
   */
  private Map> attributes;
  private boolean isRoot;
  private boolean isLink;
  private boolean isEmpty;
  private String name;
  private String descString;
  SemgrexPattern child;
  // specifies the groups in a regex that are captured as
  // matcher-global string variables
  private List> variableGroups;

  public NodePattern(GraphRelation r, boolean negDesc,
                     Map attrs,
                     boolean root, boolean empty, String name) {
    this(r, negDesc, attrs, root, empty, name,
            new ArrayList<>(0));
  }

  // TODO: there is no capacity for named variable groups in the parser right now
  public NodePattern(GraphRelation r, boolean negDesc,
                     Map attrs,
                     boolean root, boolean empty, String name,
                     List> variableGroups) {
    this.reln = r;
    this.negDesc = negDesc;
    attributes = Generics.newHashMap();
    descString = "{";
    for (Map.Entry entry : attrs.entrySet()) {
      if (!descString.equals("{"))
        descString += ";";
      String key = entry.getKey();
      String value = entry.getValue();

      // Add the attributes for this key
      if (value.equals("__")) {
        attributes.put(key, Pair.makePair(true, true));
      } else if (value.matches("/.*/")) {
        boolean isRegexp = false;
        for (int i = 1; i < value.length() - 1; ++i) {
          char chr = value.charAt(i);
          if ( !( (chr >= 'A' && chr <= 'Z') || (chr >= 'a' && chr <= 'z') || (chr >= '0' && chr <= '9') ) ) {
            isRegexp = true;
            break;
          }
        }
        String patternContent = value.substring(1, value.length() - 1);
        if (isRegexp) {
          attributes.put(key, Pair.makePair(
              Pattern.compile(patternContent),
              Pattern.compile(patternContent, Pattern.CASE_INSENSITIVE))
          );
        } else {
          attributes.put(key, Pair.makePair(patternContent, patternContent));
        }
      } else { // raw description
        attributes.put(key, Pair.makePair(value, value));
      }



//      if (value.equals("__")) {
//        attributes.put(key, Pair.makePair(Pattern.compile(".*"), Pattern.compile(".*", Pattern.CASE_INSENSITIVE)));
//      } else if (value.matches("/.*/")) {
//        attributes.put(key, Pair.makePair(
//            Pattern.compile(value.substring(1, value.length() - 1)),
//            Pattern.compile(value.substring(1, value.length() - 1), Pattern.CASE_INSENSITIVE))
//        );
//      } else { // raw description
//        attributes.put(key, Pair.makePair(
//            Pattern.compile("^(" + value + ")$"),
//            Pattern.compile("^(" + value + ")$", Pattern.CASE_INSENSITIVE))
//        );
//      }
      descString += (key + ':' + value);
    }
    if (root)
      descString += "$";
    else if (empty)
      descString += "#";
    descString += '}';

    this.name = name;
    this.child = null;
    this.isRoot = root;
    this.isEmpty = empty;

    this.variableGroups = variableGroups;
  }

  @SuppressWarnings("unchecked")
  public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean ignoreCase) {
    // System.out.println(node.word());
    if (isRoot)
      return (negDesc ? !sg.getRoots().contains(node) : sg.getRoots().contains(node));
    // System.out.println("not root");
    if (isEmpty)
      return (negDesc ? !node.equals(IndexedWord.NO_WORD) : node.equals(IndexedWord.NO_WORD));

    // log.info("Attributes are: " + attributes);
    for (Map.Entry> attr : attributes.entrySet()) {
      String key = attr.getKey();
      // System.out.println(key);
      String nodeValue;
      // if (key.equals("idx"))
      // nodeValue = Integer.toString(node.index());
      // else {

      Class c = Env.lookupAnnotationKey(env, key);
      //find class for the key

      Object value = node.get(c);
      if (value == null)
        nodeValue = null;
      else
        nodeValue = value.toString();
      // }
      // System.out.println(nodeValue);
      if (nodeValue == null)
        return negDesc;

      // Get the node pattern
      Object toMatch = ignoreCase ? attr.getValue().second : attr.getValue().first;
      boolean matches;
      if (toMatch instanceof Boolean) {
        matches = ((Boolean) toMatch);
      } else if (toMatch instanceof String) {
        if (ignoreCase) {
          matches = nodeValue.equalsIgnoreCase(toMatch.toString());
        } else {
          matches = nodeValue.equals(toMatch.toString());
        }
      } else if (toMatch instanceof Pattern) {
        matches = ((Pattern) toMatch).matcher(nodeValue).matches();
      } else {
        throw new IllegalStateException("Unknown matcher type: " + toMatch + " (of class + " + toMatch.getClass() + ")");
      }

      if (!matches) {
        // System.out.println("doesn't match");
        // System.out.println("");
        return negDesc;
      }
    }
    // System.out.println("matches");
    // System.out.println("");
    return !negDesc;
  }

  public void makeLink() {
    isLink = true;
  }

  public boolean isRoot() {
    return isRoot;
  }

  public boolean isNull() {
    return isEmpty;
  }

  @Override
  public String localString() {
    return toString(true, false);
  }

  @Override
  public String toString() {
    return toString(true, true);
  }

  @Override
  public String toString(boolean hasPrecedence) {
    return toString(hasPrecedence, true);
  }

  public String toString(boolean hasPrecedence, boolean addChild) {
    StringBuilder sb = new StringBuilder();
    if (isNegated()) {
      sb.append('!');
    }
    if (isOptional()) {
      sb.append('?');
    }
    sb.append(' ');
    if (reln != null) {
      sb.append(reln.toString());
      sb.append(' ');
    }
    if (!hasPrecedence && addChild && child != null) {
      sb.append('(');
    }
    if (negDesc) {
      sb.append('!');
    }
    sb.append(descString);
    if (name != null) {
      sb.append('=').append(name);
    }
    if (addChild && child != null) {
      sb.append(' ');
      sb.append(child.toString(false));
      if (!hasPrecedence) {
        sb.append(')');
      }
    }
    return sb.toString();
  }

  @Override
  public void setChild(SemgrexPattern n) {
    child = n;
  }

  @Override
  public List getChildren() {
    if (child == null) {
      return Collections.emptyList();
    } else {
      return Collections.singletonList(child);
    }
  }

  public String getName() {
    return name;
  }

  @Override
  public SemgrexMatcher matcher(SemanticGraph sg, IndexedWord node,
                                Map namesToNodes,
                                Map namesToRelations,
                                VariableStrings variableStrings,
                                boolean ignoreCase) {
    return new NodeMatcher(this, sg, null, null, true, node, namesToNodes, namesToRelations, variableStrings,
        ignoreCase);
  }

  @Override
  public SemgrexMatcher matcher(SemanticGraph sg,
                                Alignment alignment, SemanticGraph sg_align,
                                boolean hyp, IndexedWord node,
                                Map namesToNodes,
                                Map namesToRelations,
                                VariableStrings variableStrings,
                                boolean ignoreCase) {
    // log.info("making matcher: " +
    // ((reln.equals(GraphRelation.ALIGNED_ROOT)) ? false : hyp));
    return new NodeMatcher(this, sg, alignment, sg_align,
                           (reln.equals(GraphRelation.ALIGNED_ROOT)) ? false : hyp,
                           (reln.equals(GraphRelation.ALIGNED_ROOT)) ? sg_align.getFirstRoot() : node,
                           namesToNodes, namesToRelations,
                           variableStrings, ignoreCase);
  }

  private static class NodeMatcher extends SemgrexMatcher {

    /**
     * when finished = true, it means I have exhausted my potential
     * node match candidates.
     */
    private boolean finished = false;
    private Iterator nodeMatchCandidateIterator = null;
    private final NodePattern myNode;
    /**
     * a NodeMatcher only has a single child; if it is the left side
     * of multiple relations, a CoordinationMatcher is used.
     */
    private SemgrexMatcher childMatcher;
    private boolean matchedOnce = false;
    private boolean committedVariables = false;

    private String nextMatchReln = null;
    private IndexedWord nextMatch = null;

    private boolean namedFirst = false;
    private boolean relnNamedFirst = false;

    private boolean ignoreCase = false;

    // universal: childMatcher is null if and only if
    // myNode.child == null OR resetChild has never been called

    public NodeMatcher(NodePattern n, SemanticGraph sg, Alignment alignment, SemanticGraph sg_align, boolean hyp,
        IndexedWord node, Map namesToNodes, Map namesToRelations,
        VariableStrings variableStrings, boolean ignoreCase) {
      super(sg, alignment, sg_align, hyp, node, namesToNodes, namesToRelations, variableStrings);
      myNode = n;
      this.ignoreCase = ignoreCase;
      resetChildIter();
    }

    @Override
    void resetChildIter() {
      nodeMatchCandidateIterator = myNode.reln.searchNodeIterator(node, hyp ? sg : sg_aligned);
      if (myNode.reln instanceof GraphRelation.ALIGNMENT)
        ((GraphRelation.ALIGNMENT) myNode.reln).setAlignment(alignment, hyp,
            (GraphRelation.SearchNodeIterator) nodeMatchCandidateIterator);
      finished = false;
      if (nextMatch != null) {
        decommitVariableGroups();
        decommitNamedNodes();
        decommitNamedRelations();
      }
      nextMatch = null;

    }

    private void resetChild() {
      if (childMatcher == null) {
        if (myNode.child == null) {
          matchedOnce = false;
        } else {
          childMatcher = myNode.child.matcher(sg, alignment, sg_aligned,
              (myNode.reln instanceof GraphRelation.ALIGNMENT) ? !hyp : hyp, nextMatch, namesToNodes, namesToRelations,
              variableStrings, ignoreCase);
        }
      } else {
        childMatcher.resetChildIter(nextMatch);
      }
    }

    /*
     * goes to the next node in the tree that is a successful match to my
     * description pattern
     */
    // when finished = false; break; is called, it means I successfully matched.
    @SuppressWarnings("null")
    private void goToNextNodeMatch() {
      decommitVariableGroups(); // make sure variable groups are free.
      decommitNamedNodes();
      decommitNamedRelations();
      finished = true;
      Matcher m = null;
      while (nodeMatchCandidateIterator.hasNext()) {
        if (myNode.reln.getName() != null) {
          String foundReln = namesToRelations.get(myNode.reln.getName());
          nextMatchReln = ((GraphRelation.SearchNodeIterator) nodeMatchCandidateIterator).getReln();
          if ((foundReln != null) && (!nextMatchReln.equals(foundReln))) {
            nextMatch = nodeMatchCandidateIterator.next();
            continue;
          }
        }

        nextMatch = nodeMatchCandidateIterator.next();
        // log.info("going to next match: " + nextMatch.word() + " " +
        // myNode.descString + " " + myNode.isLink);
        if (myNode.descString.equals("{}") && myNode.isLink) {
          IndexedWord otherNode = namesToNodes.get(myNode.name);
          if (otherNode != null) {
            if (otherNode.equals(nextMatch)) {
              if ( ! myNode.negDesc) {
                finished = false;
                break;
              }
            } else {
              if (myNode.negDesc) {
                finished = false;
                break;
              }
            }
          } else {
            boolean found = myNode.nodeAttrMatch(nextMatch,
                                                 hyp ? sg : sg_aligned,
                                                 ignoreCase);
            if (found) {
              for (Pair varGroup : myNode.variableGroups) {
                // if variables have been captured from a regex, they
                // must match any previous matchings
                String thisVariable = varGroup.second();
                String thisVarString = variableStrings.getString(thisVariable);
                if (thisVarString != null &&
                    !thisVarString.equals(m.group(varGroup.first()))) {
                  // failed to match a variable
                  found = false;
                  break;
                }
              }

              // nodeAttrMatch already checks negDesc, so no need to
              // check for that here
              finished = false;
              break;
            }
          }
        } else { // try to match the description pattern.
          boolean found = myNode.nodeAttrMatch(nextMatch,
                                               hyp ? sg : sg_aligned,
                                               ignoreCase);
          if (found) {
            for (Pair varGroup : myNode.variableGroups) {
              // if variables have been captured from a regex, they
              // must match any previous matchings
              String thisVariable = varGroup.second();
              String thisVarString = variableStrings.getString(thisVariable);
              if (thisVarString != null &&
                  !thisVarString.equals(m.group(varGroup.first()))) {
                // failed to match a variable
                found = false;
                break;
              }
            }

            // nodeAttrMatch already checks negDesc, so no need to
            // check for that here
            finished = false;
            break;
          }
        }
      } // end while

      if ( ! finished) { // I successfully matched.
        resetChild();
        if (myNode.name != null) {
          // note: have to fill in the map as we go for backreferencing
          if (!namesToNodes.containsKey(myNode.name)) {
            // log.info("making namedFirst");
            namedFirst = true;
          }
          // log.info("adding named node: " + myNode.name + "=" +
          // nextMatch.word());
          namesToNodes.put(myNode.name, nextMatch);
        }
        if (myNode.reln.getName() != null) {
          if (!namesToRelations.containsKey(myNode.reln.getName()))
            relnNamedFirst = true;
          namesToRelations.put(myNode.reln.getName(), nextMatchReln);
        }
        commitVariableGroups(m); // commit my variable groups.
      }
      // finished is false exiting this if and only if nextChild exists
      // and has a label or backreference that matches
      // (also it will just have been reset)
    }

    private void commitVariableGroups(Matcher m) {
      committedVariables = true; // commit all my variable groups.
      for (Pair varGroup : myNode.variableGroups) {
        String thisVarString = m.group(varGroup.first());
        variableStrings.setVar(varGroup.second(), thisVarString);
      }
    }

    private void decommitVariableGroups() {
      if (committedVariables) {
        for (Pair varGroup : myNode.variableGroups) {
          variableStrings.unsetVar(varGroup.second());
        }
      }
      committedVariables = false;
    }

    private void decommitNamedNodes() {
      if (namesToNodes.containsKey(myNode.name) && namedFirst) {
        namedFirst = false;
        namesToNodes.remove(myNode.name);
      }
    }

    private void decommitNamedRelations() {
      if (namesToRelations.containsKey(myNode.reln.name) && relnNamedFirst) {
        relnNamedFirst = false;
        namesToRelations.remove(myNode.reln.name);
      }
    }

    /*
     * tries to match the unique child of the NodePattern node to a node.
     * Returns "true" if succeeds.
     */
    private boolean matchChild() {
      // entering here (given that it's called only once in matches())
      // we know finished is false, and either nextChild == null
      // (meaning goToNextChild has not been called) or nextChild exists
      // and has a label or backreference that matches
      if (nextMatch == null) { // I haven't been initialized yet, so my child
                               // certainly can't be matched yet.
        return false;
      }
      if (childMatcher == null) {
        if (!matchedOnce) {
          matchedOnce = true;
          return true;
        }
        return false;
      }
      // childMatcher.namesToNodes.putAll(this.namesToNodes);
      // childMatcher.namesToRelations.putAll(this.namesToRelations);
      boolean match = childMatcher.matches();
      if (match) {
        // namesToNodes.putAll(childMatcher.namesToNodes);
        // namesToRelations.putAll(childMatcher.namesToRelations);
        // System.out.println(node.word() + " " +
        // namesToNodes.get("partnerTwo"));
      } else {
        if (nextMatch != null) {
          decommitVariableGroups();
          decommitNamedNodes();
          decommitNamedRelations();
        }
      }
      return match;
    }

    // find the next local match
    @Override
    public boolean matches() {
      // System.out.println(toString());
      // System.out.println(namesToNodes);
      // log.info("matches: " + myNode.reln);
      // this is necessary so that a negated/optional node matches only once
      if (finished) {
        // System.out.println(false);
        return false;
      }
      while (!finished) {
        if (matchChild()) {
          if (myNode.isNegated()) {
            // negated node only has to fail once
            finished = true;
            return false; // cannot be optional and negated
          } else {
            if (myNode.isOptional()) {
              finished = true;
            }
            // System.out.println(true);
            return true;
          }
        } else {
          goToNextNodeMatch();
        }
      }
      if (myNode.isNegated()) { // couldn't match my relation/pattern, so
                                // succeeded!
        return true;
      } else { // couldn't match my relation/pattern, so failed!
        nextMatch = null;
        decommitVariableGroups();
        decommitNamedNodes();
        decommitNamedRelations();
        // didn't match, but return true anyway if optional
        return myNode.isOptional();
      }
    }

    @Override
    public IndexedWord getMatch() {
      return nextMatch;
    }

    @Override
    public String toString() {
      return "node matcher for: " + myNode.localString();
    }

  } // end static class NodeMatcher

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy