All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.trees.tregex.TregexMatcher Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
// TregexMatcher
// Copyright (c) 2004-2007 The Board of Trustees of
// The Leland Stanford Junior University. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
//
//
// For more information, bug reports, fixes, contact:
//    Christopher Manning
//    Dept of Computer Science, Gates 1A
//    Stanford CA 94305-9010
//    USA
//    Support/Questions: [email protected]
//    Licensing: [email protected]
//    http://www-nlp.stanford.edu/software/tregex.shtml

package edu.stanford.nlp.trees.tregex;

import java.util.*;

import edu.stanford.nlp.trees.HasParent;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.Tree;

/**
 * A TregexMatcher can be used to match a {@link TregexPattern} against a {@link edu.stanford.nlp.trees.Tree}.
 * Usage should be similar to a {@link java.util.regex.Matcher}.
 *
 * @author Galen Andrew
 */
public abstract class TregexMatcher {

  final Tree root;
  Tree tree;
  IdentityHashMap nodesToParents;
  final Map namesToNodes;
  final VariableStrings variableStrings;

  // these things are used by "find"
  private Iterator findIterator;
  private Tree findCurrent;

  final HeadFinder headFinder;

  TregexMatcher(Tree root, Tree tree, IdentityHashMap nodesToParents, Map namesToNodes, VariableStrings variableStrings, HeadFinder headFinder) {
    this.root = root;
    this.tree = tree;
    this.nodesToParents = nodesToParents;
    this.namesToNodes = namesToNodes;
    this.variableStrings = variableStrings;
    this.headFinder = headFinder;
  }

  public HeadFinder getHeadFinder() { return this.headFinder; }

  /**
   * Resets the matcher so that its search starts over.
   */
  public void reset() {
    findIterator = null;
    findCurrent = null;
    namesToNodes.clear();
    variableStrings.reset();
  }

  /**
   * Resets the matcher to start searching on the given tree for matching subexpressions.
   *
   * @param tree The tree to start searching on
   */
  void resetChildIter(Tree tree) {
    this.tree = tree;
    resetChildIter();
  }

  /**
   * Resets the matcher to restart search for matching subexpressions
   */
  void resetChildIter() {
  }

  /**
   * Does the pattern match the tree?  It's actually closer to java.util.regex's
   * "lookingAt" in that the root of the tree has to match the root of the pattern
   * but the whole tree does not have to be "accounted for".  Like with lookingAt
   * the beginning of the string has to match the pattern, but the whole string
   * doesn't have to be "accounted for".
   *
   * @return whether the tree matches the pattern
   */
  public abstract boolean matches();

  /** Rests the matcher and tests if it matches on the tree when rooted at {@code node}.
   *
   *  @param node The node where the match is checked
   *  @return whether the matcher matches at node
   */
  public boolean matchesAt(Tree node) {
    resetChildIter(node);
    return matches();
  }

  /**
   * Get the last matching tree -- that is, the tree node that matches the root node of the pattern.
   * Returns null if there has not been a match.
   *
   * @return last match
   */
  public abstract Tree getMatch();


  /**
   * Find the next match of the pattern on the tree.
   *
   * @return whether there is a match somewhere in the tree
   */
  public boolean find() {
    if (findIterator == null) {
      findIterator = root.iterator();
    }
    if (findCurrent != null && matches()) {
      return true;
    }
    while (findIterator.hasNext()) {
      findCurrent = findIterator.next();
      resetChildIter(findCurrent);
      if (matches()) {
        return true;
      }
    }
    return false;
  }

  /**
   * Similar to {@code find()}, but matches only if {@code node} is
   * the root of the match.  All other matches are ignored.  If you
   * know you are looking for matches with a particular root, this is
   * much faster than iterating over all matches and taking only the
   * ones that work and faster than altering the tregex to match only
   * the correct node.
   * 
* If called multiple times with the same node, this will return * subsequent matches in the same manner as find() returns * subsequent matches in the same tree. If you want to call this using * the same TregexMatcher on more than one node, call reset() first; * otherwise, an AssertionError will be thrown. */ public boolean findAt(Tree node) { if (findCurrent != null && findCurrent != node) { throw new AssertionError("Error: must call reset() before changing nodes for a call to findAt"); } if (findCurrent != null) { return matches(); } findCurrent = node; resetChildIter(findCurrent); return matches(); } /** * Find the next match of the pattern on the tree such that the * matching node (that is, the tree node matching the root node of * the pattern) differs from the previous matching node. * @return true iff another matching node is found. */ public boolean findNextMatchingNode() { Tree lastMatchingNode = getMatch(); while(find()) { if(getMatch() != lastMatchingNode) return true; } return false; } /** * Returns the node labeled with {@code name} in the pattern. * * @param name the name of the node, specified in the pattern. * @return node labeled by the name */ public Tree getNode(String name) { return namesToNodes.get(name); } public Set getNodeNames() { return namesToNodes.keySet(); } Tree getParent(Tree node) { if (node instanceof HasParent) { return node.parent(); } if (nodesToParents == null) { nodesToParents = new IdentityHashMap<>(); } if (nodesToParents.isEmpty()) { fillNodesToParents(root, null); } return nodesToParents.get(node); } private void fillNodesToParents(Tree node, Tree parent) { nodesToParents.put(node, parent); for (Tree child : node.children()) { fillNodesToParents(child, node); } } Tree getRoot() { return root; } /** * If there is a current match, and that match involves setting this * particular variable string, this returns that string. Otherwise, * it returns null. */ public String getVariableString(String var) { return variableStrings.getString(var); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy