All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.semgraph.semgrex; 
import edu.stanford.nlp.util.logging.Redwood;

import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.ling.*;


import java.util.*;

/**
 * A SemgrexMatcher can be used to match a {@link SemgrexPattern}
 * against a {@link edu.stanford.nlp.semgraph.SemanticGraph}. 

* * Usage should be the same as {@link java.util.regex.Matcher}.

* * @author Chloe Kiddon */ public abstract class SemgrexMatcher { /** A logger for this class */ private static Redwood.RedwoodChannels log = Redwood.channels(SemgrexMatcher.class); SemanticGraph sg; Map namesToNodes; Map namesToRelations; VariableStrings variableStrings; IndexedWord node; // to be used for patterns involving "@" Alignment alignment; SemanticGraph sg_aligned; boolean hyp; // these things are used by "find" Iterator findIterator; IndexedWord findCurrent; SemgrexMatcher(SemanticGraph sg, Alignment alignment, SemanticGraph sg_aligned, boolean hyp, IndexedWord node, Map namesToNodes, Map namesToRelations, VariableStrings variableStrings) { this.sg = sg; this.alignment = (alignment == null) ? null : alignment; this.sg_aligned = (sg_aligned == null) ? null : sg_aligned; this.hyp = hyp; this.node = node; this.namesToNodes = namesToNodes; this.namesToRelations = namesToRelations; this.variableStrings = variableStrings; } SemgrexMatcher(SemanticGraph sg, IndexedWord node, Map namesToNodes, Map namesToRelations, VariableStrings variableStrings) { this(sg, null, null, true, node, namesToNodes, namesToRelations, variableStrings); } /** * Resets the matcher so that its search starts over. */ public void reset() { findIterator = null; namesToNodes.clear(); namesToRelations.clear(); } /** * Resets the matcher to start searching on the given node for matching * subexpressions */ void resetChildIter(IndexedWord node) { this.node = node; resetChildIter(); } /** * Resets the matcher to restart search for matching subexpressions */ void resetChildIter() { } /** * Does the pattern match the graph? It's actually closer to * java.util.regex's "lookingAt" in that the root of the graph has to match * the root of the pattern but the whole tree does not have to be "accounted * for". Like with lookingAt the beginning of the string has to match the * pattern, but the whole string doesn't have to be "accounted for". * * @return whether the node matches the pattern */ public abstract boolean matches(); /** Rests the matcher and tests if it matches in the graph when rooted at * node. * * @return whether the matcher matches at node */ public boolean matchesAt(IndexedWord node) { resetChildIter(node); return matches(); } /** * Get the last matching node -- that is, the node that matches the root node * of the pattern. Returns null if there has not been a match. * * @return last match */ public abstract IndexedWord getMatch(); /** * Topological sorting actually takes a rather large amount of time, if you call multiple * patterns on the same tree. * This is a weak cache that stores all the trees sorted since the garbage collector last kicked in. * The key on this map is the identity hash code (i.e., memory address) of the semantic graph; the * value is the sorted list of vertices. * * Note that this optimization will cause strange things to happen if you mutate a semantic graph between * calls to Semgrex. */ private static final WeakHashMap> topologicalSortCache = new WeakHashMap<>(); /** * Find the next match of the pattern in the graph * * @return whether there is a match somewhere in the graph */ public boolean find() { // log.info("hyp: " + hyp); if (findIterator == null) { try { if (hyp) { synchronized (topologicalSortCache) { List topoSort = topologicalSortCache.get(System.identityHashCode(sg)); if (topoSort == null || topoSort.size() != sg.size()) { // size check to mitigate a stale cache topoSort = sg.topologicalSort(); topologicalSortCache.put(System.identityHashCode(sg), topoSort); } findIterator = topoSort.iterator(); } } else if (sg_aligned == null) { return false; } else { synchronized (topologicalSortCache) { List topoSort = topologicalSortCache.get(System.identityHashCode(sg_aligned)); if (topoSort == null || topoSort.size() != sg_aligned.size()) { // size check to mitigate a stale cache topoSort = sg_aligned.topologicalSort(); topologicalSortCache.put(System.identityHashCode(sg_aligned), topoSort); } findIterator = topoSort.iterator(); } } } catch (Exception ex) { if (hyp) { findIterator = sg.vertexSet().iterator(); } else if (sg_aligned == null) { return false; } else { findIterator = sg_aligned.vertexSet().iterator(); } } } // System.out.println("first"); if (findCurrent != null && matches()) { // log.info("find first: " + findCurrent.word()); return true; } //log.info("here"); while (findIterator.hasNext()) { findCurrent = findIterator.next(); // System.out.println("final: " + namesToNodes); resetChildIter(findCurrent); // System.out.println("after reset: " + namesToNodes); // Should not be necessary to reset namesToNodes here, since it // gets cleaned up by resetChildIter //namesToNodes.clear(); //namesToRelations.clear(); if (matches()) { // log.info("find second: " + findCurrent.word()); return true; } } return false; } /** * Find the next match of the pattern in the graph such that the matching node * (that is, the node matching the root node of the pattern) differs from the * previous matching node. * * @return true iff another matching node is found. */ public boolean findNextMatchingNode() { IndexedWord lastMatchingNode = getMatch(); while(find()) { if(getMatch() != lastMatchingNode) return true; } return false; } /** * Returns the node labeled with name in the pattern. * * @param name the name of the node, specified in the pattern. * @return node labeled by the name */ public IndexedWord getNode(String name) { return namesToNodes.get(name); } public String getRelnString(String name) { return namesToRelations.get(name); } /** * Returns the set of names for named nodes in this pattern. * This is used as a convenience routine, when there are numerous patterns * with named nodes to track. */ public Set getNodeNames() { return namesToNodes.keySet(); } /** * Returns the set of names for named relations in this pattern. */ public Set getRelationNames() { return namesToRelations.keySet(); } @Override abstract public String toString(); /** * Returns the graph associated with this match. */ public SemanticGraph getGraph() { return sg; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy