All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

The newest version!
package edu.stanford.nlp.semgraph.semgrex; 

import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.ling.*;
import edu.stanford.nlp.util.logging.Redwood;

import java.util.*;

/**
 * A {@code SemgrexMatcher} can be used to match a {@link SemgrexPattern}
 * against a {@link edu.stanford.nlp.semgraph.SemanticGraph}.
 * 

* Usage should be the same as {@link java.util.regex.Matcher}. * * @author Chloe Kiddon */ public abstract class SemgrexMatcher { /** A logger for this class */ private static final Redwood.RedwoodChannels log = Redwood.channels(SemgrexMatcher.class); final SemanticGraph sg; final Map namesToNodes; final Map namesToRelations; final VariableStrings variableStrings; IndexedWord node; // to be used for patterns involving "@" final Alignment alignment; final SemanticGraph sg_aligned; final boolean hyp; // these things are used by "find" private Iterator findIterator; private IndexedWord findCurrent; SemgrexMatcher(SemanticGraph sg, Alignment alignment, SemanticGraph sg_aligned, boolean hyp, IndexedWord node, Map namesToNodes, Map namesToRelations, VariableStrings variableStrings) { this.sg = sg; this.alignment = alignment; this.sg_aligned = sg_aligned; this.hyp = hyp; this.node = node; this.namesToNodes = namesToNodes; this.namesToRelations = namesToRelations; this.variableStrings = variableStrings; } SemgrexMatcher(SemanticGraph sg, IndexedWord node, Map namesToNodes, Map namesToRelations, VariableStrings variableStrings) { this(sg, null, null, true, node, namesToNodes, namesToRelations, variableStrings); } /** * Resets the matcher so that its search starts over. */ public void reset() { findIterator = null; namesToNodes.clear(); namesToRelations.clear(); } /** * Resets the matcher to start searching on the given node for matching * subexpressions. */ void resetChildIter(IndexedWord node) { this.node = node; resetChildIter(); } /** * Resets the matcher to restart search for matching subexpressions */ void resetChildIter() { } /** * Does the pattern match the graph? It's actually closer to * java.util.regex's "lookingAt" in that the root of the graph has to match * the root of the pattern but the whole tree does not have to be "accounted * for". Like with lookingAt the beginning of the string has to match the * pattern, but the whole string doesn't have to be "accounted for". * * @return whether the node matches the pattern */ public abstract boolean matches(); /** Rests the matcher and tests if it matches in the graph when rooted at * {@code node}. * * @return whether the matcher matches at node */ public boolean matchesAt(IndexedWord node) { resetChildIter(node); return matches(); } /** * Get the last matching node -- that is, the node that matches the root node * of the pattern. Returns null if there has not been a match. * * @return last match */ public abstract IndexedWord getMatch(); /** * Topological sorting actually takes a rather large amount of time, if you call multiple * patterns on the same tree. * This is a weak cache that stores all the trees sorted since the garbage collector last kicked in. * The key on this map is the identity hash code (i.e., memory address) of the semantic graph; the * value is the sorted list of vertices. *

* Note that this optimization will cause strange things to happen if you mutate a semantic graph between * calls to Semgrex. */ private static final WeakHashMap> topologicalSortCache = new WeakHashMap<>(); private void setupFindIterator() { try { if (hyp) { synchronized (topologicalSortCache) { List topoSort = topologicalSortCache.get(System.identityHashCode(sg)); if (topoSort == null || topoSort.size() != sg.size()) { // size check to mitigate a stale cache topoSort = sg.topologicalSort(); topologicalSortCache.put(System.identityHashCode(sg), topoSort); } findIterator = topoSort.iterator(); } } else if (sg_aligned == null) { return; } else { synchronized (topologicalSortCache) { List topoSort = topologicalSortCache.get(System.identityHashCode(sg_aligned)); if (topoSort == null || topoSort.size() != sg_aligned.size()) { // size check to mitigate a stale cache topoSort = sg_aligned.topologicalSort(); topologicalSortCache.put(System.identityHashCode(sg_aligned), topoSort); } findIterator = topoSort.iterator(); } } } catch (Exception ex) { if (hyp) { findIterator = sg.vertexSet().iterator(); } else if (sg_aligned == null) { return; } else { findIterator = sg_aligned.vertexSet().iterator(); } } } /** * Find the next match of the pattern in the graph. * * @return whether there is a match somewhere in the graph */ public boolean find() { // log.info("hyp: " + hyp); if (findIterator == null) { setupFindIterator(); } if (findIterator == null) { return false; } // System.out.println("first"); if (findCurrent != null && matches()) { // log.info("find first: " + findCurrent.word()); return true; } //log.info("here"); while (findIterator.hasNext()) { findCurrent = findIterator.next(); // System.out.println("final: " + namesToNodes); resetChildIter(findCurrent); // System.out.println("after reset: " + namesToNodes); // Should not be necessary to reset namesToNodes here, since it // gets cleaned up by resetChildIter //namesToNodes.clear(); //namesToRelations.clear(); if (matches()) { // log.info("find second: " + findCurrent.word()); return true; } } return false; } /** * Find the next match of the pattern in the graph such that the matching node * (that is, the node matching the root node of the pattern) differs from the * previous matching node. * * @return true iff another matching node is found. */ public boolean findNextMatchingNode() { IndexedWord lastMatchingNode = getMatch(); while(find()) { if(getMatch() != lastMatchingNode) return true; } return false; } /** * Returns the node labeled with {@code name} in the pattern. * * @param name the name of the node, specified in the pattern. * @return node labeled by the name */ public IndexedWord getNode(String name) { return namesToNodes.get(name); } public String getRelnString(String name) { return namesToRelations.get(name); } /** * Returns the set of names for named nodes in this pattern. * This is used as a convenience routine, when there are numerous patterns * with named nodes to track. */ public Set getNodeNames() { return namesToNodes.keySet(); } /** * Returns the set of names for named relations in this pattern. */ public Set getRelationNames() { return namesToRelations.keySet(); } @Override public abstract String toString(); /** * Returns the graph associated with this match. */ public SemanticGraph getGraph() { return sg; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy