edu.stanford.nlp.ling.tokensregex.MultiPatternMatcher Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
package edu.stanford.nlp.ling.tokensregex;
import edu.stanford.nlp.util.*;
import java.util.*;
import java.util.function.Function;
/**
* Matcher that takes in multiple patterns
*
* @author Angel Chang
*/
public class MultiPatternMatcher {
Collection> patterns;
SequencePatternTrigger patternTrigger;
public MultiPatternMatcher(SequencePatternTrigger patternTrigger,
Collection extends SequencePattern> patterns)
{
this.patterns = new ArrayList>();
this.patterns.addAll(patterns);
this.patternTrigger = patternTrigger;
}
public MultiPatternMatcher(SequencePatternTrigger patternTrigger,
SequencePattern... patterns)
{
this(patterns);
this.patternTrigger = patternTrigger;
}
public MultiPatternMatcher(Collection> patterns)
{
this.patterns = patterns;
}
public MultiPatternMatcher(SequencePattern... patterns)
{
this.patterns = new ArrayList>(patterns.length);
for (SequencePattern p:patterns) {
this.patterns.add(p);
}
}
/**
* Given a sequence, applies our patterns over the sequence and returns
* all non overlapping matches. When multiple patterns overlaps,
* matched patterns are selected by
* the highest priority/score is selected,
* then the longest pattern,
* then the starting offset,
* then the original order
* @param elements input sequence to match against
* @return list of match results that are non-overlapping
*/
public List> findNonOverlapping(List extends T> elements)
{
return findNonOverlapping(elements, SequenceMatchResult.DEFAULT_COMPARATOR);
}
/**
* Given a sequence, applies our patterns over the sequence and returns
* all non overlapping matches. When multiple patterns overlaps,
* matched patterns are selected by order specified by the comparator
* @param elements input sequence to match against
* @param cmp comparator indicating order that overlapped sequences should be selected.
* @return list of match results that are non-overlapping
*/
public List> findNonOverlapping(List extends T> elements,
Comparator super SequenceMatchResult> cmp)
{
Collection> triggered = getTriggeredPatterns(elements);
List> all = new ArrayList>();
int i = 0;
for (SequencePattern p:triggered) {
SequenceMatcher m = p.getMatcher(elements);
m.setOrder(i);
while (m.find()) {
all.add(m.toBasicSequenceMatchResult());
}
i++;
}
List> res = IntervalTree.getNonOverlapping( all, SequenceMatchResult.TO_INTERVAL, cmp);
Collections.sort(res, SequenceMatchResult.OFFSET_COMPARATOR);
return res;
}
/**
* Given a sequence, applies our patterns over the sequence and returns
* all matches, depending on the findType. When multiple patterns overlaps,
* matched patterns are selected by order specified by the comparator
* @param elements input sequence to match against
* @param findType whether FindType.FIND_ALL or FindType.FIND_NONOVERLAPPING
* @return list of match results
*/
public List> find(List extends T> elements, SequenceMatcher.FindType findType)
{
Collection> triggered = getTriggeredPatterns(elements);
List> all = new ArrayList>();
int i = 0;
for (SequencePattern p:triggered) {
SequenceMatcher m = p.getMatcher(elements);
m.setFindType(findType);
m.setOrder(i);
while (m.find()) {
all.add(m.toBasicSequenceMatchResult());
}
i++;
}
List> res = IntervalTree.getNonOverlapping( all, SequenceMatchResult.TO_INTERVAL, SequenceMatchResult.DEFAULT_COMPARATOR);
Collections.sort(res, SequenceMatchResult.OFFSET_COMPARATOR);
return res;
}
/**
* Given a sequence, applies our patterns over the sequence and returns
* all non overlapping matches. When multiple patterns overlaps,
* matched patterns are selected to give the overall maximum score
* @param elements input sequence to match against
* @return list of match results that are non-overlapping
*/
public List> findNonOverlappingMaxScore(List extends T> elements)
{
return findNonOverlappingMaxScore(elements, SequenceMatchResult.SCORER);
}
/**
* Given a sequence, applies our patterns over the sequence and returns
* all non overlapping matches. When multiple patterns overlaps,
* matched patterns are selected to give the overall maximum score
* @param elements input sequence to match against
* @param scorer scorer for scoring each match
* @return list of match results that are non-overlapping
*/
public List> findNonOverlappingMaxScore(List extends T> elements,
Function super SequenceMatchResult, Double> scorer)
{
Collection> triggered = getTriggeredPatterns(elements);
List> all = new ArrayList>();
int i = 0;
for (SequencePattern p:triggered) {
SequenceMatcher m = p.getMatcher(elements);
m.setOrder(i);
while (m.find()) {
all.add(m.toBasicSequenceMatchResult());
}
i++;
}
List> res = IntervalTree.getNonOverlappingMaxScore( all, SequenceMatchResult.TO_INTERVAL, scorer);
Collections.sort(res, SequenceMatchResult.OFFSET_COMPARATOR);
return res;
}
/**
* Given a sequence, applies each of our patterns over the sequence and returns
* all non overlapping matches for each of the patterns.
* Unlike #findAllNonOverlapping, overlapping matches from different patterns are kept
* @param elements input sequence to match against
* @return iterable of match results that are non-overlapping
*/
public Iterable> findAllNonOverlappingMatchesPerPattern(List extends T> elements)
{
Collection> triggered = getTriggeredPatterns(elements);
List>> allMatches = new ArrayList>>(elements.size());
for (SequencePattern p:triggered) {
Iterable> matches = p.getMatcher(elements).findAllNonOverlapping();
allMatches.add(matches);
}
return Iterables.chain(allMatches);
}
/**
* Given a sequence, return the collection of patterns that are triggered by the sequence
* (these patterns are the ones that may potentially match a subsequence in the sequence)
* @param elements Input sequence
* @return Collection of triggered patterns
*/
public Collection> getTriggeredPatterns(List extends T> elements) {
if (patternTrigger != null) {
return patternTrigger.apply(elements);
} else {
return patterns;
}
}
/** Interfaces for optimizing application of many SequencePatterns over a particular sequence */
/**
* A function which returns a collections of patterns that may match when
* given a single node from a larger sequence.
* @param
*/
public static interface NodePatternTrigger extends Function>> {}
/**
* A function which returns a collections of patterns that may match when
* a sequence of nodes. Note that this function needs to be conservative
* and should return ALL patterns that may match.
* @param
*/
public static interface SequencePatternTrigger extends Function, Collection>> {}
/**
* Simple SequencePatternTrigger that looks at each node, and identifies which
* patterns may potentially match each node, and then aggregates (union)
* all these patterns together. Original ordering of patterns is preserved.
* @param
*/
public static class BasicSequencePatternTrigger implements SequencePatternTrigger {
NodePatternTrigger trigger;
public BasicSequencePatternTrigger(NodePatternTrigger trigger) {
this.trigger = trigger;
}
@Override
public Collection> apply(List extends T> elements) {
// Use LinkedHashSet to preserve original ordering of patterns.
Set> triggeredPatterns = new LinkedHashSet>();
for (T node:elements) {
Collection> triggered = trigger.apply(node);
triggeredPatterns.addAll(triggered);
}
return triggeredPatterns;
}
}
}