edu.stanford.nlp.ling.tokensregex.MultiNodePattern Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
package edu.stanford.nlp.ling.tokensregex;
import edu.stanford.nlp.util.Interval;
import java.util.Collection;
import java.util.List;
/**
* Matches potentially multiple node (i.e does match across multiple tokens)
*
* @author Angel Chang
*/
public abstract class MultiNodePattern {
int minNodes = 1;
int maxNodes = -1; // Set the max number of nodes this pattern can match
/**
* Tries to match sequence of nodes starting of start
* Returns intervals (token offsets) of when the nodes matches
* @param nodes
* @param start
*/
protected abstract Collection> match(List extends T> nodes, int start);
public int getMinNodes() {
return minNodes;
}
public void setMinNodes(int minNodes) {
this.minNodes = minNodes;
}
public int getMaxNodes() {
return maxNodes;
}
public void setMaxNodes(int maxNodes) {
this.maxNodes = maxNodes;
}
protected static class IntersectMultiNodePattern extends MultiNodePattern {
List> nodePatterns;
protected IntersectMultiNodePattern(List> nodePatterns) {
this.nodePatterns = nodePatterns;
}
protected Collection> match(List extends T> nodes, int start)
{
Collection> matched = null;
for (MultiNodePattern p:nodePatterns) {
Collection> m = p.match(nodes, start);
if (m == null || m.size() == 0) {
return null;
}
if (matched == null) {
matched = m;
} else {
matched.retainAll(m);
if (m.size() == 0) {
return null;
}
}
}
return matched;
}
}
protected static class UnionMultiNodePattern extends MultiNodePattern {
List> nodePatterns;
protected UnionMultiNodePattern(List> nodePatterns) {
this.nodePatterns = nodePatterns;
}
protected Collection> match(List extends T> nodes, int start)
{
Collection> matched = null;
for (MultiNodePattern p:nodePatterns) {
Collection> m = p.match(nodes, start);
if (m != null && m.size() > 0) {
if (matched == null) {
matched = m;
} else {
for (Interval i:m) {
if (!matched.contains(i)) {
matched.add(i);
}
}
}
}
}
return matched;
}
}
}