edu.stanford.nlp.ling.tokensregex.ComplexNodePattern Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
The newest version!
package edu.stanford.nlp.ling.tokensregex;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;
import java.util.*;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Pattern for matching a complex data structure
*
* @author Angel Chang
*/
public class ComplexNodePattern extends NodePattern {
// TODO: Change/Augment from list of class to pattern to list of conditions for matching
// (so we can have more flexible matches)
private final List> annotationPatterns;
private final BiFunction getter;
public ComplexNodePattern(BiFunction getter, List> annotationPatterns) {
this.annotationPatterns = annotationPatterns;
this.getter = getter;
}
public ComplexNodePattern(BiFunction getter, Pair... annotationPatterns) {
this.annotationPatterns = Arrays.asList(annotationPatterns);
this.getter = getter;
}
public ComplexNodePattern(BiFunction getter, K key, NodePattern pattern) {
this(getter, Pair.makePair(key,pattern));
}
public List> getAnnotationPatterns() {
return Collections.unmodifiableList(annotationPatterns);
}
// TODO: make this a pattern of non special characters: [,],?,.,\,^,$,(,),*,+,{,},| ... what else?
private static final Pattern LITERAL_PATTERN = Pattern.compile("[^\\[\\]?.\\\\^$()*+{}|]*");
//private static final Pattern LITERAL_PATTERN = Pattern.compile("[A-Za-z0-9_\\-']*");
public static NodePattern newStringRegexPattern(String regex, int flags) {
boolean isLiteral = ((flags & Pattern.LITERAL) != 0) || LITERAL_PATTERN.matcher(regex).matches();
if (isLiteral) {
boolean caseInsensitive = (flags & (Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE)) != 0;
int stringMatchFlags = (caseInsensitive)? (CASE_INSENSITIVE | UNICODE_CASE):0;
return new StringAnnotationPattern(regex, stringMatchFlags);
} else {
return new StringAnnotationRegexPattern(regex, flags);
}
}
public static ComplexNodePattern valueOf(
Env env, Map attributes, BiFunction getter, Function,K> getKey)
{
ComplexNodePattern p = new ComplexNodePattern<>(getter, new ArrayList<>(attributes.size()));
p.populate(env, attributes, getKey);
return p;
}
protected void populate(Env env, Map attributes, Function,K> getKey) {
ComplexNodePattern p = this;
for (String attr:attributes.keySet()) {
String value = attributes.get(attr);
K c = getKey.apply(Pair.makePair(env, attr));
if (c != null) {
if (value.startsWith("\"") && value.endsWith("\"")) {
value = value.substring(1, value.length() - 1);
value = value.replaceAll("\\\\\"", "\""); // Unescape quotes...
p.add(c, new StringAnnotationPattern(value, env.defaultStringMatchFlags));
} else if (value.startsWith("/") && value.endsWith("/")) {
value = value.substring(1, value.length() - 1);
value = value.replaceAll("\\\\/", "/"); // Unescape forward slash
String regex = (env != null) ? env.expandStringRegex(value) : value;
int flags = (env != null) ? env.defaultStringPatternFlags : 0;
p.add(c, newStringRegexPattern(regex, flags));
} else if (value.startsWith("::")) {
switch (value) {
case "::IS_NIL":
case "::NOT_EXISTS":
p.add(c, new NilAnnotationPattern());
break;
case "::EXISTS":
case "::NOT_NIL":
p.add(c, new NotNilAnnotationPattern());
break;
case "::IS_NUM":
p.add(c, new NumericAnnotationPattern(0, NumericAnnotationPattern.CmpType.IS_NUM));
break;
default:
boolean ok = false;
if (env != null) {
Object custom = env.get(value);
if (custom != null) {
p.add(c, (NodePattern) custom);
ok = true;
}
}
if (!ok) {
throw new IllegalArgumentException("Invalid value " + value + " for key: " + attr);
}
break;
}
} else if (value.startsWith("<=")) {
Double v = Double.parseDouble(value.substring(2));
p.add(c, new NumericAnnotationPattern(v, NumericAnnotationPattern.CmpType.LE));
} else if (value.startsWith(">=")) {
Double v = Double.parseDouble(value.substring(2));
p.add(c, new NumericAnnotationPattern(v, NumericAnnotationPattern.CmpType.GE));
} else if (value.startsWith("==")) {
Double v = Double.parseDouble(value.substring(2));
p.add(c, new NumericAnnotationPattern(v, NumericAnnotationPattern.CmpType.EQ));
} else if (value.startsWith("!=")) {
Double v = Double.parseDouble(value.substring(2));
p.add(c, new NumericAnnotationPattern(v, NumericAnnotationPattern.CmpType.NE));
} else if (value.startsWith(">")) {
Double v = Double.parseDouble(value.substring(1));
p.add(c, new NumericAnnotationPattern(v, NumericAnnotationPattern.CmpType.GT));
} else if (value.startsWith("<")) {
Double v = Double.parseDouble(value.substring(1));
p.add(c, new NumericAnnotationPattern(v, NumericAnnotationPattern.CmpType.LT));
} else if (value.matches("[A-Za-z0-9_+-.]+")) {
p.add(c, new StringAnnotationPattern(value, env.defaultStringMatchFlags));
} else {
throw new IllegalArgumentException("Invalid value " + value + " for key: " + attr);
}
} else {
throw new IllegalArgumentException("Unknown annotation key: " + attr);
}
}
}
public void add(K c, NodePattern pattern) {
annotationPatterns.add(Pair.makePair(c, pattern));
}
@Override
public boolean match(M token)
{
boolean matched = true;
for (Pair entry:annotationPatterns) {
NodePattern annoPattern = entry.second;
Object anno = getter.apply(token, entry.first);
if (!annoPattern.match(anno)) {
matched = false;
break;
}
}
return matched;
}
@Override
public Object matchWithResult(M token) {
Map matchResults = new HashMap<>();//Generics.newHashMap();
if (match(token, matchResults)) {
return matchResults;
} else {
return null;
}
}
// Does matching, returning match results
protected boolean match(M token, Map matchResults)
{
boolean matched = true;
for (Pair entry:annotationPatterns) {
NodePattern annoPattern = entry.second;
Object anno = getter.apply(token, entry.first);
Object matchResult = annoPattern.matchWithResult(anno);
if (matchResult != null) {
matchResults.put(entry.first, matchResult);
} else {
matched = false;
break;
}
}
return matched;
}
public String toString() {
StringBuilder sb = new StringBuilder();
for (Pair entry:annotationPatterns) {
if (sb.length() > 0) {
sb.append(", ");
}
sb.append(entry.first).append(entry.second);
}
return sb.toString();
}
public static class NilAnnotationPattern extends NodePattern