edu.stanford.nlp.pipeline.CoreMapAttributeAggregator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
package edu.stanford.nlp.pipeline;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.stats.IntCounter;
import edu.stanford.nlp.util.ArrayMap;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import java.util.*;
/**
* Functions for aggregating token attributes.
*
* @author Angel Chang
*/
public abstract class CoreMapAttributeAggregator
{
public static Map getDefaultAggregators()
{
return DEFAULT_AGGREGATORS;
}
public static CoreMapAttributeAggregator getAggregator(String str)
{
return AGGREGATOR_LOOKUP.get(str);
}
public abstract Object aggregate(Class key, List in);
public static final CoreMapAttributeAggregator FIRST_NON_NIL = new CoreMapAttributeAggregator() {
public Object aggregate(Class key, List in) {
if (in == null) return null;
for (CoreMap cm:in) {
Object obj = cm.get(key);
if (obj != null) {
return obj;
}
}
return null;
}
};
public static final CoreMapAttributeAggregator FIRST = new CoreMapAttributeAggregator() {
public Object aggregate(Class key, List in) {
if (in == null) return null;
for (CoreMap cm:in) {
Object obj = cm.get(key);
return obj;
}
return null;
}
};
public static final CoreMapAttributeAggregator LAST_NON_NIL = new CoreMapAttributeAggregator() {
public Object aggregate(Class key, List in) {
if (in == null) return null;
for (int i = in.size()-1; i >= 0; i--) {
CoreMap cm = in.get(i);
Object obj = cm.get(key);
if (obj != null) {
return obj;
}
}
return null;
}
};
public static final CoreMapAttributeAggregator LAST = new CoreMapAttributeAggregator() {
public Object aggregate(Class key, List in) {
if (in == null) return null;
for (int i = in.size()-1; i >= 0; i--) {
CoreMap cm = in.get(i);
return cm.get(key);
}
return null;
}
};
public static final class ConcatListAggregator extends CoreMapAttributeAggregator {
public ConcatListAggregator()
{
}
@Override
public Object aggregate(Class key, List in) {
if (in == null) return null;
List res = new ArrayList<>();
for (CoreMap cm:in) {
Object obj = cm.get(key);
if (obj != null) {
if (obj instanceof List) {
res.addAll( (List) obj);
}
}
}
return res;
}
}
public static final class ConcatCoreMapListAggregator extends CoreMapAttributeAggregator {
boolean concatSelf = false;
public ConcatCoreMapListAggregator()
{
}
public ConcatCoreMapListAggregator(boolean concatSelf)
{
this.concatSelf = concatSelf;
}
public Object aggregate(Class key, List in) {
if (in == null) return null;
List res = new ArrayList<>();
for (CoreMap cm:in) {
Object obj = cm.get(key);
boolean added = false;
if (obj != null) {
if (obj instanceof List) {
res.addAll( (List) obj);
added = true;
}
}
if (!added && concatSelf) {
res.add((T) cm);
}
}
return res;
}
}
public static final ConcatCoreMapListAggregator CONCAT_TOKENS = new ConcatCoreMapListAggregator<>(true);
public static final ConcatCoreMapListAggregator CONCAT_COREMAP = new ConcatCoreMapListAggregator<>(true);
public static final class ConcatAggregator extends CoreMapAttributeAggregator {
String delimiter;
public ConcatAggregator(String delimiter)
{
this.delimiter = delimiter;
}
public Object aggregate(Class key, List in) {
if (in == null) return null;
StringBuilder sb = new StringBuilder();
for (CoreMap cm:in) {
Object obj = cm.get(key);
if (obj != null) {
if (sb.length() > 0) {
sb.append(delimiter);
}
sb.append(obj);
}
}
return sb.toString();
}
}
public static final class ConcatTextAggregator extends CoreMapAttributeAggregator {
String delimiter;
public ConcatTextAggregator(String delimiter)
{
this.delimiter = delimiter;
}
public Object aggregate(Class key, List in) {
if (in == null) return null;
String text = ChunkAnnotationUtils.getTokenText(in, key);
return text;
}
}
public static final CoreMapAttributeAggregator CONCAT = new ConcatAggregator(" ");
public static final CoreMapAttributeAggregator CONCAT_TEXT = new ConcatTextAggregator(" ");
public static final CoreMapAttributeAggregator COUNT = new CoreMapAttributeAggregator() {
public Object aggregate(Class key, List in) {
return in.size();
}
};
public static final CoreMapAttributeAggregator SUM = new CoreMapAttributeAggregator() {
public Object aggregate(Class key, List in) {
if (in == null) return null;
double sum = 0;
for (CoreMap cm:in) {
Object obj = cm.get(key);
if (obj != null) {
if (obj instanceof Number) {
sum += ((Number) obj).doubleValue();
} else if (obj instanceof String) {
sum += Double.parseDouble((String) obj);
} else {
throw new RuntimeException("Cannot sum attribute " + key + ", object of type: " + obj.getClass());
}
}
}
return sum;
}
};
public static final CoreMapAttributeAggregator MIN = new CoreMapAttributeAggregator() {
public Object aggregate(Class key, List in) {
if (in == null) return null;
Comparable min = null;
for (CoreMap cm:in) {
Object obj = cm.get(key);
if (obj != null) {
if (obj instanceof Comparable) {
Comparable c = (Comparable) obj;
if (min == null) {
min = c;
} else if (c.compareTo(min) < 0) {
min = c;
}
} else {
throw new RuntimeException("Cannot get min of attribute " + key + ", object of type: " + obj.getClass());
}
}
}
return min;
}
};
public static final CoreMapAttributeAggregator MAX = new CoreMapAttributeAggregator() {
public Object aggregate(Class key, List in) {
if (in == null) return null;
Comparable max = null;
for (CoreMap cm:in) {
Object obj = cm.get(key);
if (obj != null) {
if (obj instanceof Comparable) {
Comparable c = (Comparable) obj;
if (max == null) {
max = c;
} else if (c.compareTo(max) > 0) {
max = c;
}
} else {
throw new RuntimeException("Cannot get max of attribute " + key + ", object of type: " + obj.getClass());
}
}
}
return max;
}
};
public static final class MostFreqAggregator extends CoreMapAttributeAggregator {
Set