stream.data.NumericalBinning Maven / Gradle / Ivy
/*
* streams library
*
* Copyright (C) 2011-2012 by Christian Bockermann, Hendrik Blom
*
* streams is a library, API and runtime environment for processing high
* volume data streams. It is composed of three submodules "stream-api",
* "stream-core" and "stream-runtime".
*
* The streams library (and its submodules) is free software: you can
* redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any
* later version.
*
* The stream.ai library (and its submodules) is distributed in the hope
* that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package stream.data;
import java.io.Serializable;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.util.Locale;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import stream.Data;
import stream.Processor;
import stream.annotations.Description;
/**
* @author chris
*
*/
@Description(group = "Data Stream.Processing.Transformations.Data")
public class NumericalBinning implements Processor {
static Logger log = LoggerFactory.getLogger(NumericalBinning.class);
Double minimum = 0.0d;
Double maximum = 10.0d;
Integer bins = 10;
String[] keys = null;
Bucket[] buckets = null;
/**
* @return the minimum
*/
public Double getMinimum() {
return minimum;
}
/**
* @param minimum
* the minimum to set
*/
public void setMinimum(Double minimum) {
this.minimum = minimum;
}
/**
* @return the maximum
*/
public Double getMaximum() {
return maximum;
}
/**
* @param maximum
* the maximum to set
*/
public void setMaximum(Double maximum) {
this.maximum = maximum;
}
/**
* @return the bins
*/
public Integer getBins() {
return bins;
}
/**
* @param bins
* the bins to set
*/
public void setBins(Integer bins) {
this.bins = bins;
buckets = null;
}
/**
* @return the keys
*/
public String[] getKeys() {
return keys;
}
/**
* @param keys
* the keys to set
*/
public void setKeys(String[] keys) {
this.keys = keys;
}
public void setKey(String key) {
if (key != null) {
keys = new String[] { key };
}
}
public String getKey() {
return keys[0];
}
/**
* @see stream.AbstractProcessor#init()
*/
public void init() throws Exception {
buckets = new Bucket[Math.max(1, bins)];
double step = (maximum - minimum) / bins.doubleValue();
double last = minimum;
for (int i = 0; i < buckets.length - 1; i++) {
buckets[i] = new Bucket(last, last + step);
last += step;
}
buckets[buckets.length - 1] = new Bucket(last, maximum);
}
/**
* @see stream.DataProcessor#process(stream.Data)
*/
@Override
public Data process(Data data) {
if (buckets == null) {
try {
init();
} catch (Exception e) {
throw new RuntimeException("Initialization failed: "
+ e.getMessage());
}
}
if (keys == null || keys.length < 1)
return data;
for (String key : keys) {
Serializable value = data.get(key);
if (value instanceof Number) {
Number num = (Number) value;
data.put(key, map(num.doubleValue()));
} else {
try {
Double val = new Double(value.toString());
data.put(key, map(val));
} catch (Exception e) {
log.debug(
"Failed to parse double value from '{}' for attribute '{}'!",
value, key);
}
}
}
return data;
}
protected Bucket map(Double d) {
if (d < buckets[0].upper)
return buckets[0];
for (int i = 0; i < buckets.length; i++)
if (i + 1 < buckets.length && buckets[i + 1].lower > d)
return buckets[i];
return buckets[buckets.length - 1];
}
public class Bucket implements Serializable, Comparable {
/** The unique class ID */
private static final long serialVersionUID = 1874196246174345683L;
final Double lower;
final Double upper;
final String asString;
public Bucket(double low, double high) {
lower = low;
upper = high;
DecimalFormatSymbols otherSymbols = new DecimalFormatSymbols(
Locale.getDefault());
otherSymbols.setDecimalSeparator('.');
DecimalFormat fmt = new DecimalFormat("0.0#####", otherSymbols);
asString = "Range[" + fmt.format(lower) + ";" + fmt.format(upper)
+ ")";
}
public String toString() {
return asString;
}
/**
* @see java.lang.Comparable#compareTo(java.lang.Object)
*/
@Override
public int compareTo(Bucket arg0) {
if (arg0 == null)
return 1;
if (arg0 == this)
return 0;
int r = this.lower.compareTo(arg0.lower);
if (r == 0) {
r = this.upper.compareTo(arg0.upper);
}
return r;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy