com.datastax.data.prepare.spark.dataset.params.DataBinning Maven / Gradle / Ivy
The newest version!
package com.datastax.data.prepare.spark.dataset.params;
import com.datastax.data.prepare.util.Consts;
import com.datastax.data.prepare.util.CustomException;
import com.google.common.base.Strings;
public class DataBinning extends Attributes {
private String binningType; //分箱类型, 分为大小离散化、分级离散化、频率离散化、用户自定离散化、熵离散化
private int binSize; //数值在每个 binningType 都有所不同
private boolean defineBoundaries; //作用于 binningType 为 分级离散化 时,true 表示 分级的上限和下限由用户指定
private double minValue; //分级离散化 的 下限
private double maxValue; //分级离散化 的 上限
private String[] classNames;
private double[] upperLimits;
public static class Builder extends Attributes {
private String binningType;
private int binSize;
private boolean defineBoundaries = false;
private double minValue = 0;
private double maxValue = 0;
private String[] classNames;
private double[] upperLimits;
public Builder() {
}
public Builder attributeSelector(String attributeSelector) {
super.setAttributeSelector(attributeSelector);
return this;
}
public Builder invertSelection(boolean invertSelection) {
super.setInvertSelection(invertSelection);
return this;
}
public Builder attribute(String attribute) {
super.setAttribute(attribute);
return this;
}
public Builder regularExpression(String regularExpression) {
super.setRegularExpression(regularExpression);
return this;
}
public Builder valueType(String valueType) {
super.setValueType(valueType);
return this;
}
public Builder binningType(String binningType) {
this.binningType = binningType;
return this;
}
public Builder binSize(int binSize) {
this.binSize = binSize;
return this;
}
public Builder defineBoundaries(boolean defineBoundaries) {
this.defineBoundaries = defineBoundaries;
return this;
}
public Builder minValue(double minValue) {
this.minValue = minValue;
return this;
}
public Builder maxValue(double maxValue) {
this.maxValue = maxValue;
return this;
}
public Builder params(String[] classNames, String[] upperLimits) {
if(upperLimits == null || classNames == null) {
throw new CustomException("用户自定离散化参数为空");
}
if(upperLimits.length != classNames.length) {
throw new CustomException("用户自定离散化的参数长度不等");
}
int count = 0;
boolean negativeInfinity = false;
boolean positiveInfinity = false;
boolean repeat;
int position = 0;
int position1 = 0;
int position2 = 0;
double[] temp1 = new double[upperLimits.length];
int[] signs = new int[upperLimits.length];
double temp3 ;
for(int i=0; i
© 2015 - 2025 Weber Informatics LLC | Privacy Policy