All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datastax.data.prepare.spark.dataset.params.DataBinning Maven / Gradle / Ivy

The newest version!
package com.datastax.data.prepare.spark.dataset.params;

import com.datastax.data.prepare.util.Consts;
import com.datastax.data.prepare.util.CustomException;
import com.google.common.base.Strings;

public class DataBinning extends Attributes {
    private String binningType;  //分箱类型, 分为大小离散化、分级离散化、频率离散化、用户自定离散化、熵离散化
    private int binSize;  //数值在每个 binningType 都有所不同
    private boolean defineBoundaries;  //作用于 binningType 为 分级离散化 时,true 表示 分级的上限和下限由用户指定
    private double minValue;  //分级离散化 的 下限
    private double maxValue;  //分级离散化 的 上限
    private String[] classNames;
    private double[] upperLimits;

    public static class Builder extends Attributes {
        private String binningType;
        private int binSize;
        private boolean defineBoundaries = false;
        private double minValue = 0;
        private double maxValue = 0;
        private String[] classNames;
        private double[] upperLimits;
        public Builder() {
        }

        public Builder attributeSelector(String attributeSelector) {
            super.setAttributeSelector(attributeSelector);
            return this;
        }

        public Builder invertSelection(boolean invertSelection) {
            super.setInvertSelection(invertSelection);
            return this;
        }

        public Builder attribute(String attribute) {
            super.setAttribute(attribute);
            return this;
        }

        public Builder regularExpression(String regularExpression) {
            super.setRegularExpression(regularExpression);
            return this;
        }

        public Builder valueType(String valueType) {
            super.setValueType(valueType);
            return this;
        }

        public Builder binningType(String binningType) {
            this.binningType = binningType;
            return this;
        }

        public Builder binSize(int binSize) {
            this.binSize = binSize;
            return this;
        }

        public Builder defineBoundaries(boolean defineBoundaries) {
            this.defineBoundaries = defineBoundaries;
            return this;
        }

        public Builder minValue(double minValue) {
            this.minValue = minValue;
            return this;
        }

        public Builder maxValue(double maxValue) {
            this.maxValue = maxValue;
            return this;
        }

        public Builder params(String[] classNames, String[] upperLimits) {
            if(upperLimits == null || classNames == null) {
                throw new CustomException("用户自定离散化参数为空");
            }
            if(upperLimits.length != classNames.length) {
                throw new CustomException("用户自定离散化的参数长度不等");
            }
            int count = 0;
            boolean negativeInfinity = false;
            boolean positiveInfinity = false;
            boolean repeat;
            int position = 0;
            int position1 = 0;
            int position2 = 0;
            double[] temp1 = new double[upperLimits.length];
            int[] signs = new int[upperLimits.length];
            double temp3 ;
            for(int i=0; i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy