All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datastax.insight.ml.spark.mllib.statistics.MLStatistics Maven / Gradle / Ivy

package com.datastax.insight.ml.spark.mllib.statistics;

import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.mllib.linalg.Matrix;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.stat.KernelDensity;
import org.apache.spark.mllib.stat.Statistics;

public class MLStatistics {
    public static Matrix correlation(JavaRDD vectors,String method){
        return Statistics.corr(vectors.rdd(),method);
    }

    public static double[] kde(JavaRDD data,double[] values,double bandwidth){
        JavaRDD pData = data.map(new Function() {
            @Override
            public Double call(String value) throws Exception {
                return Double.parseDouble(value);
            }
        });

        // Construct the density estimator with the sample data
        // and a standard deviation for the Gaussian kernels
        KernelDensity kd = new KernelDensity().setSample(pData).setBandwidth(bandwidth);

        // Find density estimates for the given values
        double[] densities = kd.estimate(values);

        return densities;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy