com.datastax.insight.ml.spark.mllib.statistics.MLStatistics Maven / Gradle / Ivy
package com.datastax.insight.ml.spark.mllib.statistics;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.mllib.linalg.Matrix;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.stat.KernelDensity;
import org.apache.spark.mllib.stat.Statistics;
public class MLStatistics {
public static Matrix correlation(JavaRDD vectors,String method){
return Statistics.corr(vectors.rdd(),method);
}
public static double[] kde(JavaRDD data,double[] values,double bandwidth){
JavaRDD pData = data.map(new Function() {
@Override
public Double call(String value) throws Exception {
return Double.parseDouble(value);
}
});
// Construct the density estimator with the sample data
// and a standard deviation for the Gaussian kernels
KernelDensity kd = new KernelDensity().setSample(pData).setBandwidth(bandwidth);
// Find density estimates for the given values
double[] densities = kd.estimate(values);
return densities;
}
}