com.datastax.insight.ml.spark.mllib.feature.SVD Maven / Gradle / Ivy
package com.datastax.insight.ml.spark.mllib.feature;
import com.datastax.insight.core.driver.SparkContextBuilder;
import com.datastax.insight.spec.RDDOperator;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.mllib.linalg.Matrix;
import org.apache.spark.mllib.linalg.SingularValueDecomposition;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.linalg.distributed.RowMatrix;
import java.util.Arrays;
public class SVD implements RDDOperator {
public static JavaRDD compute(JavaRDD data,int topN,boolean computeU,double rCond){
RowMatrix mat = new RowMatrix(data.rdd());
// Compute the top X singular values and corresponding singular vectors.
SingularValueDecomposition svd = mat.computeSVD(topN,computeU,rCond);
RowMatrix U = svd.U();
Vector[] collectPartitions = (Vector[]) U.rows().collect();
JavaRDD vectors= SparkContextBuilder.getJContext().parallelize(Arrays.asList(collectPartitions));
return vectors;
}
}