All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datastax.insight.ml.spark.mllib.feature.PCAReducer Maven / Gradle / Ivy

package com.datastax.insight.ml.spark.mllib.feature;

import com.datastax.insight.core.driver.SparkContextBuilder;
import com.datastax.insight.spec.RDDOperator;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.mllib.linalg.Matrix;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.linalg.distributed.RowMatrix;

import java.util.Arrays;

public class PCAReducer implements RDDOperator {
    public static JavaRDD reduce(JavaRDD data,int tpc){
        RowMatrix mat = new RowMatrix(data.rdd());

        // Compute the top X principal components.
        Matrix pc = mat.computePrincipalComponents(tpc);
        RowMatrix projected = mat.multiply(pc);

        Vector[] collectPartitions = (Vector[])projected.rows().collect();
        JavaRDD vectors= SparkContextBuilder.getJContext().parallelize(Arrays.asList(collectPartitions));
        return vectors;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy