
com.datastax.insight.ml.spark.mllib.cluster.PowerIterationCluster Maven / Gradle / Ivy
package com.datastax.insight.ml.spark.mllib.cluster;
import com.datastax.insight.spec.RDDOperator;
import com.datastax.insight.core.Consts;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.mllib.clustering.PowerIterationClustering;
import org.apache.spark.mllib.clustering.PowerIterationClusteringModel;
import scala.Tuple3;
public class PowerIterationCluster implements RDDOperator {
public static PowerIterationClusteringModel train(JavaRDD rdd, String delimiter,int numClasses,int maxIterations){
JavaRDD> similarities=rdd.map(new Function>() {
@Override
public Tuple3 call(String line) throws Exception {
String delim=delimiter;
if(delim==null || delim.length()==0) {
delim= Consts.DELIMITER;
}
String[] items= line.split(delim);
return new Tuple3<>(Long.parseLong(items[0]), Long.parseLong(items[1]), Double.parseDouble(items[2]));
}
});
return train(similarities,numClasses,maxIterations);
}
public static PowerIterationClusteringModel train(JavaRDD> similarities,int numClasses,int maxIterations){
PowerIterationClustering pic = new PowerIterationClustering()
.setK(numClasses)
.setMaxIterations(maxIterations);
PowerIterationClusteringModel model = pic.run(similarities);
return model;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy