com.datastax.insight.ml.spark.data.rdd.RDDLoader Maven / Gradle / Ivy
package com.datastax.insight.ml.spark.data.rdd;
import com.datastax.insight.core.driver.SparkContextBuilder;
import com.datastax.insight.spec.RDDOperator;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.mllib.regression.LabeledPoint;
import org.apache.spark.mllib.util.MLUtils;
public class RDDLoader implements RDDOperator {
public static JavaRDD load(String path){
JavaSparkContext sc= SparkContextBuilder.getJContext();
return sc.textFile(path);
}
public static JavaRDD load(String path,RDDConverter converter){
JavaRDD lines=load(path);
JavaRDD rdd=lines.map(new Function() {
@Override
public T call(String line) throws Exception {
return converter.convert(line);
}
});
return rdd;
}
public static JavaRDD loadLibSVMFile(String path){
return MLUtils.loadLibSVMFile(SparkContextBuilder.getContext(),path).toJavaRDD();
}
}