All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datastax.data.prepare.spark.rdd.RDDTransformation Maven / Gradle / Ivy

The newest version!
package com.datastax.data.prepare.spark.rdd;

import com.datastax.insight.spec.RDDOperator;
import com.datastax.insight.annonation.InsightComponent;
import com.datastax.insight.annonation.InsightComponentArg;
import com.datastax.data.prepare.util.Consts;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.linalg.Vectors;
import org.apache.spark.mllib.regression.LabeledPoint;

import java.util.ArrayList;
import java.util.List;

public class RDDTransformation implements RDDOperator {
    @InsightComponent(name = "RDD 向量化", description = "向量化", icon = "arrows", order = 500206)
    public static JavaRDD denseVector(
            @InsightComponentArg(externalInput = true,name = "rdd", description = "操作数据") JavaRDD rdd,
            @InsightComponentArg(name = "delimiter", description = "分隔符",defaultValue = ";") String delimiter){
        JavaRDD vectors = rdd.map(
                (Function) s -> {
                    String delim=delimiter;
                    if(delim==null || delim.length()==0) {
                        delim= Consts.DELIMITER;
                    }
                    String[] sarray = s.split(delim);
                    double[] values = new double[sarray.length];
                    for (int i = 0; i < sarray.length; i++) {
                        values[i] = Double.parseDouble(sarray[i]);
                    }
                    return Vectors.dense(values);
                }
        );
        return vectors;
    }

@InsightComponent(name = "RDD LabeledPoint化", description = "LabeledPoint化", icon = "arrows", order = 500207)
public static JavaRDD lpRDD(
        @InsightComponentArg(externalInput = true,name = "rdd", description = "操作数据") JavaRDD rdd,
        @InsightComponentArg(name = "delimiter", description = "分隔符",defaultValue = ";") String delimiter){
    JavaRDD labeledPoints=rdd.map(
            (Function) s -> {
                String delim=delimiter;
                if(delim==null || delim.length()==0) {
                    delim= Consts.DELIMITER;
                }
                String[] sarray = s.split(delim);
                double[] values = new double[sarray.length];
                for (int i = 1; i < sarray.length; i++) {
                    values[i] = Double.parseDouble(sarray[i]);
                }
                return new LabeledPoint(Double.parseDouble(sarray[0]), Vectors.dense(values));
            }
    );
    return labeledPoints;
}

@InsightComponent(name = "RDD Split", description = "Split", icon = "square-o", order = 500502)
public static JavaRDD[] split(
        @InsightComponentArg(externalInput = true,name = "rdd", description = "需要处理的数据RDD",defaultValue = "${output}._1") JavaRDD data,
        @InsightComponentArg(name = "weights", description = "风格比例,用分号隔开") String weights){
    String[] texts=weights.split(Consts.DELIMITER);
    double[] ws=new double[texts.length];
    for(int i=0;i  JavaRDD distinct(
        @InsightComponentArg(externalInput = true,name = "rdd", description = "操作RDD") JavaRDD rdd,
        @InsightComponentArg(name = "numPartitions", description = "numPartitions",defaultValue = "0") int numPartitions){
    if(numPartitions>0){
        return rdd.distinct(numPartitions);
    }else {
        return rdd.distinct();
    }
}

@InsightComponent(name = "RDD Coalesce", description = "Coalesce", icon = "arrows", order = 50010507)
public static  JavaRDD coalesce(
        @InsightComponentArg(externalInput = true,name = "rdd", description = "操作RDD") JavaRDD rdd,
        @InsightComponentArg(name = "numPartitions", description = "numPartitions") int numPartitions,
        @InsightComponentArg(name = "shuffle", description = "shuffle") boolean shuffle){
    return rdd.coalesce(numPartitions,shuffle);
}

@InsightComponent(name = "RDD Repartition", description = "Repartition", icon = "arrows", order = 50010508)
public static  JavaRDD repartition(
        @InsightComponentArg(externalInput = true,name = "rdd", description = "操作RDD") JavaRDD rdd,
        @InsightComponentArg(name = "numPartitions", description = "numPartitions") int numPartitions){
    return rdd.repartition(numPartitions);
}

@InsightComponent(name = "RDD Sample", description = "Sample", icon = "arrows", order = 500101)
public static  JavaRDD sample(
        @InsightComponentArg(externalInput = true,name = "rdd", description = "操作RDD") JavaRDD rdd,
        @InsightComponentArg(name = "withReplacement", description = "withReplacement") boolean withReplacement,
        @InsightComponentArg(name = "fraction", description = "fraction") double fraction,
        @InsightComponentArg(name = "seed", description = "seed") long seed){
    return rdd.sample(withReplacement,fraction,seed);
}

@InsightComponent(name = "RDD Union", description = "Union", icon = "arrows", order = 50010506)
public static  JavaRDD union(
        @InsightComponentArg(externalInput = true,name = "left", description = "操作RDD") JavaRDD rdd,
        @InsightComponentArg(name = "right", description = "操作RDD") JavaRDD other){
    return rdd.union(other);
}

@InsightComponent(name = "RDD Intersection", description = "Intersection", icon = "arrows", order = 50010509)
public static  JavaRDD intersection(
        @InsightComponentArg(externalInput = true,name = "rdd", description = "操作RDD") JavaRDD rdd,
        @InsightComponentArg(name = "other", description = "操作RDD") JavaRDD other){
    return rdd.intersection(other);
}

@InsightComponent(name = "RDD Subtract", description = "Subtract", icon = "arrows", order = 50010510)
public static  JavaRDD subtract(
        @InsightComponentArg(externalInput = true,name = "rdd", description = "操作RDD") JavaRDD rdd,
        @InsightComponentArg(name = "other", description = "操作RDD") JavaRDD other,
        @InsightComponentArg(name = "numPartitions", description = "numPartitions") int numPartitions){
    return rdd.subtract(other,numPartitions);
}

@InsightComponent(name = "RDD 设置名称", description = "设置名称", icon = "arrows", order = 50010805)
public static  JavaRDD setName(
        @InsightComponentArg(externalInput = true,name = "rdd", description = "操作RDD") JavaRDD rdd,
        @InsightComponentArg(name = "name", description = "名称") String name){
    return rdd.setName(name);
}

@InsightComponent(name = "RDD Glom", description = "Glom", icon = "arrows", order = 500208)
public static  JavaRDD> glom(
        @InsightComponentArg(externalInput = true,name = "rdd", description = "操作RDD") JavaRDD rdd){
    return rdd.glom();
}

@InsightComponent(name = "RDD Pipe", description = "Pipe", icon = "arrows", order = 500210)
public static JavaRDD pipe(
        @InsightComponentArg(externalInput = true,name = "rdd", description = "操作RDD") JavaRDD rdd,
        @InsightComponentArg(name = "commands", description = "commands") String... command){
    List list=new ArrayList<>();
    for(String c : command){
        list.add(c);
    }
    return rdd.pipe(list);
}

@InsightComponent(name = "RDD Zip", description = "Zip", icon = "arrows", order = 500209)
public static  JavaPairRDD zip(
        @InsightComponentArg(externalInput = true,name = "rdd", description = "操作RDD") JavaRDD rdd,
        @InsightComponentArg(name = "type", description = "类型",items = "uniqueid;index") String type){
    if(type.equals("uniqueid")){
        return rdd.zipWithUniqueId();
    }else if(type.equals("index")){
        return rdd.zipWithIndex();
    }
    return null;
}

@InsightComponent(name = "RDD Collect", description = "Collect", icon = "arrows", order = 500211)
public static  List collect(
        @InsightComponentArg(externalInput = true,name = "rdd", description = "操作数据集") JavaRDD rdd){
    return rdd.collect();
}

@InsightComponent(name = "RDD CollectPartitions", description = "CollectPartitions", icon = "arrows", order = 500212)
public static  List[] collectPartitions(
        @InsightComponentArg(externalInput = true,name = "rdd", description = "操作RDD") JavaRDD rdd,
        @InsightComponentArg(name = "partitionIds", description = "partitionIds")  String pid){
    String[] pids=pid.split(Consts.DELIMITER);
    int[] partitionIds=new int[pids.length];
    for(int i=0;i Object count(
        @InsightComponentArg(externalInput = true, name = "rdd", description = "操作RDD") JavaRDD rdd,
        @InsightComponentArg(externalInput = true, name = "byValue", description = "byValue") boolean byValue,
        @InsightComponentArg(name = "timeout", description = "近似计算的timeout",defaultValue = "0") long timeout,
        @InsightComponentArg(name = "confidence", description = "confidence") double confidence){
    if(byValue){
        if(timeout>0){
            return rdd.countByValueApprox(timeout,confidence);
        }else {
            return rdd.countByValue();
        }
    }else {
        if(timeout>0){
            return rdd.countApprox(timeout,confidence);
        }else {
            return rdd.count();
        }
    }
}

@InsightComponent(name = "RDD Take", description = "Take", icon = "arrows", order = 50010105)
public static  List take(
        @InsightComponentArg(externalInput = true,name = "rdd", description = "操作RDD") JavaRDD rdd,
        @InsightComponentArg(name = "num", description = "num") int num){
    return rdd.take(num);
}

@InsightComponent(name = "RDD TakeOrdered", description = "TakeOrdered", icon = "arrows", order = 50010106)
public static  List takeOrdered(
        @InsightComponentArg(externalInput = true,name = "rdd", description = "操作RDD") JavaRDD rdd,
        @InsightComponentArg(name = "num", description = "num") int num){
    return rdd.takeOrdered(num);
}

@InsightComponent(name = "RDD Top", description = "Top", icon = "arrows", order = 50010107)
public static  List top(
        @InsightComponentArg(externalInput = true,name = "rdd", description = "操作RDD") JavaRDD rdd,
        @InsightComponentArg(name = "num", description = "num") int num){
    return rdd.top(num);
}

@InsightComponent(name = "RDD TakeSample", description = "TakeSample", icon = "arrows", order = 500102)
public static  List takeSample(
        @InsightComponentArg(externalInput = true,name = "rdd", description = "操作RDD") JavaRDD rdd,
        @InsightComponentArg(name = "withReplacement", description = "withReplacement") boolean withReplacement,
        @InsightComponentArg(name = "num", description = "num") int num,
        @InsightComponentArg(name = "seed", description = "seed") long seed){
    return rdd.takeSample(withReplacement,num,seed);
}

@InsightComponent(name = "RDD First", description = "First", icon = "arrows", order = 50010108)
public static  T first(
        @InsightComponentArg(externalInput = true,name = "rdd", description = "操作RDD") JavaRDD rdd){
    return rdd.first();
}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy