com.datastax.data.prepare.spark.dataset.StockBasicOperator Maven / Gradle / Ivy
The newest version!
package com.datastax.data.prepare.spark.dataset;
import com.datastax.insight.core.driver.SparkContextBuilder;
import com.datastax.insight.spec.Operator;
import com.datastax.insight.annonation.InsightComponent;
import com.datastax.insight.annonation.InsightComponentArg;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
/**
* @author keqc
*/
public class StockBasicOperator implements Operator {
@InsightComponent(name = "层次聚类", description = "层次聚类")
public static Dataset hierClusteringAlgo(
@InsightComponentArg(externalInput = true, name = "数据集", description = "数据集") Dataset data,
@InsightComponentArg(name = "x列列名", description = "x列") String xColumn,
@InsightComponentArg(name = "y列列名", description = "y列") String yColumn,
@InsightComponentArg(name = "相似度列列名", description = "相似度列") String simColumn,
@InsightComponentArg(name = "阀值", description = "用于停止层次聚类的阀值") double threshold,
@InsightComponentArg(name = "最小相似度", description = "最小相似度") double minSimilarity,
@InsightComponentArg(name = "临时文件路径", description = "存放临时文件,路径限定为hdfs路径", defaultValue = "${MISC_FOLDER}") String tempPath) {
if(data == null) {
return data;
}
chkNull(xColumn, "x列名为空");
chkNull(yColumn, "y列名为空");
chkNull(simColumn, "similarity列名为空");
chkNull(tempPath, "临时文件路径为空");
if(threshold < 0.0) {
throw new IllegalArgumentException("threshold不能小于0");
}
if(minSimilarity < 0.0) {
throw new IllegalArgumentException("min Similarity不能小于0");
}
SparkSession spark = SparkContextBuilder.getSession();
return (Dataset) StockBasicCompute.hcCompute(spark, data.toDF(), xColumn, yColumn, simColumn, threshold, minSimilarity, tempPath);
}
private static void chkNull(String s, String msg) {
if(s == null || s.trim().length() == 0) {
throw new NullPointerException(msg);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy