com.datastax.data.prepare.spark.dataset.FPGrowthOperator Maven / Gradle / Ivy
The newest version!
package com.datastax.data.prepare.spark.dataset;
import com.datastax.insight.core.driver.SparkContextBuilder;
import com.datastax.insight.spec.Operator;
import com.datastax.insight.annonation.InsightComponent;
import com.datastax.insight.annonation.InsightComponentArg;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class FPGrowthOperator implements Operator {
private static final Logger logger = LoggerFactory.getLogger(FPGrowthOperator.class);
@InsightComponent(name = "fpgrowth", description = "fpgrowth")
public static void fpgrowth(
@InsightComponentArg(externalInput = true, name = "数据集", description = "数据集") Dataset data,
@InsightComponentArg(name = "groupCol", description = "groupCol") String groupCol,
@InsightComponentArg(name = "targetCol", description = "targetCol") String targetCol,
@InsightComponentArg(name = "minSupport", description = "minSupport") double minSupport,
@InsightComponentArg(name = "numPartitions", description = "numPartitions") String numPartitionsString,
@InsightComponentArg(name = "minFreq", description = "minFreq") long minFreq,
@InsightComponentArg(name = "p", description = "p") double p,
@InsightComponentArg(name = "minItems", description = "minItems") int minItems,
// @InsightComponentArg(name = "uri", description = "uri") String uri,
@InsightComponentArg(name = "path", description = "path") String path) {
int numPartitions = -1;
if(numPartitionsString != null && numPartitionsString.trim().length() != 0) {
numPartitions = Integer.parseInt(numPartitionsString);
}
SparkSession spark = SparkContextBuilder.getSession();
// FPGrowthUtil.fpgrowth(spark, data.toDF(), groupCol, targetCol, minSupport, numPartitions, minFreq,
// p, minItems, uri, path);
StockBasicCompute.fpgCompute(spark, data.toDF(), groupCol, targetCol, minSupport, numPartitions, minFreq, p, minItems, path);
}
// @InsightComponent(name = "合并", description = "用于合并股票的多天的数据")
// public static Dataset mergeRelated(
// @InsightComponentArg(externalInput = true, name = "数据集", description = "数据集") Dataset data,
// @InsightComponentArg(name = "天数列", description = "天数列的列名") String dateCol,
// @InsightComponentArg(name = "id列", description = "包含id数组的列名") String idsCol,
// @InsightComponentArg(name = "新列名", description = "用于定义结果的新列名") String resultCol,
// @InsightComponentArg(name = "天数", description = "用于合并id列的天数") int days) {
// return (Dataset) StockOperation.mergeDays(data, dateCol, idsCol, resultCol, days);
// }
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy