All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datastax.insight.ml.spark.mllib.association.FPGrowthEvaluator Maven / Gradle / Ivy

package com.datastax.insight.ml.spark.mllib.association;

import com.datastax.insight.spec.RDDOperator;
import com.datastax.insight.core.Consts;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.mllib.fpgrowth.FPGrowthUtil;
import org.apache.spark.mllib.fpm.AssociationRules;
import org.apache.spark.mllib.fpm.FPGrowth;
import org.apache.spark.mllib.fpm.FPGrowthModel;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Dataset;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class FPGrowthEvaluator implements RDDOperator {
    public static FPGrowthModel evaluate(JavaRDD rdd, String delimiter,double minSupport,int numPartitions,double confidence){
        JavaRDD> data=rdd.map((Function>) line -> {
            String delim=delimiter;
            if(delim==null || delim.length()==0) {
                delim= Consts.DELIMITER;
            }
            List list=new ArrayList<>();
            String[] items= line.split(delim);
            Collections.addAll(list, items);
            return list;
        });
        FPGrowthModel model=new FPGrowth().setMinSupport(minSupport).setNumPartitions(numPartitions).run(data);

        if(confidence>0){
            List> rules=model.generateAssociationRules(confidence).toJavaRDD().collect();
            for(AssociationRules.Rule rule : rules){
                System.out.println(rule.javaAntecedent()+"===>"+rule.javaConsequent()+","+rule.confidence());
            }
        }
        //printItemset(model.freqItemsets());

        return model;
    }

    private static void printItemset(RDD> rdd){
        List> list=rdd.toJavaRDD().collect();
        for(FPGrowth.FreqItemset itemset : list){
            System.out.println(itemset.javaItems()+"===>"+itemset.freq());
        }
    }

    public static  void fpgrowth(Dataset data, String groupCol, String targetCol, double minSupport, int numPartitions, long minFreq,
                                    double p, int minItems, String uri, String path) {
        FPGrowthUtil.fpgrowth(data.toDF(), groupCol, targetCol, minSupport, numPartitions, minFreq, p, minItems, uri, path);
    }

    private static RDD mergeRelated(RDD rdd, Double p, Integer minItems) {
        return FPGrowthUtil.mergeRelated(rdd, p, minItems);
    }

    private static void freq2csv(RDD rdd, String uri, String path) {
        FPGrowthUtil.freq2csv(rdd, uri, path);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy