All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datastax.insight.ml.spark.mllib.association.PrefixSpanEvaluator Maven / Gradle / Ivy

package com.datastax.insight.ml.spark.mllib.association;

import com.datastax.insight.spec.RDDOperator;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.mllib.fpm.PrefixSpan;
import org.apache.spark.mllib.fpm.PrefixSpanModel;

import java.util.ArrayList;
import java.util.List;

public class PrefixSpanEvaluator implements RDDOperator {
    public static PrefixSpanModel evaluate(JavaRDD rdd,String deliOne,String deliTwo,double minSupport,int maxPatternLength){
        JavaRDD>> sequences=rdd.map(new Function>>() {
            @Override
            public List> call(String line) throws Exception {
                List> dataList=new ArrayList<>();
                String[] texts=line.split(deliOne);
                for(String text: texts){
                    List list=new ArrayList<>();
                    String[] items=text.split(deliTwo);
                    for(String item : items){
                        list.add(Integer.parseInt(item));
                    }
                    dataList.add(list);
                }
                return dataList;
            }
        });
        return evaluate(sequences,minSupport,maxPatternLength);
    }

    public static PrefixSpanModel evaluate(JavaRDD>> sequences,double minSupport,int maxPatternLength){
        PrefixSpan prefixSpan = new PrefixSpan()
                .setMinSupport(minSupport)
                .setMaxPatternLength(maxPatternLength);
        PrefixSpanModel model = prefixSpan.run(sequences);
        return model;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy