com.datastax.insight.ml.spark.mllib.association.PrefixSpanEvaluator Maven / Gradle / Ivy
package com.datastax.insight.ml.spark.mllib.association;
import com.datastax.insight.spec.RDDOperator;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.mllib.fpm.PrefixSpan;
import org.apache.spark.mllib.fpm.PrefixSpanModel;
import java.util.ArrayList;
import java.util.List;
public class PrefixSpanEvaluator implements RDDOperator {
public static PrefixSpanModel evaluate(JavaRDD rdd,String deliOne,String deliTwo,double minSupport,int maxPatternLength){
JavaRDD>> sequences=rdd.map(new Function>>() {
@Override
public List> call(String line) throws Exception {
List> dataList=new ArrayList<>();
String[] texts=line.split(deliOne);
for(String text: texts){
List list=new ArrayList<>();
String[] items=text.split(deliTwo);
for(String item : items){
list.add(Integer.parseInt(item));
}
dataList.add(list);
}
return dataList;
}
});
return evaluate(sequences,minSupport,maxPatternLength);
}
public static PrefixSpanModel evaluate(JavaRDD>> sequences,double minSupport,int maxPatternLength){
PrefixSpan prefixSpan = new PrefixSpan()
.setMinSupport(minSupport)
.setMaxPatternLength(maxPatternLength);
PrefixSpanModel model = prefixSpan.run(sequences);
return model;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy