
com.github.lwhite1.tablesaw.api.ml.association.FrequentItemset Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tablesaw Show documentation
Show all versions of tablesaw Show documentation
High-performance Java Dataframe with integrated columnar storage
package com.github.lwhite1.tablesaw.api.ml.association;
import com.github.lwhite1.tablesaw.api.IntColumn;
import com.github.lwhite1.tablesaw.api.ShortColumn;
import com.github.lwhite1.tablesaw.api.Table;
import com.github.lwhite1.tablesaw.table.TemporaryView;
import com.github.lwhite1.tablesaw.table.ViewGroup;
import it.unimi.dsi.fastutil.ints.IntRBTreeSet;
import it.unimi.dsi.fastutil.objects.Object2DoubleOpenHashMap;
import it.unimi.dsi.fastutil.shorts.ShortRBTreeSet;
import smile.association.FPGrowth;
import smile.association.ItemSet;
import java.util.List;
/**
*
*/
public class FrequentItemset {
private final FPGrowth model;
private final int setCount;
public FrequentItemset(IntColumn sets, IntColumn items, double support) {
Table temp = Table.create("temp");
temp.addColumn(sets.copy());
temp.addColumn(items.copy());
temp.sortAscendingOn(sets.name(), items.name());
ViewGroup baskets = temp.splitOn(temp.column(0));
this.setCount = baskets.size();
int[][] itemsets = new int[setCount][];
int basketIndex = 0;
for (TemporaryView basket : baskets) {
IntRBTreeSet set = new IntRBTreeSet(basket.intColumn(1).data());
int itemIndex = 0;
itemsets[basketIndex] = new int[set.size()];
for (int item : set) {
itemsets[basketIndex][itemIndex] = item;
itemIndex++;
}
basketIndex++;
}
this.model = new FPGrowth(itemsets, support);
}
public FrequentItemset(ShortColumn sets, ShortColumn items, double support) {
Table temp = Table.create("temp");
temp.addColumn(sets.copy());
temp.addColumn(items.copy());
temp.sortAscendingOn(sets.name(), items.name());
ViewGroup baskets = temp.splitOn(temp.column(0));
this.setCount = baskets.size();
int[][] itemsets = new int[setCount][];
int basketIndex = 0;
for (TemporaryView basket : baskets) {
ShortRBTreeSet set = new ShortRBTreeSet(basket.shortColumn(1).data());
int itemIndex = 0;
itemsets[basketIndex] = new int[set.size()];
for (short item : set) {
itemsets[basketIndex][itemIndex] = item;
itemIndex++;
}
basketIndex++;
}
this.model = new FPGrowth(itemsets, support);
}
public List learn() {
return model.learn();
}
public Object2DoubleOpenHashMap supportMap() {
List itemSets = learn();
Object2DoubleOpenHashMap confidenceMap = new Object2DoubleOpenHashMap<>(itemSets.size());
for (ItemSet itemSet : itemSets) {
confidenceMap.put(itemSet.items, itemSet.support);
}
return confidenceMap;
}
public Object2DoubleOpenHashMap supportMap(double supportThreshold) {
List itemSets = learn();
Object2DoubleOpenHashMap confidenceMap = new Object2DoubleOpenHashMap<>(itemSets.size());
for (ItemSet itemSet : itemSets) {
if (itemSet.support >= supportThreshold) {
confidenceMap.put(itemSet.items, itemSet.support);
}
}
return confidenceMap;
}
public Object2DoubleOpenHashMap confidenceMap() {
List itemSets = learn();
Object2DoubleOpenHashMap confidenceMap = new Object2DoubleOpenHashMap<>(itemSets.size());
for (ItemSet itemSet : itemSets) {
//ImmutableSet immutableItemSet = new ImmutableSet.Builder().add(itemSet.items);
IntRBTreeSet itemSetCopy = new IntRBTreeSet(itemSet.items);
confidenceMap.put(itemSetCopy, itemSet.support / (double) setCount);
}
return confidenceMap;
}
public Object2DoubleOpenHashMap confidenceMap(double supportThreshold) {
List itemSets = learn();
Object2DoubleOpenHashMap confidenceMap = new Object2DoubleOpenHashMap<>(itemSets.size());
for (ItemSet itemSet : itemSets) {
if (itemSet.support >= supportThreshold) {
confidenceMap.put(itemSet.items, itemSet.support / (double) setCount);
}
}
return confidenceMap;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy