org.conqat.lib.commons.datamining.AssociationRuleMiner Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of teamscale-lib-commons Show documentation
Show all versions of teamscale-lib-commons Show documentation
Provides common utility functions
/*
* Copyright (c) CQSE GmbH
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.conqat.lib.commons.datamining;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.conqat.lib.commons.collections.IdentityHashSet;
/**
* Mines association rules from a set of shopping baskets. Uses Apriori algorithm. See
* http://en.wikipedia.org/wiki/Apriori_algorithm.
*
* @param
* the item type; must support hashing.
*/
public class AssociationRuleMiner {
/** Threshold for confidence */
private final float confidenceThreshold;
/** Miner for frequent item sets */
private final FrequentItemSetMiner itemSetMiner;
/**
* Constructor.
*
* @param supportThreshold
* the support threshold [0..1], i.e. the fraction of the baskets in which a frequent
* item set must be present in order to be considered.
* @param confidenceThreshold
* the minimal confidence of the mined rules [0..1].
*/
public AssociationRuleMiner(float supportThreshold, float confidenceThreshold) {
this.confidenceThreshold = confidenceThreshold;
itemSetMiner = new FrequentItemSetMiner<>(supportThreshold);
}
/** Mines frequent item sets from the given shopping baskets. */
public Set> mineAssociationRules(Set> baskets) {
Set> result = new IdentityHashSet<>();
Set> frequentItemSets = itemSetMiner.mineFrequentItemSets(baskets);
Map, Double> supportMap = new HashMap<>();
for (FrequentItemSet frequentItemset : frequentItemSets) {
supportMap.put(frequentItemset.getItems(), frequentItemset.getSupport());
}
for (FrequentItemSet frequentItemSet : frequentItemSets) {
Set items = frequentItemSet.getItems();
if (items.size() > 1) {
for (T item : items) {
Set reducedItemSet = new HashSet<>(items);
reducedItemSet.remove(item);
double confidence = frequentItemSet.getSupport() / supportMap.get(reducedItemSet);
if (confidence >= confidenceThreshold) {
result.add(new AssociationRule<>(reducedItemSet, item, confidence));
}
}
}
}
return result;
}
}