Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
weka.associations.LabeledItemSet Maven / Gradle / Ivy
Go to download
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This version represents the developer version, the
"bleeding edge" of development, you could say. New functionality gets added
to this version.
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* LabeledItemSet.java
* Copyright (C) 2004-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.associations;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.WekaEnumeration;
/**
* Class for storing a set of items together with a class label. Item sets are
* stored in a lexicographic order, which is determined by the header
* information of the set of instances used for generating the set of items. All
* methods in this class assume that item sets are stored in lexicographic
* order. The class provides the methods used for item sets in class association
* rule mining. Because every item set knows its class label the training set
* can be splitted up virtually.
*
* @author Stefan Mutter ([email protected] )
* @version $Revision: 10203 $
*/
public class LabeledItemSet extends ItemSet implements Serializable,
RevisionHandler {
/** for serialization */
private static final long serialVersionUID = 4158771925518299903L;
/** The class label. */
protected int m_classLabel;
/** The support of the rule. */
protected int m_ruleSupCounter;
/**
* Constructor
*
* @param totalTrans the total number of transactions
* @param classLabel the class lebel
*/
public LabeledItemSet(int totalTrans, int classLabel) {
super(totalTrans);
m_classLabel = classLabel;
}
/**
* Deletes all item sets that don't have minimum support and have more than
* maximum support
*
* @return the reduced set of item sets
* @param maxSupport the maximum support
* @param itemSets the set of item sets to be pruned
* @param minSupport the minimum number of transactions to be covered
*/
public static ArrayList deleteItemSets(ArrayList itemSets,
int minSupport, int maxSupport) {
ArrayList newVector = new ArrayList(itemSets.size());
for (int i = 0; i < itemSets.size(); i++) {
LabeledItemSet current = (LabeledItemSet) itemSets.get(i);
if ((current.m_ruleSupCounter >= minSupport)
&& (current.m_ruleSupCounter <= maxSupport)) {
newVector.add(current);
}
}
return newVector;
}
/**
* Tests if two item sets are equal.
*
* @param itemSet another item set
* @return true if this item set contains the same items as the given one
*/
@Override
public final boolean equals(Object itemSet) {
if (!(this.equalCondset(itemSet))) {
return false;
}
if (m_classLabel != ((LabeledItemSet) itemSet).m_classLabel) {
return false;
}
return true;
}
/**
* Compares two item sets
*
* @param itemSet an item set
* @return true if the the item sets are equal, false otherwise
*/
public final boolean equalCondset(Object itemSet) {
if ((itemSet == null) || !(itemSet.getClass().equals(this.getClass()))) {
return false;
}
if (m_items.length != ((ItemSet) itemSet).items().length) {
return false;
}
for (int i = 0; i < m_items.length; i++) {
if (m_items[i] != ((ItemSet) itemSet).itemAt(i)) {
return false;
}
}
return true;
}
/**
* Return a hashtable filled with the given item sets.
*
* @param itemSets the set of item sets to be used for filling the hash table
* @param initialSize the initial size of the hashtable
* @return the generated hashtable
*/
public static Hashtable getHashtable(
ArrayList itemSets, int initialSize) {
Hashtable hashtable = new Hashtable(
initialSize);
for (int i = 0; i < itemSets.size(); i++) {
LabeledItemSet current = (LabeledItemSet) itemSets.get(i);
hashtable.put(current, new Integer(current.m_classLabel));
}
return hashtable;
}
/**
* Merges all item sets in the set of (k-1)-item sets to create the (k)-item
* sets and updates the counters.
*
* @return the generated (k)-item sets
* @param totalTrans the total number of transactions
* @param itemSets the set of (k-1)-item sets
* @param size the value of (k-1)
*/
public static ArrayList mergeAllItemSets(ArrayList itemSets,
int size, int totalTrans) {
ArrayList newVector = new ArrayList();
LabeledItemSet result;
int numFound, k;
for (int i = 0; i < itemSets.size(); i++) {
LabeledItemSet first = (LabeledItemSet) itemSets.get(i);
out: for (int j = i + 1; j < itemSets.size(); j++) {
LabeledItemSet second = (LabeledItemSet) itemSets.get(j);
while (first.m_classLabel != second.m_classLabel) {
j++;
if (j == itemSets.size()) {
break out;
}
second = (LabeledItemSet) itemSets.get(j);
}
result = new LabeledItemSet(totalTrans, first.m_classLabel);
result.m_items = new int[first.m_items.length];
// Find and copy common prefix of size 'size'
numFound = 0;
k = 0;
while (numFound < size) {
if (first.m_items[k] == second.m_items[k]) {
if (first.m_items[k] != -1) {
numFound++;
}
result.m_items[k] = first.m_items[k];
} else {
break out;
}
k++;
}
// Check difference
while (k < first.m_items.length) {
if ((first.m_items[k] != -1) && (second.m_items[k] != -1)) {
break;
} else {
if (first.m_items[k] != -1) {
result.m_items[k] = first.m_items[k];
} else {
result.m_items[k] = second.m_items[k];
}
}
k++;
}
if (k == first.m_items.length) {
result.m_ruleSupCounter = 0;
result.m_counter = 0;
newVector.add(result);
}
}
}
return newVector;
}
/**
* Splits the class attribute away. Depending on the invert flag, the
* instances without class attribute or only the class attribute of all
* instances is returned
*
* @param instances the instances
* @param invert flag; if true only the class attribute remains, otherweise
* the class attribute is the only attribute that is deleted.
* @throws Exception exception if instances cannot be splitted
* @return Instances without the class attribute or instances with only the
* class attribute
*/
public static Instances divide(Instances instances, boolean invert)
throws Exception {
Instances newInstances = new Instances(instances);
if (instances.classIndex() < 0) {
throw new Exception(
"For class association rule mining a class attribute has to be specified.");
}
if (invert) {
for (int i = 0; i < newInstances.numAttributes(); i++) {
if (i != newInstances.classIndex()) {
newInstances.deleteAttributeAt(i);
i--;
}
}
return newInstances;
} else {
newInstances.setClassIndex(-1);
newInstances.deleteAttributeAt(instances.classIndex());
return newInstances;
}
}
/**
* Converts the header info of the given set of instances into a set of item
* sets (singletons). The ordering of values in the header file determines the
* lexicographic order. Each item set knows its class label.
*
* @return a set of item sets, each containing a single item
* @param instancesNoClass instances without the class attribute
* @param classes the values of the class attribute sorted according to
* instances
* @exception Exception if singletons can't be generated successfully
*/
public static ArrayList singletons(Instances instancesNoClass,
Instances classes) throws Exception {
ArrayList setOfItemSets = new ArrayList();
LabeledItemSet current;
// make singletons
for (int i = 0; i < instancesNoClass.numAttributes(); i++) {
if (instancesNoClass.attribute(i).isNumeric()) {
throw new Exception("Can't handle numeric attributes!");
}
for (int j = 0; j < instancesNoClass.attribute(i).numValues(); j++) {
for (int k = 0; k < (classes.attribute(0)).numValues(); k++) {
current = new LabeledItemSet(instancesNoClass.numInstances(), k);
current.m_items = new int[instancesNoClass.numAttributes()];
for (int l = 0; l < instancesNoClass.numAttributes(); l++) {
current.m_items[l] = -1;
}
current.m_items[i] = j;
setOfItemSets.add(current);
}
}
}
return setOfItemSets;
}
/**
* Prunes a set of (k)-item sets using the given (k-1)-item sets.
*
* @param toPrune the set of (k)-item sets to be pruned
* @param kMinusOne the (k-1)-item sets to be used for pruning
* @return the pruned set of item sets
*/
public static ArrayList pruneItemSets(ArrayList toPrune,
Hashtable kMinusOne) {
ArrayList newVector = new ArrayList(toPrune.size());
int help, j;
for (int i = 0; i < toPrune.size(); i++) {
LabeledItemSet current = (LabeledItemSet) toPrune.get(i);
for (j = 0; j < current.m_items.length; j++) {
if (current.m_items[j] != -1) {
help = current.m_items[j];
current.m_items[j] = -1;
if (kMinusOne.get(current) != null
&& (current.m_classLabel == (kMinusOne.get(current).intValue()))) {
current.m_items[j] = help;
} else {
current.m_items[j] = help;
break;
}
}
}
if (j == current.m_items.length) {
newVector.add(current);
}
}
return newVector;
}
/**
* Outputs the support for an item set.
*
* @return the support
*/
@Override
public final int support() {
return m_ruleSupCounter;
}
/**
* Updates counter of item set with respect to given transaction.
*
* @param instanceNoClass instances without the class attribute
* @param instanceClass the values of the class attribute sorted according to
* instances
*/
public final void upDateCounter(Instance instanceNoClass,
Instance instanceClass) {
if (containedBy(instanceNoClass)) {
m_counter++;
if (this.m_classLabel == instanceClass.value(0)) {
m_ruleSupCounter++;
}
}
}
/**
* Updates counter of item set with respect to given transaction.
*
* @param instanceNoClass instances without the class attribute
* @param instanceClass the values of the class attribute sorted according to
* instances
*/
public final void upDateCounterTreatZeroAsMissing(Instance instanceNoClass,
Instance instanceClass) {
if (containedByTreatZeroAsMissing(instanceNoClass)) {
m_counter++;
if (this.m_classLabel == instanceClass.value(0)) {
m_ruleSupCounter++;
}
}
}
/**
* Updates counter of a specific item set
*
* @param itemSets an item sets
* @param instancesNoClass instances without the class attribute
* @param instancesClass the values of the class attribute sorted according to
* instances
*/
public static void upDateCounters(ArrayList itemSets,
Instances instancesNoClass, Instances instancesClass) {
for (int i = 0; i < instancesNoClass.numInstances(); i++) {
Enumeration enu = new WekaEnumeration(itemSets);
while (enu.hasMoreElements()) {
((LabeledItemSet) enu.nextElement()).upDateCounter(
instancesNoClass.instance(i), instancesClass.instance(i));
}
}
}
/**
* Updates counter of a specific item set
*
* @param itemSets an item sets
* @param instancesNoClass instances without the class attribute
* @param instancesClass the values of the class attribute sorted according to
* instances
*/
public static void upDateCountersTreatZeroAsMissing(
ArrayList itemSets, Instances instancesNoClass,
Instances instancesClass) {
for (int i = 0; i < instancesNoClass.numInstances(); i++) {
Enumeration enu = new WekaEnumeration(
itemSets);
while (enu.hasMoreElements()) {
enu.nextElement().upDateCounterTreatZeroAsMissing(
instancesNoClass.instance(i), instancesClass.instance(i));
}
}
}
/**
* Generates rules out of item sets
*
* @param minConfidence the minimum confidence
* @param noPrune flag indicating whether the rules are pruned accoridng to
* the minimum confidence value
* @return a set of rules
*/
public final ArrayList[] generateRules(double minConfidence,
boolean noPrune) {
ArrayList premises = new ArrayList();
ArrayList consequences = new ArrayList();
ArrayList conf = new ArrayList();
@SuppressWarnings("unchecked")
ArrayList[] rules = new ArrayList[3];
ItemSet premise, consequence;
// Generate all rules with class in the consequence.
premise = new ItemSet(m_totalTransactions);
consequence = new ItemSet(m_totalTransactions);
int[] premiseItems = new int[m_items.length];
int[] consequenceItems = new int[1];
System.arraycopy(m_items, 0, premiseItems, 0, m_items.length);
consequence.setItem(consequenceItems);
premise.setItem(premiseItems);
consequence.setItemAt(m_classLabel, 0);
consequence.setCounter(this.m_ruleSupCounter);
premise.setCounter(this.m_counter);
premises.add(premise);
consequences.add(consequence);
conf.add(new Double((double) this.m_ruleSupCounter
/ (double) this.m_counter));
rules[0] = premises;
rules[1] = consequences;
rules[2] = conf;
if (!noPrune) {
pruneRules(rules, minConfidence);
}
return rules;
}
/**
* Returns the revision string.
*
* @return the revision
*/
@Override
public String getRevision() {
return RevisionUtils.extract("$Revision: 10203 $");
}
}