weka.associations.LabeledItemSet Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-stable Show documentation
Show all versions of weka-stable Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This is the stable version. Apart from bugfixes, this version
does not receive any other updates.
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* LabeledItemSet.java
* Copyright (C) 2004 University of Waikato, Hamilton, New Zealand
*
*/
package weka.associations;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import java.io.Serializable;
import java.util.Enumeration;
import java.util.Hashtable;
/**
* Class for storing a set of items together with a class label. Item sets are stored in a lexicographic
* order, which is determined by the header information of the set of instances
* used for generating the set of items. All methods in this class assume that
* item sets are stored in lexicographic order.
* The class provides the methods used for item sets in class association rule mining.
* Because every item set knows its class label the training set can be splitted up virtually.
*
* @author Stefan Mutter ([email protected])
* @version $Revision: 1.5 $
*/
public class LabeledItemSet
extends ItemSet
implements Serializable, RevisionHandler {
/** for serialization */
private static final long serialVersionUID = 4158771925518299903L;
/** The class label. */
protected int m_classLabel;
/** The support of the rule. */
protected int m_ruleSupCounter;
/**
* Constructor
*
* @param totalTrans the total number of transactions
* @param classLabel the class lebel
*/
public LabeledItemSet(int totalTrans, int classLabel){
super(totalTrans);
m_classLabel = classLabel;
}
/**
* Deletes all item sets that don't have minimum support and have more than maximum support
* @return the reduced set of item sets
* @param maxSupport the maximum support
* @param itemSets the set of item sets to be pruned
* @param minSupport the minimum number of transactions to be covered
*/
public static FastVector deleteItemSets(FastVector itemSets,
int minSupport,
int maxSupport) {
FastVector newVector = new FastVector(itemSets.size());
for (int i = 0; i < itemSets.size(); i++) {
LabeledItemSet current = (LabeledItemSet)itemSets.elementAt(i);
if ((current.m_ruleSupCounter >= minSupport)
&& (current.m_ruleSupCounter <= maxSupport))
newVector.addElement(current);
}
return newVector;
}
/**
* Tests if two item sets are equal.
*
* @param itemSet another item set
* @return true if this item set contains the same items as the given one
*/
public final boolean equals(Object itemSet) {
if (!(this.equalCondset(itemSet)))
return false;
if(m_classLabel != ((LabeledItemSet)itemSet).m_classLabel)
return false;
return true;
}
/**
* Compares two item sets
* @param itemSet an item set
* @return true if the the item sets are equal, false otherwise
*/
public final boolean equalCondset(Object itemSet) {
if ((itemSet == null) || !(itemSet.getClass().equals(this.getClass()))) {
return false;
}
if (m_items.length != ((ItemSet)itemSet).items().length)
return false;
for (int i = 0; i < m_items.length; i++)
if (m_items[i] != ((ItemSet)itemSet).itemAt(i))
return false;
return true;
}
/**
* Return a hashtable filled with the given item sets.
*
* @param itemSets the set of item sets to be used for filling the hash table
* @param initialSize the initial size of the hashtable
* @return the generated hashtable
*/
public static Hashtable getHashtable(FastVector itemSets, int initialSize) {
Hashtable hashtable = new Hashtable(initialSize);
for (int i = 0; i < itemSets.size(); i++) {
LabeledItemSet current = (LabeledItemSet)itemSets.elementAt(i);
hashtable.put(current, new Integer(current.m_classLabel));
}
return hashtable;
}
/**
* Merges all item sets in the set of (k-1)-item sets
* to create the (k)-item sets and updates the counters.
* @return the generated (k)-item sets
* @param totalTrans the total number of transactions
* @param itemSets the set of (k-1)-item sets
* @param size the value of (k-1)
*/
public static FastVector mergeAllItemSets(FastVector itemSets, int size,
int totalTrans) {
FastVector newVector = new FastVector();
LabeledItemSet result;
int numFound, k;
for (int i = 0; i < itemSets.size(); i++) {
LabeledItemSet first = (LabeledItemSet)itemSets.elementAt(i);
out:
for (int j = i+1; j < itemSets.size(); j++) {
LabeledItemSet second = (LabeledItemSet)itemSets.elementAt(j);
while(first.m_classLabel != second.m_classLabel){
j++;
if(j == itemSets.size())
break out;
second = (LabeledItemSet)itemSets.elementAt(j);
}
result = new LabeledItemSet(totalTrans,first.m_classLabel);
result.m_items = new int[first.m_items.length];
// Find and copy common prefix of size 'size'
numFound = 0;
k = 0;
while (numFound < size) {
if (first.m_items[k] == second.m_items[k]) {
if (first.m_items[k] != -1)
numFound++;
result.m_items[k] = first.m_items[k];
} else
break out;
k++;
}
// Check difference
while (k < first.m_items.length) {
if ((first.m_items[k] != -1) && (second.m_items[k] != -1))
break;
else {
if (first.m_items[k] != -1)
result.m_items[k] = first.m_items[k];
else
result.m_items[k] = second.m_items[k];
}
k++;
}
if (k == first.m_items.length) {
result.m_ruleSupCounter = 0;
result.m_counter = 0;
newVector.addElement(result);
}
}
}
return newVector;
}
/**
* Splits the class attribute away. Depending on the invert flag, the instances without class attribute or only the class attribute of all instances is returned
* @param instances the instances
* @param invert flag; if true only the class attribute remains, otherweise the class attribute is the only attribute that is deleted.
* @throws Exception exception if instances cannot be splitted
* @return Instances without the class attribute or instances with only the class attribute
*/
public static Instances divide(Instances instances, boolean invert) throws Exception{
Instances newInstances = new Instances(instances);
if(instances.classIndex() < 0)
throw new Exception("For class association rule mining a class attribute has to be specified.");
if(invert){
for(int i=0;i
© 2015 - 2025 Weber Informatics LLC | Privacy Policy