All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.associations.LabeledItemSet Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * LabeledItemSet.java
 * Copyright (C) 2004-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.associations;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;

import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.WekaEnumeration;

/**
 * Class for storing a set of items together with a class label. Item sets are
 * stored in a lexicographic order, which is determined by the header
 * information of the set of instances used for generating the set of items. All
 * methods in this class assume that item sets are stored in lexicographic
 * order. The class provides the methods used for item sets in class association
 * rule mining. Because every item set knows its class label the training set
 * can be splitted up virtually.
 * 
 * @author Stefan Mutter ([email protected])
 * @version $Revision: 10203 $
 */

public class LabeledItemSet extends ItemSet implements Serializable,
  RevisionHandler {

  /** for serialization */
  private static final long serialVersionUID = 4158771925518299903L;

  /** The class label. */
  protected int m_classLabel;

  /** The support of the rule. */
  protected int m_ruleSupCounter;

  /**
   * Constructor
   * 
   * @param totalTrans the total number of transactions
   * @param classLabel the class lebel
   */
  public LabeledItemSet(int totalTrans, int classLabel) {

    super(totalTrans);
    m_classLabel = classLabel;
  }

  /**
   * Deletes all item sets that don't have minimum support and have more than
   * maximum support
   * 
   * @return the reduced set of item sets
   * @param maxSupport the maximum support
   * @param itemSets the set of item sets to be pruned
   * @param minSupport the minimum number of transactions to be covered
   */
  public static ArrayList deleteItemSets(ArrayList itemSets,
    int minSupport, int maxSupport) {

    ArrayList newVector = new ArrayList(itemSets.size());

    for (int i = 0; i < itemSets.size(); i++) {
      LabeledItemSet current = (LabeledItemSet) itemSets.get(i);
      if ((current.m_ruleSupCounter >= minSupport)
        && (current.m_ruleSupCounter <= maxSupport)) {
        newVector.add(current);
      }
    }
    return newVector;
  }

  /**
   * Tests if two item sets are equal.
   * 
   * @param itemSet another item set
   * @return true if this item set contains the same items as the given one
   */
  @Override
  public final boolean equals(Object itemSet) {

    if (!(this.equalCondset(itemSet))) {
      return false;
    }
    if (m_classLabel != ((LabeledItemSet) itemSet).m_classLabel) {
      return false;
    }

    return true;
  }

  /**
   * Compares two item sets
   * 
   * @param itemSet an item set
   * @return true if the the item sets are equal, false otherwise
   */
  public final boolean equalCondset(Object itemSet) {

    if ((itemSet == null) || !(itemSet.getClass().equals(this.getClass()))) {
      return false;
    }
    if (m_items.length != ((ItemSet) itemSet).items().length) {
      return false;
    }
    for (int i = 0; i < m_items.length; i++) {
      if (m_items[i] != ((ItemSet) itemSet).itemAt(i)) {
        return false;
      }
    }
    return true;
  }

  /**
   * Return a hashtable filled with the given item sets.
   * 
   * @param itemSets the set of item sets to be used for filling the hash table
   * @param initialSize the initial size of the hashtable
   * @return the generated hashtable
   */
  public static Hashtable getHashtable(
    ArrayList itemSets, int initialSize) {

    Hashtable hashtable = new Hashtable(
      initialSize);
    for (int i = 0; i < itemSets.size(); i++) {
      LabeledItemSet current = (LabeledItemSet) itemSets.get(i);
      hashtable.put(current, new Integer(current.m_classLabel));
    }

    return hashtable;
  }

  /**
   * Merges all item sets in the set of (k-1)-item sets to create the (k)-item
   * sets and updates the counters.
   * 
   * @return the generated (k)-item sets
   * @param totalTrans the total number of transactions
   * @param itemSets the set of (k-1)-item sets
   * @param size the value of (k-1)
   */
  public static ArrayList mergeAllItemSets(ArrayList itemSets,
    int size, int totalTrans) {

    ArrayList newVector = new ArrayList();
    LabeledItemSet result;
    int numFound, k;

    for (int i = 0; i < itemSets.size(); i++) {
      LabeledItemSet first = (LabeledItemSet) itemSets.get(i);
      out: for (int j = i + 1; j < itemSets.size(); j++) {
        LabeledItemSet second = (LabeledItemSet) itemSets.get(j);
        while (first.m_classLabel != second.m_classLabel) {
          j++;
          if (j == itemSets.size()) {
            break out;
          }
          second = (LabeledItemSet) itemSets.get(j);
        }
        result = new LabeledItemSet(totalTrans, first.m_classLabel);
        result.m_items = new int[first.m_items.length];

        // Find and copy common prefix of size 'size'
        numFound = 0;
        k = 0;
        while (numFound < size) {
          if (first.m_items[k] == second.m_items[k]) {
            if (first.m_items[k] != -1) {
              numFound++;
            }
            result.m_items[k] = first.m_items[k];
          } else {
            break out;
          }
          k++;
        }

        // Check difference
        while (k < first.m_items.length) {
          if ((first.m_items[k] != -1) && (second.m_items[k] != -1)) {
            break;
          } else {
            if (first.m_items[k] != -1) {
              result.m_items[k] = first.m_items[k];
            } else {
              result.m_items[k] = second.m_items[k];
            }
          }
          k++;
        }
        if (k == first.m_items.length) {
          result.m_ruleSupCounter = 0;
          result.m_counter = 0;
          newVector.add(result);
        }
      }
    }

    return newVector;
  }

  /**
   * Splits the class attribute away. Depending on the invert flag, the
   * instances without class attribute or only the class attribute of all
   * instances is returned
   * 
   * @param instances the instances
   * @param invert flag; if true only the class attribute remains, otherweise
   *          the class attribute is the only attribute that is deleted.
   * @throws Exception exception if instances cannot be splitted
   * @return Instances without the class attribute or instances with only the
   *         class attribute
   */
  public static Instances divide(Instances instances, boolean invert)
    throws Exception {

    Instances newInstances = new Instances(instances);
    if (instances.classIndex() < 0) {
      throw new Exception(
        "For class association rule mining a class attribute has to be specified.");
    }
    if (invert) {
      for (int i = 0; i < newInstances.numAttributes(); i++) {
        if (i != newInstances.classIndex()) {
          newInstances.deleteAttributeAt(i);
          i--;
        }
      }
      return newInstances;
    } else {
      newInstances.setClassIndex(-1);
      newInstances.deleteAttributeAt(instances.classIndex());
      return newInstances;
    }
  }

  /**
   * Converts the header info of the given set of instances into a set of item
   * sets (singletons). The ordering of values in the header file determines the
   * lexicographic order. Each item set knows its class label.
   * 
   * @return a set of item sets, each containing a single item
   * @param instancesNoClass instances without the class attribute
   * @param classes the values of the class attribute sorted according to
   *          instances
   * @exception Exception if singletons can't be generated successfully
   */
  public static ArrayList singletons(Instances instancesNoClass,
    Instances classes) throws Exception {

    ArrayList setOfItemSets = new ArrayList();
    LabeledItemSet current;

    // make singletons
    for (int i = 0; i < instancesNoClass.numAttributes(); i++) {
      if (instancesNoClass.attribute(i).isNumeric()) {
        throw new Exception("Can't handle numeric attributes!");
      }
      for (int j = 0; j < instancesNoClass.attribute(i).numValues(); j++) {
        for (int k = 0; k < (classes.attribute(0)).numValues(); k++) {
          current = new LabeledItemSet(instancesNoClass.numInstances(), k);
          current.m_items = new int[instancesNoClass.numAttributes()];
          for (int l = 0; l < instancesNoClass.numAttributes(); l++) {
            current.m_items[l] = -1;
          }
          current.m_items[i] = j;
          setOfItemSets.add(current);
        }
      }
    }
    return setOfItemSets;
  }

  /**
   * Prunes a set of (k)-item sets using the given (k-1)-item sets.
   * 
   * @param toPrune the set of (k)-item sets to be pruned
   * @param kMinusOne the (k-1)-item sets to be used for pruning
   * @return the pruned set of item sets
   */
  public static ArrayList pruneItemSets(ArrayList toPrune,
    Hashtable kMinusOne) {

    ArrayList newVector = new ArrayList(toPrune.size());
    int help, j;

    for (int i = 0; i < toPrune.size(); i++) {
      LabeledItemSet current = (LabeledItemSet) toPrune.get(i);

      for (j = 0; j < current.m_items.length; j++) {
        if (current.m_items[j] != -1) {
          help = current.m_items[j];
          current.m_items[j] = -1;
          if (kMinusOne.get(current) != null
            && (current.m_classLabel == (kMinusOne.get(current).intValue()))) {
            current.m_items[j] = help;
          } else {
            current.m_items[j] = help;
            break;
          }
        }
      }
      if (j == current.m_items.length) {
        newVector.add(current);
      }
    }
    return newVector;
  }

  /**
   * Outputs the support for an item set.
   * 
   * @return the support
   */
  @Override
  public final int support() {

    return m_ruleSupCounter;
  }

  /**
   * Updates counter of item set with respect to given transaction.
   * 
   * @param instanceNoClass instances without the class attribute
   * @param instanceClass the values of the class attribute sorted according to
   *          instances
   */
  public final void upDateCounter(Instance instanceNoClass,
    Instance instanceClass) {

    if (containedBy(instanceNoClass)) {
      m_counter++;
      if (this.m_classLabel == instanceClass.value(0)) {
        m_ruleSupCounter++;
      }
    }
  }

  /**
   * Updates counter of item set with respect to given transaction.
   * 
   * @param instanceNoClass instances without the class attribute
   * @param instanceClass the values of the class attribute sorted according to
   *          instances
   */
  public final void upDateCounterTreatZeroAsMissing(Instance instanceNoClass,
    Instance instanceClass) {
    if (containedByTreatZeroAsMissing(instanceNoClass)) {
      m_counter++;
      if (this.m_classLabel == instanceClass.value(0)) {
        m_ruleSupCounter++;
      }
    }
  }

  /**
   * Updates counter of a specific item set
   * 
   * @param itemSets an item sets
   * @param instancesNoClass instances without the class attribute
   * @param instancesClass the values of the class attribute sorted according to
   *          instances
   */
  public static void upDateCounters(ArrayList itemSets,
    Instances instancesNoClass, Instances instancesClass) {

    for (int i = 0; i < instancesNoClass.numInstances(); i++) {
      Enumeration enu = new WekaEnumeration(itemSets);
      while (enu.hasMoreElements()) {
        ((LabeledItemSet) enu.nextElement()).upDateCounter(
          instancesNoClass.instance(i), instancesClass.instance(i));
      }
    }

  }

  /**
   * Updates counter of a specific item set
   * 
   * @param itemSets an item sets
   * @param instancesNoClass instances without the class attribute
   * @param instancesClass the values of the class attribute sorted according to
   *          instances
   */
  public static void upDateCountersTreatZeroAsMissing(
    ArrayList itemSets, Instances instancesNoClass,
    Instances instancesClass) {
    for (int i = 0; i < instancesNoClass.numInstances(); i++) {
      Enumeration enu = new WekaEnumeration(
        itemSets);
      while (enu.hasMoreElements()) {
        enu.nextElement().upDateCounterTreatZeroAsMissing(
          instancesNoClass.instance(i), instancesClass.instance(i));
      }
    }
  }

  /**
   * Generates rules out of item sets
   * 
   * @param minConfidence the minimum confidence
   * @param noPrune flag indicating whether the rules are pruned accoridng to
   *          the minimum confidence value
   * @return a set of rules
   */
  public final ArrayList[] generateRules(double minConfidence,
    boolean noPrune) {

    ArrayList premises = new ArrayList();
    ArrayList consequences = new ArrayList();
    ArrayList conf = new ArrayList();
    @SuppressWarnings("unchecked")
    ArrayList[] rules = new ArrayList[3];
    ItemSet premise, consequence;

    // Generate all rules with class in the consequence.
    premise = new ItemSet(m_totalTransactions);
    consequence = new ItemSet(m_totalTransactions);
    int[] premiseItems = new int[m_items.length];
    int[] consequenceItems = new int[1];
    System.arraycopy(m_items, 0, premiseItems, 0, m_items.length);
    consequence.setItem(consequenceItems);
    premise.setItem(premiseItems);
    consequence.setItemAt(m_classLabel, 0);
    consequence.setCounter(this.m_ruleSupCounter);
    premise.setCounter(this.m_counter);
    premises.add(premise);
    consequences.add(consequence);
    conf.add(new Double((double) this.m_ruleSupCounter
      / (double) this.m_counter));

    rules[0] = premises;
    rules[1] = consequences;
    rules[2] = conf;
    if (!noPrune) {
      pruneRules(rules, minConfidence);
    }

    return rules;
  }

  /**
   * Returns the revision string.
   * 
   * @return the revision
   */
  @Override
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 10203 $");
  }
}