weka.associations.AprioriItemSet Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-stable Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This is the stable version. Apart from bugfixes, this version does not receive any other updates.
There is a newer version: 3.8.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    AprioriItemSet.java
 *    Copyright (C) 2004-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.associations;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;

import weka.core.ContingencyTables;
import weka.core.Instances;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.WekaEnumeration;

/**
 * Class for storing a set of items. Item sets are stored in a lexicographic
 * order, which is determined by the header information of the set of instances
 * used for generating the set of items. All methods in this class assume that
 * item sets are stored in lexicographic order. The class provides methods that
 * are used in the Apriori algorithm to construct association rules.
 * 
 * @author Eibe Frank ([email protected])
 * @author Stefan Mutter ([email protected])
 * @version $Revision: 12014 $
 */
public class AprioriItemSet extends ItemSet implements Serializable,
  RevisionHandler {

  /** for serialization */
  static final long serialVersionUID = 7684467755712672058L;

  /**
   * Constructor
   * 
   * @param totalTrans the total number of transactions in the data
   */
  public AprioriItemSet(int totalTrans) {
    super(totalTrans);
  }

  /**
   * Outputs the confidence for a rule.
   * 
   * @param premise the premise of the rule
   * @param consequence the consequence of the rule
   * @return the confidence on the training data
   */
  public static double confidenceForRule(AprioriItemSet premise,
    AprioriItemSet consequence) {

    return (double) consequence.m_counter / (double) premise.m_counter;
  }

  /**
   * Outputs the lift for a rule. Lift is defined as:

   * confidence / prob(consequence)
   * 
   * @param premise the premise of the rule
   * @param consequence the consequence of the rule
   * @param consequenceCount how many times the consequence occurs independent
   *          of the premise
   * @return the lift on the training data
   */
  public double liftForRule(AprioriItemSet premise, AprioriItemSet consequence,
    int consequenceCount) {
    double confidence = confidenceForRule(premise, consequence);

    return confidence
      / ((double) consequenceCount / (double) m_totalTransactions);
  }

  /**
   * Outputs the leverage for a rule. Leverage is defined as: 

   * prob(premise & consequence) - (prob(premise) * prob(consequence))
   * 
   * @param premise the premise of the rule
   * @param consequence the consequence of the rule
   * @param premiseCount how many times the premise occurs independent of the
   *          consequent
   * @param consequenceCount how many times the consequence occurs independent
   *          of the premise
   * @return the leverage on the training data
   */
  public double leverageForRule(AprioriItemSet premise,
    AprioriItemSet consequence, int premiseCount, int consequenceCount) {
    double coverageForItemSet = (double) consequence.m_counter
      / (double) m_totalTransactions;
    double expectedCoverageIfIndependent = ((double) premiseCount / (double) m_totalTransactions)
      * ((double) consequenceCount / (double) m_totalTransactions);
    double lev = coverageForItemSet - expectedCoverageIfIndependent;
    return lev;
  }

  /**
   * Outputs the conviction for a rule. Conviction is defined as: 

   * prob(premise) * prob(!consequence) / prob(premise & !consequence)
   * 
   * @param premise the premise of the rule
   * @param consequence the consequence of the rule
   * @param premiseCount how many times the premise occurs independent of the
   *          consequent
   * @param consequenceCount how many times the consequence occurs independent
   *          of the premise
   * @return the conviction on the training data
   */
  public double convictionForRule(AprioriItemSet premise,
    AprioriItemSet consequence, int premiseCount, int consequenceCount) {
    double num = (double) premiseCount
      * (double) (m_totalTransactions - consequenceCount) / m_totalTransactions;
    double denom = ((premiseCount - consequence.m_counter) + 1);

    if (num < 0 || denom < 0) {
      System.err.println("*** " + num + " " + denom);
      System.err.println("premis count: " + premiseCount
        + " consequence count " + consequenceCount + " total trans "
        + m_totalTransactions);
    }
    return num / denom;
  }

  /**
   * Generates all rules for an item set.
   * 
   * @param minConfidence the minimum confidence the rules have to have
   * @param hashtables containing all(!) previously generated item sets
   * @param numItemsInSet the size of the item set for which the rules are to be
   *          generated
   * @return all the rules with minimum confidence for the given item set
   */
  public ArrayList