weka.associations.PredictiveApriori Maven / Gradle / Ivy

Go to download
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    PredictiveApriori.java
 *    Copyright (C) 2004 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.associations;

import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.TreeSet;
import java.util.Vector;

import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WekaEnumeration;

/**
 *  Class implementing the predictive apriori algorithm
 * to mine association rules.

 * It searches with an increasing support threshold for the best 'n' rules
 * concerning a support-based corrected confidence value.

 * 

 * For more information see:

 * 

 * Tobias Scheffer: Finding Association Rules That Trade Support Optimally
 * against Confidence. In: 5th European Conference on Principles of Data Mining
 * and Knowledge Discovery, 424-435, 2001.

 * 

 * The implementation follows the paper expect for adding a rule to the output
 * of the 'n' best rules. A rule is added if:

 * the expected predictive accuracy of this rule is among the 'n' best and it is
 * not subsumed by a rule with at least the same expected predictive accuracy
 * (out of an unpublished manuscript from T. Scheffer).
 * 
 * 
 * 
 *  BibTeX:
 * 
 * 
 * @inproceedings{Scheffer2001,
 *    author = {Tobias Scheffer},
 *    booktitle = {5th European Conference on Principles of Data Mining and Knowledge Discovery},
 *    pages = {424-435},
 *    publisher = {Springer},
 *    title = {Finding Association Rules That Trade Support Optimally against Confidence},
 *    year = {2001}
 * }
 * 
 * 
 * 
 * 
 *  Valid options are:
 * 

 * 
 * 
 * -N <required number of rules output>
 *  The required number of rules. (default = 100)
 * 
 * 
 *  * -A
 *  If set class association rules are mined. (default = no)
 * 
 * 
 *  * -c <the class index>
 *  The class index. (default = last)
 * 
 * 
 * 
 * 
 * @author Stefan Mutter ([email protected])
 * @version $Revision: 11047 $
 */

public class PredictiveApriori extends AbstractAssociator implements
  OptionHandler, CARuleMiner, TechnicalInformationHandler {

  /** for serialization */
  static final long serialVersionUID = 8109088846865075341L;

  /** The minimum support. */
  protected int m_premiseCount;

  /** The maximum number of rules that are output. */
  protected int m_numRules;

  /** The number of rules created for the prior estimation. */
  protected static final int m_numRandRules = 1000;

  /** The number of intervals used for the prior estimation. */
  protected static final int m_numIntervals = 100;

  /** The set of all sets of itemsets. */
  protected ArrayList> m_Ls;

  /** The same information stored in hash tables. */
  protected ArrayList> m_hashtables;

  /** The list of all generated rules. */
  protected ArrayList