All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.classifiers.rules.part.MakeDecList Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.

There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    MakeDecList.java
 *    Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers.rules.part;

import java.io.Serializable;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.classifiers.trees.j48.ModelSelection;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.CapabilitiesHandler;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;

/**
 * Class for handling a decision list.
 * 
 * @author Eibe Frank ([email protected])
 * @version $Revision: 14511 $
 */
public class MakeDecList implements Serializable, RevisionHandler {

  /** for serialization */
  private static final long serialVersionUID = -1427481323245079123L;

  /** Vector storing the rules. */
  private Vector theRules;

  /** The confidence for C45-type pruning. */
  private double CF = 0.25f;

  /** Minimum number of objects */
  private final int minNumObj;

  /** The model selection method. */
  private final ModelSelection toSelectModeL;

  /**
   * How many subsets of equal size? One used for pruning, the rest for
   * training.
   */
  private int numSetS = 3;

  /** Use reduced error pruning? */
  private boolean reducedErrorPruning = false;

  /** Generated unpruned list? */
  private boolean unpruned = false;

  /** The seed for random number generation. */
  private int m_seed = 1;

  /**
   * Constructor for unpruned dec list.
   */
  public MakeDecList(ModelSelection toSelectLocModel, int minNum) {

    toSelectModeL = toSelectLocModel;
    reducedErrorPruning = false;
    unpruned = true;
    minNumObj = minNum;
  }

  /**
   * Constructor for dec list pruned using C4.5 pruning.
   */
  public MakeDecList(ModelSelection toSelectLocModel, double cf, int minNum) {

    toSelectModeL = toSelectLocModel;
    CF = cf;
    reducedErrorPruning = false;
    unpruned = false;
    minNumObj = minNum;
  }

  /**
   * Constructor for dec list pruned using hold-out pruning.
   */
  public MakeDecList(ModelSelection toSelectLocModel, int num, int minNum,
    int seed) {

    toSelectModeL = toSelectLocModel;
    numSetS = num;
    reducedErrorPruning = true;
    unpruned = false;
    minNumObj = minNum;
    m_seed = seed;
  }

  /**
   * Builds dec list.
   * 
   * @exception Exception if dec list can't be built successfully
   */
  public void buildClassifier(Instances data) throws Exception {

    ClassifierDecList currentRule;
    double currentWeight;
    Instances oldGrowData, newGrowData, oldPruneData, newPruneData;
    theRules = new Vector();
    if ((reducedErrorPruning) && !(unpruned)) {
      Random random = new Random(m_seed);
      data.randomize(random);
      data.stratify(numSetS);
      oldGrowData = data.trainCV(numSetS, numSetS - 1, random);
      oldPruneData = data.testCV(numSetS, numSetS - 1);
    } else {
      oldGrowData = data;
      oldPruneData = null;
    }

    while (Utils.gr(oldGrowData.numInstances(), 0)) {

      // Create rule
      if (unpruned) {
        currentRule = new ClassifierDecList(toSelectModeL, minNumObj);
        currentRule.buildRule(oldGrowData);
      } else if (reducedErrorPruning) {
        currentRule = new PruneableDecList(toSelectModeL, minNumObj);
        ((PruneableDecList) currentRule).buildRule(oldGrowData, oldPruneData);
      } else {
        currentRule = new C45PruneableDecList(toSelectModeL, CF, minNumObj);
        ((C45PruneableDecList) currentRule).buildRule(oldGrowData);
      }
      // Remove instances from growing data
      newGrowData = new Instances(oldGrowData, oldGrowData.numInstances());
      Enumeration enu = oldGrowData.enumerateInstances();
      while (enu.hasMoreElements()) {
        Instance instance = enu.nextElement();
        currentWeight = currentRule.weight(instance);
        if (Utils.sm(currentWeight, 1)) {
          instance.setWeight(instance.weight() * (1 - currentWeight));
          newGrowData.add(instance);
        }
      }
      newGrowData.compactify();
      oldGrowData = newGrowData;

      // Remove instances from pruning data
      if ((reducedErrorPruning) && !(unpruned)) {
        newPruneData = new Instances(oldPruneData, oldPruneData.numInstances());
        enu = oldPruneData.enumerateInstances();
        while (enu.hasMoreElements()) {
          Instance instance = enu.nextElement();
          currentWeight = currentRule.weight(instance);
          if (Utils.sm(currentWeight, 1)) {
            instance.setWeight(instance.weight() * (1 - currentWeight));
            newPruneData.add(instance);
          }
        }
        newPruneData.compactify();
        oldPruneData = newPruneData;
      }
      theRules.addElement(currentRule);
    }
  }

  /**
   * Outputs the classifier into a string.
   */
  @Override
  public String toString() {

    StringBuffer text = new StringBuffer();

    for (int i = 0; i < theRules.size(); i++) {
      text.append(theRules.elementAt(i) + "\n");
    }
    text.append("Number of Rules  : \t" + theRules.size() + "\n");

    return text.toString();
  }

  /**
   * Classifies an instance.
   * 
   * @exception Exception if instance can't be classified
   */
  public double classifyInstance(Instance instance) throws Exception {

    double maxProb = -1;
    double[] sumProbs;
    int maxIndex = 0;

    sumProbs = distributionForInstance(instance);
    for (int j = 0; j < sumProbs.length; j++) {
      if (Utils.gr(sumProbs[j], maxProb)) {
        maxIndex = j;
        maxProb = sumProbs[j];
      }
    }

    return maxIndex;
  }

  /**
   * Returns the class distribution for an instance.
   * 
   * @exception Exception if distribution can't be computed
   */
  public double[] distributionForInstance(Instance instance) throws Exception {

    double[] currentProbs = null;
    double[] sumProbs;
    double currentWeight, weight = 1;
    int i, j;

    // Get probabilities.
    sumProbs = new double[instance.numClasses()];
    i = 0;
    while ((Utils.gr(weight, 0)) && (i < theRules.size())) {
      currentWeight = theRules.elementAt(i).weight(instance);
      if (Utils.gr(currentWeight, 0)) {
        currentProbs = theRules.elementAt(i).distributionForInstance(instance);
        for (j = 0; j < sumProbs.length; j++) {
          sumProbs[j] += weight * currentProbs[j];
        }
        weight = weight * (1 - currentWeight);
      }
      i++;
    }

    return sumProbs;
  }

  /**
   * Outputs the number of rules in the classifier.
   */
  public int numRules() {

    return theRules.size();
  }

  /**
   * Returns the revision string.
   * 
   * @return the revision
   */
  @Override
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 14511 $");
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy