weka.classifiers.mi.QuickDDIterative Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of multiInstanceLearning Show documentation
A collection of multi-instance learning classifiers. Includes the Citation KNN method, several variants of the diverse density method, support vector machines for multi-instance learning, simple wrappers for applying standard propositional learners to multi-instance data, decision tree and rule learners, and some other methods.
The newest version!
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * QuickDDIterative.java
 * Copyright (C) 2008-10 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers.mi;

import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;

import weka.classifiers.AbstractClassifier;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.MultiInstanceCapabilitiesHandler;
import weka.core.Optimization;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Normalize;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;
import weka.filters.unsupervised.attribute.Standardize;

/**
 *  Modified, faster, iterative version of the basic
 * diverse density algorithm. Uses only instances from positive bags as
 * candidate diverse density maxima. Picks best instance based on current
 * scaling vector, then optimizes scaling vector. Once vector has been found,
 * picks new best point based on new scaling vector (if the number of desired
 * iterations is greater than one). Performs one iteration by default (Scaling
 * Once). For good results, try boosting it with RealAdaBoost, setting the
 * maximum probability of the negative class to 0.5 and enabling consideration
 * of both classes as the positive class. Note that standardization of
 * attributes is default, but normalization can work better.

 * 

 * James R. Foulds, Eibe Frank: Speeding up and boosting diverse density
 * learning. In: Proc 13th International Conference on Discovery Science,
 * 102-116, 2010.
 * 
 * 
 * 
 *  BibTeX:
 * 
 * 
 * @inproceedings{Foulds2010,
 *    author = {James R. Foulds and Eibe Frank},
 *    booktitle = {Proc 13th International Conference on Discovery Science},
 *    pages = {102-116},
 *    publisher = {Springer},
 *    title = {Speeding up and boosting diverse density learning},
 *    year = {2010}
 * }
 * 
 * 
 * 
 * 
 *  Valid options are:
 * 

 * 
 * 
 * -D
 *  Turn on debugging output.
 * 
 * 
 *  * -N <num>
 *  Whether to 0=normalize/1=standardize/2=neither.
 *  (default 1=standardize)
 * 
 * 
 *  * -S <num>
 *  The initial scaling factor (constant for all attributes).
 * 
 * 
 *  * -M <num>
 *  Maximum probability of negative class (default 1).
 * 
 * 
 *  * -I <num>
 *  The maximum number of iterations to perform (default 1).
 * 
 * 
 *  * -C
 *  Consider both classes as positive classes. (default: only last class).
 * 
 * 
 * 
 * 
 * @author James Foulds
 * @author Xin Xu
 * @author Eibe Frank
 * @version $Revision: 10369 $
 */
public class QuickDDIterative extends AbstractClassifier implements
  OptionHandler, MultiInstanceCapabilitiesHandler, TechnicalInformationHandler,
  WeightedInstancesHandler {

  /** for serialization */
  static final long serialVersionUID = 4263507733600536170L;

  /** The index of the class attribute */
  protected int m_ClassIndex;

  /**
   * The target point and scaling vector learned by the algorithm. (comment by
   * Jimmy)
   **/
  protected double[] m_Par;

  /** The current guess at the target point, without scaling information. -Jimmy **/
  protected double[] m_CurrentCandidate;

  /** The number of the class labels */
  protected int m_NumClasses;

  /** The weights for each bag */
  protected double[] m_BagWeights;

  /** Class labels for each bag */
  protected int[] m_Classes;

  /** MI data */
  protected double[][][] m_Data;

  /** All attribute names */
  protected Instances m_Attributes;

  /** The filter used to standardize/normalize all values. */
  protected Filter m_Filter = null;

  /** Whether to normalize/standardize/neither, default:standardize */
  protected int m_filterType = FILTER_STANDARDIZE;

  /** Initial scaling factor for Gaussian-like function at target point. */
  protected double m_scaleFactor = 1.0;

  /** The maximum number of iterations to perform */
  protected int m_maxIterations = 1;

  /** The maximum probability for the negative class */
  protected double m_maxProbNegativeClass = 1.0;

  /** Whether to consider both classes as "positive" class in turn */
  protected boolean m_considerBothClasses = false;

  /** The index of the positive class */
  protected byte m_posClass = 1;

  /** Normalize training data */
  public static final int FILTER_NORMALIZE = 0;
  /** Standardize training data */
  public static final int FILTER_STANDARDIZE = 1;
  /** No normalization/standardization */
  public static final int FILTER_NONE = 2;
  /** The filter to apply to the training data */
  public static final Tag[] TAGS_FILTER = {
    new Tag(FILTER_NORMALIZE, "Normalize training data"),
    new Tag(FILTER_STANDARDIZE, "Standardize training data"),
    new Tag(FILTER_NONE, "No normalization/standardization"), };

  /** Compute machine precision */
  protected static double m_Epsilon, m_Zero;
  static {
    m_Epsilon = 1.0;
    while (1.0 + m_Epsilon > 1.0) {
      m_Epsilon /= 2.0;
    }
    m_Epsilon *= 2.0;
    m_Zero = Math.sqrt(m_Epsilon);
  }

  /** The filter used to get rid of missing values. */
  protected ReplaceMissingValues m_Missing = new ReplaceMissingValues();

  /**
   * Returns a string describing this filter
   * 
   * @return a description of the filter suitable for displaying in the
   *         explorer/experimenter gui
   */
  public String globalInfo() {
    return "Modified, faster, iterative version of the basic diverse density algorithm. Uses only "
      + "instances from positive bags as candidate diverse density maxima. Picks "
      + "best instance based on current scaling vector, then optimizes scaling vector. "
      + "Once vector has been found, picks new best point based on new scaling vector (if the "
      + "number of desired iterations is greater than one). Performs "
      + "one iteration by default (Scaling Once). For good results, try "
      + "boosting it with RealAdaBoost, setting the maximum probability of the negative "
      + "class to 0.5 and enabling consideration of both classes as the positive class. Note "
      + "that standardization of attributes is default, but normalization can work better.\n\n"
      + getTechnicalInformation().toString();
  }

  /**
   * Returns an instance of a TechnicalInformation object, containing detailed
   * information about the technical background of this class, e.g., paper
   * reference or book this class is based on.
   * 
   * @return the technical information about this class
   */
  @Override
  public TechnicalInformation getTechnicalInformation() {
    TechnicalInformation result;

    result = new TechnicalInformation(Type.INPROCEEDINGS);
    result.setValue(Field.AUTHOR, "James R. Foulds and Eibe Frank");
    result.setValue(Field.TITLE,
      "Speeding up and boosting diverse density learning");
    result.setValue(Field.BOOKTITLE,
      "Proc 13th International Conference on Discovery Science");
    result.setValue(Field.YEAR, "2010");
    result.setValue(Field.PAGES, "102-116");
    result.setValue(Field.PUBLISHER, "Springer");

    return result;
  }

  /**
   * Returns an enumeration describing the available options
   * 
   * @return an enumeration of all the available options
   */
  @Override
  public Enumeration