weka.classifiers.mi.MINND Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-stable Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This is the stable version. Apart from bugfixes, this version does not receive any other updates.
There is a newer version: 3.8.6
Show newest version
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 * MINND.java
 * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers.mi;

import weka.classifiers.Classifier;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.MultiInstanceCapabilitiesHandler;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;

import java.util.Enumeration;
import java.util.Vector;

/** 
 
 * Multiple-Instance Nearest Neighbour with Distribution learner.

 * 

 * It uses gradient descent to find the weight for each dimension of each exeamplar from the starting point of 1.0. In order to avoid overfitting, it uses mean-square function (i.e. the Euclidean distance) to search for the weights.

 *  It then uses the weights to cleanse the training data. After that it searches for the weights again from the starting points of the weights searched before.

 *  Finally it uses the most updated weights to cleanse the test exemplar and then finds the nearest neighbour of the test exemplar using partly-weighted Kullback distance. But the variances in the Kullback distance are the ones before cleansing.

 * 

 * For more information see:

 * 

 * Xin Xu (2001). A nearest distribution approach to multiple-instance learning. Hamilton, NZ.
 * 
 
 *
 
 * BibTeX:
 * 
 * @misc{Xu2001,
 *    address = {Hamilton, NZ},
 *    author = {Xin Xu},
 *    note = {0657.591B},
 *    school = {University of Waikato},
 *    title = {A nearest distribution approach to multiple-instance learning},
 *    year = {2001}
 * }
 * 
 * 
 
 *
 
 * Valid options are: 

 * 
 * 
 -K <number of neighbours>
 *  Set number of nearest neighbour for prediction
 *  (default 1)
 * 
 *  -S <number of neighbours>
 *  Set number of nearest neighbour for cleansing the training data
 *  (default 1)
 * 
 *  -E <number of neighbours>
 *  Set number of nearest neighbour for cleansing the testing data
 *  (default 1)
 * 
 
 *
 * @author Xin Xu ([email protected])
 * @version $Revision: 9144 $ 
 */
public class MINND 
  extends Classifier 
  implements OptionHandler, MultiInstanceCapabilitiesHandler,
             TechnicalInformationHandler {

  /** for serialization */
  static final long serialVersionUID = -4512599203273864994L;
  
  /** The number of nearest neighbour for prediction */
  protected int m_Neighbour = 1;

  /** The mean for each attribute of each exemplar */
  protected double[][] m_Mean = null;

  /** The variance for each attribute of each exemplar */
  protected double[][] m_Variance = null;

  /** The dimension of each exemplar, i.e. (numAttributes-2) */
  protected int m_Dimension = 0;

  /** header info of the data */
  protected Instances m_Attributes;;

  /** The class label of each exemplar */
  protected double[] m_Class = null;

  /** The number of class labels in the data */
  protected int m_NumClasses = 0;

  /** The weight of each exemplar */
  protected double[] m_Weights = null;

  /** The very small number representing zero */
  static private double m_ZERO = 1.0e-45;

  /** The learning rate in the gradient descent */
  protected double m_Rate = -1;

  /** The minimum values for numeric attributes. */
  private double [] m_MinArray=null;

  /** The maximum values for numeric attributes. */
  private double [] m_MaxArray=null;

  /** The stopping criteria of gradient descent*/
  private double m_STOP = 1.0e-45;

  /** The weights that alter the dimnesion of each exemplar */
  private double[][] m_Change=null;

  /** The noise data of each exemplar */
  private double[][] m_NoiseM = null, m_NoiseV = null, m_ValidM = null, 
          m_ValidV = null;

  /** The number of nearest neighbour instances in the selection of noises 
    in the training data*/
  private int m_Select = 1;

  /** The number of nearest neighbour exemplars in the selection of noises 
    in the test data */
  private int m_Choose = 1;

  /** The decay rate of learning rate */
  private double m_Decay = 0.5;

  /**
   * Returns a string describing this filter
   *
   * @return a description of the filter suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return 
        "Multiple-Instance Nearest Neighbour with Distribution learner.\n\n"
      + "It uses gradient descent to find the weight for each dimension of "
      + "each exeamplar from the starting point of 1.0. In order to avoid "
      + "overfitting, it uses mean-square function (i.e. the Euclidean "
      + "distance) to search for the weights.\n "
      + "It then uses the weights to cleanse the training data. After that "
      + "it searches for the weights again from the starting points of the "
      + "weights searched before.\n "
      + "Finally it uses the most updated weights to cleanse the test exemplar "
      + "and then finds the nearest neighbour of the test exemplar using "
      + "partly-weighted Kullback distance. But the variances in the Kullback "
      + "distance are the ones before cleansing.\n\n"
      + "For more information see:\n\n"
      + getTechnicalInformation().toString();
  }

  /**
   * Returns an instance of a TechnicalInformation object, containing 
   * detailed information about the technical background of this class,
   * e.g., paper reference or book this class is based on.
   * 
   * @return the technical information about this class
   */
  public TechnicalInformation getTechnicalInformation() {
    TechnicalInformation 	result;
    
    result = new TechnicalInformation(Type.MISC);
    result.setValue(Field.AUTHOR, "Xin Xu");
    result.setValue(Field.YEAR, "2001");
    result.setValue(Field.TITLE, "A nearest distribution approach to multiple-instance learning");
    result.setValue(Field.SCHOOL, "University of Waikato");
    result.setValue(Field.ADDRESS, "Hamilton, NZ");
    result.setValue(Field.NOTE, "0657.591B");
    
    return result;
  }

  /**
   * Returns default capabilities of the classifier.
   *
   * @return      the capabilities of this classifier
   */
  public Capabilities getCapabilities() {
    Capabilities result = super.getCapabilities();
    result.disableAll();

    // attributes
    result.enable(Capability.NOMINAL_ATTRIBUTES);
    result.enable(Capability.RELATIONAL_ATTRIBUTES);

    // class
    result.enable(Capability.NOMINAL_CLASS);
    result.enable(Capability.MISSING_CLASS_VALUES);
    
    // other
    result.enable(Capability.ONLY_MULTIINSTANCE);
    
    return result;
  }

  /**
   * Returns the capabilities of this multi-instance classifier for the
   * relational data.
   *
   * @return            the capabilities of this object
   * @see               Capabilities
   */
  public Capabilities getMultiInstanceCapabilities() {
    Capabilities result = super.getCapabilities();
    result.disableAll();
    
    // attributes
    result.enable(Capability.NUMERIC_ATTRIBUTES);
    result.enable(Capability.DATE_ATTRIBUTES);
    result.enable(Capability.MISSING_VALUES);

    // class
    result.disableAllClasses();
    result.enable(Capability.NO_CLASS);
    
    return result;
  }

  /**
   * As normal Nearest Neighbour algorithm does, it's lazy and simply
   * records the exemplar information (i.e. mean and variance for each
   * dimension of each exemplar and their classes) when building the model.
   * There is actually no need to store the exemplars themselves.
   *
   * @param exs the training exemplars
   * @throws Exception if the model cannot be built properly
   */    
  public void buildClassifier(Instances exs)throws Exception{
    // can classifier handle the data?
    getCapabilities().testWithFail(exs);

    // remove instances with missing class
    Instances newData = new Instances(exs);
    newData.deleteWithMissingClass();
    
    int numegs = newData.numInstances();
    m_Dimension = newData.attribute(1).relation().numAttributes();
    m_Attributes = newData.stringFreeStructure(); 
    m_Change = new double[numegs][m_Dimension];
    m_NumClasses = exs.numClasses();
    m_Mean = new double[numegs][m_Dimension];
    m_Variance = new double[numegs][m_Dimension];
    m_Class = new double[numegs];
    m_Weights = new double[numegs];
    m_NoiseM = new double[numegs][m_Dimension];
    m_NoiseV = new double[numegs][m_Dimension];
    m_ValidM = new double[numegs][m_Dimension];
    m_ValidV = new double[numegs][m_Dimension];
    m_MinArray = new double[m_Dimension];
    m_MaxArray = new double[m_Dimension];
    for(int v=0; v < m_Dimension; v++)
      m_MinArray[v] = m_MaxArray[v] = Double.NaN;

    for(int w=0; w < numegs; w++){
      updateMinMax(newData.instance(w));
    }

    // Scale exemplars
    Instances data = m_Attributes;

    for(int x=0; x < numegs; x++){
      Instance example = newData.instance(x);
      example = scale(example);
      for (int i=0; i m_MaxArray[j])
                m_MaxArray[j] = ins.value(j);
            }
          }
        }
      }
    }
  }

  /**
   * Scale the given exemplar so that the returned exemplar
   * has the value of 0 to 1 for each dimension
   * 
   * @param before the given exemplar
   * @return the resultant exemplar after scaling
   * @throws Exception if given exampler cannot be scaled properly
   */
  private Instance scale(Instance before) throws Exception{

    Instances afterInsts = before.relationalValue(1).stringFreeStructure();
    Instance after = new Instance(before.numAttributes());
    after.setDataset(m_Attributes);

    for(int i=0; i < before.relationalValue(1).numInstances(); i++){
      Instance datum = before.relationalValue(1).instance(i);
      Instance inst = (Instance)datum.copy();

      for(int j=0; j < m_Dimension; j++){
        if(before.relationalValue(1).attribute(j).isNumeric())
          inst.setValue(j, (datum.value(j) - m_MinArray[j])/(m_MaxArray[j] - m_MinArray[j]));	
      }
      afterInsts.add(inst);
    }

    int attValue = after.attribute(1).addRelation(afterInsts);
    after.setValue(0, before.value( 0));
    after.setValue(1, attValue);	
    after.setValue(2, before.value( 2));

    return after;
  }

  /**
   * Use gradient descent to distort the MU parameter for
   * the exemplar.  The exemplar can be in the specified row in the 
   * given matrix, which has numExemplar rows and numDimension columns;
   * or not in the matrix.
   * 
   * @param row the given row index
   * @param mean
   */
  public void findWeights(int row, double[][] mean){

    double[] neww = new double[m_Dimension];
    double[] oldw = new double[m_Dimension];
    System.arraycopy(m_Change[row], 0, neww, 0, m_Dimension);
    //for(int z=0; z 0.0)
              delta[k] += (var/distance - 1.0) * 0.5 *
                (X[rowpos][k]-X[i][k]) *
                (X[rowpos][k]-X[i][k]);
      }
    }
    //System.out.println("???delta: "+delta);
    return delta;
  }

  /**
   * Compute the target function to minimize in gradient descent
   * The formula is:

   * 1/2*sum[i=1..p](f(X, Xi)-var(Y, Yi))^2 
   * where p is the number of exemplars and Y is the class label.
   * In the case of X=MU, f() is the Euclidean distance between two
   * exemplars together with the related weights and var() is 
   * sqrt(numDimension)*(Y-Yi) where Y-Yi is either 0 (when Y==Yi)
   * or 1 (Y!=Yi) 
   *
   * @param x the weights of the exemplar in question
   * @param rowpos row index of x in X
   * @param Y the observed class label
   * @return the result of the target function
   */
  public double target(double[] x, double[][] X, int rowpos, double[] Y){
    double y = Y[rowpos], result=0;

    for(int i=0; i < X.length; i++){
      if((i != rowpos) && (X[i] != null)){
        double var = (y==Y[i]) ? 0.0 : Math.sqrt((double)m_Dimension - 1);
        double f=0;
        for(int j=0; j < m_Dimension; j++)
          if(Utils.gr(m_Variance[rowpos][j], 0.0)){
            f += x[j]*(X[rowpos][j]-X[i][j]) * (X[rowpos][j]-X[i][j]);     
            //System.out.println("i:"+i+" j: "+j+" row: "+rowpos);
          }
        f = Math.sqrt(f);
        //System.out.println("???distance between "+rowpos+" and "+i+": "+f+"|y:"+y+" vs "+Y[i]);
        if(Double.isInfinite(f))
          System.exit(1);
        result += 0.5 * (f - var) * (f - var);
      }
    }
    //System.out.println("???target: "+result);
    return result;
  }    

  /**
   * Use Kullback Leibler distance to find the nearest neighbours of
   * the given exemplar.
   * It also uses K-Nearest Neighbour algorithm to classify the 
   * test exemplar
   *
   * @param ex the given test exemplar
   * @return the classification 
   * @throws Exception if the exemplar could not be classified
   * successfully
   */
  public double classifyInstance(Instance ex)throws Exception{

    ex = scale(ex);

    double[] var = new double [m_Dimension];
    for (int i=0; i= y)
        insts.add (datum);

    }

    after.setValue(0, before.value( 0));
    after.setValue(1, after.attribute(1).addRelation(insts));
    after.setValue(2, before.value( 2));

    return after;
  }    

  /**
   * This function calculates the Kullback Leibler distance between
   * two normal distributions.  This distance is always positive. 
   * Kullback Leibler distance = integral{f(X)ln(f(X)/g(X))}
   * Note that X is a vector.  Since we assume dimensions are independent
   * f(X)(g(X) the same) is actually the product of normal density
   * functions of each dimensions.  Also note that it should be log2
   * instead of (ln) in the formula, but we use (ln) simply for computational
   * convenience.
   *
   * The result is as follows, suppose there are P dimensions, and f(X)
   * is the first distribution and g(X) is the second:
   * Kullback = sum[1..P](ln(SIGMA2/SIGMA1)) +
   *            sum[1..P](SIGMA1^2 / (2*(SIGMA2^2))) +
   *            sum[1..P]((MU1-MU2)^2 / (2*(SIGMA2^2))) -
   *            P/2
   *
   * @param mu1 mu of the first normal distribution
   * @param mu2 mu of the second normal distribution 
   * @param var1 variance(SIGMA^2) of the first normal distribution
   * @param var2 variance(SIGMA^2) of the second normal distribution
   * @return the Kullback distance of two distributions
   */
  public double kullback(double[] mu1, double[] mu2,
      double[] var1, double[] var2, int pos){
    int p = mu1.length;
    double result = 0;

    for(int y=0; y < p; y++){
      if((Utils.gr(var1[y], 0)) && (Utils.gr(var2[y], 0))){
        result +=  
          ((Math.log(Math.sqrt(var2[y]/var1[y]))) +
           (var1[y] / (2.0*var2[y])) + 
           (m_Change[pos][y] * (mu1[y]-mu2[y])*(mu1[y]-mu2[y]) / (2.0*var2[y])) -
           0.5);
      }
    }

    return result;
  }

  /**
   * Returns an enumeration describing the available options
   *
   * @return an enumeration of all the available options
   */
  public Enumeration listOptions() {
    Vector result = new Vector();

    result.addElement(new Option(
          "\tSet number of nearest neighbour for prediction\n"
          + "\t(default 1)",
          "K", 1, "-K "));
    
    result.addElement(new Option(
          "\tSet number of nearest neighbour for cleansing the training data\n"
          + "\t(default 1)",
          "S", 1, "-S "));
    
    result.addElement(new Option(
          "\tSet number of nearest neighbour for cleansing the testing data\n"
          + "\t(default 1)",
          "E", 1, "-E "));

    return result.elements();
  }

  /**
   * Parses a given list of options. 

   * 
   
   * Valid options are: 

   * 
   * 
 -K <number of neighbours>
   *  Set number of nearest neighbour for prediction
   *  (default 1)
   * 
   *  -S <number of neighbours>
   *  Set number of nearest neighbour for cleansing the training data
   *  (default 1)
   * 
   *  -E <number of neighbours>
   *  Set number of nearest neighbour for cleansing the testing data
   *  (default 1)
   * 
   
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception{

    setDebug(Utils.getFlag('D', options));

    String numNeighbourString = Utils.getOption('K', options);
    if (numNeighbourString.length() != 0) 
      setNumNeighbours(Integer.parseInt(numNeighbourString));
    else 
      setNumNeighbours(1);

    numNeighbourString = Utils.getOption('S', options);
    if (numNeighbourString.length() != 0) 
      setNumTrainingNoises(Integer.parseInt(numNeighbourString));
    else 
      setNumTrainingNoises(1);

    numNeighbourString = Utils.getOption('E', options);
    if (numNeighbourString.length() != 0) 
      setNumTestingNoises(Integer.parseInt(numNeighbourString));
    else 
      setNumTestingNoises(1);
  }

  /**
   * Gets the current settings of the Classifier.
   *
   * @return an array of strings suitable for passing to setOptions
   */
  public String[] getOptions() {
    Vector        result;
    
    result = new Vector();

    if (getDebug())
      result.add("-D");
    
    result.add("-K");
    result.add("" + getNumNeighbours());
    
    result.add("-S");
    result.add("" + getNumTrainingNoises());
    
    result.add("-E");
    result.add("" + getNumTestingNoises());

    return (String[]) result.toArray(new String[result.size()]);
  }

  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String numNeighboursTipText() {
    return "The number of nearest neighbours to the estimate the class prediction of test bags.";
  }

  /**
   * Sets the number of nearest neighbours to estimate
   * the class prediction of tests bags
   * @param numNeighbour the number of citers
   */
  public void setNumNeighbours(int numNeighbour){
    m_Neighbour = numNeighbour;
  }

  /**
   * Returns the number of nearest neighbours to estimate
   * the class prediction of tests bags
   * @return the number of neighbours
   */
  public int getNumNeighbours(){
    return m_Neighbour;
  }

  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String numTrainingNoisesTipText() {
    return "The number of nearest neighbour instances in the selection of noises in the training data.";
  }

  /**
   * Sets the number of nearest neighbour instances in the 
   * selection of noises in the training data
   * 
   * @param numTraining the number of noises in training data 
   */
  public void setNumTrainingNoises (int numTraining){
    m_Select = numTraining;
  }

  /**
   * Returns the number of nearest neighbour instances in the 
   * selection of noises in the training data
   * 
   * @return the number of noises in training data
   */
  public int getNumTrainingNoises(){
    return m_Select;
  }

  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String numTestingNoisesTipText() {
    return "The number of nearest neighbour instances in the selection of noises in the test data.";
  }

  /**
   * Returns The number of nearest neighbour instances in the 
   * selection of noises in the test data 
   * @return the number of noises in test data
   */
  public int getNumTestingNoises(){
    return m_Choose;
  }

  /**
   * Sets The number of nearest neighbour exemplars in the 
   * selection of noises in the test data 
   * @param numTesting the number of noises in test data
   */
  public void setNumTestingNoises (int numTesting){
    m_Choose = numTesting;
  }
  
  /**
   * Returns the revision string.
   * 
   * @return		the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 9144 $");
  }

  /**
   * Main method for testing.
   *
   * @param args the options for the classifier
   */
  public static void main(String[] args) {	
    runClassifier(new MINND(), args);
  }
}