All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.classifiers.mi.MISMO Maven / Gradle / Ivy

Go to download

A collection of multi-instance learning classifiers. Includes the Citation KNN method, several variants of the diverse density method, support vector machines for multi-instance learning, simple wrappers for applying standard propositional learners to multi-instance data, decision tree and rule learners, and some other methods.

The newest version!
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * MISMO.java
 * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers.mi;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.classifiers.AbstractClassifier;
import weka.classifiers.functions.Logistic;
import weka.classifiers.functions.supportVector.Kernel;
import weka.classifiers.functions.supportVector.SMOset;
import weka.classifiers.mi.supportVector.MIPolyKernel;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.MultiInstanceCapabilitiesHandler;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.SerializedObject;
import weka.core.Tag;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.MultiInstanceToPropositional;
import weka.filters.unsupervised.attribute.NominalToBinary;
import weka.filters.unsupervised.attribute.Normalize;
import weka.filters.unsupervised.attribute.PropositionalToMultiInstance;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;
import weka.filters.unsupervised.attribute.Standardize;

/**
 *  Implements John Platt's sequential minimal
 * optimization algorithm for training a support vector classifier.
*
* This implementation globally replaces all missing values and transforms * nominal attributes into binary ones. It also normalizes all attributes by * default. (In that case the coefficients in the output are based on the * normalized data, not the original data --- this is important for interpreting * the classifier.)
*
* Multi-class problems are solved using pairwise classification.
*
* To obtain proper probability estimates, use the option that fits logistic * regression models to the outputs of the support vector machine. In the * multi-class case the predicted probabilities are coupled using Hastie and * Tibshirani's pairwise coupling method.
*
* Note: for improved speed normalization should be turned off when operating on * SparseInstances.
*
* For more information on the SMO algorithm, see
*
* J. Platt: Machines using Sequential Minimal Optimization. In B. Schoelkopf * and C. Burges and A. Smola, editors, Advances in Kernel Methods - Support * Vector Learning, 1998.
*
* S.S. Keerthi, S.K. Shevade, C. Bhattacharyya, K.R.K. Murthy (2001). * Improvements to Platt's SMO Algorithm for SVM Classifier Design. Neural * Computation. 13(3):637-649. *

* * * BibTeX: * *

 * @incollection{Platt1998,
 *    author = {J. Platt},
 *    booktitle = {Advances in Kernel Methods - Support Vector Learning},
 *    editor = {B. Schoelkopf and C. Burges and A. Smola},
 *    publisher = {MIT Press},
 *    title = {Machines using Sequential Minimal Optimization},
 *    year = {1998}
 * }
 * 
 * @article{Keerthi2001,
 *    author = {S.S. Keerthi and S.K. Shevade and C. Bhattacharyya and K.R.K. Murthy},
 *    journal = {Neural Computation},
 *    number = {3},
 *    pages = {637-649},
 *    title = {Improvements to Platt's SMO Algorithm for SVM Classifier Design},
 *    volume = {13},
 *    year = {2001}
 * }
 * 
*

* * * Valid options are: *

* *

 * -no-checks
 *  Turns off all checks - use with caution!
 *  Turning them off assumes that data is purely numeric, doesn't
 *  contain any missing values, and has a nominal class. Turning them
 *  off also means that no header information will be stored if the
 *  machine is linear. Finally, it also assumes that no instance has
 *  a weight equal to 0.
 *  (default: checks on)
 * 
* *
 * -C <double>
 *  The complexity constant C. (default 1)
 * 
* *
 * -N
 *  Whether to 0=normalize/1=standardize/2=neither.
 *  (default 0=normalize)
 * 
* *
 * -I
 *  Use MIminimax feature space.
 * 
* *
 * -L <double>
 *  The tolerance parameter. (default 1.0e-3)
 * 
* *
 * -P <double>
 *  The epsilon for round-off error. (default 1.0e-12)
 * 
* *
 * -M
 *  Fit logistic models to SVM outputs.
 * 
* *
 * -V <double>
 *  The number of folds for the internal cross-validation. 
 *  (default -1, use training data)
 * 
* *
 * -W <double>
 *  The random number seed. (default 1)
 * 
* *
 * -K <classname and parameters>
 *  The Kernel to use.
 *  (default: weka.classifiers.functions.supportVector.PolyKernel)
 * 
* *
 * Options specific to kernel weka.classifiers.mi.supportVector.MIPolyKernel:
 * 
* *
 * -no-checks
 *  Turns off all checks - use with caution!
 *  (default: checks on)
 * 
* *
 * -C <num>
 *  The size of the cache (a prime number), 0 for full cache and 
 *  -1 to turn it off.
 *  (default: 250007)
 * 
* *
 * -E <num>
 *  The Exponent to use.
 *  (default: 1.0)
 * 
* *
 * -L
 *  Use lower-order terms.
 *  (default: no)
 * 
* * * * @author Eibe Frank ([email protected]) * @author Shane Legg ([email protected]) (sparse vector code) * @author Stuart Inglis ([email protected]) (sparse vector code) * @author Lin Dong ([email protected]) (code for adapting to MI data) * @version $Revision: 12560 $ */ public class MISMO extends AbstractClassifier implements WeightedInstancesHandler, MultiInstanceCapabilitiesHandler, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = -5834036950143719712L; /** * Returns a string describing classifier * * @return a description suitable for displaying in the explorer/experimenter * gui */ public String globalInfo() { return "Implements John Platt's sequential minimal optimization " + "algorithm for training a support vector classifier.\n\n" + "This implementation globally replaces all missing values and " + "transforms nominal attributes into binary ones. It also " + "normalizes all attributes by default. (In that case the coefficients " + "in the output are based on the normalized data, not the " + "original data --- this is important for interpreting the classifier.)\n\n" + "Multi-class problems are solved using pairwise classification.\n\n" + "To obtain proper probability estimates, use the option that fits " + "logistic regression models to the outputs of the support vector " + "machine. In the multi-class case the predicted probabilities " + "are coupled using Hastie and Tibshirani's pairwise coupling " + "method.\n\n" + "Note: for improved speed normalization should be turned off when " + "operating on SparseInstances.\n\n" + "For more information on the SMO algorithm, see\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing detailed * information about the technical background of this class, e.g., paper * reference or book this class is based on. * * @return the technical information about this class */ @Override public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; TechnicalInformation additional; result = new TechnicalInformation(Type.INCOLLECTION); result.setValue(Field.AUTHOR, "J. Platt"); result.setValue(Field.YEAR, "1998"); result.setValue(Field.TITLE, "Machines using Sequential Minimal Optimization"); result.setValue(Field.BOOKTITLE, "Advances in Kernel Methods - Support Vector Learning"); result.setValue(Field.EDITOR, "B. Schoelkopf and C. Burges and A. Smola"); result.setValue(Field.PUBLISHER, "MIT Press"); additional = result.add(Type.ARTICLE); additional.setValue(Field.AUTHOR, "S.S. Keerthi and S.K. Shevade and C. Bhattacharyya and K.R.K. Murthy"); additional.setValue(Field.YEAR, "2001"); additional.setValue(Field.TITLE, "Improvements to Platt's SMO Algorithm for SVM Classifier Design"); additional.setValue(Field.JOURNAL, "Neural Computation"); additional.setValue(Field.VOLUME, "13"); additional.setValue(Field.NUMBER, "3"); additional.setValue(Field.PAGES, "637-649"); return result; } /** * Class for building a binary support vector machine. */ protected class BinaryMISMO implements Serializable, RevisionHandler { /** for serialization */ static final long serialVersionUID = -7107082483475433531L; /** The Lagrange multipliers. */ protected double[] m_alpha; /** The thresholds. */ protected double m_b, m_bLow, m_bUp; /** The indices for m_bLow and m_bUp */ protected int m_iLow, m_iUp; /** The training data. */ protected Instances m_data; /** Weight vector for linear machine. */ protected double[] m_weights; /** * Variables to hold weight vector in sparse form. (To reduce storage * requirements.) */ protected double[] m_sparseWeights; protected int[] m_sparseIndices; /** Kernel to use **/ protected Kernel m_kernel; /** The transformed class values. */ protected double[] m_class; /** The current set of errors for all non-bound examples. */ protected double[] m_errors; /* The five different sets used by the algorithm. */ /** {i: 0 < m_alpha[i] < C} */ protected SMOset m_I0; /** {i: m_class[i] = 1, m_alpha[i] = 0} */ protected SMOset m_I1; /** {i: m_class[i] = -1, m_alpha[i] = C} */ protected SMOset m_I2; /** {i: m_class[i] = 1, m_alpha[i] = C} */ protected SMOset m_I3; /** {i: m_class[i] = -1, m_alpha[i] = 0} */ protected SMOset m_I4; /** The set of support vectors {i: 0 < m_alpha[i]} */ protected SMOset m_supportVectors; /** Stores logistic regression model for probability estimate */ protected Logistic m_logistic = null; /** Stores the weight of the training instances */ protected double m_sumOfWeights = 0; /** * Fits logistic regression model to SVM outputs analogue to John Platt's * method. * * @param insts the set of training instances * @param cl1 the first class' index * @param cl2 the second class' index * @param numFolds the number of folds for cross-validation * @param random the random number generator for cross-validation * @throws Exception if the sigmoid can't be fit successfully */ protected void fitLogistic(Instances insts, int cl1, int cl2, int numFolds, Random random) throws Exception { // Create header of instances object ArrayList atts = new ArrayList(2); atts.add(new Attribute("pred")); ArrayList attVals = new ArrayList(2); attVals.add(insts.classAttribute().value(cl1)); attVals.add(insts.classAttribute().value(cl2)); atts.add(new Attribute("class", attVals)); Instances data = new Instances("data", atts, insts.numInstances()); data.setClassIndex(1); // Collect data for fitting the logistic model if (numFolds <= 0) { // Use training data for (int j = 0; j < insts.numInstances(); j++) { Instance inst = insts.instance(j); double[] vals = new double[2]; vals[0] = SVMOutput(-1, inst); if (inst.classValue() == cl2) { vals[1] = 1; } data.add(new DenseInstance(inst.weight(), vals)); } } else { // Check whether number of folds too large if (numFolds > insts.numInstances()) { numFolds = insts.numInstances(); } // Make copy of instances because we will shuffle them around insts = new Instances(insts); // Perform three-fold cross-validation to collect // unbiased predictions insts.randomize(random); insts.stratify(numFolds); for (int i = 0; i < numFolds; i++) { Instances train = insts.trainCV(numFolds, i, random); SerializedObject so = new SerializedObject(this); BinaryMISMO smo = (BinaryMISMO) so.getObject(); smo.buildClassifier(train, cl1, cl2, false, -1, -1); Instances test = insts.testCV(numFolds, i); for (int j = 0; j < test.numInstances(); j++) { double[] vals = new double[2]; vals[0] = smo.SVMOutput(-1, test.instance(j)); if (test.instance(j).classValue() == cl2) { vals[1] = 1; } data.add(new DenseInstance(test.instance(j).weight(), vals)); } } } // Build logistic regression model m_logistic = new Logistic(); m_logistic.buildClassifier(data); } /** * sets the kernel to use * * @param value the kernel to use */ public void setKernel(Kernel value) { m_kernel = value; } /** * Returns the kernel to use * * @return the current kernel */ public Kernel getKernel() { return m_kernel; } /** * Method for building the binary classifier. * * @param insts the set of training instances * @param cl1 the first class' index * @param cl2 the second class' index * @param fitLogistic true if logistic model is to be fit * @param numFolds number of folds for internal cross-validation * @param randomSeed seed value for random number generator for * cross-validation * @throws Exception if the classifier can't be built successfully */ protected void buildClassifier(Instances insts, int cl1, int cl2, boolean fitLogistic, int numFolds, int randomSeed) throws Exception { // Initialize some variables m_bUp = -1; m_bLow = 1; m_b = 0; m_alpha = null; m_data = null; m_weights = null; m_errors = null; m_logistic = null; m_I0 = null; m_I1 = null; m_I2 = null; m_I3 = null; m_I4 = null; m_sparseWeights = null; m_sparseIndices = null; // Store the sum of weights m_sumOfWeights = insts.sumOfWeights(); // Set class values m_class = new double[insts.numInstances()]; m_iUp = -1; m_iLow = -1; for (int i = 0; i < m_class.length; i++) { if ((int) insts.instance(i).classValue() == cl1) { m_class[i] = -1; m_iLow = i; } else if ((int) insts.instance(i).classValue() == cl2) { m_class[i] = 1; m_iUp = i; } else { throw new Exception("This should never happen!"); } } // Check whether one or both classes are missing if ((m_iUp == -1) || (m_iLow == -1)) { if (m_iUp != -1) { m_b = -1; } else if (m_iLow != -1) { m_b = 1; } else { m_class = null; return; } m_supportVectors = new SMOset(0); m_alpha = new double[0]; m_class = new double[0]; // Fit sigmoid if requested if (fitLogistic) { fitLogistic(insts, cl1, cl2, numFolds, new Random(randomSeed)); } return; } // Set the reference to the data m_data = insts; m_weights = null; // Initialize alpha array to zero m_alpha = new double[m_data.numInstances()]; // Initialize sets m_supportVectors = new SMOset(m_data.numInstances()); m_I0 = new SMOset(m_data.numInstances()); m_I1 = new SMOset(m_data.numInstances()); m_I2 = new SMOset(m_data.numInstances()); m_I3 = new SMOset(m_data.numInstances()); m_I4 = new SMOset(m_data.numInstances()); // Clean out some instance variables m_sparseWeights = null; m_sparseIndices = null; // Initialize error cache m_errors = new double[m_data.numInstances()]; m_errors[m_iLow] = 1; m_errors[m_iUp] = -1; // Initialize kernel m_kernel.buildKernel(m_data); // Build up I1 and I4 for (int i = 0; i < m_class.length; i++) { if (m_class[i] == 1) { m_I1.insert(i); } else { m_I4.insert(i); } } // Loop to find all the support vectors int numChanged = 0; boolean examineAll = true; while ((numChanged > 0) || examineAll) { numChanged = 0; if (examineAll) { for (int i = 0; i < m_alpha.length; i++) { if (examineExample(i)) { numChanged++; } } } else { // This code implements Modification 1 from Keerthi et al.'s paper for (int i = 0; i < m_alpha.length; i++) { if ((m_alpha[i] > 0) && (m_alpha[i] < m_C * m_data.instance(i).weight())) { if (examineExample(i)) { numChanged++; } // Is optimality on unbound vectors obtained? if (m_bUp > m_bLow - 2 * m_tol) { numChanged = 0; break; } } } // This is the code for Modification 2 from Keerthi et al.'s paper /* * boolean innerLoopSuccess = true; numChanged = 0; while ((m_bUp < * m_bLow - 2 * m_tol) && (innerLoopSuccess == true)) { * innerLoopSuccess = takeStep(m_iUp, m_iLow, m_errors[m_iLow]); } */ } if (examineAll) { examineAll = false; } else if (numChanged == 0) { examineAll = true; } } // Set threshold m_b = (m_bLow + m_bUp) / 2.0; // Save memory m_kernel.clean(); m_errors = null; m_I0 = m_I1 = m_I2 = m_I3 = m_I4 = null; // Fit sigmoid if requested if (fitLogistic) { fitLogistic(insts, cl1, cl2, numFolds, new Random(randomSeed)); } } /** * Computes SVM output for given instance. * * @param index the instance for which output is to be computed * @param inst the instance * @return the output of the SVM for the given instance * @throws Exception if something goes wrong */ protected double SVMOutput(int index, Instance inst) throws Exception { double result = 0; for (int i = m_supportVectors.getNext(-1); i != -1; i = m_supportVectors .getNext(i)) { result += m_class[i] * m_alpha[i] * m_kernel.eval(index, i, inst); } result -= m_b; return result; } /** * Prints out the classifier. * * @return a description of the classifier as a string */ @Override public String toString() { StringBuffer text = new StringBuffer(); int printed = 0; if ((m_alpha == null) && (m_sparseWeights == null)) { return "BinaryMISMO: No model built yet.\n"; } try { text.append("BinaryMISMO\n\n"); for (int i = 0; i < m_alpha.length; i++) { if (m_supportVectors.contains(i)) { double val = m_alpha[i]; if (m_class[i] == 1) { if (printed > 0) { text.append(" + "); } } else { text.append(" - "); } text.append(Utils.doubleToString(val, 12, 4) + " * <"); for (int j = 0; j < m_data.numAttributes(); j++) { if (j != m_data.classIndex()) { text.append(m_data.instance(i).toString(j)); } if (j != m_data.numAttributes() - 1) { text.append(" "); } } text.append("> * X]\n"); printed++; } } if (m_b > 0) { text.append(" - " + Utils.doubleToString(m_b, 12, 4)); } else { text.append(" + " + Utils.doubleToString(-m_b, 12, 4)); } text.append("\n\nNumber of support vectors: " + m_supportVectors.numElements()); int numEval = 0; int numCacheHits = -1; if (m_kernel != null) { numEval = m_kernel.numEvals(); numCacheHits = m_kernel.numCacheHits(); } text.append("\n\nNumber of kernel evaluations: " + numEval); if (numCacheHits >= 0 && numEval > 0) { double hitRatio = 1 - numEval * 1.0 / (numCacheHits + numEval); text.append(" (" + Utils.doubleToString(hitRatio * 100, 7, 3).trim() + "% cached)"); } } catch (Exception e) { e.printStackTrace(); return "Can't print BinaryMISMO classifier."; } return text.toString(); } /** * Examines instance. * * @param i2 index of instance to examine * @return true if examination was successfull * @throws Exception if something goes wrong */ protected boolean examineExample(int i2) throws Exception { double y2, F2; int i1 = -1; y2 = m_class[i2]; if (m_I0.contains(i2)) { F2 = m_errors[i2]; } else { F2 = SVMOutput(i2, m_data.instance(i2)) + m_b - y2; m_errors[i2] = F2; // Update thresholds if ((m_I1.contains(i2) || m_I2.contains(i2)) && (F2 < m_bUp)) { m_bUp = F2; m_iUp = i2; } else if ((m_I3.contains(i2) || m_I4.contains(i2)) && (F2 > m_bLow)) { m_bLow = F2; m_iLow = i2; } } // Check optimality using current bLow and bUp and, if // violated, find an index i1 to do joint optimization // with i2... boolean optimal = true; if (m_I0.contains(i2) || m_I1.contains(i2) || m_I2.contains(i2)) { if (m_bLow - F2 > 2 * m_tol) { optimal = false; i1 = m_iLow; } } if (m_I0.contains(i2) || m_I3.contains(i2) || m_I4.contains(i2)) { if (F2 - m_bUp > 2 * m_tol) { optimal = false; i1 = m_iUp; } } if (optimal) { return false; } // For i2 unbound choose the better i1... if (m_I0.contains(i2)) { if (m_bLow - F2 > F2 - m_bUp) { i1 = m_iLow; } else { i1 = m_iUp; } } if (i1 == -1) { throw new Exception("This should never happen!"); } return takeStep(i1, i2, F2); } /** * Method solving for the Lagrange multipliers for two instances. * * @param i1 index of the first instance * @param i2 index of the second instance * @param F2 * @return true if multipliers could be found * @throws Exception if something goes wrong */ protected boolean takeStep(int i1, int i2, double F2) throws Exception { double alph1, alph2, y1, y2, F1, s, L, H, k11, k12, k22, eta, a1, a2, f1, f2, v1, v2, Lobj, Hobj; double C1 = m_C * m_data.instance(i1).weight(); double C2 = m_C * m_data.instance(i2).weight(); // Don't do anything if the two instances are the same if (i1 == i2) { return false; } // Initialize variables alph1 = m_alpha[i1]; alph2 = m_alpha[i2]; y1 = m_class[i1]; y2 = m_class[i2]; F1 = m_errors[i1]; s = y1 * y2; // Find the constraints on a2 if (y1 != y2) { L = Math.max(0, alph2 - alph1); H = Math.min(C2, C1 + alph2 - alph1); } else { L = Math.max(0, alph1 + alph2 - C1); H = Math.min(C2, alph1 + alph2); } if (L >= H) { return false; } // Compute second derivative of objective function k11 = m_kernel.eval(i1, i1, m_data.instance(i1)); k12 = m_kernel.eval(i1, i2, m_data.instance(i1)); k22 = m_kernel.eval(i2, i2, m_data.instance(i2)); eta = 2 * k12 - k11 - k22; // Check if second derivative is negative if (eta < 0) { // Compute unconstrained maximum a2 = alph2 - y2 * (F1 - F2) / eta; // Compute constrained maximum if (a2 < L) { a2 = L; } else if (a2 > H) { a2 = H; } } else { // Look at endpoints of diagonal f1 = SVMOutput(i1, m_data.instance(i1)); f2 = SVMOutput(i2, m_data.instance(i2)); v1 = f1 + m_b - y1 * alph1 * k11 - y2 * alph2 * k12; v2 = f2 + m_b - y1 * alph1 * k12 - y2 * alph2 * k22; double gamma = alph1 + s * alph2; Lobj = (gamma - s * L) + L - 0.5 * k11 * (gamma - s * L) * (gamma - s * L) - 0.5 * k22 * L * L - s * k12 * (gamma - s * L) * L - y1 * (gamma - s * L) * v1 - y2 * L * v2; Hobj = (gamma - s * H) + H - 0.5 * k11 * (gamma - s * H) * (gamma - s * H) - 0.5 * k22 * H * H - s * k12 * (gamma - s * H) * H - y1 * (gamma - s * H) * v1 - y2 * H * v2; if (Lobj > Hobj + m_eps) { a2 = L; } else if (Lobj < Hobj - m_eps) { a2 = H; } else { a2 = alph2; } } if (Math.abs(a2 - alph2) < m_eps * (a2 + alph2 + m_eps)) { return false; } // To prevent precision problems if (a2 > C2 - m_Del * C2) { a2 = C2; } else if (a2 <= m_Del * C2) { a2 = 0; } // Recompute a1 a1 = alph1 + s * (alph2 - a2); // To prevent precision problems if (a1 > C1 - m_Del * C1) { a1 = C1; } else if (a1 <= m_Del * C1) { a1 = 0; } // Update sets if (a1 > 0) { m_supportVectors.insert(i1); } else { m_supportVectors.delete(i1); } if ((a1 > 0) && (a1 < C1)) { m_I0.insert(i1); } else { m_I0.delete(i1); } if ((y1 == 1) && (a1 == 0)) { m_I1.insert(i1); } else { m_I1.delete(i1); } if ((y1 == -1) && (a1 == C1)) { m_I2.insert(i1); } else { m_I2.delete(i1); } if ((y1 == 1) && (a1 == C1)) { m_I3.insert(i1); } else { m_I3.delete(i1); } if ((y1 == -1) && (a1 == 0)) { m_I4.insert(i1); } else { m_I4.delete(i1); } if (a2 > 0) { m_supportVectors.insert(i2); } else { m_supportVectors.delete(i2); } if ((a2 > 0) && (a2 < C2)) { m_I0.insert(i2); } else { m_I0.delete(i2); } if ((y2 == 1) && (a2 == 0)) { m_I1.insert(i2); } else { m_I1.delete(i2); } if ((y2 == -1) && (a2 == C2)) { m_I2.insert(i2); } else { m_I2.delete(i2); } if ((y2 == 1) && (a2 == C2)) { m_I3.insert(i2); } else { m_I3.delete(i2); } if ((y2 == -1) && (a2 == 0)) { m_I4.insert(i2); } else { m_I4.delete(i2); } // Update error cache using new Lagrange multipliers for (int j = m_I0.getNext(-1); j != -1; j = m_I0.getNext(j)) { if ((j != i1) && (j != i2)) { m_errors[j] += y1 * (a1 - alph1) * m_kernel.eval(i1, j, m_data.instance(i1)) + y2 * (a2 - alph2) * m_kernel.eval(i2, j, m_data.instance(i2)); } } // Update error cache for i1 and i2 m_errors[i1] += y1 * (a1 - alph1) * k11 + y2 * (a2 - alph2) * k12; m_errors[i2] += y1 * (a1 - alph1) * k12 + y2 * (a2 - alph2) * k22; // Update array with Lagrange multipliers m_alpha[i1] = a1; m_alpha[i2] = a2; // Update thresholds m_bLow = -Double.MAX_VALUE; m_bUp = Double.MAX_VALUE; m_iLow = -1; m_iUp = -1; for (int j = m_I0.getNext(-1); j != -1; j = m_I0.getNext(j)) { if (m_errors[j] < m_bUp) { m_bUp = m_errors[j]; m_iUp = j; } if (m_errors[j] > m_bLow) { m_bLow = m_errors[j]; m_iLow = j; } } if (!m_I0.contains(i1)) { if (m_I3.contains(i1) || m_I4.contains(i1)) { if (m_errors[i1] > m_bLow) { m_bLow = m_errors[i1]; m_iLow = i1; } } else { if (m_errors[i1] < m_bUp) { m_bUp = m_errors[i1]; m_iUp = i1; } } } if (!m_I0.contains(i2)) { if (m_I3.contains(i2) || m_I4.contains(i2)) { if (m_errors[i2] > m_bLow) { m_bLow = m_errors[i2]; m_iLow = i2; } } else { if (m_errors[i2] < m_bUp) { m_bUp = m_errors[i2]; m_iUp = i2; } } } if ((m_iLow == -1) || (m_iUp == -1)) { throw new Exception("This should never happen!"); } // Made some progress. return true; } /** * Quick and dirty check whether the quadratic programming problem is * solved. * * @throws Exception if something goes wrong */ protected void checkClassifier() throws Exception { double sum = 0; for (int i = 0; i < m_alpha.length; i++) { if (m_alpha[i] > 0) { sum += m_class[i] * m_alpha[i]; } } System.err.println("Sum of y(i) * alpha(i): " + sum); for (int i = 0; i < m_alpha.length; i++) { double output = SVMOutput(i, m_data.instance(i)); if (Utils.eq(m_alpha[i], 0)) { if (Utils.sm(m_class[i] * output, 1)) { System.err.println("KKT condition 1 violated: " + m_class[i] * output); } } if (Utils.gr(m_alpha[i], 0) && Utils.sm(m_alpha[i], m_C * m_data.instance(i).weight())) { if (!Utils.eq(m_class[i] * output, 1)) { System.err.println("KKT condition 2 violated: " + m_class[i] * output); } } if (Utils.eq(m_alpha[i], m_C * m_data.instance(i).weight())) { if (Utils.gr(m_class[i] * output, 1)) { System.err.println("KKT condition 3 violated: " + m_class[i] * output); } } } } /** * Returns the revision string. * * @return the revision */ @Override public String getRevision() { return RevisionUtils.extract("$Revision: 12560 $"); } } /** Normalize training data */ public static final int FILTER_NORMALIZE = 0; /** Standardize training data */ public static final int FILTER_STANDARDIZE = 1; /** No normalization/standardization */ public static final int FILTER_NONE = 2; /** The filter to apply to the training data */ public static final Tag[] TAGS_FILTER = { new Tag(FILTER_NORMALIZE, "Normalize training data"), new Tag(FILTER_STANDARDIZE, "Standardize training data"), new Tag(FILTER_NONE, "No normalization/standardization"), }; /** The binary classifier(s) */ protected BinaryMISMO[][] m_classifiers = null; /** The complexity parameter. */ protected double m_C = 1.0; /** Epsilon for rounding. */ protected double m_eps = 1.0e-12; /** Tolerance for accuracy of result. */ protected double m_tol = 1.0e-3; /** Whether to normalize/standardize/neither */ protected int m_filterType = FILTER_NORMALIZE; /** Use MIMinimax feature space? */ protected boolean m_minimax = false; /** The filter used to make attributes numeric. */ protected NominalToBinary m_NominalToBinary; /** The filter used to standardize/normalize all values. */ protected Filter m_Filter = null; /** The filter used to get rid of missing values. */ protected ReplaceMissingValues m_Missing; /** The class index from the training data */ protected int m_classIndex = -1; /** The class attribute */ protected Attribute m_classAttribute; /** Kernel to use **/ protected Kernel m_kernel = new MIPolyKernel(); /** * Turn off all checks and conversions? Turning them off assumes that data is * purely numeric, doesn't contain any missing values, and has a nominal * class. Turning them off also means that no header information will be * stored if the machine is linear. Finally, it also assumes that no instance * has a weight equal to 0. */ protected boolean m_checksTurnedOff; /** Precision constant for updating sets */ protected static double m_Del = 1000 * Double.MIN_VALUE; /** Whether logistic models are to be fit */ protected boolean m_fitLogisticModels = false; /** The number of folds for the internal cross-validation */ protected int m_numFolds = -1; /** The random number seed */ protected int m_randomSeed = 1; /** * Turns off checks for missing values, etc. Use with caution. */ public void turnChecksOff() { m_checksTurnedOff = true; } /** * Turns on checks for missing values, etc. */ public void turnChecksOn() { m_checksTurnedOff = false; } /** * Returns default capabilities of the classifier. * * @return the capabilities of this classifier */ @Override public Capabilities getCapabilities() { Capabilities result = getKernel().getCapabilities(); result.setOwner(this); // attributes result.enable(Capability.NOMINAL_ATTRIBUTES); result.enable(Capability.RELATIONAL_ATTRIBUTES); // class result.disableAllClasses(); result.disableAllClassDependencies(); result.disable(Capability.NO_CLASS); result.enable(Capability.NOMINAL_CLASS); result.enable(Capability.MISSING_CLASS_VALUES); // other result.enable(Capability.ONLY_MULTIINSTANCE); return result; } /** * Returns the capabilities of this multi-instance classifier for the * relational data. * * @return the capabilities of this object * @see Capabilities */ @Override public Capabilities getMultiInstanceCapabilities() { Capabilities result = ((MultiInstanceCapabilitiesHandler) getKernel()) .getMultiInstanceCapabilities(); result.setOwner(this); // attribute result.enableAllAttributeDependencies(); // with NominalToBinary we can also handle nominal attributes, but only // if the kernel can handle numeric attributes if (result.handles(Capability.NUMERIC_ATTRIBUTES)) { result.enable(Capability.NOMINAL_ATTRIBUTES); } result.enable(Capability.MISSING_VALUES); return result; } /** * Method for building the classifier. Implements a one-against-one wrapper * for multi-class problems. * * @param insts the set of training instances * @throws Exception if the classifier can't be built successfully */ @Override public void buildClassifier(Instances insts) throws Exception { if (!m_checksTurnedOff) { // can classifier handle the data? getCapabilities().testWithFail(insts); // remove instances with missing class insts = new Instances(insts); insts.deleteWithMissingClass(); /* * Removes all the instances with weight equal to 0. MUST be done since * condition (8) of Keerthi's paper is made with the assertion Ci > 0 (See * equation (3a). */ Instances data = new Instances(insts, insts.numInstances()); for (int i = 0; i < insts.numInstances(); i++) { if (insts.instance(i).weight() > 0) { data.add(insts.instance(i)); } } if (data.numInstances() == 0) { throw new Exception("No training instances left after removing " + "instance with either a weight null or a missing class!"); } insts = data; } // filter data if (!m_checksTurnedOff) { m_Missing = new ReplaceMissingValues(); } else { m_Missing = null; } if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) { boolean onlyNumeric = true; if (!m_checksTurnedOff) { for (int i = 0; i < insts.numAttributes(); i++) { if (i != insts.classIndex()) { if (!insts.attribute(i).isNumeric()) { onlyNumeric = false; break; } } } } if (!onlyNumeric) { m_NominalToBinary = new NominalToBinary(); // exclude the bag attribute m_NominalToBinary.setAttributeIndices("2-last"); } else { m_NominalToBinary = null; } } else { m_NominalToBinary = null; } if (m_filterType == FILTER_STANDARDIZE) { m_Filter = new Standardize(); } else if (m_filterType == FILTER_NORMALIZE) { m_Filter = new Normalize(); } else { m_Filter = null; } Instances transformedInsts; Filter convertToProp = new MultiInstanceToPropositional(); Filter convertToMI = new PropositionalToMultiInstance(); // transform the data into single-instance format if (m_minimax) { /* * using SimpleMI class minimax transform method. this method transforms * the multi-instance dataset into minmax feature space (single-instance) */ SimpleMI transMinimax = new SimpleMI(); transMinimax.setTransformMethod(new SelectedTag( SimpleMI.TRANSFORMMETHOD_MINIMAX, SimpleMI.TAGS_TRANSFORMMETHOD)); transformedInsts = transMinimax.transform(insts); } else { convertToProp.setInputFormat(insts); transformedInsts = Filter.useFilter(insts, convertToProp); } if (m_Missing != null) { m_Missing.setInputFormat(transformedInsts); transformedInsts = Filter.useFilter(transformedInsts, m_Missing); } if (m_NominalToBinary != null) { m_NominalToBinary.setInputFormat(transformedInsts); transformedInsts = Filter.useFilter(transformedInsts, m_NominalToBinary); } if (m_Filter != null) { m_Filter.setInputFormat(transformedInsts); transformedInsts = Filter.useFilter(transformedInsts, m_Filter); } // convert the single-instance format to multi-instance format convertToMI.setInputFormat(transformedInsts); insts = Filter.useFilter(transformedInsts, convertToMI); m_classIndex = insts.classIndex(); m_classAttribute = insts.classAttribute(); // Generate subsets representing each class Instances[] subsets = new Instances[insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { subsets[i] = new Instances(insts, insts.numInstances()); } for (int j = 0; j < insts.numInstances(); j++) { Instance inst = insts.instance(j); subsets[(int) inst.classValue()].add(inst); } for (int i = 0; i < insts.numClasses(); i++) { subsets[i].compactify(); } // Build the binary classifiers Random rand = new Random(m_randomSeed); m_classifiers = new BinaryMISMO[insts.numClasses()][insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { for (int j = i + 1; j < insts.numClasses(); j++) { m_classifiers[i][j] = new BinaryMISMO(); m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel())); Instances data = new Instances(insts, insts.numInstances()); for (int k = 0; k < subsets[i].numInstances(); k++) { data.add(subsets[i].instance(k)); } for (int k = 0; k < subsets[j].numInstances(); k++) { data.add(subsets[j].instance(k)); } data.compactify(); data.randomize(rand); m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed); } } } /** * Estimates class probabilities for given instance. * * @param inst the instance to compute the distribution for * @return the class probabilities * @throws Exception if computation fails */ @Override public double[] distributionForInstance(Instance inst) throws Exception { // convert instance into instances Instances insts = new Instances(inst.dataset(), 0); insts.add(inst); // transform the data into single-instance format Filter convertToProp = new MultiInstanceToPropositional(); Filter convertToMI = new PropositionalToMultiInstance(); if (m_minimax) { // using minimax feature space SimpleMI transMinimax = new SimpleMI(); transMinimax.setTransformMethod(new SelectedTag( SimpleMI.TRANSFORMMETHOD_MINIMAX, SimpleMI.TAGS_TRANSFORMMETHOD)); insts = transMinimax.transform(insts); } else { convertToProp.setInputFormat(insts); insts = Filter.useFilter(insts, convertToProp); } // Filter instances if (m_Missing != null) { insts = Filter.useFilter(insts, m_Missing); } if (m_NominalToBinary != null) { insts = Filter.useFilter(insts, m_NominalToBinary); } if (m_Filter != null) { insts = Filter.useFilter(insts, m_Filter); } // convert the single-instance format to multi-instance format convertToMI.setInputFormat(insts); insts = Filter.useFilter(insts, convertToMI); inst = insts.instance(0); if (!m_fitLogisticModels) { double[] result = new double[inst.numClasses()]; for (int i = 0; i < inst.numClasses(); i++) { for (int j = i + 1; j < inst.numClasses(); j++) { if ((m_classifiers[i][j].m_alpha != null) || (m_classifiers[i][j].m_sparseWeights != null)) { double output = m_classifiers[i][j].SVMOutput(-1, inst); if (output > 0) { result[j] += 1; } else { result[i] += 1; } } } } Utils.normalize(result); return result; } else { // We only need to do pairwise coupling if there are more // then two classes. if (inst.numClasses() == 2) { double[] newInst = new double[2]; newInst[0] = m_classifiers[0][1].SVMOutput(-1, inst); newInst[1] = Utils.missingValue(); return m_classifiers[0][1].m_logistic .distributionForInstance(new DenseInstance(1, newInst)); } double[][] r = new double[inst.numClasses()][inst.numClasses()]; double[][] n = new double[inst.numClasses()][inst.numClasses()]; for (int i = 0; i < inst.numClasses(); i++) { for (int j = i + 1; j < inst.numClasses(); j++) { if ((m_classifiers[i][j].m_alpha != null) || (m_classifiers[i][j].m_sparseWeights != null)) { double[] newInst = new double[2]; newInst[0] = m_classifiers[i][j].SVMOutput(-1, inst); newInst[1] = Utils.missingValue(); r[i][j] = m_classifiers[i][j].m_logistic .distributionForInstance(new DenseInstance(1, newInst))[0]; n[i][j] = m_classifiers[i][j].m_sumOfWeights; } } } return pairwiseCoupling(n, r); } } /** * Implements pairwise coupling. * * @param n the sum of weights used to train each model * @param r the probability estimate from each model * @return the coupled estimates */ public double[] pairwiseCoupling(double[][] n, double[][] r) { // Initialize p and u array double[] p = new double[r.length]; for (int i = 0; i < p.length; i++) { p[i] = 1.0 / p.length; } double[][] u = new double[r.length][r.length]; for (int i = 0; i < r.length; i++) { for (int j = i + 1; j < r.length; j++) { u[i][j] = 0.5; } } // firstSum doesn't change double[] firstSum = new double[p.length]; for (int i = 0; i < p.length; i++) { for (int j = i + 1; j < p.length; j++) { firstSum[i] += n[i][j] * r[i][j]; firstSum[j] += n[i][j] * (1 - r[i][j]); } } // Iterate until convergence boolean changed; do { changed = false; double[] secondSum = new double[p.length]; for (int i = 0; i < p.length; i++) { for (int j = i + 1; j < p.length; j++) { secondSum[i] += n[i][j] * u[i][j]; secondSum[j] += n[i][j] * (1 - u[i][j]); } } for (int i = 0; i < p.length; i++) { if ((firstSum[i] == 0) || (secondSum[i] == 0)) { if (p[i] > 0) { changed = true; } p[i] = 0; } else { double factor = firstSum[i] / secondSum[i]; double pOld = p[i]; p[i] *= factor; if (Math.abs(pOld - p[i]) > 1.0e-3) { changed = true; } } } Utils.normalize(p); for (int i = 0; i < r.length; i++) { for (int j = i + 1; j < r.length; j++) { u[i][j] = p[i] / (p[i] + p[j]); } } } while (changed); return p; } /** * Returns the weights in sparse format. * * @return the weights in sparse format */ public double[][][] sparseWeights() { int numValues = m_classAttribute.numValues(); double[][][] sparseWeights = new double[numValues][numValues][]; for (int i = 0; i < numValues; i++) { for (int j = i + 1; j < numValues; j++) { sparseWeights[i][j] = m_classifiers[i][j].m_sparseWeights; } } return sparseWeights; } /** * Returns the indices in sparse format. * * @return the indices in sparse format */ public int[][][] sparseIndices() { int numValues = m_classAttribute.numValues(); int[][][] sparseIndices = new int[numValues][numValues][]; for (int i = 0; i < numValues; i++) { for (int j = i + 1; j < numValues; j++) { sparseIndices[i][j] = m_classifiers[i][j].m_sparseIndices; } } return sparseIndices; } /** * Returns the bias of each binary SMO. * * @return the bias of each binary SMO */ public double[][] bias() { int numValues = m_classAttribute.numValues(); double[][] bias = new double[numValues][numValues]; for (int i = 0; i < numValues; i++) { for (int j = i + 1; j < numValues; j++) { bias[i][j] = m_classifiers[i][j].m_b; } } return bias; } /** * Returns the number of values of the class attribute. * * @return the number values of the class attribute */ public int numClassAttributeValues() { return m_classAttribute.numValues(); } /** * Returns the names of the class attributes. * * @return the names of the class attributes */ public String[] classAttributeNames() { int numValues = m_classAttribute.numValues(); String[] classAttributeNames = new String[numValues]; for (int i = 0; i < numValues; i++) { classAttributeNames[i] = m_classAttribute.value(i); } return classAttributeNames; } /** * Returns the attribute names. * * @return the attribute names */ public String[][][] attributeNames() { int numValues = m_classAttribute.numValues(); String[][][] attributeNames = new String[numValues][numValues][]; for (int i = 0; i < numValues; i++) { for (int j = i + 1; j < numValues; j++) { int numAttributes = m_classifiers[i][j].m_data.numAttributes(); String[] attrNames = new String[numAttributes]; for (int k = 0; k < numAttributes; k++) { attrNames[k] = m_classifiers[i][j].m_data.attribute(k).name(); } attributeNames[i][j] = attrNames; } } return attributeNames; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ @Override public Enumeration




© 2015 - 2024 Weber Informatics LLC | Privacy Policy