smile.classification.SVM Maven / Gradle / Ivy
/*
* Copyright (c) 2010-2021 Haifeng Li. All rights reserved.
*
* Smile is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Smile is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Smile. If not, see .
*/
package smile.classification;
import java.util.Arrays;
import java.util.Properties;
import smile.base.svm.KernelMachine;
import smile.base.svm.LinearKernelMachine;
import smile.base.svm.LASVM;
import smile.math.MathEx;
import smile.util.IntSet;
import smile.util.SparseArray;
import smile.math.kernel.BinarySparseLinearKernel;
import smile.math.kernel.*;
/**
* Support vector machines for classification. The basic support vector machine
* is a binary linear classifier which chooses the hyperplane that represents
* the largest separation, or margin, between the two classes. If such a
* hyperplane exists, it is known as the maximum-margin hyperplane and the
* linear classifier it defines is known as a maximum margin classifier.
*
* If there exists no hyperplane that can perfectly split the positive and
* negative instances, the soft margin method will choose a hyperplane
* that splits the instances as cleanly as possible, while still maximizing
* the distance to the nearest cleanly split instances.
*
* The soft margin parameter {@code C} trades off correct classification
* of training examples against maximization of the decision function's
* margin. For larger values of C, a smaller margin will be accepted if
* the decision function is better at classifying all training points
* correctly. A lower C will encourage a larger margin, therefore a
* simpler decision function, at the cost of training accuracy.
*
* The nonlinear SVMs are created by applying the kernel trick to
* maximum-margin hyperplanes. The resulting algorithm is formally similar,
* except that every dot product is replaced by a nonlinear kernel function.
* This allows the algorithm to fit the maximum-margin hyperplane in a
* transformed feature space. The transformation may be nonlinear and
* the transformed space be high dimensional. For example, the feature space
* corresponding Gaussian kernel is a Hilbert space of infinite dimension.
* Thus though the classifier is a hyperplane in the high-dimensional feature
* space, it may be nonlinear in the original input space. Maximum margin
* classifiers are well regularized, so the infinite dimension does not spoil
* the results.
*
* The effectiveness of SVM depends on the selection of kernel, the kernel's
* parameters, and the soft margin parameter C. Given a kernel, best combination
* of C and kernel's parameters is often selected by a grid-search with
* cross validation.
*
* The dominant approach for creating multi-class SVMs is to reduce the
* single multi-class problem into multiple binary classification problems.
* Common methods for such reduction is to build binary classifiers which
* distinguish between (i) one of the labels to the rest (one-versus-all)
* or (ii) between every pair of classes (one-versus-one). Classification
* of new instances for one-versus-all case is done by a winner-takes-all
* strategy, in which the classifier with the highest output function assigns
* the class. For the one-versus-one approach, classification
* is done by a max-wins voting strategy, in which every classifier assigns
* the instance to one of the two classes, then the vote for the assigned
* class is increased by one vote, and finally the class with most votes
* determines the instance classification.
*
*
References
*
* - Christopher J. C. Burges. A Tutorial on Support Vector Machines for Pattern Recognition. Data Mining and Knowledge Discovery 2:121-167, 1998.
* - John Platt. Sequential Minimal Optimization: A Fast Algorithm for Training Support Vector Machines.
* - Rong-En Fan, Pai-Hsuen, and Chih-Jen Lin. Working Set Selection Using Second Order Information for Training Support Vector Machines. JMLR, 6:1889-1918, 2005.
* - Antoine Bordes, Seyda Ertekin, Jason Weston and Leon Bottou. Fast Kernel Classifiers with Online and Active Learning, Journal of Machine Learning Research, 6:1579-1619, 2005.
* - Tobias Glasmachers and Christian Igel. Second Order SMO Improves SVM Online and Active Learning.
* - Chih-Chung Chang and Chih-Jen Lin. LIBSVM: a Library for Support Vector Machines.
*
*
* @see OneVersusOne
* @see OneVersusRest
*
* @author Haifeng Li
*/
public class SVM extends KernelMachine implements Classifier {
/**
* Constructor.
* @param kernel Kernel function.
* @param vectors The support vectors.
* @param weight The weights of instances.
* @param b The intercept;
*/
public SVM(MercerKernel kernel, T[] vectors, double[] weight, double b) {
super(kernel, vectors, weight, b);
}
@Override
public int numClasses() {
return 2;
}
@Override
public int[] classes() {
return new int[]{-1, +1};
}
@Override
public int predict(T x) {
return score(x) > 0 ? +1 : -1;
}
/**
* Fits a binary linear SVM.
* @param x training samples.
* @param y training labels of {-1, +1}.
* @param C the soft margin penalty parameter.
* @param tol the tolerance of convergence test.
* @return the model.
*/
public static Classifier fit(double[][] x, int[] y, double C, double tol) {
return fit(x, y, C, tol, 1);
}
/**
* Fits a binary linear SVM.
* @param x training samples.
* @param y training labels of {-1, +1}.
* @param C the soft margin penalty parameter.
* @param tol the tolerance of convergence test.
* @param epochs the number of epochs, usually 1 or 2 is sufficient.
* @return the model.
*/
public static Classifier fit(double[][] x, int[] y, double C, double tol, int epochs) {
LASVM lasvm = new LASVM<>(new LinearKernel(), C, tol);
KernelMachine svm = lasvm.fit(x, y, epochs);
IntSet labels = new IntSet(new int[]{-1, +1});
return new AbstractClassifier(labels) {
final LinearKernelMachine model = LinearKernelMachine.of(svm);
@Override
public int predict(double[] x) {
return model.f(x) > 0 ? +1 : -1;
}
};
}
/**
* Fits a binary linear SVM of binary sparse data.
* @param x training samples.
* @param y training labels of {-1, +1}.
* @param p the dimension of input vector.
* @param C the soft margin penalty parameter.
* @param tol the tolerance of convergence test.
* @return the model.
*/
public static Classifier fit(int[][] x, int[] y, int p, double C, double tol) {
return fit(x, y, p, C, tol, 1);
}
/**
* Fits a binary linear SVM of binary sparse data.
* @param x training samples.
* @param y training labels of {-1, +1}.
* @param p the dimension of input vector.
* @param C the soft margin penalty parameter.
* @param tol the tolerance of convergence test.
* @param epochs the number of epochs, usually 1 or 2 is sufficient.
* @return the model.
*/
public static Classifier fit(int[][] x, int[] y, int p, double C, double tol, int epochs) {
LASVM lasvm = new LASVM<>(new BinarySparseLinearKernel(), C, tol);
KernelMachine svm = lasvm.fit(x, y, epochs);
IntSet labels = new IntSet(new int[]{-1, +1});
return new AbstractClassifier(labels) {
final LinearKernelMachine model = LinearKernelMachine.binary(p, svm);
@Override
public int predict(int[] x) {
return model.f(x) > 0 ? +1 : -1;
}
};
}
/**
* Fits a binary linear SVM.
* @param x training samples.
* @param y training labels of {-1, +1}.
* @param p the dimension of input vector.
* @param C the soft margin penalty parameter.
* @param tol the tolerance of convergence test.
* @return the model.
*/
public static Classifier fit(SparseArray[] x, int[] y, int p, double C, double tol) {
return fit(x, y, p, C, tol, 1);
}
/**
* Fits a binary linear SVM.
* @param x training samples.
* @param y training labels of {-1, +1}.
* @param p the dimension of input vector.
* @param C the soft margin penalty parameter.
* @param tol the tolerance of convergence test.
* @param epochs the number of epochs, usually 1 or 2 is sufficient.
* @return the model.
*/
public static Classifier fit(SparseArray[] x, int[] y, int p, double C, double tol, int epochs) {
LASVM lasvm = new LASVM<>(new SparseLinearKernel(), C, tol);
KernelMachine svm = lasvm.fit(x, y, epochs);
IntSet labels = new IntSet(new int[]{-1, +1});
return new AbstractClassifier(labels) {
final LinearKernelMachine model = LinearKernelMachine.sparse(p, svm);
@Override
public int predict(SparseArray x) {
return model.f(x) > 0 ? +1 : -1;
}
};
}
/**
* Fits a binary SVM.
* @param x training samples.
* @param y training labels of {-1, +1}.
* @param kernel the kernel function.
* @param C the soft margin penalty parameter.
* @param tol the tolerance of convergence test.
* @param the data type.
* @return the model.
*/
public static SVM fit(T[] x, int[] y, MercerKernel kernel, double C, double tol) {
return fit(x, y, kernel, C, tol, 1);
}
/**
* Fits a binary SVM.
* @param x training samples.
* @param y training labels of {-1, +1}.
* @param kernel the kernel function.
* @param C the soft margin penalty parameter.
* @param tol the tolerance of convergence test.
* @param epochs the number of epochs, usually 1 or 2 is sufficient.
* @param the data type.
* @return the model.
*/
public static SVM fit(T[] x, int[] y, MercerKernel kernel, double C, double tol, int epochs) {
LASVM lasvm = new LASVM<>(kernel, C, tol);
KernelMachine model = lasvm.fit(x, y, epochs);
return new SVM<>(model.kernel(), model.vectors(), model.weights(), model.intercept());
}
/**
* Fits a binary or multiclass SVM.
* @param x training samples.
* @param y training labels.
* @param params the hyper-parameters.
* @return the model.
*/
public static Classifier fit(double[][] x, int[] y, Properties params) {
MercerKernel kernel = MercerKernel.of(params.getProperty("smile.svm.kernel", "linear"));
double C = Double.parseDouble(params.getProperty("smile.svm.C", "1.0"));
double tol = Double.parseDouble(params.getProperty("smile.svm.tolerance", "1E-3"));
int epochs = Integer.parseInt(params.getProperty("smile.svm.epochs", "1"));
int[] classes = MathEx.unique(y);
String trainer = params.getProperty("smile.svm.type", classes.length == 2 ? "binary" : "ovr").toLowerCase();
switch (trainer) {
case "ovr":
if (kernel instanceof LinearKernel) {
return OneVersusRest.fit(x, y, (xi, yi) -> SVM.fit(xi, yi, C, tol, epochs));
} else {
return OneVersusRest.fit(x, y, (xi, yi) -> SVM.fit(xi, yi, kernel, C, tol, epochs));
}
case "ovo":
if (kernel instanceof LinearKernel) {
return OneVersusOne.fit(x, y, (xi, yi) -> SVM.fit(xi, yi, C, tol, epochs));
} else {
return OneVersusOne.fit(x, y, (xi, yi) -> SVM.fit(xi, yi, kernel, C, tol, epochs));
}
case "binary":
Arrays.sort(classes);
if (classes[0] != -1 || classes[1] != +1) {
y = y.clone();
for (int i = 0; i < y.length; i++) {
y[i] = y[i] == classes[0] ? -1 : +1;
}
}
if (kernel instanceof LinearKernel) {
return SVM.fit(x, y, C, tol, epochs);
} else {
return SVM.fit(x, y, kernel, C, tol, epochs);
}
default:
throw new IllegalArgumentException("Unknown SVM type: " + trainer);
}
}
}