smile.util.SmileUtils Maven / Gradle / Ivy
The newest version!
/*******************************************************************************
* Copyright (c) 2010 Haifeng Li
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
package smile.util;
import java.util.Arrays;
import smile.clustering.CLARANS;
import smile.clustering.KMeans;
import smile.data.Attribute;
import smile.math.Math;
import smile.math.distance.Metric;
import smile.math.rbf.GaussianRadialBasis;
import smile.sort.QuickSort;
/**
* Some useful functions.
*
* @author Haifeng Li
*/
public class SmileUtils {
/**
* Sorts each variable and returns the index of values in ascending order.
* Only numeric attributes will be sorted. Note that the order of original
* array is NOT altered.
*
* @param x a set of variables to be sorted. Each row is an instance. Each
* column is a variable.
* @return the index of values in ascending order
*/
public static int[][] sort(Attribute[] attributes, double[][] x) {
int n = x.length;
int p = x[0].length;
double[] a = new double[n];
int[][] index = new int[p][];
for (int j = 0; j < p; j++) {
if (attributes[j].type == Attribute.Type.NUMERIC) {
for (int i = 0; i < n; i++) {
a[i] = x[i][j];
}
index[j] = QuickSort.sort(a);
}
}
return index;
}
/**
* Learns Gaussian RBF function and centers from data. The centers are
* chosen as the centroids of K-Means. Let dmax be the maximum
* distance between the chosen centers, the standard deviation (i.e. width)
* of Gaussian radial basis function is dmax / sqrt(2*k), where
* k is number of centers. This choice would be close to the optimal
* solution if the data were uniformly distributed in the input space,
* leading to a uniform distribution of centroids.
* @param x the training dataset.
* @param centers an array to store centers on output. Its length is used as k of k-means.
* @return a Gaussian RBF function with parameter learned from data.
*/
public static GaussianRadialBasis learnGaussianRadialBasis(double[][] x, double[][] centers) {
int k = centers.length;
KMeans kmeans = new KMeans(x, k, 10);
System.arraycopy(kmeans.centroids(), 0, centers, 0, k);
double r0 = 0.0;
for (int i = 0; i < k; i++) {
for (int j = 0; j < i; j++) {
double d = Math.distance(centers[i], centers[j]);
if (r0 < d) {
r0 = d;
}
}
}
r0 /= Math.sqrt(2*k);
return new GaussianRadialBasis(r0);
}
/**
* Learns Gaussian RBF function and centers from data. The centers are
* chosen as the centroids of K-Means. The standard deviation (i.e. width)
* of Gaussian radial basis function is estimated by the p-nearest neighbors
* (among centers, not all samples) heuristic. A suggested value for
* p is 2.
* @param x the training dataset.
* @param centers an array to store centers on output. Its length is used as k of k-means.
* @param p the number of nearest neighbors of centers to estimate the width
* of Gaussian RBF functions.
* @return Gaussian RBF functions with parameter learned from data.
*/
public static GaussianRadialBasis[] learnGaussianRadialBasis(double[][] x, double[][] centers, int p) {
if (p < 1) {
throw new IllegalArgumentException("Invlaid number of nearest neighbors: " + p);
}
int k = centers.length;
KMeans kmeans = new KMeans(x, k, 10);
System.arraycopy(kmeans.centroids(), 0, centers, 0, k);
p = Math.min(p, k-1);
double[] r = new double[k];
GaussianRadialBasis[] rbf = new GaussianRadialBasis[k];
for (int i = 0; i < k; i++) {
for (int j = 0; j < k; j++) {
r[j] = Math.distance(centers[i], centers[j]);
}
Arrays.sort(r);
double r0 = 0.0;
for (int j = 1; j <= p; j++) {
r0 += r[j];
}
r0 /= p;
rbf[i] = new GaussianRadialBasis(r0);
}
return rbf;
}
/**
* Learns Gaussian RBF function and centers from data. The centers are
* chosen as the centroids of K-Means. The standard deviation (i.e. width)
* of Gaussian radial basis function is estimated as the width of each
* cluster multiplied with a given scaling parameter r.
* @param x the training dataset.
* @param centers an array to store centers on output. Its length is used as k of k-means.
* @param r the scaling parameter.
* @return Gaussian RBF functions with parameter learned from data.
*/
public static GaussianRadialBasis[] learnGaussianRadialBasis(double[][] x, double[][] centers, double r) {
if (r <= 0.0) {
throw new IllegalArgumentException("Invlaid scaling parameter: " + r);
}
int k = centers.length;
KMeans kmeans = new KMeans(x, k, 10);
System.arraycopy(kmeans.centroids(), 0, centers, 0, k);
int n = x.length;
int[] y = kmeans.getClusterLabel();
double[] sigma = new double[k];
for (int i = 0; i < n; i++) {
sigma[y[i]] += Math.squaredDistance(x[i], centers[y[i]]);
}
int[] ni = kmeans.getClusterSize();
GaussianRadialBasis[] rbf = new GaussianRadialBasis[k];
for (int i = 0; i < k; i++) {
if (ni[i] >= 5 || sigma[i] != 0.0) {
sigma[i] = Math.sqrt(sigma[i] / ni[i]);
} else {
sigma[i] = Double.POSITIVE_INFINITY;
for (int j = 0; j < k; j++) {
if (i != j) {
double d = Math.distance(centers[i], centers[j]);
if (d < sigma[i]) {
sigma[i] = d;
}
}
}
sigma[i] /= 2.0;
}
rbf[i] = new GaussianRadialBasis(r * sigma[i]);
}
return rbf;
}
/**
* Learns Gaussian RBF function and centers from data. The centers are
* chosen as the medoids of CLARANS. Let dmax be the maximum
* distance between the chosen centers, the standard deviation (i.e. width)
* of Gaussian radial basis function is dmax / sqrt(2*k), where
* k is number of centers. In this way, the radial basis functions are not
* too peaked or too flat. This choice would be close to the optimal
* solution if the data were uniformly distributed in the input space,
* leading to a uniform distribution of medoids.
* @param x the training dataset.
* @param centers an array to store centers on output. Its length is used as k of CLARANS.
* @param distance the distance functor.
* @return a Gaussian RBF function with parameter learned from data.
*/
public static GaussianRadialBasis learnGaussianRadialBasis(T[] x, T[] centers, Metric distance) {
int k = centers.length;
CLARANS clarans = new CLARANS(x, distance, k, Math.min(100, (int) Math.round(0.01 * k * (x.length-k))));
System.arraycopy(clarans.medoids(), 0, centers, 0, k);
double r0 = 0.0;
for (int i = 0; i < k; i++) {
for (int j = 0; j < i; j++) {
double d = distance.d(centers[i], centers[j]);
if (r0 < d) {
r0 = d;
}
}
}
r0 /= Math.sqrt(2*k);
return new GaussianRadialBasis(r0);
}
/**
* Learns Gaussian RBF function and centers from data. The centers are
* chosen as the medoids of CLARANS. The standard deviation (i.e. width)
* of Gaussian radial basis function is estimated by the p-nearest neighbors
* (among centers, not all samples) heuristic. A suggested value for
* p is 2.
* @param x the training dataset.
* @param centers an array to store centers on output. Its length is used as k of CLARANS.
* @param distance the distance functor.
* @param p the number of nearest neighbors of centers to estimate the width
* of Gaussian RBF functions.
* @return Gaussian RBF functions with parameter learned from data.
*/
public static GaussianRadialBasis[] learnGaussianRadialBasis(T[] x, T[] centers, Metric distance, int p) {
if (p < 1) {
throw new IllegalArgumentException("Invlaid number of nearest neighbors: " + p);
}
int k = centers.length;
CLARANS clarans = new CLARANS(x, distance, k, Math.min(100, (int) Math.round(0.01 * k * (x.length-k))));
System.arraycopy(clarans.medoids(), 0, centers, 0, k);
p = Math.min(p, k-1);
double[] r = new double[k];
GaussianRadialBasis[] rbf = new GaussianRadialBasis[k];
for (int i = 0; i < k; i++) {
for (int j = 0; j < k; j++) {
r[j] = distance.d(centers[i], centers[j]);
}
Arrays.sort(r);
double r0 = 0.0;
for (int j = 1; j <= p; j++) {
r0 += r[j];
}
r0 /= p;
rbf[i] = new GaussianRadialBasis(r0);
}
return rbf;
}
/**
* Learns Gaussian RBF function and centers from data. The centers are
* chosen as the medoids of CLARANS. The standard deviation (i.e. width)
* of Gaussian radial basis function is estimated as the width of each
* cluster multiplied with a given scaling parameter r.
* @param x the training dataset.
* @param centers an array to store centers on output. Its length is used as k of CLARANS.
* @param distance the distance functor.
* @param r the scaling parameter.
* @return Gaussian RBF functions with parameter learned from data.
*/
public static GaussianRadialBasis[] learnGaussianRadialBasis(T[] x, T[] centers, Metric distance, double r) {
if (r <= 0.0) {
throw new IllegalArgumentException("Invlaid scaling parameter: " + r);
}
int k = centers.length;
CLARANS clarans = new CLARANS(x, distance, k, Math.min(100, (int) Math.round(0.01 * k * (x.length-k))));
System.arraycopy(clarans.medoids(), 0, centers, 0, k);
int n = x.length;
int[] y = clarans.getClusterLabel();
double[] sigma = new double[k];
for (int i = 0; i < n; i++) {
sigma[y[i]] += Math.sqr(distance.d(x[i], centers[y[i]]));
}
int[] ni = clarans.getClusterSize();
GaussianRadialBasis[] rbf = new GaussianRadialBasis[k];
for (int i = 0; i < k; i++) {
if (ni[i] >= 5 || sigma[i] == 0.0) {
sigma[i] = Math.sqrt(sigma[i] / ni[i]);
} else {
sigma[i] = Double.POSITIVE_INFINITY;
for (int j = 0; j < k; j++) {
if (i != j) {
double d = distance.d(centers[i], centers[j]);
if (d < sigma[i]) {
sigma[i] = d;
}
}
}
sigma[i] /= 2.0;
}
rbf[i] = new GaussianRadialBasis(r * sigma[i]);
}
return rbf;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy