![JAR search and dependency download from the Maven repository](/logo.png)
smile.math.MathEx Maven / Gradle / Ivy
/*******************************************************************************
* Copyright (c) 2010-2020 Haifeng Li. All rights reserved.
*
* Smile is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* Smile is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Smile. If not, see .
******************************************************************************/
package smile.math;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.stream.IntStream;
import java.util.stream.LongStream;
import smile.math.blas.UPLO;
import smile.math.distance.Distance;
import smile.math.matrix.Matrix;
import smile.sort.QuickSelect;
import smile.sort.QuickSort;
import smile.sort.Sort;
import smile.util.IntPair;
import smile.util.SparseArray;
import static java.lang.Math.abs;
import static java.lang.Math.exp;
import static java.lang.Math.floor;
import static java.lang.Math.sqrt;
/**
* Extra basic numeric functions. The following functions are
* included:
*
* - scalar functions: sqr, factorial, lfactorial, choose, lchoose, log2 are
* provided.
*
- vector functions: min, max, mean, sum, var, sd, cov, L1 norm,
* L2 norm, L∞ norm, normalize, unitize, cor, Spearman
* correlation, Kendall correlation, distance, dot product, histogram, vector
* (element-wise) copy, equal, plus, minus, times, and divide.
*
- matrix functions: min, max, rowSums, colSums, rowMeans, colMeans, transpose,
* cov, cor, matrix copy, equals.
*
- random functions: random, randomInt, and permutate.
*
- Find the root of a univariate function with or without derivative.
*
*
* @author Haifeng Li
*/
public class MathEx {
private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(MathEx.class);
/**
* Dynamically determines the machine parameters of the floating-point arithmetic.
*/
private static final FPU fpu = new FPU();
/**
* The machine precision for the double type, which is the difference between 1
* and the smallest value greater than 1 that is representable for the double type.
*/
public static final double EPSILON = fpu.EPSILON;
/**
* The machine precision for the float type, which is the difference between 1
* and the smallest value greater than 1 that is representable for the float type.
*/
public static final float FLOAT_EPSILON = fpu.FLOAT_EPSILON;
/**
* The base of the exponent of the double type.
*/
public static final int RADIX = fpu.RADIX;
/**
* The number of digits (in radix base) in the mantissa.
*/
public static final int DIGITS = fpu.DIGITS;
/**
* The number of digits (in radix base) in the mantissa.
*/
public static final int FLOAT_DIGITS = fpu.FLOAT_DIGITS;
/**
* Rounding style.
*
* - 0 if floating-point addition chops
*
- 1 if floating-point addition rounds, but not in the ieee style
*
- 2 if floating-point addition rounds in the ieee style
*
- 3 if floating-point addition chops, and there is partial underflow
*
- 4 if floating-point addition rounds, but not in the ieee style, and there is partial underflow
*
- 5 if floating-point addition rounds in the IEEEE style, and there is partial underflow
*
*/
public static final int ROUND_STYLE = fpu.ROUND_STYLE;
/**
* The largest negative integer such that 1.0 + RADIXMACHEP ≠ 1.0,
* except that machep is bounded below by -(DIGITS+3)
*/
public static final int MACHEP = fpu.MACHEP;
/**
* The largest negative integer such that 1.0 + RADIXMACHEP ≠ 1.0,
* except that machep is bounded below by -(DIGITS+3)
*/
public static final int FLOAT_MACHEP = fpu.FLOAT_MACHEP;
/**
* The largest negative integer such that 1.0 - RADIXNEGEP ≠ 1.0,
* except that negeps is bounded below by -(DIGITS+3)
*/
public static final int NEGEP = fpu.NEGEP;
/**
* The largest negative integer such that 1.0 - RADIXNEGEP ≠ 1.0,
* except that negeps is bounded below by -(DIGITS+3)
*/
public static final int FLOAT_NEGEP = fpu.FLOAT_NEGEP;
/**
* This RNG is to generate the seeds for multi-threads.
* Each thread will use different seed and unlikely generates
* the correlated sequences with other threads.
*/
private static Random seedRNG = new Random();
/**
* Used seeds.
*/
private static HashSet seeds = new HashSet<>();
/**
* High quality random number generator.
*/
private static ThreadLocal random = new ThreadLocal() {
protected synchronized Random initialValue() {
// For the first RNG, we use the default seed so that we can
// get repeatable results for random algorithms.
// Note that this may or may not be the main thread.
long seed = 19650218L;
// Make sure other threads not to use the same seed.
// This is very important for some algorithms such as random forest.
// Otherwise, all trees of random forest are same except the main thread one.
if (!seeds.isEmpty()) {
do {
seed = probablePrime(19650218L, 256, seedRNG);
} while (seeds.contains(seed));
}
logger.info(String.format("Set RNG seed %d for thread %s", seed, Thread.currentThread().getName()));
seeds.add(seed);
return new Random(seed);
}
};
/**
* Dynamically determines the machine parameters of the floating-point arithmetic.
*/
private static class FPU {
int RADIX = 2;
int DIGITS = 53;
int FLOAT_DIGITS = 24;
int ROUND_STYLE = 2;
int MACHEP = -52;
int FLOAT_MACHEP = -23;
int NEGEP = -53;
int FLOAT_NEGEP = -24;
float FLOAT_EPSILON = (float) Math.pow(2.0, FLOAT_MACHEP);
double EPSILON = Math.pow(2.0, MACHEP);
FPU() {
double beta, betain, betah, a, b, ZERO, ONE, TWO, temp, tempa, temp1;
int i, itemp;
ONE = (double) 1;
TWO = ONE + ONE;
ZERO = ONE - ONE;
a = ONE;
temp1 = ONE;
while (temp1 - ONE == ZERO) {
a = a + a;
temp = a + ONE;
temp1 = temp - a;
}
b = ONE;
itemp = 0;
while (itemp == 0) {
b = b + b;
temp = a + b;
itemp = (int) (temp - a);
}
RADIX = itemp;
beta = (double) RADIX;
DIGITS = 0;
b = ONE;
temp1 = ONE;
while (temp1 - ONE == ZERO) {
DIGITS = DIGITS + 1;
b = b * beta;
temp = b + ONE;
temp1 = temp - b;
}
ROUND_STYLE = 0;
betah = beta / TWO;
temp = a + betah;
if (temp - a != ZERO) {
ROUND_STYLE = 1;
}
tempa = a + beta;
temp = tempa + betah;
if ((ROUND_STYLE == 0) && (temp - tempa != ZERO)) {
ROUND_STYLE = 2;
}
NEGEP = DIGITS + 3;
betain = ONE / beta;
a = ONE;
for (i = 0; i < NEGEP; i++) {
a = a * betain;
}
b = a;
temp = ONE - a;
while (temp - ONE == ZERO) {
a = a * beta;
NEGEP = NEGEP - 1;
temp = ONE - a;
}
NEGEP = -(NEGEP);
MACHEP = -(DIGITS) - 3;
a = b;
temp = ONE + a;
while (temp - ONE == ZERO) {
a = a * beta;
MACHEP = MACHEP + 1;
temp = ONE + a;
}
EPSILON = a;
}
}
/**
* Private constructor.
*/
private MathEx() {
}
/**
* log(2), used in log2().
*/
private static final double LOG2 = Math.log(2);
/**
* Log of base 2.
*/
public static double log2(double x) {
return Math.log(x) / LOG2;
}
/**
* Returns natural log without underflow.
*/
public static double log(double x) {
double y = -690.7755;
if (x > 1E-300) {
y = Math.log(x);
}
return y;
}
/**
* Returns natural log(1+exp(x)) without overflow.
*/
public static double log1pe(double x) {
double y = x;
if (x <= 15) {
y = Math.log1p(Math.exp(x));
}
return y;
}
/** Returns true if x is an integer. */
public static boolean isInt(float x) {
return (x == (float) Math.floor(x)) && !Float.isInfinite(x);
}
/** Returns true if x is an integer. */
public static boolean isInt(double x) {
return (x == Math.floor(x)) && !Double.isInfinite(x);
}
/**
* Returns true if two double values equals to each other in the system precision.
* @param a a double value.
* @param b a double value.
* @return true if two double values equals to each other in the system precision
*/
public static boolean equals(double a, double b) {
if (a == b) {
return true;
}
double absa = abs(a);
double absb = abs(b);
return abs(a - b) <= Math.min(absa, absb) * 2.2204460492503131e-16;
}
/**
* Logistic sigmoid function.
*/
public static double logistic(double x) {
double y;
if (x < -40) {
y = 2.353853e+17;
} else if (x > 40) {
y = 1.0 + 4.248354e-18;
} else {
y = 1.0 + exp(-x);
}
return 1.0 / y;
}
/**
* Hyperbolic tangent function. The tanh function is a rescaling of the
* logistic sigmoid, such that its outputs range from -1 to 1.
*/
public static double tanh(double x) {
return 2.0 * logistic(2.0 * x) - 1.0;
}
/**
* Returns x * x.
*/
public static double sqr(double x) {
return x * x;
}
/**
* Returns true if x is a power of 2.
*/
public static boolean isPower2(int x) {
return x > 0 && (x & (x - 1)) == 0;
}
/**
* Returns true if n is probably prime, false if it's definitely composite.
* This implements Miller-Rabin primality test.
* @param n an odd integer to be tested for primality
* @param k a parameter that determines the accuracy of the test
*/
public static boolean isProbablePrime(long n, int k) {
return isProbablePrime(n, k, random.get());
}
/**
* Returns true if n is probably prime, false if it's definitely composite.
* This implements Miller-Rabin primality test.
* @param n an odd integer to be tested for primality
* @param k a parameter that determines the accuracy of the test
* @param rng random number generator
*/
private static boolean isProbablePrime(long n, int k, Random rng) {
if (n <= 1 || n == 4)
return false;
if (n <= 3)
return true;
// Find r such that n = 2^d * r + 1 for some r >= 1
int s = 0;
long d = n - 1;
while (d % 2 == 0) {
s++;
d = d / 2;
}
for (int i = 0; i < k; i++) {
long a = 2 + rng.nextLong() % (n-4);
long x = power(a, d, n);
if (x == 1 || x == n -1)
continue;
int r = 0;
for (; r < s; r++) {
x = (x * x) % n;
if (x == 1) return false;
if (x == n - 1) break;
}
// None of the steps made x equal n-1.
if (r == s) return false;
}
return true;
}
/**
* Modular exponentiation (x^y) % p
*/
private static long power(long x, long y, long p)
{
long res = 1; // Initialize result
x = x % p; // Update x if it is more than or
// equal to p
while (y > 0) {
// If y is odd, multiply x with result
if ((y & 1) == 1) res = (res*x) % p;
// y must be even now
y = y>>1; // y = y/2
x = (x*x) % p;
}
return res;
}
/**
* Round a double vale to given digits such as 10^n, where n is a positive
* or negative integer.
*/
public static double round(double x, int decimal) {
if (decimal < 0) {
return Math.round(x / Math.pow(10, -decimal)) * Math.pow(10, -decimal);
} else {
return Math.round(x * Math.pow(10, decimal)) / Math.pow(10, decimal);
}
}
/**
* The factorial of n.
*
* @return factorial returned as double but is, numerically, an integer.
* Numerical rounding may make this an approximation after n = 21.
*/
public static double factorial(int n) {
if (n < 0) {
throw new IllegalArgumentException("n has to be non-negative.");
}
double f = 1.0;
for (int i = 2; i <= n; i++) {
f *= i;
}
return f;
}
/**
* The log of factorial of n.
*/
public static double lfactorial(int n) {
if (n < 0) {
throw new IllegalArgumentException(String.format("n has to be non-negative: %d", n));
}
double f = 0.0;
for (int i = 2; i <= n; i++) {
f += Math.log(i);
}
return f;
}
/**
* The n choose k. Returns 0 if n is less than k.
*/
public static double choose(int n, int k) {
if (n < 0 || k < 0) {
throw new IllegalArgumentException(String.format("Invalid n = %d, k = %d", n, k));
}
if (n < k) {
return 0.0;
}
return floor(0.5 + exp(lchoose(n, k)));
}
/**
* The log of n choose k.
*/
public static double lchoose(int n, int k) {
if (n < 0 || k < 0 || k > n) {
throw new IllegalArgumentException(String.format("Invalid n = %d, k = %d", n, k));
}
return lfactorial(n) - lfactorial(k) - lfactorial(n - k);
}
/**
* Initialize the random generator with a seed.
*/
public static void setSeed(long seed) {
if (seeds.isEmpty()) {
seedRNG.setSeed(seed);
random.get().setSeed(seed);
seeds.clear();
seeds.add(seed);
} else {
random.get().setSeed(seed);
seeds.add(seed);
}
}
/**
* Initialize the random generator with a random seed from a
* cryptographically strong random number generator.
*/
public static void setSeed() {
java.security.SecureRandom sr = new java.security.SecureRandom();
byte[] bytes = sr.generateSeed(Long.BYTES);
long seed = 0;
for (int i = 0; i < Long.BYTES; i++) {
seed <<= 8;
seed |= (bytes[i] & 0xFF);
}
setSeed(seed);
}
/**
* Returns a probably prime number greater than n.
* @param n the returned value should be greater than n.
* @param k a parameter that determines the accuracy of the primality test
*/
public static long probablePrime(long n, int k) {
return probablePrime(n, k, random.get());
}
/** Returns a probably prime number. */
private static long probablePrime(long n, int k, Random rng) {
long seed = n + rng.nextInt(899999963); // The largest prime less than 9*10^8
for (int i = 0; i < 4096; i++) {
if (isProbablePrime(seed, k, rng)) break;
seed = n + rng.nextInt(899999963);
}
return seed;
}
/**
* Returns a stream of prime numbers to be used as RNG seeds.
* @param n the returned value should be greater than n.
* @param k a parameter that determines the accuracy of the primality test
*/
public static LongStream seeds(long n, int k) {
return LongStream.generate(() -> probablePrime(n, k, seedRNG));
}
/**
* Given a set of n probabilities, generate a random number in [0, n).
* @param prob probabilities of size n. The prob argument can be used to
* give a vector of weights for obtaining the elements of the vector being
* sampled. They need not sum to one, but they should be non-negative and
* not all zero.
* @return a random integer in [0, n).
*/
public static int random(double[] prob) {
int[] ans = random(prob, 1);
return ans[0];
}
/**
* Given a set of m probabilities, draw with replacement a set of n random
* number in [0, m).
* @param prob probabilities of size n. The prob argument can be used to
* give a vector of weights for obtaining the elements of the vector being
* sampled. They need not sum to one, but they should be non-negative and
* not all zero.
* @return an random array of length n in range of [0, m).
*/
public static int[] random(double[] prob, int n) {
// set up alias table
double[] q = new double[prob.length];
for (int i = 0; i < prob.length; i++) {
q[i] = prob[i] * prob.length;
}
// initialize a with indices
int[] a = new int[prob.length];
for (int i = 0; i < prob.length; i++) {
a[i] = i;
}
// set up H and L
int[] HL = new int[prob.length];
int head = 0;
int tail = prob.length - 1;
for (int i = 0; i < prob.length; i++) {
if (q[i] >= 1.0) {
HL[head++] = i;
} else {
HL[tail--] = i;
}
}
while (head != 0 && tail != prob.length - 1) {
int j = HL[tail + 1];
int k = HL[head - 1];
a[j] = k;
q[k] += q[j] - 1;
tail++; // remove j from L
if (q[k] < 1.0) {
HL[tail--] = k; // add k to L
head--; // remove k
}
}
// generate sample
int[] ans = new int[n];
for (int i = 0; i < n; i++) {
double rU = random() * prob.length;
int k = (int) (rU);
rU -= k; /* rU becomes rU-[rU] */
if (rU < q[k]) {
ans[i] = k;
} else {
ans[i] = a[k];
}
}
return ans;
}
/**
* Generate a random number in [0, 1).
*/
public static double random() {
return random.get().nextDouble();
}
/**
* Generate n random numbers in [0, 1).
*/
public static double[] random(int n) {
double[] x = new double[n];
random.get().nextDoubles(x);
return x;
}
/**
* Generate a uniform random number in the range [lo, hi).
* @param lo lower limit of range
* @param hi upper limit of range
* @return a uniform random real in the range [lo, hi)
*/
public static double random(double lo, double hi) {
return random.get().nextDouble(lo, hi);
}
/**
* Generate n uniform random numbers in the range [lo, hi).
* @param n size of the array
* @param lo lower limit of range
* @param hi upper limit of range
* @return a uniform random real in the range [lo, hi)
*/
public static double[] random(double lo, double hi, int n) {
double[] x = new double[n];
random.get().nextDoubles(x, lo, hi);
return x;
}
/**
* Returns a random long integer.
*/
public static long randomLong() {
return random.get().nextLong();
}
/**
* Returns a random integer in [0, n).
*/
public static int randomInt(int n) {
return random.get().nextInt(n);
}
/**
* Returns a random integer in [lo, hi).
*/
public static int randomInt(int lo, int hi) {
int w = hi - lo;
return lo + random.get().nextInt(w);
}
/**
* Generates a permutation of 0, 1, 2, ..., n-1, which is useful for
* sampling without replacement.
*/
public static int[] permutate(int n) {
return random.get().permutate(n);
}
/**
* Generates a permutation of given array.
*/
public static void permutate(int[] x) {
random.get().permutate(x);
}
/**
* Generates a permutation of given array.
*/
public static void permutate(float[] x) {
random.get().permutate(x);
}
/**
* Generates a permutation of given array.
*/
public static void permutate(double[] x) {
random.get().permutate(x);
}
/**
* Generates a permutation of given array.
*/
public static void permutate(Object[] x) {
random.get().permutate(x);
}
/**
* The softmax function without overflow. The function takes as
* an input vector of K real numbers, and normalizes it into a
* probability distribution consisting of K probabilities
* proportional to the exponentials of the input numbers.
* That is, prior to applying softmax, some vector components
* could be negative, or greater than one; and might not sum
* to 1; but after applying softmax, each component will be
* in the interval (0,1), and the components will add up to 1,
* so that they can be interpreted as probabilities. Furthermore,
* the larger input components will correspond to larger probabilities.
*
* @param posteriori the input/output vector.
* @return the index of largest posteriori probability.
*/
public static int softmax(double[] posteriori) {
return softmax(posteriori, posteriori.length);
}
/**
* The softmax function without overflow. The function takes as
* an input vector of K real numbers, and normalizes it into a
* probability distribution consisting of K probabilities
* proportional to the exponentials of the input numbers.
* That is, prior to applying softmax, some vector components
* could be negative, or greater than one; and might not sum
* to 1; but after applying softmax, each component will be
* in the interval (0,1), and the components will add up to 1,
* so that they can be interpreted as probabilities. Furthermore,
* the larger input components will correspond to larger probabilities.
*
* @param x the input/output vector.
* @param k uses only first k components of input vector.
* @return the index of largest posteriori probability.
*/
public static int softmax(double[] x, int k) {
int y = -1;
double max = Double.NEGATIVE_INFINITY;
for (int i = 0; i < k; i++) {
if (x[i] > max) {
max = x[i];
y = i;
}
}
double Z = 0.0;
for (int i = 0; i < k; i++) {
double out = exp(x[i] - max);
x[i] = out;
Z += out;
}
for (int i = 0; i < k; i++) {
x[i] /= Z;
}
return y;
}
/** Combines the arguments to form a vector. */
public static int[] c(int... x) {
return x;
}
/** Combines the arguments to form a vector. */
public static float[] c(float... x) {
return x;
}
/** Combines the arguments to form a vector. */
public static double[] c(double... x) {
return x;
}
/** Combines the arguments to form a vector. */
public static String[] c(String... x) {
return x;
}
/** Merges multiple vectors into one. */
public static int[] c(int[]... list) {
int n = 0;
for (int[] x : list) n += x.length;
int[] y = new int[n];
int pos = 0;
for (int[] x: list) {
System.arraycopy(x, 0, y, pos, x.length);
pos += x.length;
}
return y;
}
/** Merges multiple vectors into one. */
public static float[] c(float[]... list) {
int n = 0;
for (float[] x : list) n += x.length;
float[] y = new float[n];
int pos = 0;
for (float[] x: list) {
System.arraycopy(x, 0, y, pos, x.length);
pos += x.length;
}
return y;
}
/** Merges multiple vectors into one. */
public static double[] c(double[]... list) {
int n = 0;
for (double[] x : list) n += x.length;
double[] y = new double[n];
int pos = 0;
for (double[] x: list) {
System.arraycopy(x, 0, y, pos, x.length);
pos += x.length;
}
return y;
}
/** Concatenates multiple vectors into one array of strings. */
public static String[] c(String[]... list) {
int n = 0;
for (String[] x : list) n += x.length;
String[] y = new String[n];
int pos = 0;
for (String[] x: list) {
System.arraycopy(x, 0, y, pos, x.length);
pos += x.length;
}
return y;
}
/** Take a sequence of vector arguments and combine by columns. */
public static int[] cbind(int[]... x) {
return c(x);
}
/** Take a sequence of vector arguments and combine by columns. */
public static float[] cbind(float[]... x) {
return c(x);
}
/** Take a sequence of vector arguments and combine by columns. */
public static double[] cbind(double[]... x) {
return c(x);
}
/** Take a sequence of vector arguments and combine by columns. */
public static String[] cbind(String[]... x) {
return c(x);
}
/** Take a sequence of vector arguments and combine by rows. */
public static int[][] rbind(int[]... x) {
return x;
}
/** Take a sequence of vector arguments and combine by rows. */
public static float[][] rbind(float[]... x) {
return x;
}
/** Take a sequence of vector arguments and combine by rows. */
public static double[][] rbind(double[]... x) {
return x;
}
/** Take a sequence of vector arguments and combine by rows. */
public static String[][] rbind(String[]... x) {
return x;
}
/**
* Returns a slice of data for given indices.
*/
public static E[] slice(E[] data, int[] index) {
int n = index.length;
@SuppressWarnings("unchecked")
E[] x = (E[]) java.lang.reflect.Array.newInstance(data.getClass().getComponentType(), n);
for (int i = 0; i < n; i++) {
x[i] = data[index[i]];
}
return x;
}
/**
* Returns a slice of data for given indices.
*/
public static int[] slice(int[] data, int[] index) {
int n = index.length;
int[] x = new int[n];
for (int i = 0; i < n; i++) {
x[i] = data[index[i]];
}
return x;
}
/**
* Returns a slice of data for given indices.
*/
public static float[] slice(float[] data, int[] index) {
int n = index.length;
float[] x = new float[n];
for (int i = 0; i < n; i++) {
x[i] = data[index[i]];
}
return x;
}
/**
* Returns a slice of data for given indices.
*/
public static double[] slice(double[] data, int[] index) {
int n = index.length;
double[] x = new double[n];
for (int i = 0; i < n; i++) {
x[i] = data[index[i]];
}
return x;
}
/**
* Determines if the polygon contains the specified coordinates.
*
* @param point the coordinates of specified point to be tested.
* @return true if the Polygon contains the specified coordinates; false otherwise.
*/
public static boolean contains(double[][] polygon, double[] point) {
return contains(polygon, point[0], point[1]);
}
/**
* Determines if the polygon contains the specified coordinates.
*
* @param x the specified x coordinate.
* @param y the specified y coordinate.
* @return true if the Polygon contains the specified coordinates; false otherwise.
*/
public static boolean contains(double[][] polygon, double x, double y) {
if (polygon.length <= 2) {
return false;
}
int hits = 0;
int n = polygon.length;
double lastx = polygon[n - 1][0];
double lasty = polygon[n - 1][1];
double curx, cury;
// Walk the edges of the polygon
for (int i = 0; i < n; lastx = curx, lasty = cury, i++) {
curx = polygon[i][0];
cury = polygon[i][1];
if (cury == lasty) {
continue;
}
double leftx;
if (curx < lastx) {
if (x >= lastx) {
continue;
}
leftx = curx;
} else {
if (x >= curx) {
continue;
}
leftx = lastx;
}
double test1, test2;
if (cury < lasty) {
if (y < cury || y >= lasty) {
continue;
}
if (x < leftx) {
hits++;
continue;
}
test1 = x - curx;
test2 = y - cury;
} else {
if (y < lasty || y >= cury) {
continue;
}
if (x < leftx) {
hits++;
continue;
}
test1 = x - lastx;
test2 = y - lasty;
}
if (test1 < (test2 / (lasty - cury) * (lastx - curx))) {
hits++;
}
}
return ((hits & 1) != 0);
}
/**
* Reverses the order of the elements in the specified array.
* @param a an array to reverse.
*/
public static void reverse(int[] a) {
int i = 0, j = a.length - 1;
while (i < j) {
Sort.swap(a, i++, j--); // code for swap not shown, but easy enough
}
}
/**
* Reverses the order of the elements in the specified array.
* @param a an array to reverse.
*/
public static void reverse(float[] a) {
int i = 0, j = a.length - 1;
while (i < j) {
Sort.swap(a, i++, j--); // code for swap not shown, but easy enough
}
}
/**
* Reverses the order of the elements in the specified array.
* @param a an array to reverse.
*/
public static void reverse(double[] a) {
int i = 0, j = a.length - 1;
while (i < j) {
Sort.swap(a, i++, j--); // code for swap not shown, but easy enough
}
}
/**
* Reverses the order of the elements in the specified array.
* @param a an array to reverse.
*/
public static void reverse(T[] a) {
int i = 0, j = a.length - 1;
while (i < j) {
Sort.swap(a, i++, j--);
}
}
/**
* minimum of 3 integers
*/
public static int min(int a, int b, int c) {
return Math.min(Math.min(a, b), c);
}
/**
* minimum of 3 floats
*/
public static double min(float a, float b, float c) {
return Math.min(Math.min(a, b), c);
}
/**
* minimum of 3 doubles
*/
public static double min(double a, double b, double c) {
return Math.min(Math.min(a, b), c);
}
/**
* minimum of 4 integers
*/
public static int min(int a, int b, int c, int d) {
return Math.min(Math.min(Math.min(a, b), c), d);
}
/**
* minimum of 4 floats
*/
public static double min(float a, float b, float c, float d) {
return Math.min(Math.min(Math.min(a, b), c), d);
}
/**
* minimum of 4 doubles
*/
public static double min(double a, double b, double c, double d) {
return Math.min(Math.min(Math.min(a, b), c), d);
}
/**
* maximum of 3 integers
*/
public static int max(int a, int b, int c) {
return Math.max(Math.max(a, b), c);
}
/**
* maximum of 3 floats
*/
public static float max(float a, float b, float c) {
return Math.max(Math.max(a, b), c);
}
/**
* maximum of 3 doubles
*/
public static double max(double a, double b, double c) {
return Math.max(Math.max(a, b), c);
}
/**
* maximum of 4 integers
*/
public static int max(int a, int b, int c, int d) {
return Math.max(Math.max(Math.max(a, b), c), d);
}
/**
* maximum of 4 floats
*/
public static float max(float a, float b, float c, float d) {
return Math.max(Math.max(Math.max(a, b), c), d);
}
/**
* maximum of 4 doubles
*/
public static double max(double a, double b, double c, double d) {
return Math.max(Math.max(Math.max(a, b), c), d);
}
/**
* Returns the minimum value of an array.
*/
public static int min(int[] x) {
int min = x[0];
for (int n : x) {
if (n < min) {
min = n;
}
}
return min;
}
/**
* Returns the minimum value of an array.
*/
public static float min(float[] x) {
float min = Float.POSITIVE_INFINITY;
for (float n : x) {
if (n < min) {
min = n;
}
}
return min;
}
/**
* Returns the minimum value of an array.
*/
public static double min(double[] x) {
double min = Double.POSITIVE_INFINITY;
for (double n : x) {
if (n < min) {
min = n;
}
}
return min;
}
/**
* Returns the index of minimum value of an array.
*/
public static int whichMin(int[] x) {
int min = x[0];
int which = 0;
for (int i = 1; i < x.length; i++) {
if (x[i] < min) {
min = x[i];
which = i;
}
}
return which;
}
/**
* Returns the index of minimum value of an array.
*/
public static int whichMin(float[] x) {
float min = Float.POSITIVE_INFINITY;
int which = 0;
for (int i = 0; i < x.length; i++) {
if (x[i] < min) {
min = x[i];
which = i;
}
}
return which;
}
/**
* Returns the index of minimum value of an array.
*/
public static int whichMin(double[] x) {
double min = Double.POSITIVE_INFINITY;
int which = 0;
for (int i = 0; i < x.length; i++) {
if (x[i] < min) {
min = x[i];
which = i;
}
}
return which;
}
/**
* Returns the index of minimum value of an array.
*/
public static IntPair whichMin(double[][] x) {
double min = Double.POSITIVE_INFINITY;
int whichRow = 0;
int whichCol = 0;
for (int i = 0; i < x.length; i++) {
for (int j = 0; j < x[i].length; j++) {
if (x[i][j] < min) {
min = x[i][j];
whichRow = i;
whichCol = j;
}
}
}
return new IntPair(whichRow, whichCol);
}
/**
* Returns the maximum value of an array.
*/
public static int max(int[] x) {
int max = x[0];
for (int n : x) {
if (n > max) {
max = n;
}
}
return max;
}
/**
* Returns the maximum value of an array.
*/
public static float max(float[] x) {
float max = Float.NEGATIVE_INFINITY;
for (float n : x) {
if (n > max) {
max = n;
}
}
return max;
}
/**
* Returns the maximum value of an array.
*/
public static double max(double[] x) {
double max = Double.NEGATIVE_INFINITY;
for (double n : x) {
if (n > max) {
max = n;
}
}
return max;
}
/**
* Returns the index of maximum value of an array.
*/
public static int whichMax(int[] x) {
int max = x[0];
int which = 0;
for (int i = 1; i < x.length; i++) {
if (x[i] > max) {
max = x[i];
which = i;
}
}
return which;
}
/**
* Returns the index of maximum value of an array.
*/
public static int whichMax(float[] x) {
float max = Float.NEGATIVE_INFINITY;
int which = 0;
for (int i = 0; i < x.length; i++) {
if (x[i] > max) {
max = x[i];
which = i;
}
}
return which;
}
/**
* Returns the index of maximum value of an array.
*/
public static int whichMax(double[] x) {
double max = Double.NEGATIVE_INFINITY;
int which = 0;
for (int i = 0; i < x.length; i++) {
if (x[i] > max) {
max = x[i];
which = i;
}
}
return which;
}
/**
* Returns the index of maximum value of an array.
*/
public static IntPair whichMax(double[][] x) {
double max = Double.NEGATIVE_INFINITY;
int whichRow = 0;
int whichCol = 0;
for (int i = 0; i < x.length; i++) {
for (int j = 0; j < x[i].length; j++) {
if (x[i][j] > max) {
max = x[i][j];
whichRow = i;
whichCol = j;
}
}
}
return new IntPair(whichRow, whichCol);
}
/**
* Returns the minimum of a matrix.
*/
public static int min(int[][] matrix) {
int min = matrix[0][0];
for (int[] x : matrix) {
for (int y : x) {
if (min > y) {
min = y;
}
}
}
return min;
}
/**
* Returns the minimum of a matrix.
*/
public static double min(double[][] matrix) {
double min = Double.POSITIVE_INFINITY;
for (double[] x : matrix) {
for (double y : x) {
if (min > y) {
min = y;
}
}
}
return min;
}
/**
* Returns the maximum of a matrix.
*/
public static int max(int[][] matrix) {
int max = matrix[0][0];
for (int[] x : matrix) {
for (int y : x) {
if (max < y) {
max = y;
}
}
}
return max;
}
/**
* Returns the maximum of a matrix.
*/
public static double max(double[][] matrix) {
double max = Double.NEGATIVE_INFINITY;
for (double[] x : matrix) {
for (double y : x) {
if (max < y) {
max = y;
}
}
}
return max;
}
/**
* Returns the matrix transpose.
*/
public static double[][] transpose(double[][] A) {
int m = A.length;
int n = A[0].length;
double[][] matrix = new double[n][m];
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
matrix[j][i] = A[i][j];
}
}
return matrix;
}
/**
* Returns the row minimum for a matrix.
*/
public static int[] rowMin(int[][] data) {
int[] x = new int[data.length];
for (int i = 0; i < x.length; i++) {
x[i] = min(data[i]);
}
return x;
}
/**
* Returns the row maximum for a matrix.
*/
public static int[] rowMax(int[][] data) {
int[] x = new int[data.length];
for (int i = 0; i < x.length; i++) {
x[i] = max(data[i]);
}
return x;
}
/**
* Returns the row sums for a matrix.
*/
public static long[] rowSums(int[][] data) {
long[] x = new long[data.length];
for (int i = 0; i < x.length; i++) {
x[i] = sum(data[i]);
}
return x;
}
/**
* Returns the row minimum for a matrix.
*/
public static double[] rowMin(double[][] data) {
double[] x = new double[data.length];
for (int i = 0; i < x.length; i++) {
x[i] = min(data[i]);
}
return x;
}
/**
* Returns the row maximum for a matrix.
*/
public static double[] rowMax(double[][] data) {
double[] x = new double[data.length];
for (int i = 0; i < x.length; i++) {
x[i] = max(data[i]);
}
return x;
}
/**
* Returns the row sums for a matrix.
*/
public static double[] rowSums(double[][] data) {
double[] x = new double[data.length];
for (int i = 0; i < x.length; i++) {
x[i] = sum(data[i]);
}
return x;
}
/**
* Returns the row means for a matrix.
*/
public static double[] rowMeans(double[][] data) {
double[] x = new double[data.length];
for (int i = 0; i < x.length; i++) {
x[i] = mean(data[i]);
}
return x;
}
/**
* Returns the row standard deviations for a matrix.
*/
public static double[] rowSds(double[][] data) {
double[] x = new double[data.length];
for (int i = 0; i < x.length; i++) {
x[i] = sd(data[i]);
}
return x;
}
/**
* Returns the column minimum for a matrix.
*/
public static int[] colMin(int[][] data) {
int[] x = new int[data[0].length];
Arrays.fill(x, Integer.MAX_VALUE);
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < x.length; j++) {
if (x[j] > data[i][j]) {
x[j] = data[i][j];
}
}
}
return x;
}
/**
* Returns the column maximum for a matrix.
*/
public static int[] colMax(int[][] data) {
int[] x = new int[data[0].length];
Arrays.fill(x, Integer.MIN_VALUE);
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < x.length; j++) {
if (x[j] < data[i][j]) {
x[j] = data[i][j];
}
}
}
return x;
}
/**
* Returns the column sums for a matrix.
*/
public static long[] colSums(int[][] data) {
long[] x = new long[data[0].length];
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < x.length; j++) {
x[j] += data[i][j];
}
}
return x;
}
/**
* Returns the column minimum for a matrix.
*/
public static double[] colMin(double[][] data) {
double[] x = new double[data[0].length];
Arrays.fill(x, Double.POSITIVE_INFINITY);
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < x.length; j++) {
if (x[j] > data[i][j]) {
x[j] = data[i][j];
}
}
}
return x;
}
/**
* Returns the column maximum for a matrix.
*/
public static double[] colMax(double[][] data) {
double[] x = new double[data[0].length];
Arrays.fill(x, Double.NEGATIVE_INFINITY);
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < x.length; j++) {
if (x[j] < data[i][j]) {
x[j] = data[i][j];
}
}
}
return x;
}
/**
* Returns the column sums for a matrix.
*/
public static double[] colSums(double[][] data) {
double[] x = data[0].clone();
for (int i = 1; i < data.length; i++) {
for (int j = 0; j < x.length; j++) {
x[j] += data[i][j];
}
}
return x;
}
/**
* Returns the column means for a matrix.
*/
public static double[] colMeans(double[][] data) {
double[] x = data[0].clone();
for (int i = 1; i < data.length; i++) {
for (int j = 0; j < x.length; j++) {
x[j] += data[i][j];
}
}
scale(1.0 / data.length, x);
return x;
}
/**
* Returns the column deviations for a matrix.
*/
public static double[] colSds(double[][] data) {
if (data.length < 2) {
throw new IllegalArgumentException("Array length is less than 2.");
}
int p = data[0].length;
double[] sum = new double[p];
double[] sumsq = new double[p];
for (double[] x : data) {
for (int i = 0; i < p; i++) {
sum[i] += x[i];
sumsq[i] += x[i] * x[i];
}
}
int n = data.length - 1;
for (int i = 0; i < p; i++) {
sumsq[i] = sqrt(sumsq[i] / n - (sum[i] / data.length) * (sum[i] / n));
}
return sumsq;
}
/**
* Returns the sum of an array.
*/
public static int sum(byte[] x) {
int sum = 0;
for (int n : x) {
sum += n;
}
return sum;
}
/**
* Returns the sum of an array.
*/
public static long sum(int[] x) {
long sum = 0;
for (int n : x) {
sum += n;
}
return (int) sum;
}
/**
* Returns the sum of an array.
*/
public static double sum(float[] x) {
double sum = 0.0;
for (float n : x) {
sum += n;
}
return sum;
}
/**
* Returns the sum of an array.
*/
public static double sum(double[] x) {
double sum = 0.0;
for (double n : x) {
sum += n;
}
return sum;
}
/**
* Find the median of an array of type int. The input array will
* be rearranged.
*/
public static int median(int[] a) {
return QuickSelect.median(a);
}
/**
* Find the median of an array of type float. The input array will
* be rearranged.
*/
public static float median(float[] a) {
return QuickSelect.median(a);
}
/**
* Find the median of an array of type double. The input array will
* be rearranged.
*/
public static double median(double[] a) {
return QuickSelect.median(a);
}
/**
* Find the median of an array of type double. The input array will
* be rearranged.
*/
public static > T median(T[] a) {
return QuickSelect.median(a);
}
/**
* Find the first quantile (p = 1/4) of an array of type int. The input array will
* be rearranged.
*/
public static int q1(int[] a) {
return QuickSelect.q1(a);
}
/**
* Find the first quantile (p = 1/4) of an array of type float. The input array will
* be rearranged.
*/
public static float q1(float[] a) {
return QuickSelect.q1(a);
}
/**
* Find the first quantile (p = 1/4) of an array of type double. The input array will
* be rearranged.
*/
public static double q1(double[] a) {
return QuickSelect.q1(a);
}
/**
* Find the first quantile (p = 1/4) of an array of type double. The input array will
* be rearranged.
*/
public static > T q1(T[] a) {
return QuickSelect.q1(a);
}
/**
* Find the third quantile (p = 3/4) of an array of type int. The input array will
* be rearranged.
*/
public static int q3(int[] a) {
return QuickSelect.q3(a);
}
/**
* Find the third quantile (p = 3/4) of an array of type float. The input array will
* be rearranged.
*/
public static float q3(float[] a) {
return QuickSelect.q3(a);
}
/**
* Find the third quantile (p = 3/4) of an array of type double. The input array will
* be rearranged.
*/
public static double q3(double[] a) {
return QuickSelect.q3(a);
}
/**
* Find the third quantile (p = 3/4) of an array of type double. The input array will
* be rearranged.
*/
public static > T q3(T[] a) {
return QuickSelect.q3(a);
}
/**
* Returns the mean of an array.
*/
public static double mean(int[] x) {
return (double) sum(x) / x.length;
}
/**
* Returns the mean of an array.
*/
public static double mean(float[] x) {
return sum(x) / x.length;
}
/**
* Returns the mean of an array.
*/
public static double mean(double[] x) {
return sum(x) / x.length;
}
/**
* Returns the variance of an array.
*/
public static double var(int[] x) {
if (x.length < 2) {
throw new IllegalArgumentException("Array length is less than 2.");
}
double sum = 0.0;
double sumsq = 0.0;
for (int xi : x) {
sum += xi;
sumsq += xi * xi;
}
int n = x.length - 1;
return sumsq / n - (sum / x.length) * (sum / n);
}
/**
* Returns the variance of an array.
*/
public static double var(float[] x) {
if (x.length < 2) {
throw new IllegalArgumentException("Array length is less than 2.");
}
double sum = 0.0;
double sumsq = 0.0;
for (float xi : x) {
sum += xi;
sumsq += xi * xi;
}
int n = x.length - 1;
return sumsq / n - (sum / x.length) * (sum / n);
}
/**
* Returns the variance of an array.
*/
public static double var(double[] x) {
if (x.length < 2) {
throw new IllegalArgumentException("Array length is less than 2.");
}
double sum = 0.0;
double sumsq = 0.0;
for (double xi : x) {
sum += xi;
sumsq += xi * xi;
}
int n = x.length - 1;
return sumsq / n - (sum / x.length) * (sum / n);
}
/**
* Returns the standard deviation of an array.
*/
public static double sd(int[] x) {
return sqrt(var(x));
}
/**
* Returns the standard deviation of an array.
*/
public static double sd(float[] x) {
return sqrt(var(x));
}
/**
* Returns the standard deviation of an array.
*/
public static double sd(double[] x) {
return sqrt(var(x));
}
/**
* Returns the median absolute deviation (MAD). Note that input array will
* be altered after the computation. MAD is a robust measure of
* the variability of a univariate sample of quantitative data. For a
* univariate data set X1, X2, ..., Xn,
* the MAD is defined as the median of the absolute deviations from the data's median:
*
* MAD(X) = median(|Xi - median(Xi)|)
*
* that is, starting with the residuals (deviations) from the data's median,
* the MAD is the median of their absolute values.
*
* MAD is a more robust estimator of scale than the sample variance or
* standard deviation. For instance, MAD is more resilient to outliers in
* a data set than the standard deviation. It thus behaves better with
* distributions without a mean or variance, such as the Cauchy distribution.
*
* In order to use the MAD as a consistent estimator for the estimation of
* the standard deviation σ, one takes σ = K * MAD, where K is
* a constant scale factor, which depends on the distribution. For normally
* distributed data K is taken to be 1.4826. Other distributions behave
* differently: for example for large samples from a uniform continuous
* distribution, this factor is about 1.1547.
*/
public static double mad(int[] x) {
int m = median(x);
for (int i = 0; i < x.length; i++) {
x[i] = abs(x[i] - m);
}
return median(x);
}
/**
* Returns the median absolute deviation (MAD). Note that input array will
* be altered after the computation. MAD is a robust measure of
* the variability of a univariate sample of quantitative data. For a
* univariate data set X1, X2, ..., Xn,
* the MAD is defined as the median of the absolute deviations from the data's median:
*
* MAD(X) = median(|Xi - median(Xi)|)
*
* that is, starting with the residuals (deviations) from the data's median,
* the MAD is the median of their absolute values.
*
* MAD is a more robust estimator of scale than the sample variance or
* standard deviation. For instance, MAD is more resilient to outliers in
* a data set than the standard deviation. It thus behaves better with
* distributions without a mean or variance, such as the Cauchy distribution.
*
* In order to use the MAD as a consistent estimator for the estimation of
* the standard deviation σ, one takes σ = K * MAD, where K is
* a constant scale factor, which depends on the distribution. For normally
* distributed data K is taken to be 1.4826. Other distributions behave
* differently: for example for large samples from a uniform continuous
* distribution, this factor is about 1.1547.
*/
public static double mad(float[] x) {
float m = median(x);
for (int i = 0; i < x.length; i++) {
x[i] = abs(x[i] - m);
}
return median(x);
}
/**
* Returns the median absolute deviation (MAD). Note that input array will
* be altered after the computation. MAD is a robust measure of
* the variability of a univariate sample of quantitative data. For a
* univariate data set X1, X2, ..., Xn,
* the MAD is defined as the median of the absolute deviations from the data's median:
*
* MAD(X) = median(|Xi - median(Xi)|)
*
* that is, starting with the residuals (deviations) from the data's median,
* the MAD is the median of their absolute values.
*
* MAD is a more robust estimator of scale than the sample variance or
* standard deviation. For instance, MAD is more resilient to outliers in
* a data set than the standard deviation. It thus behaves better with
* distributions without a mean or variance, such as the Cauchy distribution.
*
* In order to use the MAD as a consistent estimator for the estimation of
* the standard deviation σ, one takes σ = K * MAD, where K is
* a constant scale factor, which depends on the distribution. For normally
* distributed data K is taken to be 1.4826. Other distributions behave
* differently: for example for large samples from a uniform continuous
* distribution, this factor is about 1.1547.
*/
public static double mad(double[] x) {
double m = median(x);
for (int i = 0; i < x.length; i++) {
x[i] = abs(x[i] - m);
}
return median(x);
}
/**
* Given a set of boolean values, are all of the values true?
*/
public static boolean all(boolean[] x) {
for (boolean b : x) {
if (!b) {
return false;
}
}
return true;
}
/**
* Given a set of boolean values, is at least one of the values true?
*/
public static boolean any(boolean[] x) {
for (boolean b : x) {
if (b) {
return true;
}
}
return false;
}
/**
* The Euclidean distance on binary sparse arrays,
* which are the indices of nonzero elements in ascending order.
*/
public static double distance(int[] x, int[] y) {
return sqrt(squaredDistance(x, y));
}
/**
* The Euclidean distance.
*/
public static double distance(float[] x, float[] y) {
return sqrt(squaredDistance(x, y));
}
/**
* The Euclidean distance.
*/
public static double distance(double[] x, double[] y) {
return sqrt(squaredDistance(x, y));
}
/**
* The Euclidean distance.
*/
public static double distance(SparseArray x, SparseArray y) {
return sqrt(squaredDistance(x, y));
}
/**
* The squared Euclidean distance on binary sparse arrays,
* which are the indices of nonzero elements in ascending order.
*/
public static double squaredDistance(int[] x, int[] y) {
double d = 0.0;
int p1 = 0, p2 = 0;
while (p1 < x.length && p2 < y.length) {
int i1 = x[p1];
int i2 = y[p2];
if (i1 == i2) {
p1++;
p2++;
} else if (i1 > i2) {
d++;
p2++;
} else {
d++;
p1++;
}
}
d += x.length - p1;
d += y.length - p2;
return d;
}
/**
* The squared Euclidean distance.
*/
public static double squaredDistance(float[] x, float[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException("Input vector sizes are different.");
}
switch (x.length) {
case 2: {
double d0 = (double) x[0] - (double) y[0];
double d1 = (double) x[1] - (double) y[1];
return d0 * d0 + d1 * d1;
}
case 3: {
double d0 = (double) x[0] - (double) y[0];
double d1 = (double) x[1] - (double) y[1];
double d2 = (double) x[2] - (double) y[2];
return d0 * d0 + d1 * d1 + d2 * d2;
}
case 4: {
double d0 = (double) x[0] - (double) y[0];
double d1 = (double) x[1] - (double) y[1];
double d2 = (double) x[2] - (double) y[2];
double d3 = (double) x[3] - (double) y[3];
return d0 * d0 + d1 * d1 + d2 * d2 + d3 * d3;
}
}
double sum = 0.0;
for (int i = 0; i < x.length; i++) {
// covert x and y for better precision
double d = (double) x[i] - (double) y[i];
sum += d * d;
}
return sum;
}
/**
* The squared Euclidean distance.
*/
public static double squaredDistance(double[] x, double[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException("Input vector sizes are different.");
}
switch (x.length) {
case 2: {
double d0 = x[0] - y[0];
double d1 = x[1] - y[1];
return d0 * d0 + d1 * d1;
}
case 3: {
double d0 = x[0] - y[0];
double d1 = x[1] - y[1];
double d2 = x[2] - y[2];
return d0 * d0 + d1 * d1 + d2 * d2;
}
case 4: {
double d0 = x[0] - y[0];
double d1 = x[1] - y[1];
double d2 = x[2] - y[2];
double d3 = x[3] - y[3];
return d0 * d0 + d1 * d1 + d2 * d2 + d3 * d3;
}
}
double sum = 0.0;
for (int i = 0; i < x.length; i++) {
double d = x[i] - y[i];
sum += d * d;
}
return sum;
}
/**
* The Euclidean distance on sparse arrays.
*/
public static double squaredDistance(SparseArray x, SparseArray y) {
Iterator it1 = x.iterator();
Iterator it2 = y.iterator();
SparseArray.Entry e1 = it1.hasNext() ? it1.next() : null;
SparseArray.Entry e2 = it2.hasNext() ? it2.next() : null;
double sum = 0.0;
while (e1 != null && e2 != null) {
if (e1.i == e2.i) {
sum += sqr(e1.x - e2.x);
e1 = it1.hasNext() ? it1.next() : null;
e2 = it2.hasNext() ? it2.next() : null;
} else if (e1.i > e2.i) {
sum += sqr(e2.x);
e2 = it2.hasNext() ? it2.next() : null;
} else {
sum += sqr(e1.x);
e1 = it1.hasNext() ? it1.next() : null;
}
}
while (it1.hasNext()) {
double d = it1.next().x;
sum += d * d;
}
while (it2.hasNext()) {
double d = it2.next().x;
sum += d * d;
}
return sum;
}
/**
* The squared Euclidean distance with handling missing values (represented as NaN).
*/
public static double squaredDistanceWithMissingValues(double[] x, double[] y) {
int n = x.length;
int m = 0;
double dist = 0.0;
for (int i = 0; i < n; i++) {
if (!Double.isNaN(x[i]) && !Double.isNaN(y[i])) {
m++;
double d = x[i] - y[i];
dist += d * d;
}
}
if (m == 0) {
dist = Double.MAX_VALUE;
} else {
dist = n * dist / m;
}
return dist;
}
/**
* Returns the pairwise distance matrix of multiple binary sparse vectors.
* @param x Each row is a binary sparse array, which are the indices of
* nonzero elements in ascending order.
* @return a full pairwise distance matrix.
*/
public static Matrix pdist(int[][] x) {
return pdist(x, false);
}
/**
* Returns the pairwise distance matrix of multiple binary sparse vectors.
* @param x Each row is a binary sparse array, which are the indices of
* nonzero elements in ascending order.
* @param squared If true, compute the squared Euclidean distance.
* @return the pairwise distance matrix.
*/
public static Matrix pdist(int[][] x, boolean squared) {
int n = x.length;
double[][] dist = new double[n][n];
pdist(x, dist, squared ? MathEx::squaredDistance : MathEx::distance);
return new Matrix(dist);
}
/**
* Returns the pairwise distance matrix of multiple vectors.
* @param x Rows of x correspond to observations, and columns correspond to variables.
* @return a full pairwise distance matrix.
*/
public static Matrix pdist(float[][] x) {
return pdist(x, false);
}
/**
* Returns the pairwise distance matrix of multiple vectors.
* @param x Rows of x correspond to observations, and columns correspond to variables.
* @param squared If true, compute the squared Euclidean distance.
* @return the pairwise distance matrix.
*/
public static Matrix pdist(float[][] x, boolean squared) {
int n = x.length;
double[][] dist = new double[n][n];
pdist(x, dist, squared ? MathEx::squaredDistance : MathEx::distance);
return new Matrix(dist);
}
/**
* Returns the pairwise distance matrix of multiple vectors.
* @param x Rows of x correspond to observations, and columns correspond to variables.
* @return a full pairwise distance matrix.
*/
public static Matrix pdist(double[][] x) {
return pdist(x, false);
}
/**
* Returns the pairwise distance matrix of multiple vectors.
* @param x Rows of x correspond to observations, and columns correspond to variables.
* @param squared If true, compute the squared Euclidean distance.
* @return the pairwise distance matrix.
*/
public static Matrix pdist(double[][] x, boolean squared) {
int n = x.length;
double[][] dist = new double[n][n];
pdist(x, dist, squared ? MathEx::squaredDistance : MathEx::distance);
return new Matrix(dist);
}
/**
* Returns the pairwise distance matrix of multiple vectors.
* @param x Each row is the sparse array of observations
* @return a full pairwise distance matrix.
*/
public static Matrix pdist(SparseArray[] x) {
return pdist(x, false);
}
/**
* Returns the pairwise distance matrix of multiple vectors.
* @param x Each row is the sparse array of observations
* @param squared If true, compute the squared Euclidean distance.
* @return the pairwise distance matrix.
*/
public static Matrix pdist(SparseArray[] x, boolean squared) {
int n = x.length;
double[][] dist = new double[n][n];
pdist(x, dist, squared ? MathEx::squaredDistance : MathEx::distance);
return new Matrix(dist);
}
/**
* Computes the pairwise distance matrix of multiple vectors.
* @param x Rows of x correspond to observations, and columns correspond to variables.
* @param d The output distance matrix. It may be only the lower half.
* @param distance The distance lambda.
*/
public static void pdist(T[] x, double[][] d, Distance distance) {
int n = x.length;
if (d[0].length < n) {
IntStream.range(0, n).parallel().forEach(i -> {
T xi = x[i];
double[] di = d[i];
for (int j = 0; j < i; j++) {
di[j] = distance.d(xi, x[j]);
}
});
} else {
IntStream.range(0, n).parallel().forEach(i -> {
T xi = x[i];
double[] di = d[i];
for (int j = 0; j < n; j++) {
di[j] = distance.d(xi, x[j]);
}
});
}
}
/**
* Shannon's entropy.
* @param p the probabilities.
*/
public static double entropy(double[] p) {
double h = 0.0;
for (double pi : p) {
if (pi > 0) {
h -= pi * Math.log(pi);
}
}
return h;
}
/**
* Kullback-Leibler divergence. The Kullback-Leibler divergence (also
* information divergence, information gain, relative entropy, or KLIC)
* is a non-symmetric measure of the difference between two probability
* distributions P and Q. KL measures the expected number of extra bits
* required to code samples from P when using a code based on Q, rather
* than using a code based on P. Typically P represents the "true"
* distribution of data, observations, or a precise calculated theoretical
* distribution. The measure Q typically represents a theory, model,
* description, or approximation of P.
*
* Although it is often intuited as a distance metric, the KL divergence is
* not a true metric - for example, the KL from P to Q is not necessarily
* the same as the KL from Q to P.
*/
public static double KullbackLeiblerDivergence(double[] x, double[] y) {
boolean intersection = false;
double kl = 0.0;
for (int i = 0; i < x.length; i++) {
if (x[i] != 0.0 && y[i] != 0.0) {
intersection = true;
kl += x[i] * Math.log(x[i] / y[i]);
}
}
if (intersection) {
return kl;
} else {
return Double.POSITIVE_INFINITY;
}
}
/**
* Kullback-Leibler divergence. The Kullback-Leibler divergence (also
* information divergence, information gain, relative entropy, or KLIC)
* is a non-symmetric measure of the difference between two probability
* distributions P and Q. KL measures the expected number of extra bits
* required to code samples from P when using a code based on Q, rather
* than using a code based on P. Typically P represents the "true"
* distribution of data, observations, or a precise calculated theoretical
* distribution. The measure Q typically represents a theory, model,
* description, or approximation of P.
*
* Although it is often intuited as a distance metric, the KL divergence is
* not a true metric - for example, the KL from P to Q is not necessarily
* the same as the KL from Q to P.
*/
public static double KullbackLeiblerDivergence(SparseArray x, SparseArray y) {
if (x.isEmpty()) {
throw new IllegalArgumentException("List x is empty.");
}
if (y.isEmpty()) {
throw new IllegalArgumentException("List y is empty.");
}
Iterator iterX = x.iterator();
Iterator iterY = y.iterator();
SparseArray.Entry a = iterX.hasNext() ? iterX.next() : null;
SparseArray.Entry b = iterY.hasNext() ? iterY.next() : null;
boolean intersection = false;
double kl = 0.0;
while (a != null && b != null) {
if (a.i < b.i) {
a = iterX.hasNext() ? iterX.next() : null;
} else if (a.i > b.i) {
b = iterY.hasNext() ? iterY.next() : null;
} else {
intersection = true;
kl += a.x * Math.log(a.x / b.x);
a = iterX.hasNext() ? iterX.next() : null;
b = iterY.hasNext() ? iterY.next() : null;
}
}
if (intersection) {
return kl;
} else {
return Double.POSITIVE_INFINITY;
}
}
/**
* Kullback-Leibler divergence. The Kullback-Leibler divergence (also
* information divergence, information gain, relative entropy, or KLIC)
* is a non-symmetric measure of the difference between two probability
* distributions P and Q. KL measures the expected number of extra bits
* required to code samples from P when using a code based on Q, rather
* than using a code based on P. Typically P represents the "true"
* distribution of data, observations, or a precise calculated theoretical
* distribution. The measure Q typically represents a theory, model,
* description, or approximation of P.
*
* Although it is often intuited as a distance metric, the KL divergence is
* not a true metric - for example, the KL from P to Q is not necessarily
* the same as the KL from Q to P.
*/
public static double KullbackLeiblerDivergence(double[] x, SparseArray y) {
return KullbackLeiblerDivergence(y, x);
}
/**
* Kullback-Leibler divergence. The Kullback-Leibler divergence (also
* information divergence, information gain, relative entropy, or KLIC)
* is a non-symmetric measure of the difference between two probability
* distributions P and Q. KL measures the expected number of extra bits
* required to code samples from P when using a code based on Q, rather
* than using a code based on P. Typically P represents the "true"
* distribution of data, observations, or a precise calculated theoretical
* distribution. The measure Q typically represents a theory, model,
* description, or approximation of P.
*
* Although it is often intuited as a distance metric, the KL divergence is
* not a true metric - for example, the KL from P to Q is not necessarily
* the same as the KL from Q to P.
*/
public static double KullbackLeiblerDivergence(SparseArray x, double[] y) {
if (x.isEmpty()) {
throw new IllegalArgumentException("List x is empty.");
}
Iterator iter = x.iterator();
boolean intersection = false;
double kl = 0.0;
while (iter.hasNext()) {
SparseArray.Entry b = iter.next();
int i = b.i;
if (y[i] > 0) {
intersection = true;
kl += b.x * Math.log(b.x / y[i]);
}
}
if (intersection) {
return kl;
} else {
return Double.POSITIVE_INFINITY;
}
}
/**
* Jensen-Shannon divergence JS(P||Q) = (KL(P||M) + KL(Q||M)) / 2, where
* M = (P+Q)/2. The Jensen-Shannon divergence is a popular
* method of measuring the similarity between two probability distributions.
* It is also known as information radius or total divergence to the average.
* It is based on the Kullback-Leibler divergence, with the difference that
* it is always a finite value. The square root of the Jensen-Shannon divergence
* is a metric.
*/
public static double JensenShannonDivergence(double[] x, double[] y) {
double[] m = new double[x.length];
for (int i = 0; i < m.length; i++) {
m[i] = (x[i] + y[i]) / 2;
}
return (KullbackLeiblerDivergence(x, m) + KullbackLeiblerDivergence(y, m)) / 2;
}
/**
* Jensen-Shannon divergence JS(P||Q) = (KL(P||M) + KL(Q||M)) / 2, where
* M = (P+Q)/2. The Jensen-Shannon divergence is a popular
* method of measuring the similarity between two probability distributions.
* It is also known as information radius or total divergence to the average.
* It is based on the Kullback-Leibler divergence, with the difference that
* it is always a finite value. The square root of the Jensen-Shannon divergence
* is a metric.
*/
public static double JensenShannonDivergence(SparseArray x, SparseArray y) {
if (x.isEmpty()) {
throw new IllegalArgumentException("List x is empty.");
}
if (y.isEmpty()) {
throw new IllegalArgumentException("List y is empty.");
}
Iterator iterX = x.iterator();
Iterator iterY = y.iterator();
SparseArray.Entry a = iterX.hasNext() ? iterX.next() : null;
SparseArray.Entry b = iterY.hasNext() ? iterY.next() : null;
double js = 0.0;
while (a != null && b != null) {
if (a.i < b.i) {
double mi = a.x / 2;
js += a.x * Math.log(a.x / mi);
a = iterX.hasNext() ? iterX.next() : null;
} else if (a.i > b.i) {
double mi = b.x / 2;
js += b.x * Math.log(b.x / mi);
b = iterY.hasNext() ? iterY.next() : null;
} else {
double mi = (a.x + b.x) / 2;
js += a.x * Math.log(a.x / mi) + b.x * Math.log(b.x / mi);
a = iterX.hasNext() ? iterX.next() : null;
b = iterY.hasNext() ? iterY.next() : null;
}
}
return js / 2;
}
/**
* Jensen-Shannon divergence JS(P||Q) = (KL(P||M) + KL(Q||M)) / 2, where
* M = (P+Q)/2. The Jensen-Shannon divergence is a popular
* method of measuring the similarity between two probability distributions.
* It is also known as information radius or total divergence to the average.
* It is based on the Kullback-Leibler divergence, with the difference that
* it is always a finite value. The square root of the Jensen-Shannon divergence
* is a metric.
*/
public static double JensenShannonDivergence(double[] x, SparseArray y) {
return JensenShannonDivergence(y, x);
}
/**
* Jensen-Shannon divergence JS(P||Q) = (KL(P||M) + KL(Q||M)) / 2, where
* M = (P+Q)/2. The Jensen-Shannon divergence is a popular
* method of measuring the similarity between two probability distributions.
* It is also known as information radius or total divergence to the average.
* It is based on the Kullback-Leibler divergence, with the difference that
* it is always a finite value. The square root of the Jensen-Shannon divergence
* is a metric.
*/
public static double JensenShannonDivergence(SparseArray x, double[] y) {
if (x.isEmpty()) {
throw new IllegalArgumentException("List x is empty.");
}
Iterator iter = x.iterator();
double js = 0.0;
while (iter.hasNext()) {
SparseArray.Entry b = iter.next();
int i = b.i;
double mi = (b.x + y[i]) / 2;
js += b.x * Math.log(b.x / mi);
if (y[i] > 0) {
js += y[i] * Math.log(y[i] / mi);
}
}
return js / 2;
}
/**
* Returns the dot product between two binary sparse arrays,
* which are the indices of nonzero elements in ascending order.
*/
public static int dot(int[] x, int[] y) {
int sum = 0;
for (int p1 = 0, p2 = 0; p1 < x.length && p2 < y.length; ) {
int i1 = x[p1];
int i2 = y[p2];
if (i1 == i2) {
sum++;
p1++;
p2++;
} else if (i1 > i2) {
p2++;
} else {
p1++;
}
}
return sum;
}
/**
* Returns the dot product between two vectors.
*/
public static float dot(float[] x, float[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException("Arrays have different length.");
}
float sum = 0.0F;
for (int i = 0; i < x.length; i++) {
sum += x[i] * y[i];
}
return sum;
}
/**
* Returns the dot product between two vectors.
*/
public static double dot(double[] x, double[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException("Arrays have different length.");
}
double sum = 0.0;
for (int i = 0; i < x.length; i++) {
sum += x[i] * y[i];
}
return sum;
}
/**
* Returns the dot product between two sparse arrays.
*/
public static double dot(SparseArray x, SparseArray y) {
Iterator it1 = x.iterator();
Iterator it2 = y.iterator();
SparseArray.Entry e1 = it1.hasNext() ? it1.next() : null;
SparseArray.Entry e2 = it2.hasNext() ? it2.next() : null;
double sum = 0.0;
while (e1 != null && e2 != null) {
if (e1.i == e2.i) {
sum += e1.x * e2.x;
e1 = it1.hasNext() ? it1.next() : null;
e2 = it2.hasNext() ? it2.next() : null;
} else if (e1.i > e2.i) {
e2 = it2.hasNext() ? it2.next() : null;
} else {
e1 = it1.hasNext() ? it1.next() : null;
}
}
return sum;
}
/**
* Returns the pairwise dot product matrix of binary sparse vectors.
* @param x Rows of x correspond to observations, and columns correspond to variables.
* @return The dot product matrix.
*/
public static Matrix pdot(int[][] x) {
int n = x.length;
Matrix matrix = new Matrix(n, n);
matrix.uplo(UPLO.LOWER);
IntStream.range(0, n).parallel().forEach(j -> {
int[] xj = x[j];
for (int i = 0; i < n; i++) {
matrix.set(i, j, dot(x[i], xj));
}
});
return matrix;
}
/**
* Returns the pairwise dot product matrix of float vectors.
* @param x Rows of x correspond to observations, and columns correspond to variables.
* @return The dot product matrix.
*/
public static Matrix pdot(float[][] x) {
int n = x.length;
Matrix matrix = new Matrix(n, n);
matrix.uplo(UPLO.LOWER);
IntStream.range(0, n).parallel().forEach(j -> {
float[] xj = x[j];
for (int i = 0; i < n; i++) {
matrix.set(i, j, dot(x[i], xj));
}
});
return matrix;
}
/**
* Returns the pairwise dot product matrix of double vectors.
* @param x Rows of x correspond to observations, and columns correspond to variables.
* @return The dot product matrix.
*/
public static Matrix pdot(double[][] x) {
int n = x.length;
Matrix matrix = new Matrix(n, n);
matrix.uplo(UPLO.LOWER);
IntStream.range(0, n).parallel().forEach(j -> {
double[] xj = x[j];
for (int i = 0; i < n; i++) {
matrix.set(i, j, dot(x[i], xj));
}
});
return matrix;
}
/**
* Returns the pairwise dot product matrix of multiple vectors.
* @param x Rows of x correspond to observations, and columns correspond to variables.
* @return The dot product matrix.
*/
public static Matrix pdot(SparseArray[] x) {
int n = x.length;
Matrix matrix = new Matrix(n, n);
matrix.uplo(UPLO.LOWER);
IntStream.range(0, n).parallel().forEach(j -> {
SparseArray xj = x[j];
for (int i = 0; i < n; i++) {
matrix.set(i, j, dot(x[i], xj));
}
});
return matrix;
}
/**
* Returns the covariance between two vectors.
*/
public static double cov(int[] x, int[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException("Arrays have different length.");
}
if (x.length < 3) {
throw new IllegalArgumentException("array length has to be at least 3.");
}
double mx = mean(x);
double my = mean(y);
double Sxy = 0.0;
for (int i = 0; i < x.length; i++) {
double dx = x[i] - mx;
double dy = y[i] - my;
Sxy += dx * dy;
}
return Sxy / (x.length - 1);
}
/**
* Returns the covariance between two vectors.
*/
public static double cov(float[] x, float[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException("Arrays have different length.");
}
if (x.length < 3) {
throw new IllegalArgumentException("array length has to be at least 3.");
}
double mx = mean(x);
double my = mean(y);
double Sxy = 0.0;
for (int i = 0; i < x.length; i++) {
double dx = x[i] - mx;
double dy = y[i] - my;
Sxy += dx * dy;
}
return Sxy / (x.length - 1);
}
/**
* Returns the covariance between two vectors.
*/
public static double cov(double[] x, double[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException("Arrays have different length.");
}
if (x.length < 3) {
throw new IllegalArgumentException("array length has to be at least 3.");
}
double mx = mean(x);
double my = mean(y);
double Sxy = 0.0;
for (int i = 0; i < x.length; i++) {
double dx = x[i] - mx;
double dy = y[i] - my;
Sxy += dx * dy;
}
return Sxy / (x.length - 1);
}
/**
* Returns the sample covariance matrix.
*/
public static double[][] cov(double[][] data) {
return cov(data, colMeans(data));
}
/**
* Returns the sample covariance matrix.
* @param mu the known mean of data.
*/
public static double[][] cov(double[][] data, double[] mu) {
double[][] sigma = new double[data[0].length][data[0].length];
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < mu.length; j++) {
for (int k = 0; k <= j; k++) {
sigma[j][k] += (data[i][j] - mu[j]) * (data[i][k] - mu[k]);
}
}
}
int n = data.length - 1;
for (int j = 0; j < mu.length; j++) {
for (int k = 0; k <= j; k++) {
sigma[j][k] /= n;
sigma[k][j] = sigma[j][k];
}
}
return sigma;
}
/**
* Returns the correlation coefficient between two vectors.
*/
public static double cor(int[] x, int[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException("Arrays have different length.");
}
if (x.length < 3) {
throw new IllegalArgumentException("array length has to be at least 3.");
}
double Sxy = cov(x, y);
double Sxx = var(x);
double Syy = var(y);
if (Sxx == 0 || Syy == 0) {
return Double.NaN;
}
return Sxy / sqrt(Sxx * Syy);
}
/**
* Returns the correlation coefficient between two vectors.
*/
public static double cor(float[] x, float[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException("Arrays have different length.");
}
if (x.length < 3) {
throw new IllegalArgumentException("array length has to be at least 3.");
}
double Sxy = cov(x, y);
double Sxx = var(x);
double Syy = var(y);
if (Sxx == 0 || Syy == 0) {
return Double.NaN;
}
return Sxy / sqrt(Sxx * Syy);
}
/**
* Returns the correlation coefficient between two vectors.
*/
public static double cor(double[] x, double[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException("Arrays have different length.");
}
if (x.length < 3) {
throw new IllegalArgumentException("array length has to be at least 3.");
}
double Sxy = cov(x, y);
double Sxx = var(x);
double Syy = var(y);
if (Sxx == 0 || Syy == 0) {
return Double.NaN;
}
return Sxy / sqrt(Sxx * Syy);
}
/**
* Returns the sample correlation matrix.
*/
public static double[][] cor(double[][] data) {
return cor(data, colMeans(data));
}
/**
* Returns the sample correlation matrix.
* @param mu the known mean of data.
*/
public static double[][] cor(double[][] data, double[] mu) {
double[][] sigma = cov(data, mu);
int n = data[0].length;
double[] sd = new double[n];
for (int i = 0; i < n; i++) {
sd[i] = sqrt(sigma[i][i]);
}
for (int i = 0; i < n; i++) {
for (int j = 0; j <= i; j++) {
sigma[i][j] /= sd[i] * sd[j];
sigma[j][i] = sigma[i][j];
}
}
return sigma;
}
/**
* Given a sorted array, replaces the elements by their rank, including
* midranking of ties, and returns as s the sum of f3 - f, where
* f is the number of elements in each tie.
*/
private static double crank(double[] w) {
int n = w.length;
double s = 0.0;
int j = 1;
while (j < n) {
if (w[j] != w[j - 1]) {
w[j - 1] = j;
++j;
} else {
int jt = j + 1;
while (jt <= n && w[jt - 1] == w[j - 1]) {
jt++;
}
double rank = 0.5 * (j + jt - 1);
for (int ji = j; ji <= (jt - 1); ji++) {
w[ji - 1] = rank;
}
double t = jt - j;
s += (t * t * t - t);
j = jt;
}
}
if (j == n) {
w[n - 1] = n;
}
return s;
}
/**
* The Spearman Rank Correlation Coefficient is a form of the Pearson
* coefficient with the data converted to rankings (ie. when variables
* are ordinal). It can be used when there is non-parametric data and hence
* Pearson cannot be used.
*/
public static double spearman(int[] x, int[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException("Input vector sizes are different.");
}
int n = x.length;
double[] wksp1 = new double[n];
double[] wksp2 = new double[n];
for (int j = 0; j < n; j++) {
wksp1[j] = x[j];
wksp2[j] = y[j];
}
QuickSort.sort(wksp1, wksp2);
crank(wksp1);
QuickSort.sort(wksp2, wksp1);
crank(wksp2);
return cor(wksp1, wksp2);
}
/**
* The Spearman Rank Correlation Coefficient is a form of the Pearson
* coefficient with the data converted to rankings (ie. when variables
* are ordinal). It can be used when there is non-parametric data and hence
* Pearson cannot be used.
*/
public static double spearman(float[] x, float[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException("Input vector sizes are different.");
}
int n = x.length;
double[] wksp1 = new double[n];
double[] wksp2 = new double[n];
for (int j = 0; j < n; j++) {
wksp1[j] = x[j];
wksp2[j] = y[j];
}
QuickSort.sort(wksp1, wksp2);
crank(wksp1);
QuickSort.sort(wksp2, wksp1);
crank(wksp2);
return cor(wksp1, wksp2);
}
/**
* The Spearman Rank Correlation Coefficient is a form of the Pearson
* coefficient with the data converted to rankings (ie. when variables
* are ordinal). It can be used when there is non-parametric data and hence
* Pearson cannot be used.
*/
public static double spearman(double[] x, double[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException("Input vector sizes are different.");
}
double[] wksp1 = x.clone();
double[] wksp2 = y.clone();
QuickSort.sort(wksp1, wksp2);
crank(wksp1);
QuickSort.sort(wksp2, wksp1);
crank(wksp2);
return cor(wksp1, wksp2);
}
/**
* The Kendall Tau Rank Correlation Coefficient is used to measure the
* degree of correspondence between sets of rankings where the measures
* are not equidistant. It is used with non-parametric data.
*/
public static double kendall(int[] x, int[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException("Input vector sizes are different.");
}
int is = 0, n2 = 0, n1 = 0, n = x.length;
double aa, a2, a1;
for (int j = 0; j < n - 1; j++) {
for (int k = j + 1; k < n; k++) {
a1 = x[j] - x[k];
a2 = y[j] - y[k];
aa = a1 * a2;
if (aa != 0.0) {
++n1;
++n2;
if (aa > 0) {
++is;
} else {
--is;
}
} else {
if (a1 != 0.0) {
++n1;
}
if (a2 != 0.0) {
++n2;
}
}
}
}
double tau = is / (sqrt(n1) * sqrt(n2));
return tau;
}
/**
* The Kendall Tau Rank Correlation Coefficient is used to measure the
* degree of correspondence between sets of rankings where the measures
* are not equidistant. It is used with non-parametric data.
*/
public static double kendall(float[] x, float[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException("Input vector sizes are different.");
}
int is = 0, n2 = 0, n1 = 0, n = x.length;
double aa, a2, a1;
for (int j = 0; j < n - 1; j++) {
for (int k = j + 1; k < n; k++) {
a1 = x[j] - x[k];
a2 = y[j] - y[k];
aa = a1 * a2;
if (aa != 0.0) {
++n1;
++n2;
if (aa > 0) {
++is;
} else {
--is;
}
} else {
if (a1 != 0.0) {
++n1;
}
if (a2 != 0.0) {
++n2;
}
}
}
}
double tau = is / (sqrt(n1) * sqrt(n2));
return tau;
}
/**
* The Kendall Tau Rank Correlation Coefficient is used to measure the
* degree of correspondence between sets of rankings where the measures
* are not equidistant. It is used with non-parametric data.
*/
public static double kendall(double[] x, double[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException("Input vector sizes are different.");
}
int is = 0, n2 = 0, n1 = 0, n = x.length;
double aa, a2, a1;
for (int j = 0; j < n - 1; j++) {
for (int k = j + 1; k < n; k++) {
a1 = x[j] - x[k];
a2 = y[j] - y[k];
aa = a1 * a2;
if (aa != 0.0) {
++n1;
++n2;
if (aa > 0) {
++is;
} else {
--is;
}
} else {
if (a1 != 0.0) {
++n1;
}
if (a2 != 0.0) {
++n2;
}
}
}
}
double tau = is / (sqrt(n1) * sqrt(n2));
return tau;
}
/**
* L1 vector norm.
*/
public static float norm1(float[] x) {
float norm = 0.0F;
for (float n : x) {
norm += abs(n);
}
return norm;
}
/**
* L1 vector norm.
*/
public static double norm1(double[] x) {
double norm = 0.0;
for (double n : x) {
norm += abs(n);
}
return norm;
}
/**
* L2 vector norm.
*/
public static float norm2(float[] x) {
float norm = 0.0F;
for (float n : x) {
norm += n * n;
}
norm = (float) sqrt(norm);
return norm;
}
/**
* L2 vector norm.
*/
public static double norm2(double[] x) {
double norm = 0.0;
for (double n : x) {
norm += n * n;
}
norm = sqrt(norm);
return norm;
}
/**
* L-infinity vector norm. Maximum absolute value.
*/
public static float normInf(float[] x) {
int n = x.length;
float f = abs(x[0]);
for (int i = 1; i < n; i++) {
f = Math.max(f, abs(x[i]));
}
return f;
}
/**
* L-infinity vector norm. Maximum absolute value.
*/
public static double normInf(double[] x) {
int n = x.length;
double f = abs(x[0]);
for (int i = 1; i < n; i++) {
f = Math.max(f, abs(x[i]));
}
return f;
}
/**
* L2 vector norm.
*/
public static float norm(float[] x) {
return norm2(x);
}
/**
* L2 vector norm.
*/
public static double norm(double[] x) {
return norm2(x);
}
/** Returns the cosine similarity. */
public static float cos(float[] x, float[] y) {
return dot(x, y) / (norm2(x) * norm2(y));
}
/** Returns the cosine similarity. */
public static double cos(double[] x, double[] y) {
return dot(x, y) / (norm2(x) * norm2(y));
}
/**
* Standardizes an array to mean 0 and variance 1.
*/
public static void standardize(double[] x) {
double mu = mean(x);
double sigma = sd(x);
if (isZero(sigma)) {
logger.warn("array has variance of 0.");
return;
}
for (int i = 0; i < x.length; i++) {
x[i] = (x[i] - mu) / sigma;
}
}
/**
* Scales each column of a matrix to range [0, 1].
*/
public static void scale(double[][] x) {
int n = x.length;
int p = x[0].length;
double[] min = colMin(x);
double[] max = colMax(x);
for (int j = 0; j < p; j++) {
double scale = max[j] - min[j];
if (!isZero(scale)) {
for (int i = 0; i < n; i++) {
x[i][j] = (x[i][j] - min[j]) / scale;
}
} else {
for (int i = 0; i < n; i++) {
x[i][j] = 0.5;
}
}
}
}
/**
* Standardizes each column of a matrix to 0 mean and unit variance.
*/
public static void standardize(double[][] x) {
int n = x.length;
int p = x[0].length;
double[] center = colMeans(x);
for (int i = 0; i < n; i++) {
for (int j = 0; j < p; j++) {
x[i][j] = x[i][j] - center[j];
}
}
double[] scale = new double[p];
for (int j = 0; j < p; j++) {
for (int i = 0; i < n; i++) {
scale[j] += sqr(x[i][j]);
}
scale[j] = sqrt(scale[j] / (n-1));
if (!isZero(scale[j])) {
for (int i = 0; i < n; i++) {
x[i][j] /= scale[j];
}
}
}
}
/**
* Unitizes each column of a matrix to unit length (L_2 norm).
*/
public static void normalize(double[][] x) {
normalize(x, false);
}
/**
* Unitizes each column of a matrix to unit length (L_2 norm).
* @param centerizing If true, centerize each column to 0 mean.
*/
public static void normalize(double[][] x, boolean centerizing) {
int n = x.length;
int p = x[0].length;
if (centerizing) {
double[] center = colMeans(x);
for (int i = 0; i < n; i++) {
for (int j = 0; j < p; j++) {
x[i][j] = x[i][j] - center[j];
}
}
}
double[] scale = new double[p];
for (int j = 0; j < p; j++) {
for (int i = 0; i < n; i++) {
scale[j] += sqr(x[i][j]);
}
scale[j] = sqrt(scale[j]);
}
for (int i = 0; i < n; i++) {
for (int j = 0; j < p; j++) {
if (!isZero(scale[j])) {
x[i][j] /= scale[j];
}
}
}
}
/**
* Unitize an array so that L2 norm of x = 1.
*
* @param x the array of double
*/
public static void unitize(double[] x) {
unitize2(x);
}
/**
* Unitize an array so that L1 norm of x is 1.
*
* @param x an array of non-negative double
*/
public static void unitize1(double[] x) {
double n = norm1(x);
for (int i = 0; i < x.length; i++) {
x[i] /= n;
}
}
/**
* Unitize an array so that L2 norm of x = 1.
*
* @param x the array of double
*/
public static void unitize2(double[] x) {
double n = norm(x);
for (int i = 0; i < x.length; i++) {
x[i] /= n;
}
}
/**
* Check if x element-wisely equals y with default epsilon 1E-7.
*/
public static boolean equals(float[] x, float[] y) {
return equals(x, y, 1.0E-7f);
}
/**
* Check if x element-wisely equals y.
*/
public static boolean equals(float[] x, float[] y, float epsilon) {
if (x.length != y.length) {
throw new IllegalArgumentException(String.format("Arrays have different length: x[%d], y[%d]", x.length, y.length));
}
for (int i = 0; i < x.length; i++) {
if (abs(x[i] - y[i]) > epsilon) {
return false;
}
}
return true;
}
/**
* Check if x element-wisely equals y with default epsilon 1E-10.
*/
public static boolean equals(double[] x, double[] y) {
return equals(x, y, 1.0E-10);
}
/**
* Check if x element-wisely equals y.
*/
public static boolean equals(double[] x, double[] y, double eps) {
if (x.length != y.length) {
throw new IllegalArgumentException(String.format("Arrays have different length: x[%d], y[%d]", x.length, y.length));
}
if (eps <= 0.0) {
throw new IllegalArgumentException("Invalid epsilon: " + eps);
}
for (int i = 0; i < x.length; i++) {
if (abs(x[i] - y[i]) > eps) {
return false;
}
}
return true;
}
/**
* Check if x element-wisely equals y with default epsilon 1E-7.
*/
public static boolean equals(float[][] x, float[][] y) {
return equals(x, y, 1.0E-7f);
}
/**
* Check if x element-wisely equals y.
*/
public static boolean equals(float[][] x, float[][] y, float epsilon) {
if (x.length != y.length || x[0].length != y[0].length) {
throw new IllegalArgumentException(String.format("Matrices have different rows: %d x %d vs %d x %d", x.length, x[0].length, y.length, y[0].length));
}
for (int i = 0; i < x.length; i++) {
for (int j = 0; j < x[i].length; j++) {
if (abs(x[i][j] - y[i][j]) > epsilon) {
return false;
}
}
}
return true;
}
/**
* Check if x element-wisely equals y with default epsilon 1E-10.
*/
public static boolean equals(double[][] x, double[][] y) {
return equals(x, y, 1.0E-10);
}
/**
* Check if x element-wisely equals y.
*/
public static boolean equals(double[][] x, double[][] y, double eps) {
if (x.length != y.length || x[0].length != y[0].length) {
throw new IllegalArgumentException(String.format("Matrices have different rows: %d x %d vs %d x %d", x.length, x[0].length, y.length, y[0].length));
}
if (eps <= 0.0) {
throw new IllegalArgumentException("Invalid epsilon: " + eps);
}
for (int i = 0; i < x.length; i++) {
for (int j = 0; j < x[i].length; j++) {
if (abs(x[i][j] - y[i][j]) > eps) {
return false;
}
}
}
return true;
}
/** Tests if a floating number is zero. */
public static boolean isZero(float x) {
return isZero(x, FLOAT_EPSILON);
}
/** Tests if a floating number is zero with given epsilon. */
public static boolean isZero(float x, float epsilon) {
return abs(x) < epsilon;
}
/** Tests if a floating number is zero. */
public static boolean isZero(double x) {
return isZero(x, EPSILON);
}
/** Tests if a floating number is zero with given epsilon. */
public static boolean isZero(double x, double epsilon) {
return abs(x) < epsilon;
}
/**
* Deep clone a two-dimensional array.
*/
public static int[][] clone(int[][] x) {
int[][] matrix = new int[x.length][];
for (int i = 0; i < x.length; i++) {
matrix[i] = x[i].clone();
}
return matrix;
}
/**
* Deep clone a two-dimensional array.
*/
public static float[][] clone(float[][] x) {
float[][] matrix = new float[x.length][];
for (int i = 0; i < x.length; i++) {
matrix[i] = x[i].clone();
}
return matrix;
}
/**
* Deep clone a two-dimensional array.
*/
public static double[][] clone(double[][] x) {
double[][] matrix = new double[x.length][];
for (int i = 0; i < x.length; i++) {
matrix[i] = x[i].clone();
}
return matrix;
}
/**
* Swap two elements of an array.
*/
public static void swap(int[] x, int i, int j) {
int s = x[i];
x[i] = x[j];
x[j] = s;
}
/**
* Swap two elements of an array.
*/
public static void swap(float[] x, int i, int j) {
float s = x[i];
x[i] = x[j];
x[j] = s;
}
/**
* Swap two elements of an array.
*/
public static void swap(double[] x, int i, int j) {
double s = x[i];
x[i] = x[j];
x[j] = s;
}
/**
* Swap two elements of an array.
*/
public static void swap(Object[] x, int i, int j) {
Object s = x[i];
x[i] = x[j];
x[j] = s;
}
/**
* Swap two arrays.
*/
public static void swap(int[] x, int[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException(String.format("Arrays have different length: x[%d], y[%d]", x.length, y.length));
}
for (int i = 0; i < x.length; i++) {
int s = x[i];
x[i] = y[i];
y[i] = s;
}
}
/**
* Swap two arrays.
*/
public static void swap(float[] x, float[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException(String.format("Arrays have different length: x[%d], y[%d]", x.length, y.length));
}
for (int i = 0; i < x.length; i++) {
float s = x[i];
x[i] = y[i];
y[i] = s;
}
}
/**
* Swap two arrays.
*/
public static void swap(double[] x, double[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException(String.format("Arrays have different length: x[%d], y[%d]", x.length, y.length));
}
for (int i = 0; i < x.length; i++) {
double s = x[i];
x[i] = y[i];
y[i] = s;
}
}
/**
* Swap two arrays.
*/
public static void swap(E[] x, E[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException(String.format("Arrays have different length: x[%d], y[%d]", x.length, y.length));
}
for (int i = 0; i < x.length; i++) {
E s = x[i];
x[i] = y[i];
y[i] = s;
}
}
/**
* Copy x into y.
*/
public static void copy(int[] x, int[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException(String.format("Arrays have different length: x[%d], y[%d]", x.length, y.length));
}
System.arraycopy(x, 0, y, 0, x.length);
}
/**
* Copy x into y.
*/
public static void copy(float[] x, float[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException(String.format("Arrays have different length: x[%d], y[%d]", x.length, y.length));
}
System.arraycopy(x, 0, y, 0, x.length);
}
/**
* Copy x into y.
*/
public static void copy(double[] x, double[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException(String.format("Arrays have different length: x[%d], y[%d]", x.length, y.length));
}
System.arraycopy(x, 0, y, 0, x.length);
}
/**
* Copy x into y.
*/
public static void copy(int[][] x, int[][] y) {
if (x.length != y.length || x[0].length != y[0].length) {
throw new IllegalArgumentException(String.format("Matrices have different rows: %d x %d vs %d x %d", x.length, x[0].length, y.length, y[0].length));
}
for (int i = 0; i < x.length; i++) {
System.arraycopy(x[i], 0, y[i], 0, x[i].length);
}
}
/**
* Copy x into y.
*/
public static void copy(float[][] x, float[][] y) {
if (x.length != y.length || x[0].length != y[0].length) {
throw new IllegalArgumentException(String.format("Matrices have different rows: %d x %d vs %d x %d", x.length, x[0].length, y.length, y[0].length));
}
for (int i = 0; i < x.length; i++) {
System.arraycopy(x[i], 0, y[i], 0, x[i].length);
}
}
/**
* Copy x into y.
*/
public static void copy(double[][] x, double[][] y) {
if (x.length != y.length || x[0].length != y[0].length) {
throw new IllegalArgumentException(String.format("Matrices have different rows: %d x %d vs %d x %d", x.length, x[0].length, y.length, y[0].length));
}
for (int i = 0; i < x.length; i++) {
System.arraycopy(x[i], 0, y[i], 0, x[i].length);
}
}
/**
* Element-wise sum of two arrays y = x + y.
*/
public static void add(double[] y, double[] x) {
if (x.length != y.length) {
throw new IllegalArgumentException(String.format("Arrays have different length: x[%d], y[%d]", x.length, y.length));
}
for (int i = 0; i < x.length; i++) {
y[i] += x[i];
}
}
/**
* Element-wise subtraction of two arrays y = y - x.
* @param y minuend matrix
* @param x subtrahend matrix
*/
public static void sub(double[] y, double[] x) {
if (x.length != y.length) {
throw new IllegalArgumentException(String.format("Arrays have different length: x[%d], y[%d]", x.length, y.length));
}
for (int i = 0; i < x.length; i++) {
y[i] -= x[i];
}
}
/**
* Scale each element of an array by a constant x = a * x.
*/
public static void scale(double a, double[] x) {
for (int i = 0; i < x.length; i++) {
x[i] *= a;
}
}
/**
* Scale each element of an array by a constant y = a * x.
*/
public static void scale(double a, double[] x, double[] y) {
for (int i = 0; i < x.length; i++) {
y[i] = a * x[i];
}
}
/**
* Update an array by adding a multiple of another array y = a * x + y.
*/
public static double[] axpy(double a, double[] x, double[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException(String.format("Arrays have different length: x[%d], y[%d]", x.length, y.length));
}
for (int i = 0; i < x.length; i++) {
y[i] += a * x[i];
}
return y;
}
/**
* Raise each element of an array to a scalar power.
* @param x array
* @param n scalar exponent
* @return xn
*/
public static double[] pow(double[] x, double n) {
double[] y = new double[x.length];
for (int i = 0; i < x.length; i++) y[i] = Math.pow(x[i], n);
return y;
}
/**
* Find unique elements of vector.
* @param x an integer array.
* @return the same values as in x but with no repetitions.
*/
public static int[] unique(int[] x) {
return Arrays.stream(x).distinct().toArray();
}
/**
* Find unique elements of vector.
* @param x an array of strings.
* @return the same values as in x but with no repetitions.
*/
public static String[] unique(String[] x) {
return Arrays.stream(x).distinct().toArray(String[]::new);
}
/**
* Sorts each variable and returns the index of values in ascending order.
* Note that the order of original array is NOT altered.
*
* @param x a set of variables to be sorted. Each row is an instance. Each
* column is a variable.
* @return the index of values in ascending order
*/
public static int[][] sort(double[][] x) {
int n = x.length;
int p = x[0].length;
double[] a = new double[n];
int[][] index = new int[p][];
for (int j = 0; j < p; j++) {
for (int i = 0; i < n; i++) {
a[i] = x[i][j];
}
index[j] = QuickSort.sort(a);
}
return index;
}
/**
* Solve the tridiagonal linear set which is of diagonal dominance
*
*
* |bi| > |ai| + |ci|.
*
*
*
* | b0 c0 0 0 0 ... |
* | a1 b1 c1 0 0 ... |
* | 0 a2 b2 c2 0 ... |
* | ... |
* | ... a(n-2) b(n-2) c(n-2) |
* | ... 0 a(n-1) b(n-1) |
*
*
* @param a the lower part of tridiagonal matrix. a[0] is undefined and not
* referenced by the method.
* @param b the diagonal of tridiagonal matrix.
* @param c the upper of tridiagonal matrix. c[n-1] is undefined and not
* referenced by the method.
* @param r the right-hand side of linear equations.
* @return the solution.
*/
public static double[] solve(double[] a, double[] b, double[] c, double[] r) {
if (b[0] == 0.0) {
throw new IllegalArgumentException("Invalid value of b[0] == 0. The equations should be rewritten as a set of order n - 1.");
}
int n = a.length;
double[] u = new double[n];
double[] gam = new double[n];
double bet = b[0];
u[0] = r[0] / bet;
for (int j = 1; j < n; j++) {
gam[j] = c[j - 1] / bet;
bet = b[j] - a[j] * gam[j];
if (bet == 0.0) {
throw new IllegalArgumentException("The tridagonal matrix is not of diagonal dominance.");
}
u[j] = (r[j] - a[j] * u[j - 1]) / bet;
}
for (int j = (n - 2); j >= 0; j--) {
u[j] -= gam[j + 1] * u[j + 1];
}
return u;
}
}