All Downloads are FREE. Search and download functionalities are using the official Maven repository.

math.stats.distribution.fit.FastKolmogorovSmirnov Maven / Gradle / Ivy

There is a newer version: 0.9.5
Show newest version
/*
 * Class:        KolmogorovSmirnovDistQuick
 * Description:  Kolmogorov-Smirnov 2-sided 1-sample distribution
 * Environment:  Java
 * Software:     SSJ
 * Copyright (C) 2001  Pierre L'Ecuyer and Universite de Montreal
 * Organization: DIRO, Universite de Montreal
 * @author       Richard Simard
 * @since        January 2010
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package math.stats.distribution.fit;

import math.Arithmetic;
import math.FastMath;
import math.MathConsts;
import static math.MathConsts.SQRT_PI_HALF;
import static math.MathConsts.PI_SQUARED;

/**
 * Methods for the two-sided *Kolmogorov?Smirnov* distribution of the
 * {@code Kolmogorov?Smirnov} test statistic {@code D_n} given an ordered sample
 * of {@code n} independent uniforms {@code U_i} over {@code (0,1)}.
 * 

* https://www.jstatsoft.org/article/view/v039i11 *

* https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test */ final class FastKolmogorovSmirnov { private static final int NEXACT = 500; private static final double PI2 = PI_SQUARED; private static final double PI4 = PI2 * PI2; // for the Durbin matrix algorithm private static final double NORM = 1.0e140; private static final double INORM = 1.0e-140; private static final int LOGNORM = 140; /* * For n <= NEXACT, we use exact algorithms: the Durbin matrix and the * Pomeranz algorithms. For n > NEXACT, we use asymptotic methods except for * x close to 0 where we still use the method of Durbin for n <= NKOLMO. For * n > NKOLMO, we use asymptotic methods only and so the precision is less * for x close to 0. We could increase the limit NKOLMO to 10^6 to get * better precision for x close to 0, but at the price of a slower speed. */ private static final int NKOLMO = 100_000; /** * Computes the complementary *Kolmogorov?Smirnov* distribution * {@code P[D_n >= x]} with parameter {@code n}, in a form that is more * precise in the upper tail. *

* It returns at least 10 decimal digits of precision everywhere for all * {@code n <= 500}, at least 6 decimal digits of precision for * {@code 500 < n <= 200_000}, and a few correct decimal digits (1 to 5) for * {@code n > 200_000}. *

* Restriction: {@code n >= 1} */ static double barF(int n, double x) { double v = barFConnu(n, x); if (v >= 0.0) { return v; } final double w = n * x * x; if (n <= NEXACT) { if (w < 4.0) { return 1.0 - cdf(n, x); } else { return 2.0 * KolmogorovSmirnovP.kolmoSmirnovPlusBarUpper(n, x); } } if (w >= 2.65) { return 2.0 * KolmogorovSmirnovP.kolmoSmirnovPlusBarUpper(n, x); } return 1.0 - cdf(n, x); } /** * Computes the *Kolmogorov?Smirnov* distribution function * {@code u = P[D_n <= x]} with parameter {@code n}. *

* This method uses Pomeranz?s recursion algorithm and the Durbin matrix * algorithm for * {@code n <= 500}, which returns at least 13 decimal digits of precision. * It uses the Pelz-Good asymptotic expansion in the central part of the * range for {@code n > 500} and returns at least 7 decimal digits of * precision everywhere for * {@code 500 < n <= 100_000}. For {@code n > 100_000}, it returns at least * 5 decimal digits of precision for all {@code u > 10^(-16)}, and a few * correct decimals when {@code u <= 10^(-16)}. *

* Restriction: {@code n >= 1} */ static double cdf(int n, double x) { double u = cdfConnu(n, x); if (u >= 0.0) { return u; } final double w = n * x * x; if (n <= NEXACT) { if (w < 0.754693) { return durbinMatrix(n, x); } if (w < 4.0) { return pomeranz(n, x); } return 1.0 - barF(n, x); } if ((w * x * n <= 7.0) && (n <= NKOLMO)) { return durbinMatrix(n, x); } return pelz(n, x); } private static double barFConnu(int n, double x) { final double w = n * x * x; if ((w >= 370.0) || (x >= 1.0)) { return 0.0; } if ((w <= 0.0274) || (x <= 0.5 / n)) { return 1.0; } if (n == 1) { return 2.0 - 2.0 * x; } if (x <= 1.0 / n) { double t = 2.0 * x * n - 1.0; if (n <= NEXACT) { double v = Arithmetic.factoPow(n); return 1.0 - v * FastMath.pow(t, (double) n); } double v = Arithmetic.logFactorial(n) + n * Math.log(t / n); return 1.0 - FastMath.exp(v); } if (x >= 1.0 - 1.0 / n) { return 2.0 * FastMath.pow(1.0 - x, (double) n); } return -1.0; } private static double cdfConnu(int n, double x) { // For nx^2 > 18, barF(n, x) is smaller than 5e-16 if ((n * x * x >= 18.0) || (x >= 1.0)) { return 1.0; } if (x <= 0.5 / n) { return 0.0; } if (n == 1) { return 2.0 * x - 1.0; } if (x <= 1.0 / n) { double t = 2.0 * x * n - 1.0; if (n <= NEXACT) { double w = Arithmetic.factoPow(n); return w * FastMath.pow(t, (double) n); } double w = Arithmetic.logFactorial(n) + n * Math.log(t / n); return FastMath.exp(w); } if (x >= 1.0 - 1.0 / n) { return 1.0 - 2.0 * FastMath.pow(1.0 - x, (double) n); } return -1.0; } private static double pomeranz(int n, double x) { // The Pomeranz algorithm to compute the KS distribution final double EPS = 1.0e-15; final int ENO = 350; // for renormalization of V final double RENO = FastMath.scalb(1.0, ENO); final double t = n * x; final double[] A = new double[2 * n + 3]; final double[] Atflo = new double[2 * n + 3]; final double[] Atcei = new double[2 * n + 3]; final double[][] V = new double[2][n + 2]; final double[][] H = new double[4][n + 2]; // = pow(w, j) / factorial(j) calcFloorCeil(n, t, A, Atflo, Atcei); for (int j = 1; j <= n + 1; j++) { V[0][j] = 0.0; } for (int j = 2; j <= n + 1; j++) { V[1][j] = 0.0; } V[1][1] = RENO; // Precompute H[][] = (A[j] - A[j-1]^k / k! H[0][0] = 1.0; double w = 2.0 * A[2] / n; for (int j = 1; j <= n + 1; j++) { H[0][j] = w * H[0][j - 1] / j; } H[1][0] = 1.0; w = (1.0 - 2.0 * A[2]) / n; for (int j = 1; j <= n + 1; j++) { H[1][j] = w * H[1][j - 1] / j; } H[2][0] = 1.0; w = A[2] / n; for (int j = 1; j <= n + 1; j++) { H[2][j] = w * H[2][j - 1] / j; } H[3][0] = 1.0; for (int j = 1; j <= n + 1; j++) { H[3][j] = 0.0; } int coreno = 1; // counter: how many renormalizations int r1 = 0; // Indices i and i-1 for V[i][] int r2 = 1; double sum; for (int i = 2; i <= 2 * n + 2; i++) { int jlow = (int) (2 + Atflo[i]); if (jlow < 1) { jlow = 1; } int jup = (int) (Atcei[i]); if (jup > n + 1) { jup = n + 1; } int klow = (int) (2 + Atflo[i - 1]); if (klow < 1) { klow = 1; } int kup0 = (int) (Atcei[i - 1]); // find to which case it corresponds w = (A[i] - A[i - 1]) / n; int s = -1; for (int j = 0; j < 4; j++) { if (FastMath.abs(w - H[j][1]) <= EPS) { s = j; break; } } double minsum = RENO; r1 = (r1 + 1) & 1; // i - 1 r2 = (r2 + 1) & 1; // i for (int j = jlow; j <= jup; j++) { int kup = kup0; if (kup > j) { kup = j; } sum = 0.0; for (int k = kup; k >= klow; k--) { sum += V[r1][k] * H[s][j - k]; } V[r2][j] = sum; if (sum < minsum) { minsum = sum; } } if (minsum < 1.0e-280) { // V is too small: renormalize to avoid underflow of // probabilities for (int j = jlow; j <= jup; j++) { V[r2][j] *= RENO; } coreno++; // keep track of log of RENO } } sum = V[r2][n + 1]; w = Arithmetic.logFactorial(n) - coreno * ENO * MathConsts.LN_2 + Math.log(sum); if (w >= 0.) { return 1.0; } return FastMath.exp(w); } private static void calcFloorCeil(int n /* sample size */, double t /* = nx */, double[] A /* A_i */, double[] Atflo /* floor (A_i - t) */, double[] Atcei /* ceiling(A_i + t) */ ) { // Precompute A_i, floors, and ceilings for limits of sums in the // Pomeranz algorithm final int ell = (int) t; // floor(t) final double w = FastMath.ceil(t) - t; double z = t - ell; // t - floor(t) if (z > 0.5) { for (int i = 2; i <= 2 * n + 2; i += 2) { Atflo[i] = i / 2 - 2 - ell; } for (int i = 1; i <= 2 * n + 2; i += 2) { Atflo[i] = i / 2 - 1 - ell; } for (int i = 2; i <= 2 * n + 2; i += 2) { Atcei[i] = i / 2 + ell; } for (int i = 1; i <= 2 * n + 2; i += 2) { Atcei[i] = i / 2 + 1 + ell; } } else if (z > 0.0) { for (int i = 1; i <= 2 * n + 2; i++) { Atflo[i] = i / 2 - 1 - ell; } for (int i = 2; i <= 2 * n + 2; i++) { Atcei[i] = i / 2 + ell; } Atcei[1] = 1.0 + ell; } else { // z == 0.0 for (int i = 2; i <= 2 * n + 2; i += 2) { Atflo[i] = i / 2 - 1 - ell; } for (int i = 1; i <= 2 * n + 2; i += 2) { Atflo[i] = i / 2 - ell; } for (int i = 2; i <= 2 * n + 2; i += 2) { Atcei[i] = i / 2 - 1 + ell; } for (int i = 1; i <= 2 * n + 2; i += 2) { Atcei[i] = i / 2 + ell; } } if (w < z) { z = w; } A[0] = A[1] = 0.0; A[2] = z; A[3] = 1.0 - A[2]; for (int i = 4; i <= 2 * n + 1; i++) { A[i] = A[i - 2] + 1.0; } A[2 * n + 2] = n; } private static double durbinMatrix(int n, double d) { final int k = (int) (n * d) + 1; final int m = 2 * k - 1; final double h = k - n * d; final double[] H = new double[m * m]; final double[] Q = new double[m * m]; final int[] pQ = new int[1]; for (int i = 0; i < m; i++) { for (int j = 0; j < m; j++) { if (i - j + 1 < 0) { H[i * m + j] = 0; } else { H[i * m + j] = 1; } } } for (int i = 0; i < m; i++) { H[i * m] -= FastMath.pow(h, (double) (i + 1)); H[(m - 1) * m + i] -= FastMath.pow(h, (double) (m - i)); } H[(m - 1) * m] += (2.0 * h - 1.0 > 0.0 ? FastMath.pow(2.0 * h - 1.0, (double) m) : 0.0); for (int i = 0; i < m; i++) { for (int j = 0; j < m; j++) { if (i - j + 1 > 0) { for (int g = 1; g <= i - j + 1; g++) { H[i * m + j] /= g; } } } } mPower(H, 0, Q, pQ, m, n); double s = Q[(k - 1) * m + k - 1]; for (int i = 1; i <= n; i++) { s = s * (double) i / n; if (s < INORM) { s *= NORM; pQ[0] -= LOGNORM; } } s *= FastMath.pow(10.0, (double) pQ[0]); return s; } private static double pelz(int n, double x) { /* * Approximating the Lower Tail-Areas of the Kolmogorov-Smirnov * One-Sample Statistic * * Wolfgang Pelz and I. J. Good, Journal of the Royal Statistical * Society, Series B. Vol. 38, No. 2 (1976), pp. 152-156 */ final int JMAX = 20; final double EPS = 1.0e-10; final double RACN = Math.sqrt((double) n); final double z = RACN * x; final double z2 = z * z; final double z4 = z2 * z2; final double z6 = z4 * z2; final double w = PI2 / (2.0 * z * z); int j = 0; double term = 1.0; double sum = 0.0; while (j <= JMAX && term > EPS * sum) { double ti = j + 0.5; term = FastMath.exp(-ti * ti * w); sum += term; j++; } sum *= MathConsts.SQRT_TWO_PI / z; term = 1.0; double tom = 0.0; j = 0; while (j <= JMAX && FastMath.abs(term) > EPS * FastMath.abs(tom)) { double ti = j + 0.5; term = (PI2 * ti * ti - z2) * FastMath.exp(-ti * ti * w); tom += term; j++; } sum += tom * SQRT_PI_HALF / (RACN * 3.0 * z4); term = 1.0; tom = 0.0; j = 0; while (j <= JMAX && FastMath.abs(term) > EPS * FastMath.abs(tom)) { double ti = j + 0.5; term = 6.0 * z6 + 2.0 * z4 + PI2 * (2.0 * z4 - 5.0 * z2) * ti * ti + PI4 * (1.0 - 2.0 * z2) * ti * ti * ti * ti; term *= FastMath.exp(-ti * ti * w); tom += term; j++; } sum += tom * SQRT_PI_HALF / (n * 36.0 * z * z6); term = 1.0; tom = 0.0; j = 1; while (j <= JMAX && term > EPS * tom) { double ti = j; term = PI2 * ti * ti * FastMath.exp(-ti * ti * w); tom += term; j++; } sum -= tom * SQRT_PI_HALF / (n * 18.0 * z * z2); term = 1.0; tom = 0.0; j = 0; while (j <= JMAX && FastMath.abs(term) > EPS * FastMath.abs(tom)) { double ti = j + 0.5; ti = ti * ti; term = -30.0 * z6 - 90.0 * z6 * z2 + PI2 * (135.0 * z4 - 96.0 * z6) * ti + PI4 * (212.0 * z4 - 60.0 * z2) * ti * ti + PI2 * PI4 * ti * ti * ti * (5.0 - 30.0 * z2); term *= FastMath.exp(-ti * w); tom += term; j++; } sum += tom * SQRT_PI_HALF / (RACN * n * 3240.0 * z4 * z6); term = 1.0; tom = 0.0; j = 1; while (j <= JMAX && FastMath.abs(term) > EPS * FastMath.abs(tom)) { double ti = j * j; term = (3.0 * PI2 * ti * z2 - PI4 * ti * ti) * FastMath.exp(-ti * w); tom += term; j++; } sum += tom * SQRT_PI_HALF / (RACN * n * 108.0 * z6); return sum; } private static void mPower(double[] A, int eA, double[] V, int[] eV, int m, int n) { if (n == 1) { for (int i = 0; i < m * m; i++) { V[i] = A[i]; } eV[0] = eA; return; } mPower(A, eA, V, eV, m, n / 2); double[] B = new double[m * m]; int[] pB = new int[1]; mMultiply(V, V, B, m); pB[0] = 2 * (eV[0]); if (B[(m / 2) * m + (m / 2)] > NORM) { renormalize(B, m, pB); } if (n % 2 == 0) { for (int i = 0; i < m * m; i++) { V[i] = B[i]; } eV[0] = pB[0]; } else { mMultiply(A, B, V, m); eV[0] = eA + pB[0]; } if (V[(m / 2) * m + (m / 2)] > NORM) { renormalize(V, m, eV); } } private static void mMultiply(double[] A, double[] B, double[] C, int m) { for (int i = 0; i < m; i++) { for (int j = 0; j < m; j++) { double s = 0.0; for (int k = 0; k < m; k++) { s += A[i * m + k] * B[k * m + j]; } C[i * m + j] = s; } } } private static void renormalize(double[] V, int m, int[] p) { for (int i = 0; i < m * m; i++) { V[i] *= INORM; } p[0] += LOGNORM; } private FastKolmogorovSmirnov() { throw new AssertionError(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy