All Downloads are FREE. Search and download functionalities are using the official Maven repository.

umontreal.iro.lecuyer.gof.GofFormat Maven / Gradle / Ivy

Go to download

SSJ is a Java library for stochastic simulation, developed under the direction of Pierre L'Ecuyer, in the Département d'Informatique et de Recherche Opérationnelle (DIRO), at the Université de Montréal. It provides facilities for generating uniform and nonuniform random variates, computing different measures related to probability distributions, performing goodness-of-fit tests, applying quasi-Monte Carlo methods, collecting (elementary) statistics, and programming discrete-event simulations with both events and processes.

The newest version!


/*
 * Class:        GofFormat
 * Description:  
 * Environment:  Java
 * Software:     SSJ 
 * Copyright (C) 2001  Pierre L'Ecuyer and Université de Montréal
 * Organization: DIRO, Université de Montréal
 * @author       
 * @since

 * SSJ is free software: you can redistribute it and/or modify it under
 * the terms of the GNU General Public License (GPL) as published by the
 * Free Software Foundation, either version 3 of the License, or
 * any later version.

 * SSJ is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.

 * A copy of the GNU General Public License is available at
   GPL licence site.
 */

package umontreal.iro.lecuyer.gof;
   import cern.colt.list.*;

import umontreal.iro.lecuyer.util.PrintfFormat;
import umontreal.iro.lecuyer.probdist.*;
import java.io.PrintWriter;

/**
 * This class contains methods used to format results of GOF
 * test statistics, or to apply a series of tests
 * simultaneously and format the results.
 * It is in fact a translation from C to Java of a set of functions that
 * were specially written for the implementation of TestU01, a software
 * package for testing uniform random number generators.
 * 
 * 

* Strictly speaking, applying several tests simultaneously makes the * p-values ``invalid'' in the sense that the probability of having * at least one p-value less than 0.01, say, is larger than 0.01. * One must therefore be careful with the interpretation of these * p-values (one could use, e.g., the Bonferroni inequality). * Applying simultaneous tests is convenient in some situations, such as in * screening experiments for detecting statistical deficiencies * in random number generators. In that context, rejection of the null * hypothesis typically occurs with extremely small p-values (e.g., less * than 10-15), and the interpretation is quite obvious in this case. * *

* The class also provides tools to plot an empirical or * theoretical distribution function, by creating a data file that * contains a graphic plot in a format compatible with the software * specified by the environment variable {@link #graphSoft graphSoft}. * NOTE: see also the more recent package * {@link umontreal.iro.lecuyer.charts charts}. * *

* Note: This class uses the Colt library. * */ public class GofFormat { private GofFormat() {} /** * Data file format used for plotting functions with Gnuplot. * */ public static final int GNUPLOT = 0; /** * Data file format used for creating graphics with Mathematica. * */ public static final int MATHEMATICA = 1; /** * Environment variable that selects the type of software to be * used for plotting the graphs of functions. * The data files produced by {@link #graphFunc graphFunc} and * {@link #graphDistUnif graphDistUnif} will be in a format suitable * for this selected software. * The default value is GNUPLOT. * To display a graphic in file f using gnuplot, for example, * one can use the command ``plot f with steps, x with lines'' * in gnuplot. * graphSoft can take the values {@link #GNUPLOT GNUPLOT} or {@link #MATHEMATICA MATHEMATICA}. * */ public static int graphSoft = GNUPLOT; private static String formatMath2 (double x, double y) { // Writes the pair (x, y) in file f, in a format understood // by Mathematica StringBuffer sb = new StringBuffer(); String S; sb.append (" { "); if ((x != 0.0) && (x < 0.1 || x > 1.0)) { S = PrintfFormat.E (16, 7, x); int exppos = S.indexOf ('E'); if (exppos != -1) S = S.substring (0, exppos) + "*10^(" + S.substring (exppos+1) + ")"; } else S = PrintfFormat.g (16, 8, x); sb.append (S + ", "); if (y != 0.0 && (y < 0.1 || y > 1.0)) { S = PrintfFormat.E (16, 7, y); int exppos = S.indexOf ('E'); if (exppos != -1) S = S.substring (0, exppos) + "*10^(" + S.substring (exppos+1) + ")"; } else S = PrintfFormat.g (16, 8, y); sb.append (S + " }"); return sb.toString(); } private static String graphFunc (ContinuousDistribution dist, double a, double b, int m, int mono, String desc) { // Renommer drawCDF en fixant mono = 1 et éliminant mono. int i; double yprec, y, x, h; StringBuffer sb = new StringBuffer(); String openComment = ""; String closeComment = ""; String openGraph = ""; String closeGraph = ""; if (mono != 1 && mono != -1) throw new IllegalArgumentException ("mono must be 1 or -1"); switch (graphSoft) { case GNUPLOT: openComment = "# "; closeComment = ""; openGraph = ""; closeGraph = PrintfFormat.NEWLINE; break; case MATHEMATICA: openComment = "(* "; closeComment = " *)"; openGraph = "points = { " + PrintfFormat.NEWLINE; closeGraph = "}" + PrintfFormat.NEWLINE; break; } sb.append (openComment + "----------------------------------" + closeComment + PrintfFormat.NEWLINE); sb.append (openComment + PrintfFormat.s (-70, desc) + closeComment + PrintfFormat.NEWLINE + PrintfFormat.NEWLINE); sb.append (openGraph); h = (b - a) / m; if (mono == 1) yprec = -Double.MAX_VALUE; else if (mono == -1) yprec = Double.MAX_VALUE; else yprec = 0.0; for (i = 0; i <= m; i++) { x = a + i*h; y = mono == 1 ? dist.cdf (x) : dist.barF (x); switch (graphSoft) { case MATHEMATICA: sb.append (formatMath2 (x, y)); if (i < m) sb.append (','); break; default: // Default and GNUPLOT sb.append (PrintfFormat.g (20, 14, x) + " " + PrintfFormat.g (20, 14, y)); } switch (mono) { case 1: if (y < yprec) sb.append (" " + openComment + " DECREASING" + closeComment); break; case -1: if (y > yprec) sb.append (" " + openComment + " INCREASING" + closeComment); break; default: break; } sb.append (PrintfFormat.NEWLINE); yprec = y; } sb.append (closeGraph); return sb.toString(); } /** * Formats data to plot the graph of the distribution function F over the * interval [a, b], and returns the result as a {@link String}. * The method dist.cdf(x) returns the value of F at x. * The {@link String} desc gives a short caption for the graphic plot. * The method computes the m + 1 points * (xiF(xi)), * where * xi = a + i(b - a)/m for * i = 0, 1,…, m, and formats these points * into a String in a format suitable for the * software specified by {@link #graphSoft graphSoft}. * NOTE: see also the more recent class * {@link umontreal.iro.lecuyer.charts.ContinuousDistChart ContinuousDistChart}. * * @param dist continuous distribution function to plot * * @param a lower bound of the interval to plot * * @param b upper bound of the interval to plot * * @param m number of points in the plot minus one * * @param desc short caption describing the plot * * @return a string representation of the plot data * */ public static String drawCdf (ContinuousDistribution dist, double a, double b, int m, String desc) { return graphFunc (dist, a, b, m, 1, desc); } /** * Formats data to plot the graph of the density f (x) over the interval [a, b], * and returns the result as a {@link String}. The method * dist.density(x) returns the value of f (x) at x. * The {@link String} desc gives a short caption for the graphic * plot. The method computes the m + 1 points * (xif (xi)), * where * xi = a + i(b - a)/m for * i = 0, 1,…, m, and formats these points * into a String in a format suitable for the * software specified by {@link #graphSoft graphSoft}. * NOTE: see also the more recent class * {@link umontreal.iro.lecuyer.charts.ContinuousDistChart ContinuousDistChart}. * * @param dist continuous density function to plot * * @param a lower bound of the interval to plot * * @param b upper bound of the interval to plot * * @param m number of points in the plot minus one * * @param desc short caption describing the plot * * @return a string representation of the plot data * */ public static String drawDensity (ContinuousDistribution dist, double a, double b, int m, String desc) { int i; double y, x, h; StringBuffer sb = new StringBuffer(); String openComment = ""; String closeComment = ""; String openGraph = ""; String closeGraph = ""; switch (graphSoft) { case GNUPLOT: openComment = "# "; closeComment = ""; openGraph = ""; closeGraph = PrintfFormat.NEWLINE; break; case MATHEMATICA: openComment = "(* "; closeComment = " *)"; openGraph = "points = { " + PrintfFormat.NEWLINE; closeGraph = "}" + PrintfFormat.NEWLINE; break; } sb.append (openComment + "----------------------------------" + closeComment + PrintfFormat.NEWLINE); sb.append (openComment + PrintfFormat.s (-70, desc) + closeComment + PrintfFormat.NEWLINE + PrintfFormat.NEWLINE); sb.append (openGraph); h = (b - a) / m; for (i = 0; i <= m; i++) { x = a + i*h; y = dist.density (x); switch (graphSoft) { case MATHEMATICA: sb.append (formatMath2 (x, y)); if (i < m) sb.append (','); break; default: // Default and GNUPLOT sb.append (PrintfFormat.g (16, 8, x) + " " + PrintfFormat.g (16, 8, y)); } sb.append (PrintfFormat.NEWLINE); } sb.append (closeGraph); return sb.toString(); } /** * Formats data to plot the empirical distribution of * * U(1),..., U(N), which are assumed to be in data[0...N-1], * and to compare it with the uniform distribution. The U(i) must be sorted. * The two endpoints (0, 0) and (1, 1) are always included in the plot. * The string desc gives a short caption for the graphic plot. * The data is printed in a format suitable for the * software specified by {@link #graphSoft graphSoft}. * NOTE: see also the more recent class * {@link umontreal.iro.lecuyer.charts.EmpiricalChart EmpiricalChart}. * * @param data array of observations to plot * * @param desc short caption describing the plot * * @return a string representation of the plot data * */ public static String graphDistUnif (DoubleArrayList data, String desc) { double[] u = data.elements(); int n = data.size(); int i; double unSurN = 1.0/n; StringBuffer sb = new StringBuffer(); switch (graphSoft) { case GNUPLOT: sb.append ("#----------------------------------" + PrintfFormat.NEWLINE); sb.append ("# " + PrintfFormat.s (-70, desc) + PrintfFormat.NEWLINE + PrintfFormat.NEWLINE); sb.append (PrintfFormat.g (16, 8, 0.0) + " " + PrintfFormat.g (16, 8, 0.0) + PrintfFormat.NEWLINE); for (i = 0; i < n; i++) sb.append (PrintfFormat.g (16, 8, u[i]) + " " + PrintfFormat.g (16, 8, (i + 1)*unSurN) + PrintfFormat.NEWLINE); sb.append (PrintfFormat.g (16, 8, 1.0) + " " + PrintfFormat.g (16, 8, 1.0) + PrintfFormat.NEWLINE + PrintfFormat.NEWLINE); break; case MATHEMATICA: sb.append ("(*----------------------------------*)" + PrintfFormat.NEWLINE); sb.append ("(* " + PrintfFormat.s (-70, desc) + PrintfFormat.NEWLINE + " *)" + PrintfFormat.NEWLINE + PrintfFormat.NEWLINE + "points = { " + PrintfFormat.NEWLINE); sb.append (formatMath2 (0.0, 0.0) + "," + PrintfFormat.NEWLINE); for (i = 0; i < n; i++) sb.append (formatMath2 (u[i], (i + 1)*unSurN) + "," + PrintfFormat.NEWLINE); sb.append (formatMath2 (1.0, 1.0) + PrintfFormat.NEWLINE); break; default: throw new IllegalArgumentException ("graphSoft unknown"); } return sb.toString(); } /** * Environment variable used in {@link #formatp0 formatp0} to determine * which p-values are too close to 0 or 1 to be printed explicitly. * If EPSILONP * = ε, then any p-value * (or significance level) less than ε or larger than * * 1 - ε is not written explicitly; * the program simply writes ``eps'' or ``1-eps''. * The default value is 10-15. * */ public static double EPSILONP = 1.0E-15; /** * Environment variable used in {@link #formatp1 formatp1} to determine * which p-values should be marked as suspect when printing test results. * If SUSPECTP = α, then any p-value * (or significance level) less than α or larger than * 1 - α is considered suspect and is * ``singled out'' by formatp1. * The default value is 0.01. * */ public static double SUSPECTP = 0.01; /** * Returns the significance level (or p-value) p of a test, * in the format ``1 - p'' if p is close to 1, and p otherwise. * Uses the environment variable {@link #EPSILONP EPSILONP} and replaces p * by ε when it is too small. * * @param p the p-value or significance level to be formated * * @return the string representation of the p-value * */ public static String formatp0 (double p) { // Formats the significance level of a test, without a descriptor if ((p >= 0.01) && (p <= 0.99)) return PrintfFormat.format (8, 2, 1, p); else if (p < EPSILONP) return " eps "; else if (p < 0.01) return PrintfFormat.format (8, 2, 2, p); else if (p >= 1.0 - EPSILONP) return " 1 - eps "; else return " 1 - " + PrintfFormat.g (8, 2, 1.0 - p); } /** * Returns the string ``Significance level of test : '', * then calls {@link #formatp0 formatp0} to print p, and adds * the marker ``****'' if p is considered suspect * (uses the environment variable RSUSPECTP for this). * * @param p the p-value or significance level to be formated * * @return the string representation of the significance level of test * */ public static String formatp1 (double p) { // Prints the significance level of a test, with a descriptor. StringBuffer sb = new StringBuffer(); sb.append ("Significance level of test :" + formatp0 (p)); if (p < SUSPECTP || p > 1.0 - SUSPECTP) sb.append (" *****"); sb.append (PrintfFormat.NEWLINE + PrintfFormat.NEWLINE); return sb.toString(); } /** * Returns x on a single line, then go to the next line * and calls {@link #formatp1 formatp1}. * * @param x value of the statistic for which the significance level is formated * * @param p the p-value or significance level to be formated * * @return the string representation of the significance level of test * */ public static String formatp2 (double x, double p) { // Prints the statistic x and its significance level p. return PrintfFormat.format (8, 2, 1, x) + PrintfFormat.NEWLINE + formatp1 (p); } /** * Formats the test statistic x for a test named testName * with p-value p. The first line of the returned string contains * the name of the test and the statistic whereas the second line contains * its significance level. The formated values of x and p are * aligned. * * @param testName name of the test that was performed * * @param x value of the test statistic * * @param p significance level (or p-value) of the test * * @return the string representation of the test result * */ public static String formatp3 (String testName, double x, double p) { final String SLT = "Significance level of test"; int l = Math.max (SLT.length(), testName.length()); PrintfFormat pf = new PrintfFormat(); pf.append (-l, testName).append (" : ").append (8, 2, 1, x).append (PrintfFormat.NEWLINE); pf.append (-l, SLT).append (" : ").append (formatp0 (p)); if (p < SUSPECTP || p > 1.0 - SUSPECTP) pf.append (" *****"); pf.append (PrintfFormat.NEWLINE + PrintfFormat.NEWLINE); return pf.toString(); } /** * Computes the p-value of the chi-square statistic * chi2 for a test with k intervals. Uses d decimal digits * of precision in the calculations. The result of the * test is returned as a string. The p-value is computed using * {@link GofStat#pDisc pDisc}. * * @param k number of subintervals for the chi-square test * * @param chi2 chi-square statistic * * @return the string representation of the test result and p-value * */ public static String formatChi2 (int k, int d, double chi2) { StringBuffer sb = new StringBuffer(); sb.append ("Chi2 statistic : " + PrintfFormat.format (8, 2, 1, chi2)); sb.append (PrintfFormat.NEWLINE + "p-value : " + formatp0 (GofStat.pDisc (ChiSquareDist.cdf (k - 1, d, chi2), ChiSquareDist.barF (k - 1, d, chi2)))); sb.append (PrintfFormat.NEWLINE + PrintfFormat.NEWLINE); return sb.toString(); } /** * Computes the p-values of the three Kolmogorov-Smirnov statistics * DN+, DN-, and DN, whose values are in dp, dm, d, * respectively, assuming a sample of size n. * Then formats these statistics and their p-values * using {@link #formatp2 formatp2} for each one. * * @param n sample size * * @param dp value of the DN+ statistic * * @param dm value of the DN- statistic * * @param d value of the DN statistic * * @return the string representation of the Kolmogorov-Smirnov statistics and their * significance levels * */ public static String formatKS (int n, double dp, double dm, double d) { // Prints the results of a Kolmogorov-Smirnov test return "Kolmogorov-Smirnov+ statistic = D+ :" + formatp2 (dp, KolmogorovSmirnovPlusDist.barF (n, dp)) + "Kolmogorov-Smirnov- statistic = D- :" + formatp2 (dm, KolmogorovSmirnovPlusDist.barF (n, dm)) + "Kolmogorov-Smirnov statistic = D :" + formatp2 (d, KolmogorovSmirnovDistQuick.barF (n, d)) + PrintfFormat.NEWLINE + PrintfFormat.NEWLINE; } /** * Computes the KS test statistics to compare the * empirical distribution of the observations in data * with the theoretical distribution dist and * formats the results. * * @param data array of observations to be tested * * @param dist assumed distribution of the observations * * @return the string representation of the Kolmogorov-Smirnov statistics and their * significance levels * */ public static String formatKS (DoubleArrayList data, ContinuousDistribution dist) { double[] v = data.elements(); int n = data.size(); DoubleArrayList dataUnif = GofStat.unifTransform (data, dist); dataUnif.quickSortFromTo (0, dataUnif.size() - 1); double[] ret = GofStat.kolmogorovSmirnov (dataUnif); return formatKS (n, ret[0], ret[1], ret[2]); } /** * Similar to {@link #formatKS(int,double,double,double) formatKS}, * but for the KS statistic DN+(a). * Writes a header, * computes the p-value and calls {@link #formatp2 formatp2}. * * @param n sample size * * @param a size of the jump * * @param dp value of DN+(a) * * @return the string representation of the Kolmogorov-Smirnov statistic and its significance level * */ public static String formatKSJumpOne (int n, double a, double dp) { double d = 1.0 - FDist.kolmogorovSmirnovPlusJumpOne (n, a, dp); return PrintfFormat.NEWLINE + "Kolmogorov-Smirnov+ statistic = D+ : " + PrintfFormat.g (8, 2, dp) + PrintfFormat.NEWLINE + formatp1 (d) + PrintfFormat.NEWLINE; } /** * Similar to {@link #formatKS(DoubleArrayList,ContinuousDistribution) formatKS}, * but for DN+(a). * * @param data array of observations to be tested * * @param dist assumed distribution of the data * * @param a size of the jump * * @return string representation of the Kolmogorov-Smirnov statistic and its significance level * */ public static String formatKSJumpOne (DoubleArrayList data, ContinuousDistribution dist, double a) { double[] v = data.elements(); int n = data.size(); DoubleArrayList dataUnif = GofStat.unifTransform (data, dist); dataUnif.quickSortFromTo (0, dataUnif.size() - 1); double[] ret = GofStat.kolmogorovSmirnovJumpOne (dataUnif, a); return formatKSJumpOne (n, a, ret[0]); } /** * Kolmogorov-Smirnov+ test * */ public static final int KSP = 0; /** * Kolmogorov-Smirnov- test * */ public static final int KSM = 1; /** * Kolmogorov-Smirnov test * */ public static final int KS = 2; /** * Anderson-Darling test * */ public static final int AD = 3; /** * Cramér-von Mises test * */ public static final int CM = 4; /** * Watson G test * */ public static final int WG = 5; /** * Watson U test * */ public static final int WU = 6; /** * Mean * */ public static final int MEAN = 7; /** * Correlation * */ public static final int COR = 8; /** * Total number of test types * */ public static final int NTESTTYPES = 9; /** * Name of each testType test. * Could be used for printing the test results, for example. * */ public static final String[] TESTNAMES = { "KolmogorovSmirnovPlus", "KolmogorovSmirnovMinus", "KolmogorovSmirnov", "Anderson-Darling", "CramerVon-Mises", "Watson G", "Watson U", "Mean", "Correlation" }; /** * The set of EDF tests that are to be performed when calling * the methods {@link #activeTests activeTests}, {@link #formatActiveTests formatActiveTests}, etc. * By default, this set contains KSP, KSM, * and AD. Note: MEAN and COR are always excluded * from this set of active tests. * The valid indices for this array are {@link #KSP KSP}, {@link #KSM KSM}, * {@link #KS KS}, {@link #AD AD}, {@link #CM CM}, {@link #WG WG}, * {@link #WU WU}, {@link #MEAN MEAN}, and {@link #COR COR}. * */ public static boolean[] activeTests = null; private static void initActiveTests() { activeTests = new boolean[NTESTTYPES]; for (int i = 0; i < activeTests.length; i++) activeTests[i] = false; activeTests[KSP] = activeTests[KSM] = true; activeTests[AD] = activeTests[MEAN] = activeTests[COR] = true; } static { initActiveTests(); } /** * Computes all EDF test statistics * to compare the empirical * distribution of * U(0),..., U(N-1) with the uniform distribution, * assuming that these sorted observations are in sortedData. * If N > 1, returns sVal with the values of the KS * statistics DN+, DN- and DN, of the Cramér-von Mises * statistic WN2, Watson's GN and UN2, Anderson-Darling's * AN2, and the average of the Ui's, respectively. * If N = 1, only puts 1 -sortedData.get (0) in sVal[KSP]. * Calling this method is more efficient than computing these statistics * separately by calling the corresponding methods in {@link GofStat}. * * @param sortedData array of sorted observations * * @param sVal array that will be filled with the results of the tests * * */ public static void tests (DoubleArrayList sortedData, double[] sVal) { double[] u = sortedData.elements(); int n = sortedData.size(); int i; double a2 = 0.0, w2, dm = 0.0, dp = 0.0, w; double u1, ui, d2, d1; double sumZ; double unSurN; if (n <= 0) throw new IllegalArgumentException ("n <= 0"); if (sVal.length != NTESTTYPES) throw new IllegalArgumentException ("sVal must " + "be of size NTESTTYPES."); // We assume that u is already sorted. if (n == 1) { sVal[KSP] = 1.0 - u[0]; sVal[MEAN] = u[0]; return; } unSurN = 1.0 / n; w2 = unSurN / 12.0; sumZ = 0.0; for (i = 0; i < n; i++) { // Statistics KS d1 = u[i] - i*unSurN; d2 = (i + 1)*unSurN - u[i]; if (d1 > dm) dm = d1; if (d2 > dp) dp = d2; // Watson U and G sumZ += u[i]; w = u[i] - (i + 0.5)*unSurN; w2 += w*w; // Anderson-Darling ui = u[i]; u1 = 1.0 - ui; if (ui < GofStat.EPSILONAD) ui = GofStat.EPSILONAD; else if (u1 < GofStat.EPSILONAD) u1 = GofStat.EPSILONAD; a2 += (2*i + 1) * Math.log (ui) + (1 + 2*(n - i - 1))*Math.log (u1); } if (dm > dp) sVal[KS] = dm; else sVal[KS] = dp; sVal[KSM] = dm; sVal[KSP] = dp; sumZ = sumZ * unSurN - 0.5; sVal[CM] = w2; sVal[WG] = Math.sqrt ((double) n) * (dp + sumZ); sVal[WU] = w2 - sumZ * sumZ * n; sVal[AD] = -n - a2 * unSurN; sVal[MEAN] = sumZ + 0.5; // Nouveau ... } /** * The observations V are in data, * not necessarily sorted, and their empirical * distribution is compared with the continuous distribution dist. * * If N = 1, only puts data.get (0) in sVal[MEAN], * and 1 -dist.cdf (data.get (0)) in sVal[KSP]. * * @param data array of observations to test * * @param dist assumed distribution of the observations * * @param sVal array that will be filled with the results of the tests * * */ public static void tests (DoubleArrayList data, ContinuousDistribution dist, double[] sVal) { double[] v = data.elements(); int n = data.size(); if (n <= 0) throw new IllegalArgumentException ("n <= 0"); DoubleArrayList sortedData = GofStat.unifTransform (data, dist); sortedData.quickSortFromTo (0, sortedData.size()-1); tests (sortedData, sVal); if (n == 1) sVal[MEAN] = v[0]; // On veut v[0], pas u[0]. } /** * Computes the EDF test statistics by calling * {@link #tests(DoubleArrayList,double[]) tests}, then computes the p-values of those * that currently belong to activeTests, * and return these quantities in sVal and pVal, respectively. * Assumes that * U(0),..., U(N-1) are in sortedData * and that we want to compare their empirical distribution * with the uniform distribution. * If N = 1, only puts 1 -sortedData.get (0) in * sVal[KSP], pVal[KSP], and pVal[MEAN]. * * @param sortedData array of sorted observations * * @param sVal array that will be filled with the results of the tests * * @param pVal array that will be filled with the p-values * * */ public static void activeTests (DoubleArrayList sortedData, double[] sVal, double[] pVal) { double[] u = sortedData.elements(); int n = sortedData.size(); if (n <= 0) throw new IllegalArgumentException ("n <= 0"); if (sVal.length != NTESTTYPES || pVal.length != NTESTTYPES) throw new IllegalArgumentException ("sVal and pVal must " + "be of length NTESTTYPES."); if (n == 1) { sVal[KSP] = 1.0 - u[0]; pVal[KSP] = 1.0 - u[0]; pVal[MEAN] = pVal[KSP]; return; } // We assume that u is already sorted. tests (sortedData, sVal); if (activeTests.length != NTESTTYPES) { initActiveTests(); System.err.println ("activeTests was invalid, it was reinitialized."); } if (activeTests[KSP]) pVal[KSP] = KolmogorovSmirnovPlusDist.barF (n, sVal[KSP]); if (activeTests[KSM]) pVal[KSM] = KolmogorovSmirnovPlusDist.barF (n, sVal[KSM]); if (activeTests[KS]) pVal[KS] = KolmogorovSmirnovDistQuick.barF (n, sVal[KS]); if (activeTests[AD]) pVal[AD] = AndersonDarlingDistQuick.barF (n, sVal[AD]); if (activeTests[CM]) pVal[CM] = CramerVonMisesDist.barF (n, sVal[CM]); if (activeTests[WG]) pVal[WG] = WatsonGDist.barF (n, sVal[WG]); if (activeTests[WU]) pVal[WU] = WatsonUDist.barF (n, sVal[WU]); } /** * The observations are in data, * not necessarily sorted, and we want to compare their empirical * distribution with the distribution dist. * If N = 1, only puts data.get(0) in sVal[MEAN], * and 1 -dist.cdf (data.get (0)) in sVal[KSP], pVal[KSP], * and pVal[MEAN]. * * @param data array of observations to test * * @param dist assumed distribution of the observations * * @param sVal array that will be filled with the results of the tests * * @param pVal array that will be filled with the p-values * * */ public static void activeTests (DoubleArrayList data, ContinuousDistribution dist, double[] sVal, double[] pVal) { double[] v = data.elements(); int n = data.size(); if (n <= 0) throw new IllegalArgumentException ("n <= 0"); DoubleArrayList sortedData = GofStat.unifTransform (data, dist); sortedData.quickSortFromTo (0, sortedData.size() - 1); activeTests (sortedData, sVal, pVal); if (n == 1) sVal[MEAN] = v[0]; } /** * Gets the p-values of the active EDF test statistics, * which are in activeTests. It is assumed that the values * of these statistics and their p-values are already computed, * in sVal and pVal, and that the sample size is n. * These statistics and p-values are formated * using {@link #formatp2 formatp2} for each one. * If n=1, prints only pVal[KSP] using {@link #formatp1 formatp1}. * * @param n sample size * * @param sVal array containing the results of the tests * * @param pVal array containing the p-values * * @return the results formated as a string * */ public static String formatActiveTests (int n, double[] sVal, double[] pVal) { if (activeTests.length != NTESTTYPES) { initActiveTests(); System.err.println ("activeTests was invalid, it was reinitialized."); } if (sVal.length != NTESTTYPES || pVal.length != NTESTTYPES) throw new IllegalArgumentException ("The length of " + "sVal and pVal must be NTESTTYPES."); if (n == 1) return formatp1 (pVal[KSP]);; StringBuffer sb = new StringBuffer (PrintfFormat.NEWLINE); if (activeTests[KSP]) sb.append ("Kolmogorov-Smirnov+ statistic = D+ :" + formatp2 (sVal[KSP], pVal[KSP])); if (activeTests[KSM]) sb.append ("Kolmogorov-Smirnov- statistic = D- :" + formatp2 (sVal[KSM], pVal[KSM])); if (activeTests[KS]) sb.append ("Kolmogorov-Smirnov statistic = D :" + formatp2 (sVal[KS], pVal[KS])); if (activeTests[AD]) sb.append ("Anderson-Darling statistic = A2 :" + formatp2 (sVal[AD], pVal[AD])); if (activeTests[CM]) sb.append ("Cramer-von Mises statistic = W2 :" + formatp2 (sVal[CM], pVal[CM])); if (activeTests[WG]) sb.append ("Watson statistic = G :" + formatp2 (sVal[WG], pVal[WG])); if (activeTests[WU]) sb.append ("Watson statistic = U2 :" + formatp2 (sVal[WU], pVal[WU])); sb.append (PrintfFormat.NEWLINE); return sb.toString(); } /** * Repeats the following k times: * Applies the {@link GofStat#iterateSpacings GofStat.iterateSpacings} * transformation to the * * U(0),..., U(N-1), assuming that these observations are in * sortedData, then computes the EDF test statistics and calls * {@link #activeTests(DoubleArrayList,double[],double[]) activeTests} after each transformation. * The function returns the original array sortedData (the * transformations are applied on a copy of sortedData). * If printval = true, stores all the values into the returned * {@link String} after each iteration. * If graph = true, calls {@link #graphDistUnif graphDistUnif} after each iteration * to print to stream f the data for plotting the distribution * function of the Ui. * * @param sortedData array containing the sorted observations * * @param k number of times the tests are applied * * @param printval if true, stores all the values of the observations at each iteration * * @param graph if true, the distribution of the Ui will be plotted after each * iteration * * @param f stream where the plots are written to * * @return a string representation of the test results * */ public static String iterSpacingsTests (DoubleArrayList sortedData, int k, boolean printval, boolean graph, PrintWriter f) { int n = sortedData.size(); DoubleArrayList sortedDataCopy = (DoubleArrayList)sortedData.clone(); DoubleArrayList diffArrayList = new DoubleArrayList(sortedData.size()+2); int j; int i; double[] sVal = new double[NTESTTYPES], pVal = new double[NTESTTYPES]; StringBuffer sb = new StringBuffer (PrintfFormat.NEWLINE); for (j = 1; j <= k; j++) { sb.append ("-----------------------------------" + PrintfFormat.NEWLINE + "EDF Tests after \"iterateSpacings\", level : " + PrintfFormat.d (2, j) + PrintfFormat.NEWLINE); GofStat.diff (sortedDataCopy, diffArrayList, 0, n - 1, 0.0, 1.0); GofStat.iterateSpacings (sortedDataCopy, diffArrayList); sortedDataCopy.quickSortFromTo (0, sortedDataCopy.size() - 1); activeTests (sortedDataCopy, sVal, pVal); sb.append (formatActiveTests (n, sVal, pVal)); String desc = "Values of Uniforms after iterateSpacings, level " + PrintfFormat.d (2, j); if (printval) { sb.append (desc + PrintfFormat.NEWLINE + "------------------------" + PrintfFormat.NEWLINE); sb.append (sortedDataCopy + PrintfFormat.NEWLINE); } if (graph && f != null) f.print (graphDistUnif (sortedDataCopy, desc)); else if (graph && f == null) sb.append (graphDistUnif (sortedDataCopy, desc)); } return sb.toString(); } /** * Similar to {@link #iterSpacingsTests iterSpacingsTests}, but with the * {@link GofStat#powerRatios GofStat.powerRatios} transformation. * * @param sortedData array containing the sorted observations * * @param k number of times the tests are applied * * @param printval if true, stores all the values of the observations at each iteration * * @param graph if true, the distribution of the Ui will be plotted after each * iteration * * @param f stream where the plots are written to * * @return a string representation of the test results */ public static String iterPowRatioTests (DoubleArrayList sortedData, int k, boolean printval, boolean graph, PrintWriter f) { int n = sortedData.size(); DoubleArrayList sortedDataCopy = (DoubleArrayList)sortedData.clone(); int i; int j; double[] sVal = new double[NTESTTYPES], pVal = new double[NTESTTYPES]; StringBuffer sb = new StringBuffer (PrintfFormat.NEWLINE); for (i = 1; i <= k; i++) { GofStat.powerRatios (sortedDataCopy); sb.append ("-----------------------------------" + PrintfFormat.NEWLINE + "EDF Tests after \"powerRatios\", level : " + PrintfFormat.d (2, i) + PrintfFormat.NEWLINE); sortedDataCopy.quickSortFromTo (0, sortedDataCopy.size() - 1); activeTests (sortedDataCopy, sVal, pVal); sb.append (formatActiveTests (n, sVal, pVal)); String desc = "Values of Uniforms after PowerRatios, level " + PrintfFormat.d (2, i); if (printval) { sb.append (desc + PrintfFormat.NEWLINE + "--------------------------" + PrintfFormat.NEWLINE); sb.append (sortedDataCopy + PrintfFormat.NEWLINE); } if (graph && f != null) f.print (graphDistUnif (sortedDataCopy, desc)); else if (graph && f == null) sb.append (graphDistUnif (sortedDataCopy, desc)); } return sb.toString(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy