cc.mallet.util.StatFunctions Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jcore-mallet-2.0.9 Show documentation
MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.
The newest version!
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
   http://www.cs.umass.edu/~mccallum/mallet
   This software is provided under the terms of the Common Public License,
   version 1.0, as published by http://www.opensource.org.  For further
   information, see the file `LICENSE' included with this distribution. */




/** 
   @author Andrew McCallum [email protected]
 */

package cc.mallet.util;

import java.util.logging.*;

// Obtained from http://www.stat.vt.edu/~sundar/java/code/StatFunctions.html
// August 2002

/** * @(#)StatFunctions.java * * DAMAGE (c) 2000 by Sundar Dorai-Raj
  * * @author Sundar Dorai-Raj
  * * Email: [email protected]
  * * This program is free software; you can redistribute it and/or
  * * modify it under the terms of the GNU General Public License 
  * * as published by the Free Software Foundation; either version 2 
  * * of the License, or (at your option) any later version, 
  * * provided that any use properly credits the author. 
  * * This program is distributed in the hope that it will be useful,
  * * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  * * GNU General Public License for more details at http://www.gnu.org * * */

import cc.mallet.util.MalletLogger;

public final class StatFunctions {
	private static Logger logger = MalletLogger.getLogger(StatFunctions.class.getName());
  public static double cov(Univariate x,Univariate y) {
    double sumxy=0;
    int i,n=(x.size()>=y.size() ? x.size():y.size());
    try {
      for(i=0;i1)
      throw new IllegalArgumentException("Illegal argument "+p+" for qnorm(p).");
    double split=0.42,
           a0=  2.50662823884,
           a1=-18.61500062529,
           a2= 41.39119773534,
           a3=-25.44106049637,
           b1= -8.47351093090,
           b2= 23.08336743743,
           b3=-21.06224101826,
           b4=  3.13082909833,
           c0= -2.78718931138,
           c1= -2.29796479134,
           c2=  4.85014127135,
           c3=  2.32121276858,
           d1=  3.54388924762,
           d2=  1.63706781897,
           q=p-0.5;
    double r,ppnd;
    if(Math.abs(q)<=split) {
      r=q*q;
      ppnd=q*(((a3*r+a2)*r+a1)*r+a0)/((((b4*r+b3)*r+b2)*r+b1)*r+1);
    }
    else {
      r=p;
      if(q>0) r=1-p;
      if(r>0) {
        r=Math.sqrt(-Math.log(r));
        ppnd=(((c3*r+c2)*r+c1)*r+c0)/((d2*r+d1)*r+1);
        if(q<0) ppnd=-ppnd;
      }
      else {
        ppnd=0;
      }
    }
    if(upper) ppnd=1-ppnd;
    return(ppnd);
  }

  public static double qnorm(double p,boolean upper,double mu,double sigma2) {
    return(qnorm(p,upper)*Math.sqrt(sigma2)+mu);
  }

  public static double pnorm(double z,boolean upper) {
    /* Reference:
       I. D. Hill 
       Algorithm AS 66: "The Normal Integral"
       Applied Statistics
    */
    double ltone=7.0,
           utzero=18.66,
           con=1.28,
           a1 = 0.398942280444,
           a2 = 0.399903438504,
           a3 = 5.75885480458,
           a4 =29.8213557808,
           a5 = 2.62433121679,
           a6 =48.6959930692,
           a7 = 5.92885724438,
           b1 = 0.398942280385,
           b2 = 3.8052e-8,
           b3 = 1.00000615302,
           b4 = 3.98064794e-4,
           b5 = 1.986153813664,
           b6 = 0.151679116635,
           b7 = 5.29330324926,
           b8 = 4.8385912808,
           b9 =15.1508972451,
           b10= 0.742380924027,
           b11=30.789933034,
           b12= 3.99019417011;
    double y,alnorm;

    if(z<0) {
      upper=!upper;
      z=-z;
    }
    if(z<=ltone || upper && z<=utzero) {
      y=0.5*z*z;
      if(z>con) {
        alnorm=b1*Math.exp(-y)/(z-b2+b3/(z+b4+b5/(z-b6+b7/(z+b8-b9/(z+b10+b11/(z+b12))))));
      }
      else {
        alnorm=0.5-z*(a1-a2*y/(y+a3-a4/(y+a5+a6/(y+a7))));
      }
    }
    else {
      alnorm=0;
    }
    if(!upper) alnorm=1-alnorm;
    return(alnorm);
  }

  public static double pnorm(double x,boolean upper,double mu,double sigma2) {
    return(pnorm((x-mu)/Math.sqrt(sigma2),upper));
  }

  public static double qt(double p,double ndf,boolean lower_tail) {
    // Algorithm 396: Student's t-quantiles by
    // G.W. Hill CACM 13(10), 619-620, October 1970
    if(p<=0 || p>=1 || ndf<1) 
      throw new IllegalArgumentException("Invalid p or df in call to qt(double,double,boolean).");
    double eps=1e-12;
    double M_PI_2=1.570796326794896619231321691640; // pi/2
    boolean neg;
    double P,q,prob,a,b,c,d,y,x;
    if((lower_tail && p > 0.5) || (!lower_tail && p < 0.5)) {
       neg = false;
       P = 2 * (lower_tail ? (1 - p) : p);
     }
     else {
       neg = true;
       P = 2 * (lower_tail ? p : (1 - p));
     }

     if(Math.abs(ndf - 2) < eps) {   /* df ~= 2 */
       q = Math.sqrt(2 / (P * (2 - P)) - 2);
     }
     else if (ndf < 1 + eps) {   /* df ~= 1 */
       prob = P * M_PI_2;
       q = Math.cos(prob)/Math.sin(prob);
     }
     else {      /*-- usual case;  including, e.g.,  df = 1.1 */
       a = 1 / (ndf - 0.5);
       b = 48 / (a * a);
       c = ((20700 * a / b - 98) * a - 16) * a + 96.36;
       d = ((94.5 / (b + c) - 3) / b + 1) * Math.sqrt(a * M_PI_2) * ndf;
       y = Math.pow(d * P, 2 / ndf);
       if (y > 0.05 + a) {
         /* Asymptotic inverse expansion about normal */
         x = qnorm(0.5 * P,false);
         y = x * x;
         if (ndf < 5)
           c += 0.3 * (ndf - 4.5) * (x + 0.6);
         c = (((0.05 * d * x - 5) * x - 7) * x - 2) * x + b + c;
         y = (((((0.4 * y + 6.3) * y + 36) * y + 94.5) / c - y - 3) / b + 1) * x;
         y = a * y * y;
         if (y > 0.002)/* FIXME: This cutoff is machine-precision dependent*/
           y = Math.exp(y) - 1;
         else { /* Taylor of    e^y -1 : */
           y = (0.5 * y + 1) * y;
         }
       }
       else {
         y = ((1 / (((ndf + 6) / (ndf * y) - 0.089 * d - 0.822)
             * (ndf + 2) * 3) + 0.5 / (ndf + 4))
             * y - 1) * (ndf + 1) / (ndf + 2) + 1 / y;
       }
       q = Math.sqrt(ndf * y);
     }
     if(neg) q = -q;
     return q;
  }

  public static double pt(double t,double df) {
    // ALGORITHM AS 3  APPL. STATIST. (1968) VOL.17, P.189
    // Computes P(T=2) {
      for(k=ks;k<=im2;k+=2) {
        c=c*b*(fk-1)/fk;
        s+=c;
        if(s!=idf) {
          idf=s;
          fk+=2;
        }
      }
    }
    if(ioe!=1)
      return 0.5+0.5*a*Math.sqrt(b)*s;
    if(df==1) s=0;
    return 0.5+(a*b*s+Math.atan(a))*g1;
  }

  public double pchisq(double q,double df) {
    // Posten, H. (1989) American Statistician 43 p. 261-265
    double df2=df*.5;
    double q2=q*.5;
    int n=5,k;
    double tk,CFL,CFU,prob;
    if(q<=0 || df<=0)
      throw new IllegalArgumentException("Illegal argument "+q+" or "+df+" for qnorm(p).");
    if(q1;k--)
       tk=q2*(1-k-df2)/(df2+2*k-1+k*q2/(df2+2*k+tk));
     CFL=1-q2/(df2+1+q2/(df2+2+tk));
     prob=Math.exp(df2*Math.log(q2)-q2-Maths.logGamma(df2+1)-Math.log(CFL));
    }
    else {
      tk=(n-df2)/(q2+n);
      for(k=n-1;k>1;k--)
        tk=(k-df2)/(q2+k/(1+tk));
      CFU=1+(1-df2)/(q2+1/(1+tk));
      prob=1-Math.exp((df2-1)*Math.log(q2)-q2-Maths.logGamma(df2)-Math.log(CFU));
    }
    return prob;
  }
  
  public static double betainv(double x,double p,double q) {
    // ALGORITHM AS 63 APPL. STATIST. VOL.32, NO.1
    // Computes P(Beta>x)
    double beta=Maths.logBeta(p,q),acu=1E-14;
    double cx,psq,pp,qq,x2,term,ai,betain,ns,rx,temp;
    boolean indx;
    if(p<=0 || q<=0) return(-1.0);
    if(x<=0 || x>=1) return(-1.0);
    psq=p+q;
    cx=1-x;
    if(pacu && temp>acu*betain) {
      term=term*temp*rx/(pp+ai);
      betain=betain+term;
      temp=Math.abs(term);
      if(temp>acu && temp>acu*betain) {
        ai++;
        ns--;
        if(ns>=0) {
          temp=qq-ai;
          if(ns==0) rx=x2;
        }
        else {
          temp=psq;
          psq+=1;
        }
      }
    }
    betain*=Math.exp(pp*Math.log(x2)+(qq-1)*Math.log(cx)-beta)/pp;
    if(indx) betain=1-betain;
    return(betain);
  }

  public static double pf(double x,double df1,double df2) {
    // ALGORITHM AS 63 APPL. STATIST. VOL.32, NO.1
    // Computes P(F>x)
    return(betainv(df1*x/(df1*x+df2),0.5*df1,0.5*df2));
  }  
}