cc.mallet.util.Maths Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of mallet Show documentation
MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.
The newest version!
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
   http://www.cs.umass.edu/~mccallum/mallet
   This software is provided under the terms of the Common Public License,
   version 1.0, as published by http://www.opensource.org.  For further
   information, see the file `LICENSE' included with this distribution. */




/** 
   @author Andrew McCallum [email protected]
 */

package cc.mallet.util;

// Math and statistics functions
public final class Maths {

	// From libbow, dirichlet.c
	// Written by Tom Minka 
	public static final double logGamma (double x)
	{
		double result, y, xnum, xden;
		int i;
		final double d1 = -5.772156649015328605195174e-1;
		final double p1[] = { 
			4.945235359296727046734888e0, 2.018112620856775083915565e2, 
			2.290838373831346393026739e3, 1.131967205903380828685045e4, 
			2.855724635671635335736389e4, 3.848496228443793359990269e4, 
			2.637748787624195437963534e4, 7.225813979700288197698961e3 
		};
		final double q1[] = {
			6.748212550303777196073036e1, 1.113332393857199323513008e3, 
			7.738757056935398733233834e3, 2.763987074403340708898585e4, 
			5.499310206226157329794414e4, 6.161122180066002127833352e4, 
			3.635127591501940507276287e4, 8.785536302431013170870835e3
		};
		final double d2 = 4.227843350984671393993777e-1;
		final double p2[] = {
			4.974607845568932035012064e0, 5.424138599891070494101986e2, 
			1.550693864978364947665077e4, 1.847932904445632425417223e5, 
			1.088204769468828767498470e6, 3.338152967987029735917223e6, 
			5.106661678927352456275255e6, 3.074109054850539556250927e6
		};
		final double q2[] = {
			1.830328399370592604055942e2, 7.765049321445005871323047e3, 
			1.331903827966074194402448e5, 1.136705821321969608938755e6, 
			5.267964117437946917577538e6, 1.346701454311101692290052e7, 
			1.782736530353274213975932e7, 9.533095591844353613395747e6
		};
		final double d4 = 1.791759469228055000094023e0;
		final double p4[] = {
			1.474502166059939948905062e4, 2.426813369486704502836312e6, 
			1.214755574045093227939592e8, 2.663432449630976949898078e9, 
			2.940378956634553899906876e10, 1.702665737765398868392998e11, 
			4.926125793377430887588120e11, 5.606251856223951465078242e11
		};
		final double q4[] = {
			2.690530175870899333379843e3, 6.393885654300092398984238e5, 
			4.135599930241388052042842e7, 1.120872109616147941376570e9, 
			1.488613728678813811542398e10, 1.016803586272438228077304e11, 
			3.417476345507377132798597e11, 4.463158187419713286462081e11
		};
		final double c[] = {
			-1.910444077728e-03, 8.4171387781295e-04, 
			-5.952379913043012e-04, 7.93650793500350248e-04, 
			-2.777777777777681622553e-03, 8.333333333333333331554247e-02, 
			5.7083835261e-03
		};
		final double a = 0.6796875;

		if((x <= 0.5) || ((x > a) && (x <= 1.5))) {
			if(x <= 0.5) {
				result = -Math.log(x);
				/*  Test whether X < machine epsilon. */
				if(x+1 == 1) {
					return result;
				}
			}
			else {
				result = 0;
				x = (x - 0.5) - 0.5;
			}
			xnum = 0;
			xden = 1;
			for(i=0;i<8;i++) {
				xnum = xnum * x + p1[i];
				xden = xden * x + q1[i];
			}
			result += x*(d1 + x*(xnum/xden));
		}
		else if((x <= a) || ((x > 1.5) && (x <= 4))) {
			if(x <= a) {
				result = -Math.log(x);
				x = (x - 0.5) - 0.5;
			}
			else {
				result = 0;
				x -= 2;
			}
			xnum = 0;
			xden = 1;
			for(i=0;i<8;i++) {
				xnum = xnum * x + p2[i];
				xden = xden * x + q2[i];
			}
			result += x*(d2 + x*(xnum/xden));
		}
		else if(x <= 12) {
			x -= 4;
			xnum = 0;
			xden = -1;
			for(i=0;i<8;i++) {
				xnum = xnum * x + p4[i];
				xden = xden * x + q4[i];
			}
			result = d4 + x*(xnum/xden);
		}
		/*  X > 12  */
		else {
			y = Math.log(x);
			result = x*(y - 1) - y*0.5 + .9189385332046727417803297;
			x = 1/x;
			y = x*x;
			xnum = c[6];
			for(i=0;i<6;i++) {
				xnum = xnum * y + c[i];
			}
			xnum *= x;
			result += xnum;
		}
		return result;
	}
	
	// This is from "Numeric Recipes in C"
  public static double oldLogGamma (double x) {
    int j;
    double y, tmp, ser;
    double [] cof = {76.18009172947146, -86.50532032941677 ,
                     24.01409824083091, -1.231739572450155 ,
                      0.1208650973866179e-2, -0.5395239384953e-5};
    y = x;
    tmp = x + 5.5 - (x + 0.5) * Math.log (x + 5.5);
    ser = 1.000000000190015;
    for (j = 0; j <= 5; j++)
      ser += cof[j] / ++y;
    return Math.log (2.5066282746310005 * ser / x) - tmp;
  }

  public static double logBeta (double a, double b) {
    return logGamma(a)+logGamma(b)-logGamma(a+b);
  }

  public static double beta (double a, double b) {
    return Math.exp (logBeta(a,b));
  }

  public static double gamma (double x) {
    return Math.exp (logGamma(x));
  }

  public static double factorial (int n) {
    return Math.exp (logGamma(n+1));
  }

  public static double logFactorial (int n) {
    return logGamma(n+1);
  }

  /**
   * Computes p(x;n,p) where x~B(n,p)
   */
  // Copied as the "classic" method from Catherine Loader.
  //  Fast and Accurate Computation of Binomial Probabilities.
  //   2001.  (This is not the fast and accurate version.)
  public static double logBinom (int x, int n, double p)
  {
    return logFactorial (n) - logFactorial (x) - logFactorial (n - x)
            + (x*Math.log (p)) + ((n-x)*Math.log (1-p));
  }

  /**
   * Vastly inefficient O(x) method to compute cdf of B(n,p)
   */
  public static double pbinom (int x, int n, double p) {
    double sum = Double.NEGATIVE_INFINITY;
    for (int i = 0; i <= x; i++) {
      sum = sumLogProb (sum, logBinom (i, n, p));
    }
    return Math.exp (sum);
  }

  public static double sigmod(double beta){
	return (double)1.0/(1.0+Math.exp(-beta));
  }

  public static double sigmod_rev(double sig){
	return (double)Math.log(sig/(1-sig));
  }

  public static double logit (double p)
  {
    return Math.log (p / (1 - p));
  }

  // Combination?
  public static double numCombinations (int n, int r) {
    return Math.exp (logFactorial(n)-logFactorial(r)-logFactorial(n-r));
  }

  // Permutation?
  public static double numPermutations (int n, int r) {
    return Math.exp (logFactorial(n)-logFactorial(r));
  }


	public static double cosh (double a)
	{
		if (a < 0)
			return 0.5 * (Math.exp(-a) + Math.exp(a));
		else
			return 0.5 * (Math.exp(a) + Math.exp(-a));
		}

	public static double tanh (double a)
	{
		return (Math.exp(a) - Math.exp(-a)) / (Math.exp(a) + Math.exp(-a));
	}

	/**
	 * Numbers that are closer than this are considered equal
	 * by almostEquals.
	 */
	public static double EPSILON = 0.000001;

	public static boolean almostEquals (double d1, double d2)
  {
		return almostEquals (d1, d2, EPSILON);
	}
  
   public static boolean almostEquals (double d1, double d2, double epsilon)
   {
		return Math.abs (d1 - d2) < epsilon;
	}

  public static boolean almostEquals (double[] d1, double[] d2, double eps)
 {
   for (int i = 0; i < d1.length; i++) {
     double v1 = d1[i];
     double v2 = d2[i];
     if (!almostEquals (v1, v2, eps)) return false;
   }
   return true;
 }

  // gsc
  /**
   * Checks if min <= value <= max.
   */
  public static boolean checkWithinRange(double value, double min, double max) {
    return (value > min || almostEquals(value, min, EPSILON)) &&
           (value < max || almostEquals(value, max, EPSILON));
  }

  public static final double log2 = Math.log(2);

  // gsc
  /**
   * Returns the KL divergence, K(p1 || p2).
   *
   * The log is w.r.t. base 2. 
   *
   * *Note*: If any value in p2 is 0.0 then the KL-divergence
   * is infinite. 
   * 
   */
  public static double klDivergence(double[] p1, double[] p2) {
    assert (p1.length == p2.length);
    double klDiv = 0.0;
    for (int i = 0; i < p1.length; ++i) {
      if (p1[i] == 0) {
        continue;
      }
      if (p2[i] == 0) {
        return Double.POSITIVE_INFINITY;
      }
      klDiv += p1[i] * Math.log(p1[i] / p2[i]);
    }
    return klDiv / log2; // moved this division out of the loop -DM
  }

  // gsc
  /**
   * Returns the Jensen-Shannon divergence.
   */
  public static double jensenShannonDivergence(double[] p1, double[] p2) {
    assert(p1.length == p2.length);
    double[] average = new double[p1.length];
    for (int i = 0; i < p1.length; ++i) {
      average[i] += (p1[i] + p2[i])/2;
    }
    return (klDivergence(p1, average) + klDivergence(p2, average))/2;
  }

  /**
	 *  Returns the sum of two doubles expressed in log space,
	 *   that is,
	 * 
	 *    sumLogProb = log (e^a + e^b)
   *               = log e^a(1 + e^(b-a))
   *               = a + log (1 + e^(b-a))
	 * 
	 *
	 * By exponentiating b-a, we obtain better numerical precision than
	 *  we would if we calculated e^a or e^b directly.
	 * 
	 * Note: This function is just like 
   *  {@link cc.mallet.fst.Transducer#sumNegLogProb sumNegLogProb}
	 *   in Transducer,
   *   except that the logs aren't negated.
	 */
	public static double sumLogProb (double a, double b)
	{
		if (a == Double.NEGATIVE_INFINITY) 
			return b;
		else if (b == Double.NEGATIVE_INFINITY)
			return a;
		else if (b < a)
			return a + Math.log (1 + Math.exp(b-a));
		else
			return b + Math.log (1 + Math.exp(a-b));
	}

  // Below from Stanford NLP package, SloppyMath.java

  private static final double LOGTOLERANCE = 30.0;

  /**
   * Sums an array of numbers log(x1)...log(xn).  This saves some of
   *  the unnecessary calls to Math.log in the two-argument version.
   * 

   * Note that this implementation IGNORES elements of the input
   *  array that are more than LOGTOLERANCE (currently 30.0) less
   *  than the maximum element.
   * 

   * Cursory testing makes me wonder if this is actually much faster than
   *  repeated use of the 2-argument version, however -cas.
   * @param vals An array log(x1), log(x2), ..., log(xn)
   * @return log(x1+x2+...+xn)
   */
  public static double sumLogProb (double[] vals)
  {
    double max = Double.NEGATIVE_INFINITY;
    int len = vals.length;
    int maxidx = 0;

    for (int i = 0; i < len; i++) {
      if (vals[i] > max) {
        max = vals[i];
        maxidx = i;
      }
    }

    boolean anyAdded = false;
    double intermediate = 0.0;
    double cutoff = max - LOGTOLERANCE;

    for (int i = 0; i < maxidx; i++) {
      if (vals[i] >= cutoff) {
        anyAdded = true;
        intermediate += Math.exp(vals[i] - max);
      }
    }
    for (int i = maxidx + 1; i < len; i++) {
      if (vals[i] >= cutoff) {
        anyAdded = true;
        intermediate += Math.exp(vals[i] - max);
      }
    }

    if (anyAdded) {
      return max + Math.log(1.0 + intermediate);
    } else {
      return max;
    }

  }

  /**
   *  Returns the difference of two doubles expressed in log space,
   *   that is,
   * 
   *    sumLogProb = log (e^a - e^b)
   *               = log e^a(1 - e^(b-a))
   *               = a + log (1 - e^(b-a))
   * 
   *
   * By exponentiating b-a, we obtain better numerical precision than
   *  we would if we calculated e^a or e^b directly.
   * 
   * Returns NaN if b > a (so that log(e^a - e^b) is undefined).
   */
  public static double subtractLogProb (double a, double b)
  {
    if (b == Double.NEGATIVE_INFINITY)
      return a;
    else
      return a + Math.log (1 - Math.exp(b-a));
  }

  public static double getEntropy(double[] dist) {
  	double entropy = 0;
  	for (int i = 0; i < dist.length; i++) {
  		if (dist[i] != 0) {
  			entropy -= dist[i] * Math.log(dist[i]);
  		}
  	}
  	return entropy;
  }
  
}