cc.mallet.optimize.tests.TestOptimizable Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of mallet Show documentation
MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.
There is a newer version: 2.0.12
Show newest version
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
   http://www.cs.umass.edu/~mccallum/mallet
   This software is provided under the terms of the Common Public License,
   version 1.0, as published by http://www.opensource.org.  For further
   information, see the file `LICENSE' included with this distribution. */




/** 
   @author Andrew McCallum [email protected]
 */

package cc.mallet.optimize.tests;

import junit.framework.*;
import java.util.logging.*;
import java.io.*;
import java.util.Random;

import cc.mallet.classify.*;
import cc.mallet.optimize.LineOptimizer;
import cc.mallet.optimize.Optimizable;
import cc.mallet.pipe.*;
import cc.mallet.pipe.iterator.*;
import cc.mallet.types.*;
import cc.mallet.util.*;

/**
 *  Contains static methods for testing subclasses of
 *   Maximizable and Maximizable.ByGradient.  Especially
 *   useful are methods that verify the consistency of the value
 *   and gradient functions of an instance of 
 *   Maximizable.ByGradient.
 */
public class TestOptimizable extends TestCase
{
	private static Logger logger =
		MalletLogger.getLogger(TestOptimizable.class.getName());

	public TestOptimizable (String name) {
		super (name);
	}

	static private int numComponents = -1;

	/**
	 *  Sets the number of gradient components that will be checked.
	 *   If negative, all will be checked.
	 */
	public static void setNumComponents (int n) { numComponents = n; }

	/**
	 *  Tests that parameters set by setParameters can be retrieved by 
	 *   getParameters.
	 *  @param maxable Instance of a Maximizable that should be tested.
	 *   Its current parameters will be overwritten.
	 */
	public static boolean testGetSetParameters (Optimizable maxable)
	{
		System.out.println ("TestMaximizable testGetSetParameters");
		// Set all the parameters to unique values using setParameters()
		double[] parameters = new double [maxable.getNumParameters()];
		maxable.getParameters (parameters);
		for (int i = 0; i < parameters.length; i++)
			parameters[i] = (double)i;
		maxable.setParameters (parameters);

		// Test to make sure those parameters are there
		MatrixOps.setAll (parameters, 0.0);
		maxable.getParameters (parameters);
		for (int i = 0; i < parameters.length; i++)
			assertTrue (parameters[i] == (double)i);
		return true;
	}

	public static double
	testValueAndGradientInDirection (Optimizable.ByGradientValue maxable, double[] direction)
	{
		int numParameters = maxable.getNumParameters();
		assert (numParameters == direction.length);
		double[] oldParameters = new double[numParameters];
		double[] parameters = new double[numParameters];
		double[] normalizedDirection = direction.clone();
		System.arraycopy(direction, 0, normalizedDirection, 0, numParameters);
		MatrixOps.absNormalize(normalizedDirection);
		
		double value = maxable.getValue();
		// the gradient from the optimizable function
		double[] analyticGradient = new double[numParameters];
		maxable.getParameters (parameters);
		maxable.getParameters (oldParameters);
		maxable.getValueGradient (analyticGradient);
		// the gradient calculate from the slope of the value
		// This setting of epsilon should make the individual elements of
		// the analytical gradient and the empirical gradient equal.  This
		// simplifies the comparison of the individual dimensions of the
		// gradient and thus makes debugging easier.
		double directionGradient = MatrixOps.dotProduct (analyticGradient, normalizedDirection);
		double epsilon = 0.1 / MatrixOps.absNorm(analyticGradient);
		double tolerance = 0.00001 * directionGradient; // this was "epsilon * 5";
		System.out.println ("epsilon = "+epsilon+" tolerance="+tolerance);
		MatrixOps.plusEquals (parameters, normalizedDirection, epsilon);
		//logger.fine ("Parameters:"); parameters.print();
		maxable.setParameters (parameters);
		double epsValue = maxable.getValue();
		double slope = (epsValue - value) / epsilon;
		System.out.println ("value="+value+" epsilon="+epsilon+" epsValue="+
				epsValue+" slope = "+slope+" gradient="+directionGradient);
		assert (!Double.isNaN (slope));
		double slopeDifference = Math.abs(slope - directionGradient);
		logger.info ("TestMaximizable "+
				": slope tolerance = "+tolerance+
				": gradient slope = "+directionGradient+
				", value+epsilon slope = "+slope+
				": slope difference = "+slopeDifference);
		maxable.setParameters (oldParameters);
		assert (Math.abs(slopeDifference) < tolerance) : "Slope difference "+slopeDifference+" is greater than tolerance "+tolerance;
		return slopeDifference;
	}

	/**
	 * Tests that the value and gradient function are consistent
	 *  at the current parameters.
	 *  Computes both the analytic gradient (the one given by 
	 *  maxable.getValueGradient) and the empirical gradient,
	 *  which is (if x are the current parameters and f the function
	 *  computed by maxable) f(x + epsilon) - f(x).  Verifies
	 *  that the angle between the empirical and analytic gradients 
	 *  are close to 0.
	 * @see #testValueAndGradient testValueAndGradient
	 * @see #testValueAndGradientRandomParameters testValueAndGradientRandomParameters
	 * @throws IllegalStateException If the angle is above the tolerance
	 */
	public static double
	testValueAndGradientCurrentParameters (Optimizable.ByGradientValue maxable)
	{
		double[] parameters = new double [maxable.getNumParameters()];
		double value = maxable.getValue();
		// the gradient from the maximizable function
		double[] analyticGradient = new double[maxable.getNumParameters()];
		double[] empiricalGradient = new double[maxable.getNumParameters()];
		maxable.getParameters (parameters);
		maxable.getValueGradient (analyticGradient);
		// the gradient calculate from the slope of the value
		maxable.getValueGradient (empiricalGradient);
		// This setting of epsilon should make the individual elements of
		// the analytical gradient and the empirical gradient equal.  This
		// simplifies the comparison of the individual dimensions of the
		// gradient and thus makes debugging easier.
		// cas: However, avoid huge epsilon if norm of analytic gradient is
		//  close to 0.
		// Next line used to be: double norm = Math.max (0.1, MatrixOps.twoNorm(analyticGradient));
		// but if all the components of the analyticalGradient are very small, the squaring in the
		// twoNorm causes epsilon to be too large.  -AKM
		double norm = Math.max (0.1, MatrixOps.absNorm(analyticGradient));
		double epsilon = 0.1 / norm;
		double tolerance = epsilon * 5;
		System.out.println ("epsilon = "+epsilon+" tolerance="+tolerance);

		int sampleParameterInterval = -1;
		if (numComponents > 0) {
			sampleParameterInterval = Math.max (1, parameters.length / numComponents);
			logger.info ("Will check every "+sampleParameterInterval+"-th component.");
		}

		// Check each direction, perturb it, measure new value, and make
		// sure it agrees with the gradient from
		// maxable.getValueGradient()
		for (int i = 0; i < parameters.length; i++) {
//			{ int i = 0;   // Uncomment this line to debug one parameter at a time -cas
			if ((parameters.length >= sampleParameterInterval) &&
					(i % sampleParameterInterval != 0))
				continue;

			double param = parameters[i];
			parameters[i] = param + epsilon;
			//logger.fine ("Parameters:"); parameters.print();
			maxable.setParameters (parameters);
			double epsValue = maxable.getValue();
			double slope = (epsValue - value) / epsilon;
			System.out.println ("value="+value+" epsValue="+epsValue+" slope["+i+"] = "+slope+" gradient[]="+analyticGradient[i]);
			assert (!Double.isNaN (slope));
			logger.info ("TestMaximizable checking singleIndex "+i+
					": gradient slope = "+analyticGradient[i]+
					", value+epsilon slope = "+slope+
					": slope difference = "+(slope - analyticGradient[i]));
			// No negative below because the gradient points in the direction
			// of maximizing the function.
			empiricalGradient[i] = slope;
			parameters[i] =  param;
		}
		// Normalize the matrices to have the same L2 length
		System.out.println ("analyticGradient.twoNorm = "+
				MatrixOps.twoNorm(analyticGradient));
		System.out.println ("empiricalGradient.twoNorm = "+
				MatrixOps.twoNorm(empiricalGradient));
		MatrixOps.timesEquals (analyticGradient,
				1.0/MatrixOps.twoNorm(analyticGradient));
		MatrixOps.timesEquals (empiricalGradient,
				1.0/MatrixOps.twoNorm(empiricalGradient));
		/* 
	   System.out.println("N   ANA          EMP");
	   for (int i = 0; i < analyticGradient.length; i++) {
      	System.out.println(i+"   "+analyticGradient[i]+"  "+empiricalGradient[i]);
	   }
		 */

		// Return the angle between the two vectors, in radians
		double dot = MatrixOps.dotProduct (analyticGradient,empiricalGradient);
		if (Maths.almostEquals (dot, 1.0)) {
			logger.info ("TestMaximizable angle is zero.");
			return 0.0;
		} else {
			double angle = Math.acos (dot);

			logger.info ("TestMaximizable angle = "+angle);
			if (Math.abs(angle) > tolerance)
				throw new IllegalStateException ("Gradient/Value mismatch: angle="+
						angle + " tol: " + tolerance);
			if (Double.isNaN (angle))
				throw new IllegalStateException ("Gradient/Value error: angle is NaN!");

			return angle;
		}
	}

	/**
	 * Tests that getValue and getValueGradient are consistent.
	 * Tests for consistency at params = 0 and at
	 *   params = -0.0001 * grad(f)
	 *  @see #testValueAndGradientCurrentParameters testValueAndGradientCurrentParameters
	 * @throws IllegalStateException If the test fails.
	 */
	public static boolean	testValueAndGradient (Optimizable.ByGradientValue maxable)
	{
		double[] parameters = new double [maxable.getNumParameters()];
		MatrixOps.setAll (parameters, 0.0);
		maxable.setParameters (parameters);
		testValueAndGradientCurrentParameters (maxable);
		MatrixOps.setAll (parameters, 0.0);
		double[] delta = new double[maxable.getNumParameters()];
		maxable.getValueGradient (delta);
		logger.info ("Gradient two-Norm = "+MatrixOps.twoNorm(delta));
		logger.info ("  max parameter change = "+(MatrixOps.infinityNorm(delta) * -0.001));
		MatrixOps.timesEquals (delta, -0.0001);
		MatrixOps.plusEquals (parameters, delta);
		maxable.setParameters (parameters);
		testValueAndGradientCurrentParameters (maxable);
		return true;
	}

	/**
	 * Tests that getValue and getValueGradient are consistent 
	 *   at a random parameter setting.
	 *  @see #testValueAndGradientCurrentParameters testValueAndGradientCurrentParameters
	 * @throws IllegalStateException If the test fails.
	 */
	public static boolean testValueAndGradientRandomParameters 
	(Optimizable.ByGradientValue maxable, Random r)
	{
		double[] params = new double [maxable.getNumParameters()];
		for (int i = 0; i < params.length; i++) {
			params[i] = r.nextDouble ();
			if (r.nextBoolean ()) 
				params [i] = -params[i];
		}
		maxable.setParameters (params);
		testValueAndGradientCurrentParameters (maxable);
		return true;
	}


	// Maximizable for 3x^2 - 5x + 2
	static class SimplePoly implements Optimizable.ByGradientValue {

		double[] params = new double [1];

		public void getParameters(double[] doubleArray) {
			doubleArray [0] = params [0];
		}

		public int getNumParameters() { return 1; }

		public double getParameter(int n) { return params [0]; };

		public void setParameters(double[] doubleArray) {
			params [0] = doubleArray [0];
		}
		public void setParameter(int n, double d) { params[n] = d; }

		public double getValue () {
			return 3*params[0]*params[0] - 5 * params[0] + 2;
		}

		public void getValueGradient (double[] buffer)
		{
			buffer [0] = 3*params [0] - 5;
		}
	}

	static class WrongSimplePoly extends SimplePoly {
		public void getValueGradient (double[] buffer)
		{
			buffer [0] = 3*params [0]; // WRONG: Missing -5
		}
	}


	public void testTestValueAndGradient ()
	{
		SimplePoly maxable = new SimplePoly ();
		testValueAndGradient (maxable);

		try {
			WrongSimplePoly badMaxable = new WrongSimplePoly ();
			testValueAndGradient (badMaxable);
			fail ("WrongSimplyPoly should fail testMaxmiziable!");
		} catch (Exception e) {}
	}

	public static Test suite ()
	{
		return new TestSuite (TestOptimizable.class);
	}

	protected void setUp ()
	{
	}

	public static void main (String[] args)
	{
		junit.textui.TestRunner.run (suite());
	}

}