
cc.mallet.optimize.tests.TestOptimizable Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mallet Show documentation
Show all versions of mallet Show documentation
MALLET is a Java-based package for statistical natural language processing,
document classification, clustering, topic modeling, information extraction,
and other machine learning applications to text.
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
/**
@author Andrew McCallum [email protected]
*/
package cc.mallet.optimize.tests;
import junit.framework.*;
import java.util.logging.*;
import java.io.*;
import java.util.Random;
import cc.mallet.classify.*;
import cc.mallet.optimize.LineOptimizer;
import cc.mallet.optimize.Optimizable;
import cc.mallet.pipe.*;
import cc.mallet.pipe.iterator.*;
import cc.mallet.types.*;
import cc.mallet.util.*;
/**
* Contains static methods for testing subclasses of
* Maximizable and Maximizable.ByGradient. Especially
* useful are methods that verify the consistency of the value
* and gradient functions of an instance of
* Maximizable.ByGradient.
*/
public class TestOptimizable extends TestCase
{
private static Logger logger =
MalletLogger.getLogger(TestOptimizable.class.getName());
public TestOptimizable (String name) {
super (name);
}
static private int numComponents = -1;
/**
* Sets the number of gradient components that will be checked.
* If negative, all will be checked.
*/
public static void setNumComponents (int n) { numComponents = n; }
/**
* Tests that parameters set by setParameters can be retrieved by
* getParameters.
* @param maxable Instance of a Maximizable that should be tested.
* Its current parameters will be overwritten.
*/
public static boolean testGetSetParameters (Optimizable maxable)
{
System.out.println ("TestMaximizable testGetSetParameters");
// Set all the parameters to unique values using setParameters()
double[] parameters = new double [maxable.getNumParameters()];
maxable.getParameters (parameters);
for (int i = 0; i < parameters.length; i++)
parameters[i] = (double)i;
maxable.setParameters (parameters);
// Test to make sure those parameters are there
MatrixOps.setAll (parameters, 0.0);
maxable.getParameters (parameters);
for (int i = 0; i < parameters.length; i++)
assertTrue (parameters[i] == (double)i);
return true;
}
public static double
testValueAndGradientInDirection (Optimizable.ByGradientValue maxable, double[] direction)
{
int numParameters = maxable.getNumParameters();
assert (numParameters == direction.length);
double[] oldParameters = new double[numParameters];
double[] parameters = new double[numParameters];
double[] normalizedDirection = direction.clone();
System.arraycopy(direction, 0, normalizedDirection, 0, numParameters);
MatrixOps.absNormalize(normalizedDirection);
double value = maxable.getValue();
// the gradient from the optimizable function
double[] analyticGradient = new double[numParameters];
maxable.getParameters (parameters);
maxable.getParameters (oldParameters);
maxable.getValueGradient (analyticGradient);
// the gradient calculate from the slope of the value
// This setting of epsilon should make the individual elements of
// the analytical gradient and the empirical gradient equal. This
// simplifies the comparison of the individual dimensions of the
// gradient and thus makes debugging easier.
double directionGradient = MatrixOps.dotProduct (analyticGradient, normalizedDirection);
double epsilon = 0.1 / MatrixOps.absNorm(analyticGradient);
double tolerance = 0.00001 * directionGradient; // this was "epsilon * 5";
System.out.println ("epsilon = "+epsilon+" tolerance="+tolerance);
MatrixOps.plusEquals (parameters, normalizedDirection, epsilon);
//logger.fine ("Parameters:"); parameters.print();
maxable.setParameters (parameters);
double epsValue = maxable.getValue();
double slope = (epsValue - value) / epsilon;
System.out.println ("value="+value+" epsilon="+epsilon+" epsValue="+
epsValue+" slope = "+slope+" gradient="+directionGradient);
assert (!Double.isNaN (slope));
double slopeDifference = Math.abs(slope - directionGradient);
logger.info ("TestMaximizable "+
": slope tolerance = "+tolerance+
": gradient slope = "+directionGradient+
", value+epsilon slope = "+slope+
": slope difference = "+slopeDifference);
maxable.setParameters (oldParameters);
assert (Math.abs(slopeDifference) < tolerance) : "Slope difference "+slopeDifference+" is greater than tolerance "+tolerance;
return slopeDifference;
}
/**
* Tests that the value and gradient function are consistent
* at the current parameters.
* Computes both the analytic gradient (the one given by
* maxable.getValueGradient) and the empirical gradient,
* which is (if x are the current parameters and f the function
* computed by maxable) f(x + epsilon) - f(x). Verifies
* that the angle between the empirical and analytic gradients
* are close to 0.
* @see #testValueAndGradient testValueAndGradient
* @see #testValueAndGradientRandomParameters testValueAndGradientRandomParameters
* @throws IllegalStateException If the angle is above the tolerance
*/
public static double
testValueAndGradientCurrentParameters (Optimizable.ByGradientValue maxable)
{
double[] parameters = new double [maxable.getNumParameters()];
double value = maxable.getValue();
// the gradient from the maximizable function
double[] analyticGradient = new double[maxable.getNumParameters()];
double[] empiricalGradient = new double[maxable.getNumParameters()];
maxable.getParameters (parameters);
maxable.getValueGradient (analyticGradient);
// the gradient calculate from the slope of the value
maxable.getValueGradient (empiricalGradient);
// This setting of epsilon should make the individual elements of
// the analytical gradient and the empirical gradient equal. This
// simplifies the comparison of the individual dimensions of the
// gradient and thus makes debugging easier.
// cas: However, avoid huge epsilon if norm of analytic gradient is
// close to 0.
// Next line used to be: double norm = Math.max (0.1, MatrixOps.twoNorm(analyticGradient));
// but if all the components of the analyticalGradient are very small, the squaring in the
// twoNorm causes epsilon to be too large. -AKM
double norm = Math.max (0.1, MatrixOps.absNorm(analyticGradient));
double epsilon = 0.1 / norm;
double tolerance = epsilon * 5;
System.out.println ("epsilon = "+epsilon+" tolerance="+tolerance);
int sampleParameterInterval = -1;
if (numComponents > 0) {
sampleParameterInterval = Math.max (1, parameters.length / numComponents);
logger.info ("Will check every "+sampleParameterInterval+"-th component.");
}
// Check each direction, perturb it, measure new value, and make
// sure it agrees with the gradient from
// maxable.getValueGradient()
for (int i = 0; i < parameters.length; i++) {
// { int i = 0; // Uncomment this line to debug one parameter at a time -cas
if ((parameters.length >= sampleParameterInterval) &&
(i % sampleParameterInterval != 0))
continue;
double param = parameters[i];
parameters[i] = param + epsilon;
//logger.fine ("Parameters:"); parameters.print();
maxable.setParameters (parameters);
double epsValue = maxable.getValue();
double slope = (epsValue - value) / epsilon;
System.out.println ("value="+value+" epsValue="+epsValue+" slope["+i+"] = "+slope+" gradient[]="+analyticGradient[i]);
assert (!Double.isNaN (slope));
logger.info ("TestMaximizable checking singleIndex "+i+
": gradient slope = "+analyticGradient[i]+
", value+epsilon slope = "+slope+
": slope difference = "+(slope - analyticGradient[i]));
// No negative below because the gradient points in the direction
// of maximizing the function.
empiricalGradient[i] = slope;
parameters[i] = param;
}
// Normalize the matrices to have the same L2 length
System.out.println ("analyticGradient.twoNorm = "+
MatrixOps.twoNorm(analyticGradient));
System.out.println ("empiricalGradient.twoNorm = "+
MatrixOps.twoNorm(empiricalGradient));
MatrixOps.timesEquals (analyticGradient,
1.0/MatrixOps.twoNorm(analyticGradient));
MatrixOps.timesEquals (empiricalGradient,
1.0/MatrixOps.twoNorm(empiricalGradient));
/*
System.out.println("N ANA EMP");
for (int i = 0; i < analyticGradient.length; i++) {
System.out.println(i+" "+analyticGradient[i]+" "+empiricalGradient[i]);
}
*/
// Return the angle between the two vectors, in radians
double dot = MatrixOps.dotProduct (analyticGradient,empiricalGradient);
if (Maths.almostEquals (dot, 1.0)) {
logger.info ("TestMaximizable angle is zero.");
return 0.0;
} else {
double angle = Math.acos (dot);
logger.info ("TestMaximizable angle = "+angle);
if (Math.abs(angle) > tolerance)
throw new IllegalStateException ("Gradient/Value mismatch: angle="+
angle + " tol: " + tolerance);
if (Double.isNaN (angle))
throw new IllegalStateException ("Gradient/Value error: angle is NaN!");
return angle;
}
}
/**
* Tests that getValue and getValueGradient are consistent.
* Tests for consistency at params = 0 and at
* params = -0.0001 * grad(f)
* @see #testValueAndGradientCurrentParameters testValueAndGradientCurrentParameters
* @throws IllegalStateException If the test fails.
*/
public static boolean testValueAndGradient (Optimizable.ByGradientValue maxable)
{
double[] parameters = new double [maxable.getNumParameters()];
MatrixOps.setAll (parameters, 0.0);
maxable.setParameters (parameters);
testValueAndGradientCurrentParameters (maxable);
MatrixOps.setAll (parameters, 0.0);
double[] delta = new double[maxable.getNumParameters()];
maxable.getValueGradient (delta);
logger.info ("Gradient two-Norm = "+MatrixOps.twoNorm(delta));
logger.info (" max parameter change = "+(MatrixOps.infinityNorm(delta) * -0.001));
MatrixOps.timesEquals (delta, -0.0001);
MatrixOps.plusEquals (parameters, delta);
maxable.setParameters (parameters);
testValueAndGradientCurrentParameters (maxable);
return true;
}
/**
* Tests that getValue and getValueGradient are consistent
* at a random parameter setting.
* @see #testValueAndGradientCurrentParameters testValueAndGradientCurrentParameters
* @throws IllegalStateException If the test fails.
*/
public static boolean testValueAndGradientRandomParameters
(Optimizable.ByGradientValue maxable, Random r)
{
double[] params = new double [maxable.getNumParameters()];
for (int i = 0; i < params.length; i++) {
params[i] = r.nextDouble ();
if (r.nextBoolean ())
params [i] = -params[i];
}
maxable.setParameters (params);
testValueAndGradientCurrentParameters (maxable);
return true;
}
// Maximizable for 3x^2 - 5x + 2
static class SimplePoly implements Optimizable.ByGradientValue {
double[] params = new double [1];
public void getParameters(double[] doubleArray) {
doubleArray [0] = params [0];
}
public int getNumParameters() { return 1; }
public double getParameter(int n) { return params [0]; };
public void setParameters(double[] doubleArray) {
params [0] = doubleArray [0];
}
public void setParameter(int n, double d) { params[n] = d; }
public double getValue () {
return 3*params[0]*params[0] - 5 * params[0] + 2;
}
public void getValueGradient (double[] buffer)
{
buffer [0] = 3*params [0] - 5;
}
}
static class WrongSimplePoly extends SimplePoly {
public void getValueGradient (double[] buffer)
{
buffer [0] = 3*params [0]; // WRONG: Missing -5
}
}
public void testTestValueAndGradient ()
{
SimplePoly maxable = new SimplePoly ();
testValueAndGradient (maxable);
try {
WrongSimplePoly badMaxable = new WrongSimplePoly ();
testValueAndGradient (badMaxable);
fail ("WrongSimplyPoly should fail testMaxmiziable!");
} catch (Exception e) {}
}
public static Test suite ()
{
return new TestSuite (TestOptimizable.class);
}
protected void setUp ()
{
}
public static void main (String[] args)
{
junit.textui.TestRunner.run (suite());
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy