edu.stanford.nlp.maxent.CGRunner Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
/**
* Title: Stanford JavaNLP.
* Description: A Maximum Entropy Toolkit.
* Copyright: Copyright (c) 2002. Kristina Toutanova, Stanford University
* Company: Stanford University, All Rights Reserved.
*/
package edu.stanford.nlp.maxent;
import java.util.Arrays;
import edu.stanford.nlp.maxent.iis.LambdaSolve;
import edu.stanford.nlp.optimization.*;
import edu.stanford.nlp.util.ReflectionLoading;
/**
* This class will call an optimization method such as Conjugate Gradient or
* Quasi-Newton on a LambdaSolve object to find
* optimal parameters, including imposing a Gaussian prior on those
* parameters.
*
* @author Kristina Toutanova
* @author Christopher Manning
*/
public class CGRunner {
private static final boolean SAVE_LAMBDAS_REGULARLY = false;
private final LambdaSolve prob;
private final String filename;
/**
* Error tolerance passed to CGMinimizer
*/
private final double tol;
private final boolean useGaussianPrior;
private final double priorSigmaS;
private final double[] sigmaSquareds; // = null;
private static final double DEFAULT_TOLERANCE = 1e-4;
private static final double DEFAULT_SIGMASQUARED = 0.5;
/**
* Set up a LambdaSolve problem for solution by a Minimizer.
* Uses a Gaussian prior with a sigma2 of 0.5.
*
* @param prob The problem to solve
* @param filename Used (with extension) to save intermediate results.
*/
public CGRunner(LambdaSolve prob, String filename) {
this(prob, filename, DEFAULT_SIGMASQUARED);
}
/**
* Set up a LambdaSolve problem for solution by a Minimizer,
* specifying a value for sigma2.
*
* @param prob The problem to solve
* @param filename Used (with extension) to save intermediate results.
* @param priorSigmaS The prior sigma2: this doubled will be
* used to divide the lambda2 values as the
* prior penalty in the likelihood. A value of 0.0
* or Double.POSITIVE_INFINITY
* indicates to not use regularization.
*/
public CGRunner(LambdaSolve prob, String filename, double priorSigmaS) {
this(prob, filename, DEFAULT_TOLERANCE, priorSigmaS);
}
/**
* Set up a LambdaSolve problem for solution by a Minimizer.
*
* @param prob The problem to solve
* @param filename Used (with extension) to save intermediate results.
* @param tol Tolerance of errors (passed to CG)
* @param priorSigmaS The prior sigma2: this doubled will be
* used to divide the lambda2 values as the
* prior penalty. A value of 0.0
* or Double.POSITIVE_INFINITY
* indicates to not use regularization.
*/
public CGRunner(LambdaSolve prob, String filename, double tol, double priorSigmaS) {
this.prob = prob;
this.filename = filename;
this.tol = tol;
this.useGaussianPrior = priorSigmaS != 0.0 && priorSigmaS != Double.POSITIVE_INFINITY;
this.priorSigmaS = priorSigmaS;
this.sigmaSquareds = null;
}
/**
* Set up a LambdaSolve problem for solution by a Minimizer.
*
* @param prob The problem to solve
* @param filename Used (with extension) to save intermediate results.
* @param tol Tolerance of errors (passed to CG)
* @param sigmaSquareds The prior sigma2 for each feature: this doubled will be
* used to divide the lambda2 values as the
* prior penalty. This array must have size the number of features.
* If it is null, no regularization will be performed.
*/
public CGRunner(LambdaSolve prob, String filename, double tol, double[] sigmaSquareds) {
this.prob = prob;
this.filename = filename;
this.tol = tol;
this.useGaussianPrior = sigmaSquareds !=null;
this.sigmaSquareds = sigmaSquareds;
this.priorSigmaS = -1.0; // not used
}
private void printOptimizationResults(LikelihoodFunction df, MonitorFunction monitor) {
double negLogLike = df.valueAt(prob.lambda);
System.err.printf("After optimization neg (penalized) log cond likelihood: %1.2f%n", negLogLike);
if (monitor != null) {
monitor.reportMonitoring(negLogLike);
}
int numNonZero = 0;
for (int i = 0; i < prob.lambda.length; i++) {
if (prob.lambda[i] != 0.0) {
// 0.0 == -0.0 in IEEE math!
numNonZero++;
}
}
System.err.printf("Non-zero parameters: %d/%d (%1.2f%%)%n", numNonZero, prob.lambda.length,
(100.0 * numNonZero) / prob.lambda.length);
}
/**
* Solves the problem using a quasi-newton method (L-BFGS). The solution
* is stored in the {@code lambda} array of {@code prob}.
*/
public void solveQN() {
LikelihoodFunction df = new LikelihoodFunction(prob, tol, useGaussianPrior, priorSigmaS, sigmaSquareds);
MonitorFunction monitor = new MonitorFunction(prob, df, filename);
Minimizer cgm = new QNMinimizer(monitor, 10);
// all parameters are started at 0.0
prob.lambda = cgm.minimize(df, tol, new double[df.domainDimension()]);
printOptimizationResults(df, monitor);
}
public void solveOWLQN2(double weight) {
LikelihoodFunction df = new LikelihoodFunction(prob, tol, useGaussianPrior, priorSigmaS, sigmaSquareds);
MonitorFunction monitor = new MonitorFunction(prob, df, filename);
Minimizer cgm = new QNMinimizer(monitor, 10);
((QNMinimizer) cgm).useOWLQN(true, weight);
// all parameters are started at 0.0
prob.lambda = cgm.minimize(df, tol, new double[df.domainDimension()]);
printOptimizationResults(df, monitor);
}
/**
* Solves the problem using conjugate gradient (CG). The solution
* is stored in the {@code lambda} array of {@code prob}.
*/
public void solveCG() {
LikelihoodFunction df = new LikelihoodFunction(prob, tol, useGaussianPrior, priorSigmaS, sigmaSquareds);
MonitorFunction monitor = new MonitorFunction(prob, df, filename);
Minimizer cgm = new CGMinimizer(monitor);
// all parameters are started at 0.0
prob.lambda = cgm.minimize(df, tol, new double[df.domainDimension()]);
printOptimizationResults(df, monitor);
}
/**
* Solves the problem using OWLQN. The solution
* is stored in the {@code lambda} array of {@code prob}. Note that the
* likelihood function will be a penalized L2 likelihood function unless you
* have turned this off via setting the priorSigmaS to 0.0.
*
* @param weight Controls the sparseness/regularization of the L1 solution.
* The bigger the number the sparser the solution. Weights between
* 0.01 and 1.0 typically give good performance.
*/
public void solveL1(double weight) {
LikelihoodFunction df = new LikelihoodFunction(prob, tol, useGaussianPrior, priorSigmaS, sigmaSquareds);
Minimizer owl = ReflectionLoading.loadByReflection("edu.stanford.nlp.optimization.OWLQNMinimizer", weight);
prob.lambda = owl.minimize(df, tol, new double[df.domainDimension()]);
printOptimizationResults(df, null);
}
/**
* This class implements the DiffFunction interface for Minimizer
*/
private static final class LikelihoodFunction implements DiffFunction {
private final LambdaSolve model;
private final double tol;
private final boolean useGaussianPrior;
private final double[] sigmaSquareds;
private int valueAtCalls;
private double likelihood;
public LikelihoodFunction(LambdaSolve m, double tol, boolean useGaussianPrior, double sigmaSquared, double[] sigmaSquareds) {
model = m;
this.tol = tol;
this.useGaussianPrior = useGaussianPrior;
if (useGaussianPrior) {
// keep separate prior on each parameter for flexibility
this.sigmaSquareds = new double[model.lambda.length];
if (sigmaSquareds != null) {
System.arraycopy(sigmaSquareds, 0, this.sigmaSquareds, 0, sigmaSquareds.length);
} else {
Arrays.fill(this.sigmaSquareds, sigmaSquared);
}
} else {
this.sigmaSquareds = null;
}
}
@Override
public int domainDimension() {
return model.lambda.length;
}
public double likelihood() {
return likelihood;
}
public int numCalls() {
return valueAtCalls;
}
@Override
public double valueAt(double[] lambda) {
valueAtCalls++;
model.lambda = lambda;
double lik = model.logLikelihoodScratch();
if (useGaussianPrior) {
//double twoSigmaSquared = 2 * sigmaSquared;
for (int i = 0; i < lambda.length; i++) {
lik += (lambda[i] * lambda[i]) / (sigmaSquareds[i] + sigmaSquareds[i]);
}
}
// System.err.println(valueAtCalls + " calls to valueAt;" +
// " penalized log likelihood is " + lik);
likelihood = lik;
return lik;
}
@Override
public double[] derivativeAt(double[] lambda) {
boolean eq = true;
for (int j = 0; j < lambda.length; j++) {
if (Math.abs(lambda[j] - model.lambda[j]) > tol) {
eq = false;
break;
}
}
if (!eq) {
System.err.println("derivativeAt: call with different value");
valueAt(lambda);
}
double[] drvs = model.getDerivatives();
// System.out.println("for lambdas "+lambda[0]+" "+lambda[1] +
// " derivatives "+drvs[0]+" "+drvs[1]);
if (useGaussianPrior) {
// prior penalty
for (int j = 0; j < lambda.length; j++) {
// double sign=1;
// if(lambda[j]<=0){sign=-1;}
drvs[j] += lambda[j] / sigmaSquareds[j];
}
}
//System.out.println("final derivatives "+drvs[0]+" "+drvs[1]);
return drvs;
}
} // end static class LikelihoodFunction
/**
* This one is used in the monitor
*/
private static final class MonitorFunction implements Function {
private final LambdaSolve model;
private final LikelihoodFunction lf;
private final String filename;
private int iterations; // = 0
public MonitorFunction(LambdaSolve m, LikelihoodFunction lf, String filename) {
this.model = m;
this.lf = lf;
this.filename = filename;
}
@Override
@SuppressWarnings({"ConstantConditions", "PointlessBooleanExpression"})
public double valueAt(double[] lambda) {
double likelihood = lf.likelihood();
// this line is printed in the middle of the normal line of QN minimization, so put println at beginning
System.err.println();
System.err.print(reportMonitoring(likelihood));
if (SAVE_LAMBDAS_REGULARLY && iterations > 0 && iterations % 5 == 0) {
model.save_lambdas(filename + '.' + iterations + ".lam");
}
if (iterations > 0 && iterations % 30 == 0) {
model.checkCorrectness();
}
iterations++;
return 42; // never cause premature termination.
}
public String reportMonitoring(double likelihood) {
return "Iter. " + iterations + ": " + "neg. log cond. likelihood = " + likelihood + " [" + lf.numCalls() + " calls to valueAt]";
}
@Override
public int domainDimension() {
return lf.domainDimension();
}
} // end static class MonitorFunction
}