edu.stanford.nlp.maxent.CGRunner Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
There is a newer version: 3.9.2
Show newest version
/**
 * Title:       Stanford JavaNLP.
 * Description: A Maximum Entropy Toolkit.
 * Copyright:   Copyright (c) 2002. Kristina Toutanova, Stanford University
 * Company:     Stanford University, All Rights Reserved.
 */
package edu.stanford.nlp.maxent; 
import edu.stanford.nlp.util.logging.Redwood;

import java.util.Arrays;

import edu.stanford.nlp.maxent.iis.LambdaSolve;
import edu.stanford.nlp.optimization.*;
import edu.stanford.nlp.util.ReflectionLoading;

/**
 * This class will call an optimization method such as Conjugate Gradient or
 * Quasi-Newton  on a LambdaSolve object to find
 * optimal parameters, including imposing a Gaussian prior on those
 * parameters.
 *
 * @author Kristina Toutanova
 * @author Christopher Manning
 */
public class CGRunner  {

  /** A logger for this class */
  private static Redwood.RedwoodChannels log = Redwood.channels(CGRunner.class);

  private static final boolean SAVE_LAMBDAS_REGULARLY = false;

  private final LambdaSolve prob;
  private final String filename;
  /**
   * Error tolerance passed to CGMinimizer
   */
  private final double tol;
  private final boolean useGaussianPrior;
  private final double priorSigmaS;
  private final double[] sigmaSquareds; // = null;

  private static final double DEFAULT_TOLERANCE = 1e-4;
  private static final double DEFAULT_SIGMASQUARED = 0.5;


  /**
   * Set up a LambdaSolve problem for solution by a Minimizer.
   * Uses a Gaussian prior with a sigma² of 0.5.
   *
   * @param prob     The problem to solve
   * @param filename Used (with extension) to save intermediate results.
   */
  public CGRunner(LambdaSolve prob, String filename) {
    this(prob, filename, DEFAULT_SIGMASQUARED);
  }

  /**
   * Set up a LambdaSolve problem for solution by a Minimizer,
   * specifying a value for sigma².
   *
   * @param prob             The problem to solve
   * @param filename         Used (with extension) to save intermediate results.
   * @param priorSigmaS      The prior sigma²: this doubled will be
   *                         used to divide the lambda² values as the
   *                         prior penalty in the likelihood.  A value of 0.0
   *                         or Double.POSITIVE_INFINITY
   *                         indicates to not use regularization.
   */
  public CGRunner(LambdaSolve prob, String filename, double priorSigmaS) {
    this(prob, filename, DEFAULT_TOLERANCE, priorSigmaS);
  }

  /**
   * Set up a LambdaSolve problem for solution by a Minimizer.
   *
   * @param prob             The problem to solve
   * @param filename         Used (with extension) to save intermediate results.
   * @param tol              Tolerance of errors (passed to CG)
   * @param priorSigmaS      The prior sigma²: this doubled will be
   *                         used to divide the lambda² values as the
   *                         prior penalty.  A value of 0.0
   *                         or Double.POSITIVE_INFINITY
   *                         indicates to not use regularization.
   */
  public CGRunner(LambdaSolve prob, String filename, double tol, double priorSigmaS) {
    this.prob = prob;
    this.filename = filename;
    this.tol = tol;
    this.useGaussianPrior = priorSigmaS != 0.0 && priorSigmaS != Double.POSITIVE_INFINITY;
    this.priorSigmaS = priorSigmaS;
    this.sigmaSquareds = null;
  }


  /**
   * Set up a LambdaSolve problem for solution by a Minimizer.
   *
   * @param prob             The problem to solve
   * @param filename         Used (with extension) to save intermediate results.
   * @param tol              Tolerance of errors (passed to CG)
   * @param sigmaSquareds    The prior sigma² for each feature: this doubled will be
   *                         used to divide the lambda² values as the
   *                         prior penalty. This array must have size the number of features.
   *                         If it is null, no regularization will be performed.
   */
  public CGRunner(LambdaSolve prob, String filename, double tol, double[] sigmaSquareds) {
    this.prob = prob;
    this.filename = filename;
    this.tol = tol;
    this.useGaussianPrior = sigmaSquareds !=null;
    this.sigmaSquareds = sigmaSquareds;
    this.priorSigmaS = -1.0; // not used
  }

  private void printOptimizationResults(LikelihoodFunction df, MonitorFunction monitor) {
    double negLogLike = df.valueAt(prob.lambda);
    System.err.printf("After optimization neg (penalized) log cond likelihood: %1.2f%n", negLogLike);
    if (monitor != null) {
      monitor.reportMonitoring(negLogLike);
    }
    int numNonZero = 0;
    for (int i = 0; i < prob.lambda.length; i++) {
      if (prob.lambda[i] != 0.0) {
        // 0.0 == -0.0 in IEEE math!
        numNonZero++;
      }
    }
    System.err.printf("Non-zero parameters: %d/%d (%1.2f%%)%n", numNonZero, prob.lambda.length,
        (100.0 * numNonZero) / prob.lambda.length);
  }


  /**
   * Solves the problem using a quasi-newton method (L-BFGS).  The solution
   * is stored in the {@code lambda} array of {@code prob}.
   */
  public void solveQN() {
    LikelihoodFunction df = new LikelihoodFunction(prob, tol, useGaussianPrior, priorSigmaS, sigmaSquareds);
    MonitorFunction monitor = new MonitorFunction(prob, df, filename);
    Minimizer cgm = new QNMinimizer(monitor, 10);

    // all parameters are started at 0.0
    prob.lambda = cgm.minimize(df, tol, new double[df.domainDimension()]);
    printOptimizationResults(df, monitor);
  }

  public void solveOWLQN2(double weight) {
    LikelihoodFunction df = new LikelihoodFunction(prob, tol, useGaussianPrior, priorSigmaS, sigmaSquareds);
    MonitorFunction monitor = new MonitorFunction(prob, df, filename);
    Minimizer cgm = new QNMinimizer(monitor, 10);
    ((QNMinimizer) cgm).useOWLQN(true, weight);

    // all parameters are started at 0.0
    prob.lambda = cgm.minimize(df, tol, new double[df.domainDimension()]);
    printOptimizationResults(df, monitor);
  }

  /**
   * Solves the problem using conjugate gradient (CG).  The solution
   * is stored in the {@code lambda} array of {@code prob}.
   */
  public void solveCG() {
    LikelihoodFunction df = new LikelihoodFunction(prob, tol, useGaussianPrior, priorSigmaS, sigmaSquareds);
    MonitorFunction monitor = new MonitorFunction(prob, df, filename);
    Minimizer cgm = new CGMinimizer(monitor);

    // all parameters are started at 0.0
    prob.lambda = cgm.minimize(df, tol, new double[df.domainDimension()]);
    printOptimizationResults(df, monitor);
  }

  /**
   * Solves the problem using OWLQN.  The solution
   * is stored in the {@code lambda} array of {@code prob}.  Note that the
   * likelihood function will be a penalized L2 likelihood function unless you
   * have turned this off via setting the priorSigmaS to 0.0.
   *
   * @param weight Controls the sparseness/regularization of the L1 solution.
   *     The bigger the number the sparser the solution.  Weights between
   *     0.01 and 1.0 typically give good performance.
   */
  public void solveL1(double weight) {
    LikelihoodFunction df = new LikelihoodFunction(prob, tol, useGaussianPrior, priorSigmaS, sigmaSquareds);
    Minimizer owl = ReflectionLoading.loadByReflection("edu.stanford.nlp.optimization.OWLQNMinimizer", weight);
    prob.lambda = owl.minimize(df, tol, new double[df.domainDimension()]);
    printOptimizationResults(df, null);
  }


  /**
   * This class implements the DiffFunction interface for Minimizer
   */
  private static final class LikelihoodFunction implements DiffFunction {

    private final LambdaSolve model;
    private final double tol;
    private final boolean useGaussianPrior;
    private final double[] sigmaSquareds;
    private int valueAtCalls;
    private double likelihood;


    public LikelihoodFunction(LambdaSolve m, double tol, boolean useGaussianPrior, double sigmaSquared, double[] sigmaSquareds) {
      model = m;
      this.tol = tol;
      this.useGaussianPrior = useGaussianPrior;
      if (useGaussianPrior) {
        // keep separate prior on each parameter for flexibility
        this.sigmaSquareds = new double[model.lambda.length];
        if (sigmaSquareds != null) {
          System.arraycopy(sigmaSquareds, 0, this.sigmaSquareds, 0, sigmaSquareds.length);
        } else {
          Arrays.fill(this.sigmaSquareds, sigmaSquared);
        }
      } else {
        this.sigmaSquareds = null;
      }
    }

    @Override
    public int domainDimension() {
      return model.lambda.length;
    }

    public double likelihood() {
      return likelihood;
    }

    public int numCalls() {
      return valueAtCalls;
    }


    @Override
    public double valueAt(double[] lambda) {
      valueAtCalls++;
      model.lambda = lambda;
      double lik = model.logLikelihoodScratch();

      if (useGaussianPrior) {
        //double twoSigmaSquared = 2 * sigmaSquared;
        for (int i = 0; i < lambda.length; i++) {
          lik += (lambda[i] * lambda[i]) / (sigmaSquareds[i] + sigmaSquareds[i]);
        }
      }
      // log.info(valueAtCalls + " calls to valueAt;" +
      //		       " penalized log likelihood is " + lik);

      likelihood = lik;
      return lik;
    }


    @Override
    public double[] derivativeAt(double[] lambda) {
      boolean eq = true;
      for (int j = 0; j < lambda.length; j++) {
        if (Math.abs(lambda[j] - model.lambda[j]) > tol) {
          eq = false;
          break;
        }
      }
      if (!eq) {
        log.info("derivativeAt: call with different value");
        valueAt(lambda);
      }

      double[] drvs = model.getDerivatives();

      // System.out.println("for lambdas "+lambda[0]+" "+lambda[1] +
      //                   " derivatives "+drvs[0]+" "+drvs[1]);

      if (useGaussianPrior) {
        // prior penalty
        for (int j = 0; j < lambda.length; j++) {
          // double sign=1;
          // if(lambda[j]<=0){sign=-1;}
          drvs[j] += lambda[j] / sigmaSquareds[j];
        }
      }

      //System.out.println("final derivatives "+drvs[0]+" "+drvs[1]);
      return drvs;
    }

  } // end static class LikelihoodFunction


  /**
   * This one is used in the monitor
   */
  private static final class MonitorFunction implements Function {

    private final LambdaSolve model;
    private final LikelihoodFunction lf;
    private final String filename;
    private int iterations; // = 0

    public MonitorFunction(LambdaSolve m, LikelihoodFunction lf, String filename) {
      this.model = m;
      this.lf = lf;
      this.filename = filename;
    }

    @Override
    @SuppressWarnings({"ConstantConditions", "PointlessBooleanExpression"})
    public double valueAt(double[] lambda) {
      double likelihood = lf.likelihood();
      // this line is printed in the middle of the normal line of QN minimization, so put println at beginning
      log.info();
      log.info(reportMonitoring(likelihood));

      if (SAVE_LAMBDAS_REGULARLY  && iterations > 0 && iterations % 5 == 0) {
        model.save_lambdas(filename + '.' + iterations + ".lam");
      }

      if (iterations > 0 && iterations % 30 == 0) {
        model.checkCorrectness();
      }
      iterations++;

      return 42; // never cause premature termination.
    }

    public String reportMonitoring(double likelihood) {
      return "Iter. " + iterations + ": " + "neg. log cond. likelihood = " + likelihood + " [" + lf.numCalls() + " calls to valueAt]";
    }

    @Override
    public int domainDimension() {
      return lf.domainDimension();
    }

  } // end static class MonitorFunction

}