All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cc.mallet.classify.MaxEntTrainer Maven / Gradle / Ivy

Go to download

MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.

The newest version!
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
   http://www.cs.umass.edu/~mccallum/mallet
   This software is provided under the terms of the Common Public License,
   version 1.0, as published by http://www.opensource.org.  For further
   information, see the file `LICENSE' included with this distribution. */

package cc.mallet.classify;

import java.util.logging.*;
import java.io.*;

import cc.mallet.optimize.ConjugateGradient;
import cc.mallet.optimize.InvalidOptimizableException;
import cc.mallet.optimize.LimitedMemoryBFGS;
import cc.mallet.optimize.Optimizable;
import cc.mallet.optimize.OptimizationException;
import cc.mallet.optimize.Optimizer;
import cc.mallet.types.Alphabet;
import cc.mallet.types.InstanceList;
import cc.mallet.util.MalletLogger;
import cc.mallet.util.MalletProgressMessageLogger;

//Does not currently handle instances that are labeled with distributions
//instead of a single label.
/**
 * The trainer for a Maximum Entropy classifier.
   @author Andrew McCallum [email protected]
 */

public class MaxEntTrainer extends ClassifierTrainer
	implements ClassifierTrainer.ByOptimization, Boostable, Serializable {

	private static Logger logger = MalletLogger.getLogger(MaxEntTrainer.class.getName());
	private static Logger progressLogger = MalletProgressMessageLogger.getLogger(MaxEntTrainer.class.getName()+"-pl");

	int numIterations = Integer.MAX_VALUE;

	//public static final String EXP_GAIN = "exp";
	//public static final String GRADIENT_GAIN = "grad";
	//public static final String INFORMATION_GAIN = "info";

	// xxx Why does TestMaximizable fail when this variance is very small?
	static final double DEFAULT_GAUSSIAN_PRIOR_VARIANCE = 1;
	static final double DEFAULT_L1_WEIGHT = 0.0;
	static final Class DEFAULT_MAXIMIZER_CLASS = LimitedMemoryBFGS.class;

	double gaussianPriorVariance = DEFAULT_GAUSSIAN_PRIOR_VARIANCE;
	double l1Weight = DEFAULT_L1_WEIGHT;

	Class maximizerClass = DEFAULT_MAXIMIZER_CLASS;

	InstanceList trainingSet = null;
	MaxEnt initialClassifier;

	MaxEntOptimizableByLabelLikelihood optimizable = null;
	Optimizer optimizer = null;

	// 
	// CONSTRUCTORS
	//

	public MaxEntTrainer () {}

	/** Construct a MaxEnt trainer using a trained classifier as
	 *   initial values.
	 */ 
	public MaxEntTrainer (MaxEnt theClassifierToTrain) {
		this.initialClassifier = theClassifierToTrain;
	}

	/** Constructs a trainer with a parameter to avoid overtraining.  1.0 is
	 * the default value. */
	public MaxEntTrainer (double gaussianPriorVariance) {
		this.gaussianPriorVariance = gaussianPriorVariance;
	}

	//
	//  CLASSIFIER OBJECT: stores parameters
	// 

	public MaxEnt getClassifier () {
		if (optimizable != null)
			return optimizable.getClassifier();
		return initialClassifier;
	}

	/**
	 *  Initialize parameters using the provided classifier. 
	 */
	public void setClassifier (MaxEnt theClassifierToTrain) {
		// Is this necessary?  What is the caller is about to set the training set to something different? -akm
		assert (trainingSet == null || Alphabet.alphabetsMatch(theClassifierToTrain, trainingSet));
		if (this.initialClassifier != theClassifierToTrain) {
			this.initialClassifier = theClassifierToTrain;
			optimizable = null;
			optimizer = null;
		}
	}

	//
	//  OPTIMIZABLE OBJECT: implements value and gradient functions
	//

	public Optimizable getOptimizable () {
		return optimizable;
	}

	public MaxEntOptimizableByLabelLikelihood getOptimizable (InstanceList trainingSet) {
		return getOptimizable(trainingSet, getClassifier());
	}

	public MaxEntOptimizableByLabelLikelihood getOptimizable (InstanceList trainingSet, MaxEnt initialClassifier) {

		if (trainingSet != this.trainingSet || this.initialClassifier != initialClassifier) {

			this.trainingSet = trainingSet;
			this.initialClassifier = initialClassifier;

			if (optimizable == null || optimizable.trainingList != trainingSet) {
				optimizable = new MaxEntOptimizableByLabelLikelihood (trainingSet, initialClassifier);

				if (l1Weight == 0.0) {
					optimizable.setGaussianPriorVariance(gaussianPriorVariance);
				}
				else {
					// the prior term for L1-regularized classifiers 
					//  is implemented as part of the optimizer, 
					//  so don't include a prior calculation in the value and 
					//  gradient functions.
					optimizable.useNoPrior();
				}

				optimizer = null;
			}
		}

		return optimizable;
	}

	//
	//  OPTIMIZER OBJECT: maximizes value function
	//

	public Optimizer getOptimizer () {
		if (optimizer == null && optimizable != null) {
			optimizer = new ConjugateGradient(optimizable);
		}

		return optimizer;
	}

	/** This method is called by the train method. 
	 *   This is the main entry point for the optimizable and optimizer
	 *   compontents.
	 */
	public Optimizer getOptimizer (InstanceList trainingSet) {

		// If the data is not set, or has changed, 
		// initialize the optimizable object and 
		// replace the optimizer.
		if (trainingSet != this.trainingSet ||
			optimizable == null) {
			getOptimizable(trainingSet);
			optimizer = null;
		}

		// Build a new optimizer
		if (optimizer == null) {
			// If l1Weight is 0, this devolves to 
			//  standard L-BFGS, but the implementation
			//  may be faster.
			optimizer = new LimitedMemoryBFGS(optimizable); 
			//OrthantWiseLimitedMemoryBFGS(optimizable, l1Weight);
		}
		return optimizer;
	}

	/**
	 * Specifies the maximum number of iterations to run during a single call
	 * to train or trainWithFeatureInduction.  Not
	 * currently functional.
	 * @return This trainer
	 */
	// XXX Since we maximize before using numIterations, this doesn't work.
	// Is that a bug?  If so, should the default numIterations be higher?
	public MaxEntTrainer setNumIterations (int i) {
		numIterations = i;
		return this;
	}

	public int getIteration () {
		if (optimizable == null)
			return 0;
		else
		  return Integer.MAX_VALUE;
//			return optimizer.getIteration ();
	}

	/**
	 * Sets a parameter to prevent overtraining.  A smaller variance for the prior
	 * means that feature weights are expected to hover closer to 0, so extra
	 * evidence is required to set a higher weight.
	 * @return This trainer
	 */
	public MaxEntTrainer setGaussianPriorVariance (double gaussianPriorVariance) {
		this.gaussianPriorVariance = gaussianPriorVariance;
		return this;
	}

	/** 
	 *  Use an L1 prior. Larger values mean parameters will be closer to 0.
	 *   Note that this setting overrides any Gaussian prior.
	 */
	public MaxEntTrainer setL1Weight(double l1Weight) {
		this.l1Weight = l1Weight;
		return this;
	}

	public MaxEnt train (InstanceList trainingSet) {
		return train (trainingSet, numIterations);
	}

	public MaxEnt train (InstanceList trainingSet, int numIterations)
	{
		logger.fine ("trainingSet.size() = "+trainingSet.size());
		getOptimizer (trainingSet);  // This will set this.optimizer, this.optimizable

		for (int i = 0; i < numIterations; i++) {
			try {
				finishedTraining = optimizer.optimize (1);
		  } catch (InvalidOptimizableException e) {
			  e.printStackTrace();
			  logger.warning("Catching InvalidOptimizatinException! saying converged.");
			  finishedTraining = true;
			} catch (OptimizationException e) {
				e.printStackTrace();
				logger.info ("Catching OptimizationException; saying converged.");
				finishedTraining = true;
			}
			if (finishedTraining)
				break;
		}

		// only if any number of iterations is allowed 
		if (numIterations == Integer.MAX_VALUE) {
			// Run it again because in our and Sam Roweis' experience, BFGS can still
			// eke out more likelihood after first convergence by re-running without
			// being restricted by its gradient history.
			optimizer = null;
			getOptimizer(trainingSet);
			try {
				finishedTraining = optimizer.optimize ();
		  } catch (InvalidOptimizableException e) {
			  e.printStackTrace();
			  logger.warning("Catching InvalidOptimizatinException! saying converged.");
			  finishedTraining = true;
			} catch (OptimizationException e) {
				e.printStackTrace();
				logger.info ("Catching OptimizationException; saying converged.");
				finishedTraining = true;
			}
		}
		//TestMaximizable.testValueAndGradientCurrentParameters (mt);
		progressLogger.info("\n"); //  progress messages are on one line; move on.
		//logger.info("MaxEnt ngetValueCalls:"+getValueCalls()+"\nMaxEnt ngetValueGradientCalls:"+getValueGradientCalls());
		return optimizable.getClassifier();
	}

	/**
	 * 

Trains a maximum entropy model using feature selection and feature induction * (adding conjunctions of features as new features).

* * @param trainingData A list of Instances whose data * fields are binary, augmentable FeatureVectors. * and whose target fields are Labels. * @param validationData [not currently used] As trainingData, * or null. * @param testingData As trainingData, or null. * @param evaluator The evaluator to track training progress and decide whether * to continue, or null. * @param totalIterations The maximum total number of training iterations, * including those taken during feature induction. * @param numIterationsBetweenFeatureInductions How many iterations to train * between one round of feature induction and the next; this should usually * be fairly small, like 5 or 10, to avoid overfitting with current features. * @param numFeatureInductions How many rounds of feature induction to run * before beginning normal training. * @param numFeaturesPerFeatureInduction The maximum number of features to * choose during each round of featureInduction. * * @return The trained MaxEnt classifier */ /* // added - [email protected] public Classifier trainWithFeatureInduction (InstanceList trainingData, int totalIterations, int numIterationsBetweenFeatureInductions, int numFeatureInductions, int numFeaturesPerFeatureInduction) { return trainWithFeatureInduction (trainingData, null, totalIterations, numIterationsBetweenFeatureInductions, numFeatureInductions, numFeaturesPerFeatureInduction, EXP_GAIN); } */ /** *

Like the other version of trainWithFeatureInduction, but * allows some default options to be changed.

* * @param maxent An initial partially-trained classifier (default null). * This classifier may be modified during training. * @param gainName The estimate of gain (log-likelihood increase) we want our chosen * features to maximize. * Should be one of MaxEntTrainer.EXP_GAIN, * MaxEntTrainer.GRADIENT_GAIN, or * MaxEntTrainer.INFORMATION_GAIN (default EXP_GAIN). * * @return The trained MaxEnt classifier */ /* // Temporarily removed until I figure out how to handle induceFeaturesFor (testData) public Classifier trainWithFeatureInduction (InstanceList trainingData, int totalIterations, int numIterationsBetweenFeatureInductions, int numFeatureInductions, int numFeaturesPerFeatureInduction, String gainName) { // XXX This ought to be a parameter, except that setting it to true can // crash training ("Jump too small"). boolean saveParametersDuringFI = false; Alphabet inputAlphabet = trainingData.getDataAlphabet(); Alphabet outputAlphabet = trainingData.getTargetAlphabet(); int trainingIteration = 0; int numLabels = outputAlphabet.size(); MaxEnt maxent = getClassifier(); // Initialize feature selection FeatureSelection globalFS = trainingData.getFeatureSelection(); if (globalFS == null) { // Mask out all features; some will be added later by FeatureInducer.induceFeaturesFor(.) globalFS = new FeatureSelection (trainingData.getDataAlphabet()); trainingData.setFeatureSelection (globalFS); } //if (validationData != null) validationData.setFeatureSelection (globalFS); //if (testingData != null) testingData.setFeatureSelection (globalFS); getOptimizer(trainingData); // This will initialize this.me so getClassifier() below works maxent.setFeatureSelection(globalFS); // Run feature induction for (int featureInductionIteration = 0; featureInductionIteration < numFeatureInductions; featureInductionIteration++) { // Print out some feature information logger.info ("Feature induction iteration "+featureInductionIteration); // Train the model a little bit. We don't care whether it converges; we // execute all feature induction iterations no matter what. if (featureInductionIteration != 0) { // Don't train until we have added some features setNumIterations(numIterationsBetweenFeatureInductions); train (trainingData); } trainingIteration += numIterationsBetweenFeatureInductions; logger.info ("Starting feature induction with "+(1+inputAlphabet.size())+ " features over "+numLabels+" labels."); // Create the list of error tokens InstanceList errorInstances = new InstanceList (trainingData.getDataAlphabet(), trainingData.getTargetAlphabet()); // This errorInstances.featureSelection will get examined by FeatureInducer, // so it can know how to add "new" singleton features errorInstances.setFeatureSelection (globalFS); List errorLabelVectors = new ArrayList(); // these are length-1 vectors for (int i = 0; i < trainingData.size(); i++) { Instance instance = trainingData.get(i); FeatureVector inputVector = (FeatureVector) instance.getData(); Label trueLabel = (Label) instance.getTarget(); // Having trained using just the current features, see how we classify // the training data now. Classification classification = maxent.classify(instance); if (!classification.bestLabelIsCorrect()) { errorInstances.add(inputVector, trueLabel, null, null); errorLabelVectors.add(classification.getLabelVector()); } } logger.info ("Error instance list size = "+errorInstances.size()); int s = errorLabelVectors.size(); LabelVector[] lvs = new LabelVector[s]; for (int i = 0; i < s; i++) { lvs[i] = (LabelVector)errorLabelVectors.get(i); } RankedFeatureVector.Factory gainFactory = null; if (gainName.equals (EXP_GAIN)) gainFactory = new ExpGain.Factory (lvs, gaussianPriorVariance); else if (gainName.equals(GRADIENT_GAIN)) gainFactory = new GradientGain.Factory (lvs); else if (gainName.equals(INFORMATION_GAIN)) gainFactory = new InfoGain.Factory (); else throw new IllegalArgumentException("Unsupported gain name: "+gainName); FeatureInducer klfi = new FeatureInducer (gainFactory, errorInstances, numFeaturesPerFeatureInduction, 2*numFeaturesPerFeatureInduction, 2*numFeaturesPerFeatureInduction); // Note that this adds features globally, but not on a per-transition basis klfi.induceFeaturesFor (trainingData, false, false); if (testingData != null) klfi.induceFeaturesFor (testingData, false, false); logger.info ("MaxEnt FeatureSelection now includes "+globalFS.cardinality()+" features"); klfi = null; double[] newParameters = new double[(1+inputAlphabet.size()) * outputAlphabet.size()]; // XXX (Executing this block often causes an error during training; I don't know why.) if (saveParametersDuringFI) { // Keep current parameter values // XXX This relies on the implementation detail that the most recent features // added to an Alphabet get the highest indices. // Count parameters per output label int oldParamCount = maxent.parameters.length / outputAlphabet.size(); int newParamCount = 1+inputAlphabet.size(); // Copy params into the proper locations for (int i=0; i
Related Artifacts
Related Groups
-->


© 2015 - 2025 Weber Informatics LLC | Privacy Policy