cc.mallet.topics.DMROptimizable Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jcore-mallet-2.0.9 Show documentation
MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.
The newest version!
package cc.mallet.topics;

/** This class implements the value and gradient functions for
 *   Dirichlet-multinomial Regression. See Guimaraes and Lindrooth, 
 *   for a general introduction to DMR, 
 *   and Mimno and McCallum (UAI, 2008) for an application to 
 *   multinomial mixture models.
 */

import cc.mallet.optimize.Optimizable;
import cc.mallet.classify.MaxEnt;

import cc.mallet.types.InstanceList;
import cc.mallet.types.Instance;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.Dirichlet;
import cc.mallet.types.MatrixOps;

import cc.mallet.util.MalletLogger;
import cc.mallet.util.MalletProgressMessageLogger;

import java.util.logging.*;
import java.text.NumberFormat;
import java.text.DecimalFormat;

public class DMROptimizable implements Optimizable.ByGradientValue {

	private static Logger logger = MalletLogger.getLogger(DMROptimizable.class.getName());
	private static Logger progressLogger = MalletProgressMessageLogger.getLogger(DMROptimizable.class.getName()+"-pl");

	MaxEnt classifier;
	InstanceList trainingList;

	int numGetValueCalls = 0;
    int numGetValueGradientCalls = 0;
    int numIterations = Integer.MAX_VALUE;

	NumberFormat formatter = null;

	static final double DEFAULT_GAUSSIAN_PRIOR_VARIANCE = 1;
	static final double DEFAULT_LARGE_GAUSSIAN_PRIOR_VARIANCE = 100;
    static final double DEFAULT_GAUSSIAN_PRIOR_MEAN = 0.0;
        
    double gaussianPriorMean = DEFAULT_GAUSSIAN_PRIOR_MEAN;
    double gaussianPriorVariance = DEFAULT_GAUSSIAN_PRIOR_VARIANCE;

	// Allowing the default feature (the base level) to 
	//  fluctuate more freely than the feature parameters leads
	//  to much better results.
    double defaultFeatureGaussianPriorVariance = DEFAULT_LARGE_GAUSSIAN_PRIOR_VARIANCE;

	double[] parameters;
	double[] cachedGradient;

	double cachedValue;
	boolean cachedValueStale;
	boolean cachedGradientStale;
	int numLabels;
	int numFeatures;
	int defaultFeatureIndex;

	public DMROptimizable () {}

	public DMROptimizable (InstanceList instances, MaxEnt initialClassifier) {

		this.trainingList = instances;
		Alphabet alphabet = instances.getDataAlphabet();
		Alphabet labelAlphabet = instances.getTargetAlphabet();

		this.numLabels = labelAlphabet.size();

		// Add one feature for the "default feature".
		this.numFeatures = alphabet.size() + 1; // add a spot for the intercept term
            
		//System.out.println("num features: " + numFeatures + " numLabels: " + numLabels);

		this.defaultFeatureIndex = numFeatures - 1;

		this.parameters = new double [numLabels * numFeatures];

		//this.constraints = new double [numLabels * numFeatures];
		this.cachedGradient = new double [numLabels * numFeatures];

		if (initialClassifier != null) {
			this.classifier = initialClassifier;
			this.parameters = classifier.getParameters();
			this.defaultFeatureIndex = classifier.getDefaultFeatureIndex();
			assert (initialClassifier.getInstancePipe() == instances.getPipe());
		}
		else if (this.classifier == null) {
			this.classifier =
				new MaxEnt (instances.getPipe(), parameters);
		}

		formatter = new DecimalFormat("0.###E0");

		cachedValueStale = true;
		cachedGradientStale = true;

		// Initialize the constraints

		logger.fine("Number of instances in training list = " + trainingList.size());

		for (Instance instance : trainingList) {
			FeatureVector multinomialValues = (FeatureVector) instance.getTarget();

			if (multinomialValues == null)
				continue;

			FeatureVector features = (FeatureVector) instance.getData();
			assert (features.getAlphabet() == alphabet);

			boolean hasNaN = false;

			for (int i = 0; i < features.numLocations(); i++) {
				if (Double.isNaN(features.valueAtLocation(i))) {
					logger.info("NaN for feature " + alphabet.lookupObject(features.indexAtLocation(i)).toString()); 
					hasNaN = true;
				}
			}

			if (hasNaN) {
				logger.info("NaN in instance: " + instance.getName());
			}

		}

		//TestMaximizable.testValueAndGradientCurrentParameters (this);
	}
	
	/** Set the variance for the default features (aka intercept terms), generally 
	 *   larger than the variance for the regular features.
	 */
	public void setInterceptGaussianPriorVariance(double sigmaSquared) {
		this.defaultFeatureGaussianPriorVariance = sigmaSquared;
	}

	/** Set the variance for regular (non default) features, generally 
	 *   smaller than the variance for the default features.
	 */
	public void setRegularGaussianPriorVariance(double sigmaSquared) {
		this.gaussianPriorVariance = sigmaSquared;
	}

	public MaxEnt getClassifier () { return classifier; }
                
	public double getParameter (int index) {
		return parameters[index];
	}
                
	public void setParameter (int index, double v) {
		cachedValueStale = true;
		cachedGradientStale = true;
		parameters[index] = v;
	}
                
	public int getNumParameters() {
		return parameters.length;
	}
                
	public void getParameters (double[] buff) {
		if (buff == null || buff.length != parameters.length) {
			buff = new double [parameters.length];
		}
		System.arraycopy (parameters, 0, buff, 0, parameters.length);
	}
        
	public void setParameters (double [] buff) {
		assert (buff != null);
		cachedValueStale = true;
		cachedGradientStale = true;
		if (buff.length != parameters.length)
			parameters = new double[buff.length];
		System.arraycopy (buff, 0, parameters, 0, buff.length);
	}

	/** The log probability of the observed count vectors given the features. */
	public double getValue () {

		if (! cachedValueStale) { return cachedValue; }

		numGetValueCalls++;
		cachedValue = 0;

		// Incorporate likelihood of data
		double[] scores = new double[ trainingList.getTargetAlphabet().size() ];
		double value = 0.0;

		int instanceIndex = 0;
		
		for (Instance instance: trainingList) {

			FeatureVector multinomialValues = (FeatureVector) instance.getTarget();
			if (multinomialValues == null) { continue; }

			//System.out.println("L Now "+inputAlphabet.size()+" regular features.");
                
			// Get the predicted probability of each class
			//   under the current model parameters
			this.classifier.getUnnormalizedClassificationScores(instance, scores);

			double sumScores = 0.0;

			// Exponentiate the scores
			for (int i=0; i
        
            
                Related Artifacts
                
                                    
            
        
        
            
                Related Groups
                
                                    
            
        
    
    
-->