
cc.mallet.topics.DMROptimizable Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mallet Show documentation
Show all versions of mallet Show documentation
MALLET is a Java-based package for statistical natural language processing,
document classification, clustering, topic modeling, information extraction,
and other machine learning applications to text.
package cc.mallet.topics;
/** This class implements the value and gradient functions for
* Dirichlet-multinomial Regression. See Guimaraes and Lindrooth,
* for a general introduction to DMR,
* and Mimno and McCallum (UAI, 2008) for an application to
* multinomial mixture models.
*/
import cc.mallet.optimize.Optimizable;
import cc.mallet.classify.MaxEnt;
import cc.mallet.types.InstanceList;
import cc.mallet.types.Instance;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.Dirichlet;
import cc.mallet.types.MatrixOps;
import cc.mallet.util.MalletLogger;
import cc.mallet.util.MalletProgressMessageLogger;
import java.util.logging.*;
import java.util.*;
import java.text.NumberFormat;
import java.text.DecimalFormat;
import gnu.trove.TIntIntHashMap;
public class DMROptimizable implements Optimizable.ByGradientValue {
private static Logger logger = MalletLogger.getLogger(DMROptimizable.class.getName());
private static Logger progressLogger = MalletProgressMessageLogger.getLogger(DMROptimizable.class.getName()+"-pl");
MaxEnt classifier;
InstanceList trainingList;
int numGetValueCalls = 0;
int numGetValueGradientCalls = 0;
int numIterations = Integer.MAX_VALUE;
NumberFormat formatter = null;
static final double DEFAULT_GAUSSIAN_PRIOR_VARIANCE = 1;
static final double DEFAULT_LARGE_GAUSSIAN_PRIOR_VARIANCE = 100;
static final double DEFAULT_GAUSSIAN_PRIOR_MEAN = 0.0;
double gaussianPriorMean = DEFAULT_GAUSSIAN_PRIOR_MEAN;
double gaussianPriorVariance = DEFAULT_GAUSSIAN_PRIOR_VARIANCE;
// Allowing the default feature (the base level) to
// fluctuate more freely than the feature parameters leads
// to much better results.
double defaultFeatureGaussianPriorVariance = DEFAULT_LARGE_GAUSSIAN_PRIOR_VARIANCE;
double[] parameters;
double[] cachedGradient;
double cachedValue;
boolean cachedValueStale;
boolean cachedGradientStale;
int numLabels;
int numFeatures;
int defaultFeatureIndex;
public DMROptimizable () {}
public DMROptimizable (InstanceList instances, MaxEnt initialClassifier) {
this.trainingList = instances;
Alphabet alphabet = instances.getDataAlphabet();
Alphabet labelAlphabet = instances.getTargetAlphabet();
this.numLabels = labelAlphabet.size();
// Add one feature for the "default feature".
this.numFeatures = alphabet.size() + 1; // add a spot for the intercept term
//System.out.println("num features: " + numFeatures + " numLabels: " + numLabels);
this.defaultFeatureIndex = numFeatures - 1;
this.parameters = new double [numLabels * numFeatures];
//this.constraints = new double [numLabels * numFeatures];
this.cachedGradient = new double [numLabels * numFeatures];
if (initialClassifier != null) {
this.classifier = initialClassifier;
this.parameters = classifier.getParameters();
this.defaultFeatureIndex = classifier.getDefaultFeatureIndex();
assert (initialClassifier.getInstancePipe() == instances.getPipe());
}
else if (this.classifier == null) {
this.classifier =
new MaxEnt (instances.getPipe(), parameters);
}
formatter = new DecimalFormat("0.###E0");
cachedValueStale = true;
cachedGradientStale = true;
// Initialize the constraints
logger.fine("Number of instances in training list = " + trainingList.size());
for (Instance instance : trainingList) {
FeatureVector multinomialValues = (FeatureVector) instance.getTarget();
if (multinomialValues == null)
continue;
FeatureVector features = (FeatureVector) instance.getData();
assert (features.getAlphabet() == alphabet);
boolean hasNaN = false;
for (int i = 0; i < features.numLocations(); i++) {
if (Double.isNaN(features.valueAtLocation(i))) {
logger.info("NaN for feature " + alphabet.lookupObject(features.indexAtLocation(i)).toString());
hasNaN = true;
}
}
if (hasNaN) {
logger.info("NaN in instance: " + instance.getName());
}
}
//TestMaximizable.testValueAndGradientCurrentParameters (this);
}
/** Set the variance for the default features (aka intercept terms), generally
* larger than the variance for the regular features.
*/
public void setInterceptGaussianPriorVariance(double sigmaSquared) {
this.defaultFeatureGaussianPriorVariance = sigmaSquared;
}
/** Set the variance for regular (non default) features, generally
* smaller than the variance for the default features.
*/
public void setRegularGaussianPriorVariance(double sigmaSquared) {
this.gaussianPriorVariance = sigmaSquared;
}
public MaxEnt getClassifier () { return classifier; }
public double getParameter (int index) {
return parameters[index];
}
public void setParameter (int index, double v) {
cachedValueStale = true;
cachedGradientStale = true;
parameters[index] = v;
}
public int getNumParameters() {
return parameters.length;
}
public void getParameters (double[] buff) {
if (buff == null || buff.length != parameters.length) {
buff = new double [parameters.length];
}
System.arraycopy (parameters, 0, buff, 0, parameters.length);
}
public void setParameters (double [] buff) {
assert (buff != null);
cachedValueStale = true;
cachedGradientStale = true;
if (buff.length != parameters.length)
parameters = new double[buff.length];
System.arraycopy (buff, 0, parameters, 0, buff.length);
}
/** The log probability of the observed count vectors given the features. */
public double getValue () {
if (! cachedValueStale) { return cachedValue; }
numGetValueCalls++;
cachedValue = 0;
// Incorporate likelihood of data
double[] scores = new double[ trainingList.getTargetAlphabet().size() ];
double value = 0.0;
int instanceIndex = 0;
for (Instance instance: trainingList) {
FeatureVector multinomialValues = (FeatureVector) instance.getTarget();
if (multinomialValues == null) { continue; }
//System.out.println("L Now "+inputAlphabet.size()+" regular features.");
// Get the predicted probability of each class
// under the current model parameters
this.classifier.getUnnormalizedClassificationScores(instance, scores);
double sumScores = 0.0;
// Exponentiate the scores
for (int i=0; i© 2015 - 2025 Weber Informatics LLC | Privacy Policy