
cc.mallet.topics.DMROptimizable Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jcore-mallet-2.0.9 Show documentation
Show all versions of jcore-mallet-2.0.9 Show documentation
MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.
The newest version!
package cc.mallet.topics;
/** This class implements the value and gradient functions for
* Dirichlet-multinomial Regression. See Guimaraes and Lindrooth,
* for a general introduction to DMR,
* and Mimno and McCallum (UAI, 2008) for an application to
* multinomial mixture models.
*/
import cc.mallet.optimize.Optimizable;
import cc.mallet.classify.MaxEnt;
import cc.mallet.types.InstanceList;
import cc.mallet.types.Instance;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.Dirichlet;
import cc.mallet.types.MatrixOps;
import cc.mallet.util.MalletLogger;
import cc.mallet.util.MalletProgressMessageLogger;
import java.util.logging.*;
import java.text.NumberFormat;
import java.text.DecimalFormat;
public class DMROptimizable implements Optimizable.ByGradientValue {
private static Logger logger = MalletLogger.getLogger(DMROptimizable.class.getName());
private static Logger progressLogger = MalletProgressMessageLogger.getLogger(DMROptimizable.class.getName()+"-pl");
MaxEnt classifier;
InstanceList trainingList;
int numGetValueCalls = 0;
int numGetValueGradientCalls = 0;
int numIterations = Integer.MAX_VALUE;
NumberFormat formatter = null;
static final double DEFAULT_GAUSSIAN_PRIOR_VARIANCE = 1;
static final double DEFAULT_LARGE_GAUSSIAN_PRIOR_VARIANCE = 100;
static final double DEFAULT_GAUSSIAN_PRIOR_MEAN = 0.0;
double gaussianPriorMean = DEFAULT_GAUSSIAN_PRIOR_MEAN;
double gaussianPriorVariance = DEFAULT_GAUSSIAN_PRIOR_VARIANCE;
// Allowing the default feature (the base level) to
// fluctuate more freely than the feature parameters leads
// to much better results.
double defaultFeatureGaussianPriorVariance = DEFAULT_LARGE_GAUSSIAN_PRIOR_VARIANCE;
double[] parameters;
double[] cachedGradient;
double cachedValue;
boolean cachedValueStale;
boolean cachedGradientStale;
int numLabels;
int numFeatures;
int defaultFeatureIndex;
public DMROptimizable () {}
public DMROptimizable (InstanceList instances, MaxEnt initialClassifier) {
this.trainingList = instances;
Alphabet alphabet = instances.getDataAlphabet();
Alphabet labelAlphabet = instances.getTargetAlphabet();
this.numLabels = labelAlphabet.size();
// Add one feature for the "default feature".
this.numFeatures = alphabet.size() + 1; // add a spot for the intercept term
//System.out.println("num features: " + numFeatures + " numLabels: " + numLabels);
this.defaultFeatureIndex = numFeatures - 1;
this.parameters = new double [numLabels * numFeatures];
//this.constraints = new double [numLabels * numFeatures];
this.cachedGradient = new double [numLabels * numFeatures];
if (initialClassifier != null) {
this.classifier = initialClassifier;
this.parameters = classifier.getParameters();
this.defaultFeatureIndex = classifier.getDefaultFeatureIndex();
assert (initialClassifier.getInstancePipe() == instances.getPipe());
}
else if (this.classifier == null) {
this.classifier =
new MaxEnt (instances.getPipe(), parameters);
}
formatter = new DecimalFormat("0.###E0");
cachedValueStale = true;
cachedGradientStale = true;
// Initialize the constraints
logger.fine("Number of instances in training list = " + trainingList.size());
for (Instance instance : trainingList) {
FeatureVector multinomialValues = (FeatureVector) instance.getTarget();
if (multinomialValues == null)
continue;
FeatureVector features = (FeatureVector) instance.getData();
assert (features.getAlphabet() == alphabet);
boolean hasNaN = false;
for (int i = 0; i < features.numLocations(); i++) {
if (Double.isNaN(features.valueAtLocation(i))) {
logger.info("NaN for feature " + alphabet.lookupObject(features.indexAtLocation(i)).toString());
hasNaN = true;
}
}
if (hasNaN) {
logger.info("NaN in instance: " + instance.getName());
}
}
//TestMaximizable.testValueAndGradientCurrentParameters (this);
}
/** Set the variance for the default features (aka intercept terms), generally
* larger than the variance for the regular features.
*/
public void setInterceptGaussianPriorVariance(double sigmaSquared) {
this.defaultFeatureGaussianPriorVariance = sigmaSquared;
}
/** Set the variance for regular (non default) features, generally
* smaller than the variance for the default features.
*/
public void setRegularGaussianPriorVariance(double sigmaSquared) {
this.gaussianPriorVariance = sigmaSquared;
}
public MaxEnt getClassifier () { return classifier; }
public double getParameter (int index) {
return parameters[index];
}
public void setParameter (int index, double v) {
cachedValueStale = true;
cachedGradientStale = true;
parameters[index] = v;
}
public int getNumParameters() {
return parameters.length;
}
public void getParameters (double[] buff) {
if (buff == null || buff.length != parameters.length) {
buff = new double [parameters.length];
}
System.arraycopy (parameters, 0, buff, 0, parameters.length);
}
public void setParameters (double [] buff) {
assert (buff != null);
cachedValueStale = true;
cachedGradientStale = true;
if (buff.length != parameters.length)
parameters = new double[buff.length];
System.arraycopy (buff, 0, parameters, 0, buff.length);
}
/** The log probability of the observed count vectors given the features. */
public double getValue () {
if (! cachedValueStale) { return cachedValue; }
numGetValueCalls++;
cachedValue = 0;
// Incorporate likelihood of data
double[] scores = new double[ trainingList.getTargetAlphabet().size() ];
double value = 0.0;
int instanceIndex = 0;
for (Instance instance: trainingList) {
FeatureVector multinomialValues = (FeatureVector) instance.getTarget();
if (multinomialValues == null) { continue; }
//System.out.println("L Now "+inputAlphabet.size()+" regular features.");
// Get the predicted probability of each class
// under the current model parameters
this.classifier.getUnnormalizedClassificationScores(instance, scores);
double sumScores = 0.0;
// Exponentiate the scores
for (int i=0; i
Related Artifacts
Related Groups
-->
© 2015 - 2025 Weber Informatics LLC | Privacy Policy