Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package opennlp.maxent;
import java.io.IOException;
import opennlp.model.DataIndexer;
import opennlp.model.EvalParameters;
import opennlp.model.EventStream;
import opennlp.model.MutableContext;
import opennlp.model.OnePassDataIndexer;
import opennlp.model.Prior;
import opennlp.model.UniformPrior;
/**
* An implementation of Generalized Iterative Scaling. The reference paper
* for this implementation was Adwait Ratnaparkhi's tech report at the
* University of Pennsylvania's Institute for Research in Cognitive Science,
* and is available at ftp://ftp.cis.upenn.edu/pub/ircs/tr/97-08.ps.Z.
*
* The slack parameter used in the above implementation has been removed by default
* from the computation and a method for updating with Gaussian smoothing has been
* added per Investigating GIS and Smoothing for Maximum Entropy Taggers, Clark and Curran (2002).
* http://acl.ldc.upenn.edu/E/E03/E03-1071.pdf
* The slack parameter can be used by setting useSlackParameter to true.
* Gaussian smoothing can be used by setting useGaussianSmoothing to true.
*
* A prior can be used to train models which converge to the distribution which minimizes the
* relative entropy between the distribution specified by the empirical constraints of the training
* data and the specified prior. By default, the uniform distribution is used as the prior.
*/
class GISTrainer {
/**
* Specifies whether unseen context/outcome pairs should be estimated as occur very infrequently.
*/
private boolean useSimpleSmoothing = false;
/**
* Specifies whether a slack parameter should be used in the model.
*/
private boolean useSlackParameter = false;
/**
* Specified whether parameter updates should prefer a distribution of parameters which
* is gaussian.
*/
private boolean useGaussianSmoothing = false;
private double sigma = 2.0;
// If we are using smoothing, this is used as the "number" of
// times we want the trainer to imagine that it saw a feature that it
// actually didn't see. Defaulted to 0.1.
private double _smoothingObservation = 0.1;
private boolean printMessages = false;
/**
* Number of unique events which occured in the event set.
*/
private int numUniqueEvents;
/**
* Number of predicates.
*/
private int numPreds;
/**
* Number of outcomes.
*/
private int numOutcomes;
/**
* Records the array of predicates seen in each event.
*/
private int[][] contexts;
/**
* The value associated with each context. If null then context values are assumes to be 1.
*/
private float[][] values;
/**
* List of outcomes for each event i, in context[i].
*/
private int[] outcomeList;
/**
* Records the num of times an event has been seen for each event i, in context[i].
*/
private int[] numTimesEventsSeen;
/**
* The number of times a predicate occured in the training data.
*/
private int[] predicateCounts;
private int cutoff;
/**
* Stores the String names of the outcomes. The GIS only tracks outcomes as
* ints, and so this array is needed to save the model to disk and thereby
* allow users to know what the outcome was in human understandable terms.
*/
private String[] outcomeLabels;
/**
* Stores the String names of the predicates. The GIS only tracks predicates
* as ints, and so this array is needed to save the model to disk and thereby
* allow users to know what the outcome was in human understandable terms.
*/
private String[] predLabels;
/**
* Stores the observed expected values of the features based on training data.
*/
private MutableContext[] observedExpects;
/**
* Stores the estimated parameter value of each predicate during iteration
*/
private MutableContext[] params;
/**
* Stores the expected values of the features based on the current models
*/
private MutableContext[] modelExpects;
/**
* This is the prior distribution that the model uses for training.
*/
private Prior prior;
/**
* Observed expectation of correction feature.
*/
private double cfObservedExpect;
/**
* A global variable for the models expected value of the correction feature.
*/
private double CFMOD;
private final double NEAR_ZERO = 0.01;
private final double LLThreshold = 0.0001;
/**
* Stores the number of features that get fired per event.
*/
int[] numfeats;
/**
* Initial probability for all outcomes.
*/
EvalParameters evalParams;
/**
* Creates a new GISTrainer instance which does not print
* progress messages about training to STDOUT.
*
*/
GISTrainer() {
super();
}
/**
* Creates a new GISTrainer instance.
*
* @param printMessages sends progress messages about training to
* STDOUT when true; trains silently otherwise.
*/
GISTrainer(boolean printMessages) {
this();
this.printMessages = printMessages;
}
/**
* Sets whether this trainer will use smoothing while training the model.
* This can improve model accuracy, though training will potentially take
* longer and use more memory. Model size will also be larger.
*
* @param smooth true if smoothing is desired, false if not
*/
public void setSmoothing(boolean smooth) {
useSimpleSmoothing = smooth;
}
/**
* Sets whether this trainer will use smoothing while training the model.
* This can improve model accuracy, though training will potentially take
* longer and use more memory. Model size will also be larger.
*
* @param timesSeen the "number" of times we want the trainer to imagine
* it saw a feature that it actually didn't see
*/
public void setSmoothingObservation(double timesSeen) {
_smoothingObservation = timesSeen;
}
/**
* Sets whether this trainer will use smoothing while training the model.
* This can improve model accuracy, though training will potentially take
* longer and use more memory. Model size will also be larger.
*
* @param smooth true if smoothing is desired, false if not
*/
public void setGaussianSigma(double sigmaValue) {
useGaussianSmoothing = true;
sigma = sigmaValue;
}
/**
* Trains a GIS model on the event in the specified event stream, using the specified number
* of iterations and the specified count cutoff.
* @param eventStream A stream of all events.
* @param iterations The number of iterations to use for GIS.
* @param cutoff The number of times a feature must occur to be included.
* @return A GIS model trained with specified
*/
public GISModel trainModel(EventStream eventStream, int iterations, int cutoff) throws IOException {
return trainModel(iterations, new OnePassDataIndexer(eventStream,cutoff),cutoff);
}
/**
* Train a model using the GIS algorithm.
*
* @param iterations The number of GIS iterations to perform.
* @param di The data indexer used to compress events in memory.
* @return The newly trained model, which can be used immediately or saved
* to disk using an opennlp.maxent.io.GISModelWriter object.
*/
public GISModel trainModel(int iterations, DataIndexer di, int cutoff) {
return trainModel(iterations,di,new UniformPrior(),cutoff);
}
/**
* Train a model using the GIS algorithm.
*
* @param iterations The number of GIS iterations to perform.
* @param di The data indexer used to compress events in memory.
* @param modelPrior The prior distribution used to train this model.
* @return The newly trained model, which can be used immediately or saved
* to disk using an opennlp.maxent.io.GISModelWriter object.
*/
public GISModel trainModel(int iterations, DataIndexer di, Prior modelPrior, int cutoff) {
/************** Incorporate all of the needed info ******************/
display("Incorporating indexed data for training... \n");
contexts = di.getContexts();
values = di.getValues();
this.cutoff = cutoff;
predicateCounts = di.getPredCounts();
numTimesEventsSeen = di.getNumTimesEventsSeen();
numUniqueEvents = contexts.length;
this.prior = modelPrior;
//printTable(contexts);
// determine the correction constant and its inverse
int correctionConstant = 1;
for (int ci = 0; ci < contexts.length; ci++) {
if (values == null || values[ci] == null) {
if (contexts[ci].length > correctionConstant) {
correctionConstant = contexts[ci].length;
}
}
else {
float cl = values[ci][0];
for (int vi=1;vi correctionConstant) {
correctionConstant=(int) Math.ceil(cl);
}
}
}
display("done.\n");
outcomeLabels = di.getOutcomeLabels();
outcomeList = di.getOutcomeList();
numOutcomes = outcomeLabels.length;
predLabels = di.getPredLabels();
prior.setLabels(outcomeLabels,predLabels);
numPreds = predLabels.length;
display("\tNumber of Event Tokens: " + numUniqueEvents + "\n");
display("\t Number of Outcomes: " + numOutcomes + "\n");
display("\t Number of Predicates: " + numPreds + "\n");
// set up feature arrays
float[][] predCount = new float[numPreds][numOutcomes];
for (int ti = 0; ti < numUniqueEvents; ti++) {
for (int j = 0; j < contexts[ti].length; j++) {
if (values != null && values[ti] != null) {
predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti]*values[ti][j];
}
else {
predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti];
}
}
}
//printTable(predCount);
di = null; // don't need it anymore
// A fake "observation" to cover features which are not detected in
// the data. The default is to assume that we observed "1/10th" of a
// feature during training.
final double smoothingObservation = _smoothingObservation;
// Get the observed expectations of the features. Strictly speaking,
// we should divide the counts by the number of Tokens, but because of
// the way the model's expectations are approximated in the
// implementation, this is cancelled out when we compute the next
// iteration of a parameter, making the extra divisions wasteful.
params = new MutableContext[numPreds];
modelExpects = new MutableContext[numPreds];
observedExpects = new MutableContext[numPreds];
// The model does need the correction constant and the correction feature. The correction constant
// is only needed during training, and the correction feature is not necessary.
// For compatibility reasons the model contains form now on a correction constant of 1,
// and a correction param 0.
evalParams = new EvalParameters(params,0,1,numOutcomes);
int[] activeOutcomes = new int[numOutcomes];
int[] outcomePattern;
int[] allOutcomesPattern= new int[numOutcomes];
for (int oi = 0; oi < numOutcomes; oi++) {
allOutcomesPattern[oi] = oi;
}
int numActiveOutcomes = 0;
for (int pi = 0; pi < numPreds; pi++) {
numActiveOutcomes = 0;
if (useSimpleSmoothing) {
numActiveOutcomes = numOutcomes;
outcomePattern = allOutcomesPattern;
}
else { //determine active outcomes
for (int oi = 0; oi < numOutcomes; oi++) {
if (predCount[pi][oi] > 0 && predicateCounts[pi] >= cutoff) {
activeOutcomes[numActiveOutcomes] = oi;
numActiveOutcomes++;
}
}
if (numActiveOutcomes == numOutcomes) {
outcomePattern = allOutcomesPattern;
}
else {
outcomePattern = new int[numActiveOutcomes];
for (int aoi=0;aoi 0) {
observedExpects[pi].setParameter(aoi, predCount[pi][oi]);
}
else if (useSimpleSmoothing) {
observedExpects[pi].setParameter(aoi,smoothingObservation);
}
}
}
// compute the expected value of correction
if (useSlackParameter) {
int cfvalSum = 0;
for (int ti = 0; ti < numUniqueEvents; ti++) {
for (int j = 0; j < contexts[ti].length; j++) {
int pi = contexts[ti][j];
if (!modelExpects[pi].contains(outcomeList[ti])) {
cfvalSum += numTimesEventsSeen[ti];
}
}
cfvalSum += (correctionConstant - contexts[ti].length) * numTimesEventsSeen[ti];
}
if (cfvalSum == 0) {
cfObservedExpect = Math.log(NEAR_ZERO); //nearly zero so log is defined
}
else {
cfObservedExpect = Math.log(cfvalSum);
}
}
predCount = null; // don't need it anymore
display("...done.\n");
numfeats = new int[numOutcomes];
/***************** Find the parameters ************************/
display("Computing model parameters...\n");
findParameters(iterations, correctionConstant);
/*************** Create and return the model ******************/
// To be compatible with old models the correction constant is always 1
return new GISModel(params, predLabels, outcomeLabels, 1, evalParams.getCorrectionParam());
}
/* Estimate and return the model parameters. */
private void findParameters(int iterations, int correctionConstant) {
double prevLL = 0.0;
double currLL = 0.0;
display("Performing " + iterations + " iterations.\n");
for (int i = 1; i <= iterations; i++) {
if (i < 10)
display(" " + i + ": ");
else if (i < 100)
display(" " + i + ": ");
else
display(i + ": ");
currLL = nextIteration(correctionConstant);
if (i > 1) {
if (prevLL > currLL) {
System.err.println("Model Diverging: loglikelihood decreased");
break;
}
if (currLL - prevLL < LLThreshold) {
break;
}
}
prevLL = currLL;
}
// kill a bunch of these big objects now that we don't need them
observedExpects = null;
modelExpects = null;
numTimesEventsSeen = null;
contexts = null;
}
//modeled on implementation in Zhang Le's maxent kit
private double gaussianUpdate(int predicate, int oid, int n, double correctionConstant) {
double param = params[predicate].getParameters()[oid];
double x = 0.0;
double x0 = 0.0;
double f;
double tmp;
double fp;
double modelValue = modelExpects[predicate].getParameters()[oid];
double observedValue = observedExpects[predicate].getParameters()[oid];
for (int i = 0; i < 50; i++) {
tmp = modelValue * Math.exp(correctionConstant * x0);
f = tmp + (param + x0) / sigma - observedValue;
fp = tmp * correctionConstant + 1 / sigma;
if (fp == 0) {
break;
}
x = x0 - f / fp;
if (Math.abs(x - x0) < 0.000001) {
x0 = x;
break;
}
x0 = x;
}
return x0;
}
/* Compute one iteration of GIS and retutn log-likelihood.*/
private double nextIteration(int correctionConstant) {
// compute contribution of p(a|b_i) for each feature and the new
// correction parameter
double[] modelDistribution = new double[numOutcomes];
double loglikelihood = 0.0;
CFMOD = 0.0;
int numEvents = 0;
int numCorrect = 0;
for (int ei = 0; ei < numUniqueEvents; ei++) {
if (values != null) {
prior.logPrior(modelDistribution,contexts[ei],values[ei]);
GISModel.eval(contexts[ei], values[ei], modelDistribution, evalParams);
}
else {
prior.logPrior(modelDistribution,contexts[ei]);
GISModel.eval(contexts[ei], modelDistribution, evalParams);
}
for (int j = 0; j < contexts[ei].length; j++) {
int pi = contexts[ei][j];
if (predicateCounts[pi] >= cutoff) {
int[] activeOutcomes = modelExpects[pi].getOutcomes();
for (int aoi=0;aoi modelDistribution[max]) {
max = oi;
}
}
if (max == outcomeList[ei]) {
numCorrect += numTimesEventsSeen[ei];
}
}
}
display(".");
// compute the new parameter values
for (int pi = 0; pi < numPreds; pi++) {
double[] observed = observedExpects[pi].getParameters();
double[] model = modelExpects[pi].getParameters();
int[] activeOutcomes = params[pi].getOutcomes();
for (int aoi=0;aoi 0.0 && useSlackParameter)
evalParams.setCorrectionParam(evalParams.getCorrectionParam() + (cfObservedExpect - Math.log(CFMOD)));
display(". loglikelihood=" + loglikelihood + "\t" + ((double) numCorrect / numEvents) + "\n");
return (loglikelihood);
}
private void display(String s) {
if (printMessages)
System.out.print(s);
}
}