edu.stanford.nlp.tagger.maxent.LambdaSolveTagger Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-corenlp Show documentation
Show all versions of stanford-corenlp Show documentation
Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.
package edu.stanford.nlp.tagger.maxent;
import edu.stanford.nlp.util.logging.Redwood;
import edu.stanford.nlp.maxent.Feature;
import edu.stanford.nlp.maxent.Problem;
import edu.stanford.nlp.maxent.iis.LambdaSolve;
import java.text.NumberFormat;
import java.io.DataInputStream;
/**
* This module does the working out of lambda parameters for binary tagger
* features. It can use either IIS or CG.
*
* @author Kristina Toutanova
* @version 1.0
*/
public class LambdaSolveTagger extends LambdaSolve {
/** A logger for this class */
private static Redwood.RedwoodChannels log = Redwood.channels(LambdaSolveTagger.class);
/**
* Suppress extraneous printouts
*/
//@SuppressWarnings("unused")
//private static final boolean VERBOSE = false;
LambdaSolveTagger(Problem p1, double eps1, byte[][] fnumArr) {
p = p1;
eps = eps1;
// newtonerr = nerr1;
lambda = new double[p1.fSize];
// lambda_converged = new boolean[p1.fSize];
// cdm 2008: Below line is memory hog. Is there anything we can do to avoid this square array allocation?
probConds = new double[p1.data.xSize][p1.data.ySize];
this.fnumArr = fnumArr;
zlambda = new double[p1.data.xSize];
ftildeArr = new double[p.fSize];
initCondsZlambdaEtc();
super.setBinary();
}
/* Unused.
@SuppressWarnings({"UnusedDeclaration"})
private void readOldLambdas(String filename, String oldfilename) {
double[] lambdaold;
lambdaold = read_lambdas(oldfilename);
HashMap oldAssocs = GlobalHolder.readAssociations(oldfilename);
HashMap newAssocs = GlobalHolder.readAssociations(filename);
for (FeatureKey fk : oldAssocs.keySet()) {
int numOld = GlobalHolder.getNum(fk, oldAssocs);
int numNew = GlobalHolder.getNum(fk, newAssocs);
if ((numOld > -1) && (numNew > -1)) {
lambda[numNew] = lambdaold[numOld];
updateConds(numNew, lambdaold[numOld]);
}
}
}
*/
/* --- unused
LambdaSolveTagger(String filename) {
this.readL(filename);
super.setBinary();
}
--- */
/** Initialize a trained LambdaSolveTagger.
* This is the version used when loading a saved tagger.
* Only the lambda array is used, and the rest is irrelevant, CDM thinks.
*
* @param dataStream Stream to load lambda parameters from.
*/
LambdaSolveTagger(DataInputStream dataStream) {
lambda = read_lambdas(dataStream);
super.setBinary();
}
/** Initialize a trained LambdaSolveTagger.
* This is the version used when creating a LambdaSolveTagger from
* a condensed lambda array.
* Only the lambda array is used, and the rest is irrelevant, CDM thinks.
*
* @param lambda Array used as the lambda parameters (directly; no safety copy is made).
*/
LambdaSolveTagger(double[] lambda) {
this.lambda = lambda;
super.setBinary();
}
void initCondsZlambdaEtc() {
// init pcond
for (int x = 0; x < p.data.xSize; x++) {
for (int y = 0; y < p.data.ySize; y++) {
probConds[x][y] = 1.0 / p.data.ySize;
}
}
log.info(" pcond initialized ");
// init zlambda
for (int x = 0; x < p.data.xSize; x++) {
zlambda[x] = p.data.ySize;
}
log.info(" zlambda initialized ");
// init ftildeArr
for (int i = 0; i < p.fSize; i++) {
ftildeArr[i] = p.functions.get(i).ftilde();
if (ftildeArr[i] == 0) {
log.info(" Empirical expectation 0 for feature " + i);
}
}
log.info(" ftildeArr initialized ");
}
/* --- unused
*
* Iteration for lambda[index].
*
* @return true if this lambda hasn't converged.
*
boolean iterate(int index, double err, MutableDouble ret) {
double deltaL = 0.0;
deltaL = newton(deltaL, index, err);
lambda[index] = lambda[index] + deltaL;
if (!(deltaL == deltaL)) {
log.info(" NaN " + index + ' ' + deltaL);
}
ret.set(deltaL);
return (Math.abs(deltaL) >= eps);
}
--- */
/* --- unused:
*
* Finds the root of an equation by Newton's method. This is my
* implementation. It might be improved if we looked at some official
* library for numerical methods.
*
double newton(double lambda0, int index, double err) {
double lambdaN = lambda0;
int i = 0;
do {
i++;
double lambdaP = lambdaN;
double gPrimeVal = gprime(lambdaP, index);
if (!(gPrimeVal == gPrimeVal)) {
log.info("gPrime of " + lambdaP + ' ' + index + " is NaN " + gPrimeVal);
}
double gVal = g(lambdaP, index);
if (gPrimeVal == 0.0) {
return 0.0;
}
lambdaN = lambdaP - gVal / gPrimeVal;
if (!(lambdaN == lambdaN)) {
log.info("the division of " + gVal + ' ' + gPrimeVal + ' ' + index + " is NaN " + lambdaN);
return 0;
}
if (Math.abs(lambdaN - lambdaP) < err) {
return lambdaN;
}
if (i > 100) {
if (Math.abs(gVal) > 1) {
return 0;
}
return lambdaN;
}
} while (true);
}
--- */
/* --- unused:
*
* This method updates the conditional probabilities in the model, resulting from the
* update of lambda[index] to lambda[index]+deltaL .
*
void updateConds(int index, double deltaL) {
// for each x that (x,y)=true / exists y
// recalculate pcond(y,x) for all y
int yTag = ((TaggerFeature) (p.functions.get(index))).getYTag();
for (int i = 0; i < p.functions.get(index).len(); i++) {
// update for this x
double s = 0;
int x = (p.functions.get(index)).getX(i);
double zlambdaX = zlambda[x] + pcond(yTag, x) * zlambda[x] * (Math.exp(deltaL) - 1);
for (int y = 0; y < p.data.ySize; y++) {
probConds[x][y] = (probConds[x][y] * zlambda[x]) / zlambdaX;
s = s + probConds[x][y];
}
s = s - probConds[x][yTag];
probConds[x][yTag] = probConds[x][yTag] * Math.exp(deltaL);
s = s + probConds[x][yTag];
zlambda[x] = zlambdaX;
}
}
--- */
/* --- unused:
double pcondCalc(int y, int x) {
double zlambdaX;
zlambdaX = 0.0;
for (int y1 = 0; y1 < p.data.ySize; y1++) {
double s = 0.0;
for (int i = 0; i < p.fSize; i++) {
s = s + lambda[i] * p.functions.get(i).getVal(x, y1);
}
zlambdaX = zlambdaX + Math.exp(s);
}
double s = 0.0;
for (int i = 0; i < p.fSize; i++) {
s = s + lambda[i] * p.functions.get(i).getVal(x, y);
}
return (1 / zlambdaX) * Math.exp(s);
}
double fnumCalc(int x, int y) {
double s = 0.0;
for (int i = 0; i < p.fSize; i++) {
//this is slow
s = s + p.functions.get(i).getVal(x, y);
}
return s;
}
--- */
double g(double lambdaP, int index) {
double s = 0.0;
for (int i = 0; i < p.functions.get(index).len(); i++) {
int y = ((TaggerFeature) p.functions.get(index)).getYTag();
int x = (p.functions.get(index)).getX(i);
s = s + p.data.ptildeX(x) * pcond(y, x) * 1 * Math.exp(lambdaP * fnum(x, y));
}
s = s - ftildeArr[index];
return s;
}
/* --- unused
double gprime(double lambdaP, int index) {
double s = 0.0;
for (int i = 0; i < p.functions.get(index).len(); i++) {
int y = ((TaggerFeature) (p.functions.get(index))).getYTag();
int x = (p.functions.get(index)).getX(i);
s = s + p.data.ptildeX(x) * pcond(y, x) * 1 * Math.exp(lambdaP * fnum(x, y)) * fnum(x, y);
}
return s;
}
--- */
double fExpected(Feature f) {
TaggerFeature tF = (TaggerFeature) f;
double s = 0.0;
int y = tF.getYTag();
for (int i = 0; i < f.len(); i++) {
int x = tF.getX(i);
s = s + p.data.ptildeX(x) * pcond(y, x);
}
return s;
}
/** Works out whether the model expectations match the empirical
* expectations.
* @return Whether the model is correct
*/
@Override
public boolean checkCorrectness() {
log.info("Checking model correctness; x size " + p.data.xSize + ' ' + ", ysize " + p.data.ySize);
NumberFormat nf = NumberFormat.getNumberInstance();
nf.setMaximumFractionDigits(4);
boolean flag = true;
for (int f = 0; f < lambda.length; f++) {
if (Math.abs(lambda[f]) > 100) {
log.info(" Lambda too big " + lambda[f]);
log.info(" empirical " + ftildeArr[f] + " expected " + fExpected(p.functions.get(f)));
}
}
for (int i = 0; i < ftildeArr.length; i++) {
double exp = Math.abs(ftildeArr[i] - fExpected(p.functions.get(i)));
if (exp > 0.001) {
flag = false;
log.info("Constraint " + i + " not satisfied emp " + nf.format(ftildeArr[i]) + " exp " + nf.format(fExpected(p.functions.get(i))) + " diff " + nf.format(exp) + " lambda " + nf.format(lambda[i]));
}
}
for (int x = 0; x < p.data.xSize; x++) {
double s = 0.0;
for (int y = 0; y < p.data.ySize; y++) {
s = s + probConds[x][y];
}
if (Math.abs(s - 1) > 0.0001) {
for (int y = 0; y < p.data.ySize; y++) {
log.info(y + " : " + probConds[x][y]);
}
log.info("probabilities do not sum to one " + x + ' ' + (float) s);
}
}
return flag;
}
/* --- unused
double ZAlfa(double alfa, Feature f, int x) {
double s = 0.0;
for (int y = 0; y < p.data.ySize; y++) {
s = s + pcond(y, x) * Math.exp(alfa * f.getVal(x, y));
}
return s;
}
--- */
/* ---
private static double[] read_lambdas(String modelFilename) {
if (VERBOSE) {
log.info(" entering read");
}
try {
double[] lambdaold;
// InDataStreamFile rf=new InDataStreamFile(modelFilename+".holder.prob");
// int xSize=rf.readInt();
// int ySize=rf.readInt();
// if (VERBOSE) log.info("x y "+xSize+" "+ySize);
// //rf.seek(rf.getFilePointer()+xSize*ySize*8);
// int funsize=rf.readInt();
// lambdaold=new double[funsize];
// byte[] b=new byte[funsize*8];
// rf.read(b);
// lambdaold=Convert.byteArrToDoubleArr(b);
// rf.close();
DataInputStream dis = new DataInputStream(new FileInputStream(modelFilename + ".holder.prob"));
int xSize = dis.readInt();
int ySize = dis.readInt();
if (VERBOSE) {
log.info("x y " + xSize + ' ' + ySize);
}
int funsize = dis.readInt();
byte[] b = new byte[funsize * 8];
if (dis.read(b) != funsize * 8) { log.info("Rewrite read_lambdas!"); }
lambdaold = Convert.byteArrToDoubleArr(b);
dis.close();
return lambdaold;
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
--- */
}