
cc.mallet.fst.confidence.MaxEntSequenceConfidenceEstimator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mallet Show documentation
Show all versions of mallet Show documentation
MALLET is a Java-based package for statistical natural language processing,
document classification, clustering, topic modeling, information extraction,
and other machine learning applications to text.
The newest version!
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
/**
@author Aron Culotta [email protected]
*/
package cc.mallet.fst.confidence;
import java.util.*;
import cc.mallet.classify.*;
import cc.mallet.fst.*;
import cc.mallet.pipe.*;
import cc.mallet.types.*;
/**
* Estimates the confidence of a {@link Sequence} extracted by a {@link
* Transducer} using a {@link MaxEnt} classifier to classify Sequences
* as "correct" or "incorrect." xxx needs some interface work.
*/
public class MaxEntSequenceConfidenceEstimator extends TransducerSequenceConfidenceEstimator
{
MaxEntTrainer meTrainer;
MaxEnt meClassifier;
Pipe pipe;
String correct, incorrect;
public MaxEntSequenceConfidenceEstimator (Transducer model, double gaussianVariance) {
super(model);
meTrainer = new MaxEntTrainer (gaussianVariance);
}
public MaxEntSequenceConfidenceEstimator (Transducer model) {
this (model, 10.0);
}
public MaxEnt getClassifier () { return this.meClassifier; }
/**
Train underlying classifier on ilist
. Assumes ilist
has targst correct
or incorrect
.
@param ilist training list to build correct/incorrect classifier
@param correct "correct" label
@param incorrect "incorrect" label
*/
public MaxEnt trainClassifier (InstanceList ilist, String correct, String incorrect) {
this.meClassifier = (MaxEnt) meTrainer.train (ilist);
this.pipe = ilist.getPipe ();
this.correct = correct;
this.incorrect = incorrect;
InfoGain ig = new InfoGain (ilist);
int igl = Math.min (30, ig.numLocations());
for (int i = 0; i < igl; i++)
System.out.println ("InfoGain["+ig.getObjectAtRank(i)+"]="+ig.getValueAtRank(i));
return this.meClassifier;
}
/**
Calculates the confidence in the tagging of an {@link Instance}.
*/
public double estimateConfidenceFor (Instance instance,
Object[] startTags, Object[] inTags) {
Classification c = null;
if (Alphabet.alphabetsMatch(instance, this.pipe))
c = this.meClassifier.classify (new SequenceConfidenceInstance (instance));
else
c = this.meClassifier.classify (instance);
return c.getLabelVector().value (this.correct);
}
public PipedInstanceWithConfidence[] rankPipedInstancesByConfidence (InstanceList ilist,
Object[] startTags,
Object[] continueTags) {
ArrayList confidenceList = new ArrayList ();
for (int i=0; i < ilist.size(); i++) {
Instance instance = ilist.get (i);
boolean correctInstance = ((Labeling)instance.getTarget()).getBestLabel().toString().equals (this.correct);
System.err.println ("Instance is " + (correctInstance ? "correct" : "incorrect"));
confidenceList.add (new PipedInstanceWithConfidence (instance,
estimateConfidenceFor (instance, startTags, continueTags),
correctInstance));
}
Collections.sort (confidenceList);
PipedInstanceWithConfidence[] ret = new PipedInstanceWithConfidence[1];
ret = (PipedInstanceWithConfidence[]) confidenceList.toArray (ret);
return ret;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy