
cc.mallet.fst.confidence.MaxEntSequenceConfidenceEstimator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jcore-mallet-2.0.9 Show documentation
Show all versions of jcore-mallet-2.0.9 Show documentation
MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.
The newest version!
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
/**
@author Aron Culotta [email protected]
*/
package cc.mallet.fst.confidence;
import java.util.*;
import cc.mallet.classify.*;
import cc.mallet.fst.*;
import cc.mallet.pipe.*;
import cc.mallet.types.*;
/**
* Estimates the confidence of a {@link Sequence} extracted by a {@link
* Transducer} using a {@link MaxEnt} classifier to classify Sequences
* as "correct" or "incorrect." xxx needs some interface work.
*/
public class MaxEntSequenceConfidenceEstimator extends TransducerSequenceConfidenceEstimator
{
MaxEntTrainer meTrainer;
MaxEnt meClassifier;
Pipe pipe;
String correct, incorrect;
public MaxEntSequenceConfidenceEstimator (Transducer model, double gaussianVariance) {
super(model);
meTrainer = new MaxEntTrainer (gaussianVariance);
}
public MaxEntSequenceConfidenceEstimator (Transducer model) {
this (model, 10.0);
}
public MaxEnt getClassifier () { return this.meClassifier; }
/**
Train underlying classifier on ilist
. Assumes ilist
has targst correct
or incorrect
.
@param ilist training list to build correct/incorrect classifier
@param correct "correct" label
@param incorrect "incorrect" label
*/
public MaxEnt trainClassifier (InstanceList ilist, String correct, String incorrect) {
this.meClassifier = (MaxEnt) meTrainer.train (ilist);
this.pipe = ilist.getPipe ();
this.correct = correct;
this.incorrect = incorrect;
InfoGain ig = new InfoGain (ilist);
int igl = Math.min (30, ig.numLocations());
for (int i = 0; i < igl; i++)
System.out.println ("InfoGain["+ig.getObjectAtRank(i)+"]="+ig.getValueAtRank(i));
return this.meClassifier;
}
/**
Calculates the confidence in the tagging of an {@link Instance}.
*/
public double estimateConfidenceFor (Instance instance,
Object[] startTags, Object[] inTags) {
Classification c = null;
if (Alphabet.alphabetsMatch(instance, this.pipe))
c = this.meClassifier.classify (new SequenceConfidenceInstance (instance));
else
c = this.meClassifier.classify (instance);
return c.getLabelVector().value (this.correct);
}
public PipedInstanceWithConfidence[] rankPipedInstancesByConfidence (InstanceList ilist,
Object[] startTags,
Object[] continueTags) {
ArrayList confidenceList = new ArrayList ();
for (int i=0; i < ilist.size(); i++) {
Instance instance = ilist.get (i);
boolean correctInstance = ((Labeling)instance.getTarget()).getBestLabel().toString().equals (this.correct);
System.err.println ("Instance is " + (correctInstance ? "correct" : "incorrect"));
confidenceList.add (new PipedInstanceWithConfidence (instance,
estimateConfidenceFor (instance, startTags, continueTags),
correctInstance));
}
Collections.sort (confidenceList);
PipedInstanceWithConfidence[] ret = new PipedInstanceWithConfidence[1];
ret = (PipedInstanceWithConfidence[]) confidenceList.toArray (ret);
return ret;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy