
cc.mallet.fst.confidence.NBestViterbiConfidenceEstimator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mallet Show documentation
Show all versions of mallet Show documentation
MALLET is a Java-based package for statistical natural language processing,
document classification, clustering, topic modeling, information extraction,
and other machine learning applications to text.
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
/**
@author Aron Culotta [email protected]
*/
package cc.mallet.fst.confidence;
import java.util.logging.*;
import java.util.*;
import cc.mallet.fst.*;
import cc.mallet.pipe.iterator.*;
import cc.mallet.types.*;
import cc.mallet.util.MalletLogger;
/**
Estimates the confidence of an entire sequence by the probability
that one of the the Viterbi paths rank 2->N is correct. Note that
this is a strange definition of confidence, and is mainly used for
{@link MultipleChoiceCRFActiveLearner}, where we want to find
Instances that are mislabeled, but are likely to have a correct
labeling in the top N Viterbi paths.
*/
public class NBestViterbiConfidenceEstimator extends TransducerSequenceConfidenceEstimator
{
/** total number of Viterbi paths */
int N;
private static Logger logger = MalletLogger.getLogger(
NBestViterbiConfidenceEstimator.class.getName());
public NBestViterbiConfidenceEstimator (Transducer model, int N) {
super(model);
this.N = N;
}
/**
Calculates the confidence in the tagging of a {@link Instance}.
*/
public double estimateConfidenceFor (Instance instance,
Object[] startTags,
Object[] inTags) {
SumLatticeDefault lattice = new SumLatticeDefault (model, (Sequence)instance.getData());
double[] costs = new double[N];
List> as = new MaxLatticeDefault (model, (Sequence)instance.getData()).bestOutputAlignments(N);
for (int i = 0; i < N; i++)
costs[i] = as.get(i).getWeight();
double latticeCost = lattice.getTotalWeight();
double prFirstIsCorrect = Math.exp( latticeCost - costs[0] );
double prOtherIsCorrect = 0.0;
for (int i=1; i < N; i++)
prOtherIsCorrect += Math.exp( latticeCost - costs[i] );
return prFirstIsCorrect / prOtherIsCorrect;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy