All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cc.mallet.pipe.Classification2ConfidencePredictingFeatureVector Maven / Gradle / Ivy

Go to download

MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.

The newest version!
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
   http://www.cs.umass.edu/~mccallum/mallet
   This software is provided under the terms of the Common Public License,
   version 1.0, as published by http://www.opensource.org.  For further
   information, see the file `LICENSE' included with this distribution. */




/** 
   @author Aron Culotta [email protected]
 */

package cc.mallet.pipe;

import cc.mallet.classify.*;
import cc.mallet.pipe.Pipe;
import cc.mallet.types.*;
import cc.mallet.util.PropertyList;

  /** Pipe features from underlying classifier to
   * the confidence prediction instance list
   */
public class Classification2ConfidencePredictingFeatureVector extends Pipe
{
	public Classification2ConfidencePredictingFeatureVector ()
	{
		super (new Alphabet(), new LabelAlphabet());
	}
	
	public Instance pipe (Instance carrier)
	{
		Classification classification = (Classification) carrier.getData();
		PropertyList features = null;
		LabelVector lv = classification.getLabelVector();
		Label bestLabel = lv.getBestLabel();
		Instance inst = (Instance)classification.getInstance();
		FeatureVector fv = (FeatureVector)inst.getData();
		Alphabet fdict = fv.getAlphabet();
		
		double winningThreshold = .990;
		double varianceThreshold = .15;
		double secondThreshold = .03;
		
		double winningScore = lv.getValueAtRank(0);
		double marginOfVictory = winningScore - lv.getValueAtRank(1);
		
		// attempts to use the confusion matrix of the training list
		// as some prior knowledge in training
		
		features = PropertyList.add ("winningScore", winningScore, features);
		features = PropertyList.add ("secondScore", lv.getValueAtRank(1), features);
		for(int i=0; i secondThreshold) {
				features = PropertyList.add ("SecondScoreAboveX", 1.0, features);
				secondScoreGreaterThanX++;			    
				}
*/			
			
			/*
			// all the confidence predicting features
			features = PropertyList.add ("winningScore", winningScore, features);
			
			features = PropertyList.add(bestLabel.toString()+"IsFirst", 1.0, features);
			features = PropertyList.add (lv.getLabelAtRank(1).toString() + "IsSecond", 1.0, features);			
			
			features = PropertyList.add ("secondScore", lv.getValueAtRank(1), features);

			for(int i=0; i secondThreshold) {
			        features = PropertyList.add ("SecondScoreAboveX", 1.0, features);
				secondScoreGreaterThanX++;			    
			}
			LabelAlphabet vocab = lv.getLabelAlphabet();
 			for(int i=0; i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy