weka.gui.beans.PredictionAppender Maven / Gradle / Ivy
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* PredictionAppender.java
* Copyright (C) 2003-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.gui.beans;
import java.awt.BorderLayout;
import java.beans.EventSetDescriptor;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.Vector;
import javax.swing.JPanel;
import weka.clusterers.DensityBasedClusterer;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
/**
* Bean that can can accept batch or incremental classifier events and produce
* dataset or instance events which contain instances with predictions appended.
*
* @author Mark Hall
* @version $Revision: 10813 $
*/
public class PredictionAppender extends JPanel implements DataSource,
TrainingSetProducer, TestSetProducer, Visible, BeanCommon, EventConstraints,
BatchClassifierListener, IncrementalClassifierListener,
BatchClustererListener, Serializable {
/** for serialization */
private static final long serialVersionUID = -2987740065058976673L;
/**
* Objects listenening for dataset events
*/
protected Vector m_dataSourceListeners =
new Vector();
/**
* Objects listening for instances events
*/
protected Vector m_instanceListeners =
new Vector();
/**
* Objects listening for training set events
*/
protected Vector m_trainingSetListeners =
new Vector();;
/**
* Objects listening for test set events
*/
protected Vector m_testSetListeners =
new Vector();
/**
* Non null if this object is a target for any events.
*/
protected Object m_listenee = null;
/**
* Format of instances to be produced.
*/
protected Instances m_format;
protected BeanVisual m_visual = new BeanVisual("PredictionAppender",
BeanVisual.ICON_PATH + "PredictionAppender.gif", BeanVisual.ICON_PATH
+ "PredictionAppender_animated.gif");
/**
* Append classifier's predicted probabilities (if the class is discrete and
* the classifier is a distribution classifier)
*/
protected boolean m_appendProbabilities;
protected transient weka.gui.Logger m_logger;
protected transient List m_stringAttIndexes;
/**
* Global description of this bean
*
* @return a String
value
*/
public String globalInfo() {
return "Accepts batch or incremental classifier events and "
+ "produces a new data set with classifier predictions appended.";
}
/**
* Creates a new PredictionAppender
instance.
*/
public PredictionAppender() {
setLayout(new BorderLayout());
add(m_visual, BorderLayout.CENTER);
}
/**
* Set a custom (descriptive) name for this bean
*
* @param name the name to use
*/
@Override
public void setCustomName(String name) {
m_visual.setText(name);
}
/**
* Get the custom (descriptive) name for this bean (if one has been set)
*
* @return the custom name (or the default name)
*/
@Override
public String getCustomName() {
return m_visual.getText();
}
/**
* Return a tip text suitable for displaying in a GUI
*
* @return a String
value
*/
public String appendPredictedProbabilitiesTipText() {
return "append probabilities rather than labels for discrete class "
+ "predictions";
}
/**
* Return true if predicted probabilities are to be appended rather than class
* value
*
* @return a boolean
value
*/
public boolean getAppendPredictedProbabilities() {
return m_appendProbabilities;
}
/**
* Set whether to append predicted probabilities rather than class value (for
* discrete class data sets)
*
* @param ap a boolean
value
*/
public void setAppendPredictedProbabilities(boolean ap) {
m_appendProbabilities = ap;
}
/**
* Add a training set listener
*
* @param tsl a TrainingSetListener
value
*/
@Override
public void addTrainingSetListener(TrainingSetListener tsl) {
// TODO Auto-generated method stub
m_trainingSetListeners.addElement(tsl);
// pass on any format that we might have determined so far
if (m_format != null) {
TrainingSetEvent e = new TrainingSetEvent(this, m_format);
tsl.acceptTrainingSet(e);
}
}
/**
* Remove a training set listener
*
* @param tsl a TrainingSetListener
value
*/
@Override
public void removeTrainingSetListener(TrainingSetListener tsl) {
m_trainingSetListeners.removeElement(tsl);
}
/**
* Add a test set listener
*
* @param tsl a TestSetListener
value
*/
@Override
public void addTestSetListener(TestSetListener tsl) {
m_testSetListeners.addElement(tsl);
// pass on any format that we might have determined so far
if (m_format != null) {
TestSetEvent e = new TestSetEvent(this, m_format);
tsl.acceptTestSet(e);
}
}
/**
* Remove a test set listener
*
* @param tsl a TestSetListener
value
*/
@Override
public void removeTestSetListener(TestSetListener tsl) {
m_testSetListeners.removeElement(tsl);
}
/**
* Add a datasource listener
*
* @param dsl a DataSourceListener
value
*/
@Override
public synchronized void addDataSourceListener(DataSourceListener dsl) {
m_dataSourceListeners.addElement(dsl);
// pass on any format that we might have determined so far
if (m_format != null) {
DataSetEvent e = new DataSetEvent(this, m_format);
dsl.acceptDataSet(e);
}
}
/**
* Remove a datasource listener
*
* @param dsl a DataSourceListener
value
*/
@Override
public synchronized void removeDataSourceListener(DataSourceListener dsl) {
m_dataSourceListeners.remove(dsl);
}
/**
* Add an instance listener
*
* @param dsl a InstanceListener
value
*/
@Override
public synchronized void addInstanceListener(InstanceListener dsl) {
m_instanceListeners.addElement(dsl);
// pass on any format that we might have determined so far
if (m_format != null) {
InstanceEvent e = new InstanceEvent(this, m_format);
dsl.acceptInstance(e);
}
}
/**
* Remove an instance listener
*
* @param dsl a InstanceListener
value
*/
@Override
public synchronized void removeInstanceListener(InstanceListener dsl) {
m_instanceListeners.remove(dsl);
}
/**
* Set the visual for this data source
*
* @param newVisual a BeanVisual
value
*/
@Override
public void setVisual(BeanVisual newVisual) {
m_visual = newVisual;
}
/**
* Get the visual being used by this data source.
*
*/
@Override
public BeanVisual getVisual() {
return m_visual;
}
/**
* Use the default images for a data source
*
*/
@Override
public void useDefaultVisual() {
m_visual.loadIcons(BeanVisual.ICON_PATH + "PredictionAppender.gif",
BeanVisual.ICON_PATH + "PredictionAppender_animated.gif");
}
protected InstanceEvent m_instanceEvent;
protected transient StreamThroughput m_throughput;
/**
* Accept and process an incremental classifier event
*
* @param e an IncrementalClassifierEvent
value
*/
@Override
public void acceptClassifier(IncrementalClassifierEvent e) {
weka.classifiers.Classifier classifier = e.getClassifier();
Instance currentI = e.getCurrentInstance();
int status = e.getStatus();
int oldNumAtts = 0;
if (status == IncrementalClassifierEvent.NEW_BATCH) {
oldNumAtts = e.getStructure().numAttributes();
m_throughput = new StreamThroughput(statusMessagePrefix());
} else {
if (currentI != null) {
oldNumAtts = currentI.dataset().numAttributes();
}
}
if (status == IncrementalClassifierEvent.NEW_BATCH) {
m_instanceEvent = new InstanceEvent(this, null, 0);
// create new header structure
Instances oldStructure = new Instances(e.getStructure(), 0);
// String relationNameModifier = oldStructure.relationName()
// +"_with predictions";
// check for string attributes
m_stringAttIndexes = new ArrayList();
for (int i = 0; i < e.getStructure().numAttributes(); i++) {
if (e.getStructure().attribute(i).isString()) {
m_stringAttIndexes.add(new Integer(i));
}
}
String relationNameModifier = "_with predictions";
// +"_with predictions";
if (!m_appendProbabilities || oldStructure.classAttribute().isNumeric()) {
try {
m_format =
makeDataSetClass(oldStructure, oldStructure, classifier,
relationNameModifier);
} catch (Exception ex) {
ex.printStackTrace();
return;
}
} else if (m_appendProbabilities) {
try {
m_format =
makeDataSetProbabilities(oldStructure, oldStructure, classifier,
relationNameModifier);
} catch (Exception ex) {
ex.printStackTrace();
return;
}
}
// Pass on the structure
m_instanceEvent.setStructure(m_format);
notifyInstanceAvailable(m_instanceEvent);
return;
}
if (currentI != null) {
m_throughput.updateStart();
double[] instanceVals = new double[m_format.numAttributes()];
Instance newInst = null;
try {
// process the actual instance
for (int i = 0; i < oldNumAtts; i++) {
instanceVals[i] = currentI.value(i);
}
if (!m_appendProbabilities
|| currentI.dataset().classAttribute().isNumeric()) {
double predClass = classifier.classifyInstance(currentI);
instanceVals[instanceVals.length - 1] = predClass;
} else if (m_appendProbabilities) {
double[] preds = classifier.distributionForInstance(currentI);
for (int i = oldNumAtts; i < instanceVals.length; i++) {
instanceVals[i] = preds[i - oldNumAtts];
}
}
} catch (Exception ex) {
ex.printStackTrace();
return;
} finally {
newInst = new DenseInstance(currentI.weight(), instanceVals);
newInst.setDataset(m_format);
// check for string attributes
if (m_stringAttIndexes != null) {
for (int i = 0; i < m_stringAttIndexes.size(); i++) {
int index = m_stringAttIndexes.get(i);
m_format.attribute(m_stringAttIndexes.get(i)).setStringValue(
currentI.stringValue(index));
}
}
m_instanceEvent.setInstance(newInst);
m_instanceEvent.setStatus(status);
m_throughput.updateEnd(m_logger);
// notify listeners
notifyInstanceAvailable(m_instanceEvent);
}
} else {
m_instanceEvent.setInstance(null); // end of stream
// notify listeners
notifyInstanceAvailable(m_instanceEvent);
}
if (status == IncrementalClassifierEvent.BATCH_FINISHED || currentI == null) {
// clean up
// m_incrementalStructure = null;
m_instanceEvent = null;
m_throughput.finished(m_logger);
}
}
/**
* Accept and process a batch classifier event
*
* @param e a BatchClassifierEvent
value
*/
@Override
public void acceptClassifier(BatchClassifierEvent e) {
if (m_dataSourceListeners.size() > 0 || m_trainingSetListeners.size() > 0
|| m_testSetListeners.size() > 0) {
if (e.getTestSet() == null) {
// can't append predictions
return;
}
if ((e.getTestSet().isStructureOnly() || e.getTestSet().getDataSet()
.numInstances() == 0)
&& e.getTestSet().getDataSet().classIndex() < 0) {
return; // don't do anything or make a fuss if there is no class set in
// a structure only data set
}
if (e.getTestSet().getDataSet().classIndex() < 0) {
if (m_logger != null) {
m_logger.logMessage("[PredictionAppender] " + statusMessagePrefix()
+ "No class attribute set in the data!");
m_logger.statusMessage(statusMessagePrefix()
+ "ERROR: Can't append probablities - see log.");
}
stop();
return;
}
Instances testSet = e.getTestSet().getDataSet();
Instances trainSet = e.getTrainSet().getDataSet();
int setNum = e.getSetNumber();
int maxNum = e.getMaxSetNumber();
weka.classifiers.Classifier classifier = e.getClassifier();
String relationNameModifier =
"_set_" + e.getSetNumber() + "_of_" + e.getMaxSetNumber();
if (!m_appendProbabilities || testSet.classAttribute().isNumeric()) {
try {
Instances newTestSetInstances =
makeDataSetClass(testSet, trainSet, classifier,
relationNameModifier);
Instances newTrainingSetInstances =
makeDataSetClass(trainSet, trainSet, classifier,
relationNameModifier);
if (m_trainingSetListeners.size() > 0) {
TrainingSetEvent tse =
new TrainingSetEvent(this, new Instances(newTrainingSetInstances,
0));
tse.m_setNumber = setNum;
tse.m_maxSetNumber = maxNum;
notifyTrainingSetAvailable(tse);
// fill in predicted values
for (int i = 0; i < trainSet.numInstances(); i++) {
double predClass =
classifier.classifyInstance(trainSet.instance(i));
newTrainingSetInstances.instance(i).setValue(
newTrainingSetInstances.numAttributes() - 1, predClass);
}
tse = new TrainingSetEvent(this, newTrainingSetInstances);
tse.m_setNumber = setNum;
tse.m_maxSetNumber = maxNum;
notifyTrainingSetAvailable(tse);
}
if (m_testSetListeners.size() > 0) {
TestSetEvent tse =
new TestSetEvent(this, new Instances(newTestSetInstances, 0));
tse.m_setNumber = setNum;
tse.m_maxSetNumber = maxNum;
notifyTestSetAvailable(tse);
}
if (m_dataSourceListeners.size() > 0) {
notifyDataSetAvailable(new DataSetEvent(this, new Instances(
newTestSetInstances, 0)));
}
if (e.getTestSet().isStructureOnly()) {
m_format = newTestSetInstances;
}
if (m_dataSourceListeners.size() > 0 || m_testSetListeners.size() > 0) {
// fill in predicted values
for (int i = 0; i < testSet.numInstances(); i++) {
Instance tempInst = testSet.instance(i);
// if the class value is missing, then copy the instance
// and set the data set to the training data. This is
// just in case this test data was loaded from a CSV file
// with all missing values for a nominal class (in this
// case we have no information on the legal class values
// in the test data)
if (tempInst.isMissing(tempInst.classIndex())
&& !(classifier instanceof weka.classifiers.misc.InputMappedClassifier)) {
tempInst = (Instance) testSet.instance(i).copy();
tempInst.setDataset(trainSet);
}
double predClass = classifier.classifyInstance(tempInst);
newTestSetInstances.instance(i).setValue(
newTestSetInstances.numAttributes() - 1, predClass);
}
}
// notify listeners
if (m_testSetListeners.size() > 0) {
TestSetEvent tse = new TestSetEvent(this, newTestSetInstances);
tse.m_setNumber = setNum;
tse.m_maxSetNumber = maxNum;
notifyTestSetAvailable(tse);
}
if (m_dataSourceListeners.size() > 0) {
notifyDataSetAvailable(new DataSetEvent(this, newTestSetInstances));
}
return;
} catch (Exception ex) {
ex.printStackTrace();
}
}
if (m_appendProbabilities) {
try {
Instances newTestSetInstances =
makeDataSetProbabilities(testSet, trainSet, classifier,
relationNameModifier);
Instances newTrainingSetInstances =
makeDataSetProbabilities(trainSet, trainSet, classifier,
relationNameModifier);
if (m_trainingSetListeners.size() > 0) {
TrainingSetEvent tse =
new TrainingSetEvent(this, new Instances(newTrainingSetInstances,
0));
tse.m_setNumber = setNum;
tse.m_maxSetNumber = maxNum;
notifyTrainingSetAvailable(tse);
// fill in predicted probabilities
for (int i = 0; i < trainSet.numInstances(); i++) {
double[] preds =
classifier.distributionForInstance(trainSet.instance(i));
for (int j = 0; j < trainSet.classAttribute().numValues(); j++) {
newTrainingSetInstances.instance(i).setValue(
trainSet.numAttributes() + j, preds[j]);
}
}
tse = new TrainingSetEvent(this, newTrainingSetInstances);
tse.m_setNumber = setNum;
tse.m_maxSetNumber = maxNum;
notifyTrainingSetAvailable(tse);
}
if (m_testSetListeners.size() > 0) {
TestSetEvent tse =
new TestSetEvent(this, new Instances(newTestSetInstances, 0));
tse.m_setNumber = setNum;
tse.m_maxSetNumber = maxNum;
notifyTestSetAvailable(tse);
}
if (m_dataSourceListeners.size() > 0) {
notifyDataSetAvailable(new DataSetEvent(this, new Instances(
newTestSetInstances, 0)));
}
if (e.getTestSet().isStructureOnly()) {
m_format = newTestSetInstances;
}
if (m_dataSourceListeners.size() > 0 || m_testSetListeners.size() > 0) {
// fill in predicted probabilities
for (int i = 0; i < testSet.numInstances(); i++) {
Instance tempInst = testSet.instance(i);
// if the class value is missing, then copy the instance
// and set the data set to the training data. This is
// just in case this test data was loaded from a CSV file
// with all missing values for a nominal class (in this
// case we have no information on the legal class values
// in the test data)
if (tempInst.isMissing(tempInst.classIndex())
&& !(classifier instanceof weka.classifiers.misc.InputMappedClassifier)) {
tempInst = (Instance) testSet.instance(i).copy();
tempInst.setDataset(trainSet);
}
double[] preds = classifier.distributionForInstance(tempInst);
for (int j = 0; j < tempInst.classAttribute().numValues(); j++) {
newTestSetInstances.instance(i).setValue(
testSet.numAttributes() + j, preds[j]);
}
}
}
// notify listeners
if (m_testSetListeners.size() > 0) {
TestSetEvent tse = new TestSetEvent(this, newTestSetInstances);
tse.m_setNumber = setNum;
tse.m_maxSetNumber = maxNum;
notifyTestSetAvailable(tse);
}
if (m_dataSourceListeners.size() > 0) {
notifyDataSetAvailable(new DataSetEvent(this, newTestSetInstances));
}
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
}
/**
* Accept and process a batch clusterer event
*
* @param e a BatchClassifierEvent
value
*/
@Override
public void acceptClusterer(BatchClustererEvent e) {
if (m_dataSourceListeners.size() > 0 || m_trainingSetListeners.size() > 0
|| m_testSetListeners.size() > 0) {
if (e.getTestSet().isStructureOnly()) {
return;
}
Instances testSet = e.getTestSet().getDataSet();
weka.clusterers.Clusterer clusterer = e.getClusterer();
String test;
if (e.getTestOrTrain() == 0) {
test = "test";
} else {
test = "training";
}
String relationNameModifier =
"_" + test + "_" + e.getSetNumber() + "_of_" + e.getMaxSetNumber();
if (!m_appendProbabilities
|| !(clusterer instanceof DensityBasedClusterer)) {
if (m_appendProbabilities
&& !(clusterer instanceof DensityBasedClusterer)) {
System.err
.println("Only density based clusterers can append probabilities. Instead cluster will be assigned for each instance.");
if (m_logger != null) {
m_logger
.logMessage("[PredictionAppender] "
+ statusMessagePrefix()
+ " Only density based clusterers can "
+ "append probabilities. Instead cluster will be assigned for each "
+ "instance.");
m_logger
.statusMessage(statusMessagePrefix()
+ "WARNING: Only density based clusterers can append probabilities. "
+ "Instead cluster will be assigned for each instance.");
}
}
try {
Instances newInstances =
makeClusterDataSetClass(testSet, clusterer, relationNameModifier);
// data source listeners get both train and test sets
if (m_dataSourceListeners.size() > 0) {
notifyDataSetAvailable(new DataSetEvent(this, new Instances(
newInstances, 0)));
}
if (m_trainingSetListeners.size() > 0 && e.getTestOrTrain() > 0) {
TrainingSetEvent tse =
new TrainingSetEvent(this, new Instances(newInstances, 0));
tse.m_setNumber = e.getSetNumber();
tse.m_maxSetNumber = e.getMaxSetNumber();
notifyTrainingSetAvailable(tse);
}
if (m_testSetListeners.size() > 0 && e.getTestOrTrain() == 0) {
TestSetEvent tse =
new TestSetEvent(this, new Instances(newInstances, 0));
tse.m_setNumber = e.getSetNumber();
tse.m_maxSetNumber = e.getMaxSetNumber();
notifyTestSetAvailable(tse);
}
// fill in predicted values
for (int i = 0; i < testSet.numInstances(); i++) {
double predCluster = clusterer.clusterInstance(testSet.instance(i));
newInstances.instance(i).setValue(newInstances.numAttributes() - 1,
predCluster);
}
// notify listeners
if (m_dataSourceListeners.size() > 0) {
notifyDataSetAvailable(new DataSetEvent(this, newInstances));
}
if (m_trainingSetListeners.size() > 0 && e.getTestOrTrain() > 0) {
TrainingSetEvent tse = new TrainingSetEvent(this, newInstances);
tse.m_setNumber = e.getSetNumber();
tse.m_maxSetNumber = e.getMaxSetNumber();
notifyTrainingSetAvailable(tse);
}
if (m_testSetListeners.size() > 0 && e.getTestOrTrain() == 0) {
TestSetEvent tse = new TestSetEvent(this, newInstances);
tse.m_setNumber = e.getSetNumber();
tse.m_maxSetNumber = e.getMaxSetNumber();
notifyTestSetAvailable(tse);
}
return;
} catch (Exception ex) {
ex.printStackTrace();
}
} else {
try {
Instances newInstances =
makeClusterDataSetProbabilities(testSet, clusterer,
relationNameModifier);
notifyDataSetAvailable(new DataSetEvent(this, new Instances(
newInstances, 0)));
// fill in predicted probabilities
for (int i = 0; i < testSet.numInstances(); i++) {
double[] probs =
clusterer.distributionForInstance(testSet.instance(i));
for (int j = 0; j < clusterer.numberOfClusters(); j++) {
newInstances.instance(i).setValue(testSet.numAttributes() + j,
probs[j]);
}
}
// notify listeners
notifyDataSetAvailable(new DataSetEvent(this, newInstances));
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
}
private Instances makeDataSetProbabilities(Instances insts, Instances format,
weka.classifiers.Classifier classifier, String relationNameModifier)
throws Exception {
// adjust structure for InputMappedClassifier (if necessary)
if (classifier instanceof weka.classifiers.misc.InputMappedClassifier) {
format =
((weka.classifiers.misc.InputMappedClassifier) classifier)
.getModelHeader(new Instances(format, 0));
}
String classifierName = classifier.getClass().getName();
classifierName =
classifierName.substring(classifierName.lastIndexOf('.') + 1,
classifierName.length());
Instances newInstances = new Instances(insts);
for (int i = 0; i < format.classAttribute().numValues(); i++) {
weka.filters.unsupervised.attribute.Add addF =
new weka.filters.unsupervised.attribute.Add();
addF.setAttributeIndex("last");
addF.setAttributeName(classifierName + "_prob_"
+ format.classAttribute().value(i));
addF.setInputFormat(newInstances);
newInstances = weka.filters.Filter.useFilter(newInstances, addF);
}
newInstances.setRelationName(insts.relationName() + relationNameModifier);
return newInstances;
}
private Instances makeDataSetClass(Instances insts, Instances structure,
weka.classifiers.Classifier classifier, String relationNameModifier)
throws Exception {
// adjust structure for InputMappedClassifier (if necessary)
if (classifier instanceof weka.classifiers.misc.InputMappedClassifier) {
structure =
((weka.classifiers.misc.InputMappedClassifier) classifier)
.getModelHeader(new Instances(structure, 0));
}
weka.filters.unsupervised.attribute.Add addF =
new weka.filters.unsupervised.attribute.Add();
addF.setAttributeIndex("last");
String classifierName = classifier.getClass().getName();
classifierName =
classifierName.substring(classifierName.lastIndexOf('.') + 1,
classifierName.length());
addF.setAttributeName("class_predicted_by: " + classifierName);
if (structure.classAttribute().isNominal()) {
String classLabels = "";
Enumeration
© 2015 - 2025 Weber Informatics LLC | Privacy Policy