weka.gui.boundaryvisualizer.BoundaryPanelDistributed Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-dev Show documentation
Show all versions of weka-dev Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This version represents the developer version, the
"bleeding edge" of development, you could say. New functionality gets added
to this version.
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* BoundaryPanelDistrubuted.java
* Copyright (C) 2003-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.gui.boundaryvisualizer;
import java.awt.BorderLayout;
import java.awt.Color;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.ObjectInputStream;
import java.rmi.Naming;
import java.util.ArrayList;
import java.util.Vector;
import weka.classifiers.AbstractClassifier;
import weka.classifiers.Classifier;
import weka.core.Instances;
import weka.core.Utils;
import weka.experiment.Compute;
import weka.experiment.RemoteExperimentEvent;
import weka.experiment.RemoteExperimentListener;
import weka.experiment.TaskStatusInfo;
/**
* This class extends BoundaryPanel with code for distributing the processing
* necessary to create a visualization among a list of remote machines.
* Specifically, a visualization is broken down and processed row by row using
* the available remote computers.
*
* @author Mark Hall
* @version $Revision: 10222 $
* @since 1.0
* @see BoundaryPanel
*/
public class BoundaryPanelDistributed extends BoundaryPanel {
/** for serialization */
private static final long serialVersionUID = -1743284397893937776L;
/** a list of RemoteExperimentListeners */
protected Vector m_listeners = new Vector();
/** Holds the names of machines with remoteEngine servers running */
protected Vector m_remoteHosts = new Vector();
/** The queue of available hosts */
private weka.core.Queue m_remoteHostsQueue = new weka.core.Queue();
/** The status of each of the remote hosts */
private int[] m_remoteHostsStatus;
/** The number of times tasks have failed on each remote host */
private int[] m_remoteHostFailureCounts;
protected static final int AVAILABLE = 0;
protected static final int IN_USE = 1;
protected static final int CONNECTION_FAILED = 2;
protected static final int SOME_OTHER_FAILURE = 3;
protected static final int MAX_FAILURES = 3;
/**
* Set to true if MAX_FAILURES exceeded on all hosts or connections fail on
* all hosts or user aborts plotting
*/
private boolean m_plottingAborted = false;
/** The number of hosts removed due to exceeding max failures */
private int m_removedHosts;
/** The count of failed sub-tasks */
private int m_failedCount;
/** The queue of sub-tasks waiting to be processed */
private weka.core.Queue m_subExpQueue = new weka.core.Queue();
/** number of seconds between polling server */
private final int m_minTaskPollTime = 1000;
private int[] m_hostPollingTime;
/**
* Creates a new BoundaryPanelDistributed
instance.
*
* @param panelWidth width of the display
* @param panelHeight height of the display
*/
public BoundaryPanelDistributed(int panelWidth, int panelHeight) {
super(panelWidth, panelHeight);
}
/**
* Set a list of host names of machines to distribute processing to
*
* @param remHosts a Vector of host names (Strings)
*/
public void setRemoteHosts(Vector remHosts) {
m_remoteHosts = remHosts;
}
/**
* Add an object to the list of those interested in recieving update
* information from the RemoteExperiment
*
* @param r a listener
*/
public void addRemoteExperimentListener(RemoteExperimentListener r) {
m_listeners.addElement(r);
}
@Override
protected void initialize() {
super.initialize();
m_plottingAborted = false;
m_failedCount = 0;
// initialize all remote hosts to available
m_remoteHostsStatus = new int[m_remoteHosts.size()];
m_remoteHostFailureCounts = new int[m_remoteHosts.size()];
m_remoteHostsQueue = new weka.core.Queue();
if (m_remoteHosts.size() == 0) {
System.err.println("No hosts specified!");
System.exit(1);
}
// prime the hosts queue
m_hostPollingTime = new int[m_remoteHosts.size()];
for (int i = 0; i < m_remoteHosts.size(); i++) {
m_remoteHostsQueue.push(new Integer(i));
m_hostPollingTime[i] = m_minTaskPollTime;
}
// set up sub taskss (just holds the row numbers to be processed
m_subExpQueue = new weka.core.Queue();
for (int i = 0; i < m_panelHeight; i++) {
m_subExpQueue.push(new Integer(i));
}
try {
// need to build classifier and data generator
m_classifier.buildClassifier(m_trainingData);
} catch (Exception ex) {
ex.printStackTrace();
System.exit(1);
}
boolean[] attsToWeightOn;
// build DataGenerator
attsToWeightOn = new boolean[m_trainingData.numAttributes()];
attsToWeightOn[m_xAttribute] = true;
attsToWeightOn[m_yAttribute] = true;
m_dataGenerator.setWeightingDimensions(attsToWeightOn);
try {
m_dataGenerator.buildGenerator(m_trainingData);
} catch (Exception ex) {
ex.printStackTrace();
System.exit(1);
}
}
/**
* Start processing
*
* @exception Exception if an error occurs
*/
@Override
public void start() throws Exception {
// done in the sub task
/*
* m_numOfSamplesPerGenerator = (int)Math.pow(m_samplesBase,
* m_trainingData.numAttributes()-3);
*/
m_stopReplotting = true;
if (m_trainingData == null) {
throw new Exception("No training data set (BoundaryPanel)");
}
if (m_classifier == null) {
throw new Exception("No classifier set (BoundaryPanel)");
}
if (m_dataGenerator == null) {
throw new Exception("No data generator set (BoundaryPanel)");
}
if (m_trainingData.attribute(m_xAttribute).isNominal()
|| m_trainingData.attribute(m_yAttribute).isNominal()) {
throw new Exception("Visualization dimensions must be numeric "
+ "(BoundaryPanel)");
}
computeMinMaxAtts();
initialize();
// launch tasks on all available hosts
int totalHosts = m_remoteHostsQueue.size();
for (int i = 0; i < totalHosts; i++) {
availableHost(-1);
Thread.sleep(70);
}
}
/**
* Push a host back onto the list of available hosts and launch a waiting Task
* (if any).
*
* @param hostNum the number of the host to return to the queue. -1 if no host
* to return.
*/
protected synchronized void availableHost(int hostNum) {
if (hostNum >= 0) {
if (m_remoteHostFailureCounts[hostNum] < MAX_FAILURES) {
m_remoteHostsQueue.push(new Integer(hostNum));
} else {
notifyListeners(false, true, false, "Max failures exceeded for host "
+ (m_remoteHosts.elementAt(hostNum)) + ". Removed from host list.");
m_removedHosts++;
}
}
// check for all sub exp complete or all hosts failed or failed count
// exceeded
if (m_failedCount == (MAX_FAILURES * m_remoteHosts.size())) {
m_plottingAborted = true;
notifyListeners(false, true, true, "Plotting aborted! Max failures "
+ "exceeded on all remote hosts.");
return;
}
/*
* System.err.println("--------------");
* System.err.println("exp q :"+m_subExpQueue.size());
* System.err.println("host list size "+m_remoteHosts.size());
* System.err.println("actual host list size "+m_remoteHostsQueue.size());
* System.err.println("removed hosts "+m_removedHosts);
*/
if (m_subExpQueue.size() == 0
&& (m_remoteHosts.size() == (m_remoteHostsQueue.size() + m_removedHosts))) {
if (m_plotTrainingData) {
plotTrainingData();
}
notifyListeners(false, true, true, "Plotting completed successfully.");
return;
}
if (checkForAllFailedHosts()) {
return;
}
if (m_plottingAborted
&& (m_remoteHostsQueue.size() + m_removedHosts) == m_remoteHosts.size()) {
notifyListeners(false, true, true, "Plotting aborted. All remote tasks "
+ "finished.");
}
if (!m_subExpQueue.empty() && !m_plottingAborted) {
if (!m_remoteHostsQueue.empty()) {
int availHost, waitingTask;
try {
availHost = ((Integer) m_remoteHostsQueue.pop()).intValue();
waitingTask = ((Integer) m_subExpQueue.pop()).intValue();
launchNext(waitingTask, availHost);
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
}
/**
* Inform all listeners of progress
*
* @param status true if this is a status type of message
* @param log true if this is a log type of message
* @param finished true if the remote task has finished
* @param message the message.
*/
private synchronized void notifyListeners(boolean status, boolean log,
boolean finished, String message) {
if (m_listeners.size() > 0) {
for (int i = 0; i < m_listeners.size(); i++) {
RemoteExperimentListener r = (m_listeners.elementAt(i));
r.remoteExperimentStatus(new RemoteExperimentEvent(status, log,
finished, message));
}
} else {
System.err.println(message);
}
}
/**
* Check to see if we have failed to connect to all hosts
*/
private boolean checkForAllFailedHosts() {
boolean allbad = true;
for (int m_remoteHostsStatu : m_remoteHostsStatus) {
if (m_remoteHostsStatu != CONNECTION_FAILED) {
allbad = false;
break;
}
}
if (allbad) {
m_plottingAborted = true;
notifyListeners(false, true, true, "Plotting aborted! All connections "
+ "to remote hosts failed.");
}
return allbad;
}
/**
* Increment the number of successfully completed sub experiments
*/
protected synchronized void incrementFinished() {
}
/**
* Increment the overall number of failures and the number of failures for a
* particular host
*
* @param hostNum the index of the host to increment failure count
*/
protected synchronized void incrementFailed(int hostNum) {
m_failedCount++;
m_remoteHostFailureCounts[hostNum]++;
}
/**
* Push an experiment back on the queue of waiting experiments
*
* @param expNum the index of the experiment to push onto the queue
*/
protected synchronized void waitingTask(int expNum) {
m_subExpQueue.push(new Integer(expNum));
}
protected void launchNext(final int wtask, final int ah) {
Thread subTaskThread;
subTaskThread = new Thread() {
@Override
public void run() {
m_remoteHostsStatus[ah] = IN_USE;
// m_subExpComplete[wtask] = TaskStatusInfo.PROCESSING;
RemoteBoundaryVisualizerSubTask vSubTask = new RemoteBoundaryVisualizerSubTask();
vSubTask.setXAttribute(m_xAttribute);
vSubTask.setYAttribute(m_yAttribute);
vSubTask.setRowNumber(wtask);
vSubTask.setPanelWidth(m_panelWidth);
vSubTask.setPanelHeight(m_panelHeight);
vSubTask.setPixHeight(m_pixHeight);
vSubTask.setPixWidth(m_pixWidth);
vSubTask.setClassifier(m_classifier);
vSubTask.setDataGenerator(m_dataGenerator);
vSubTask.setInstances(m_trainingData);
vSubTask.setMinMaxX(m_minX, m_maxX);
vSubTask.setMinMaxY(m_minY, m_maxY);
vSubTask.setNumSamplesPerRegion(m_numOfSamplesPerRegion);
vSubTask.setGeneratorSamplesBase(m_samplesBase);
try {
String name = "//" + (m_remoteHosts.elementAt(ah)) + "/RemoteEngine";
Compute comp = (Compute) Naming.lookup(name);
// assess the status of the sub-exp
notifyListeners(false, true, false, "Starting row " + wtask
+ " on host " + (m_remoteHosts.elementAt(ah)));
Object subTaskId = comp.executeTask(vSubTask);
boolean finished = false;
TaskStatusInfo is = null;
long startTime = System.currentTimeMillis();
while (!finished) {
try {
Thread.sleep(Math.max(m_minTaskPollTime, m_hostPollingTime[ah]));
TaskStatusInfo cs = (TaskStatusInfo) comp.checkStatus(subTaskId);
if (cs.getExecutionStatus() == TaskStatusInfo.FINISHED) {
// push host back onto queue and try launching any waiting
// sub-experiments
long runTime = System.currentTimeMillis() - startTime;
runTime /= 4;
if (runTime < 1000) {
runTime = 1000;
}
m_hostPollingTime[ah] = (int) runTime;
// Extract the row from the result
RemoteResult rr = (RemoteResult) cs.getTaskResult();
double[][] probs = rr.getProbabilities();
for (int i = 0; i < m_panelWidth; i++) {
m_probabilityCache[wtask][i] = probs[i];
if (i < m_panelWidth - 1) {
plotPoint(i, wtask, probs[i], false);
} else {
plotPoint(i, wtask, probs[i], true);
}
}
notifyListeners(false, true, false, cs.getStatusMessage());
m_remoteHostsStatus[ah] = AVAILABLE;
incrementFinished();
availableHost(ah);
finished = true;
} else if (cs.getExecutionStatus() == TaskStatusInfo.FAILED) {
// a non connection related error---possibly host doesn't have
// access to data sets or security policy is not set up
// correctly or classifier(s) failed for some reason
notifyListeners(false, true, false, cs.getStatusMessage());
m_remoteHostsStatus[ah] = SOME_OTHER_FAILURE;
// m_subExpComplete[wexp] = TaskStatusInfo.FAILED;
notifyListeners(false, true, false,
"Row " + wtask + " " + cs.getStatusMessage()
+ ". Scheduling for execution on another host.");
incrementFailed(ah);
// push experiment back onto queue
waitingTask(wtask);
// push host back onto queue and try launching any waiting
// Tasks. Host is pushed back on the queue as the
// failure may be temporary.
availableHost(ah);
finished = true;
} else {
if (is == null) {
is = cs;
notifyListeners(false, true, false, cs.getStatusMessage());
} else {
RemoteResult rr = (RemoteResult) cs.getTaskResult();
if (rr != null) {
int percentComplete = rr.getPercentCompleted();
String timeRemaining = "";
if (percentComplete > 0 && percentComplete < 100) {
double timeSoFar = (double) System.currentTimeMillis()
- (double) startTime;
double timeToGo = ((100.0 - percentComplete) / percentComplete)
* timeSoFar;
if (timeToGo < m_hostPollingTime[ah]) {
m_hostPollingTime[ah] = (int) timeToGo;
}
String units = "seconds";
timeToGo /= 1000.0;
if (timeToGo > 60) {
units = "minutes";
timeToGo /= 60.0;
}
if (timeToGo > 60) {
units = "hours";
timeToGo /= 60.0;
}
timeRemaining = " (approx. time remaining "
+ Utils.doubleToString(timeToGo, 1) + " " + units + ")";
}
if (percentComplete < 25
/* && minTaskPollTime < 30000 */) {
if (percentComplete > 0) {
m_hostPollingTime[ah] = (int) ((25.0 / percentComplete) * m_hostPollingTime[ah]);
} else {
m_hostPollingTime[ah] *= 2;
}
if (m_hostPollingTime[ah] > 60000) {
m_hostPollingTime[ah] = 60000;
}
}
notifyListeners(false, true, false, "Row " + wtask + " "
+ percentComplete + "% complete" + timeRemaining + ".");
} else {
notifyListeners(false, true, false, "Row " + wtask
+ " queued on " + (m_remoteHosts.elementAt(ah)));
if (m_hostPollingTime[ah] < 60000) {
m_hostPollingTime[ah] *= 2;
}
}
is = cs;
}
}
} catch (InterruptedException ie) {
ie.printStackTrace();
}
}
} catch (Exception ce) {
m_remoteHostsStatus[ah] = CONNECTION_FAILED;
m_removedHosts++;
System.err.println(ce);
ce.printStackTrace();
notifyListeners(false, true, false,
"Connection to " + (m_remoteHosts.elementAt(ah))
+ " failed. Scheduling row " + wtask
+ " for execution on another host.");
checkForAllFailedHosts();
waitingTask(wtask);
} finally {
if (isInterrupted()) {
System.err.println("Sub exp Interupted!");
}
}
}
};
subTaskThread.setPriority(Thread.MIN_PRIORITY);
subTaskThread.start();
}
/**
* Main method for testing this class
*
* @param args a String[]
value
*/
public static void main(String[] args) {
try {
if (args.length < 8) {
System.err.println("Usage : BoundaryPanelDistributed "
+ " "
+ " <# loc/pixel> " + " "
+ " ");
System.exit(1);
}
Vector hostNames = new Vector();
// try loading hosts file
try {
BufferedReader br = new BufferedReader(new FileReader("hosts.vis"));
String hostName = br.readLine();
while (hostName != null) {
System.out.println("Adding host " + hostName);
hostNames.add(hostName);
hostName = br.readLine();
}
br.close();
} catch (Exception ex) {
System.err.println("No hosts.vis file - create this file in "
+ "the current directory with one host name "
+ "per line, or use BoundaryPanel instead.");
System.exit(1);
}
final javax.swing.JFrame jf = new javax.swing.JFrame(
"Weka classification boundary visualizer");
jf.getContentPane().setLayout(new BorderLayout());
System.err.println("Loading instances from : " + args[0]);
java.io.Reader r = new java.io.BufferedReader(new java.io.FileReader(
args[0]));
final Instances i = new Instances(r);
i.setClassIndex(Integer.parseInt(args[1]));
// bv.setClassifier(new Logistic());
final int xatt = Integer.parseInt(args[2]);
final int yatt = Integer.parseInt(args[3]);
int base = Integer.parseInt(args[4]);
int loc = Integer.parseInt(args[5]);
int bandWidth = Integer.parseInt(args[6]);
int panelWidth = Integer.parseInt(args[7]);
int panelHeight = Integer.parseInt(args[8]);
final String classifierName = args[9];
final BoundaryPanelDistributed bv = new BoundaryPanelDistributed(
panelWidth, panelHeight);
bv.addRemoteExperimentListener(new RemoteExperimentListener() {
@Override
public void remoteExperimentStatus(RemoteExperimentEvent e) {
if (e.m_experimentFinished) {
String classifierNameNew = classifierName.substring(
classifierName.lastIndexOf('.') + 1, classifierName.length());
bv.saveImage(classifierNameNew + "_" + i.relationName() + "_X"
+ xatt + "_Y" + yatt + ".jpg");
} else {
System.err.println(e.m_messageString);
}
}
});
bv.setRemoteHosts(hostNames);
jf.getContentPane().add(bv, BorderLayout.CENTER);
jf.setSize(bv.getMinimumSize());
// jf.setSize(200,200);
jf.addWindowListener(new java.awt.event.WindowAdapter() {
@Override
public void windowClosing(java.awt.event.WindowEvent e) {
jf.dispose();
System.exit(0);
}
});
jf.pack();
jf.setVisible(true);
// bv.initialize();
bv.repaint();
String[] argsR = null;
if (args.length > 10) {
argsR = new String[args.length - 10];
for (int j = 10; j < args.length; j++) {
argsR[j - 10] = args[j];
}
}
Classifier c = AbstractClassifier.forName(args[9], argsR);
KDDataGenerator dataGen = new KDDataGenerator();
dataGen.setKernelBandwidth(bandWidth);
bv.setDataGenerator(dataGen);
bv.setNumSamplesPerRegion(loc);
bv.setGeneratorSamplesBase(base);
bv.setClassifier(c);
bv.setTrainingData(i);
bv.setXAttribute(xatt);
bv.setYAttribute(yatt);
try {
// try and load a color map if one exists
FileInputStream fis = new FileInputStream("colors.ser");
ObjectInputStream ois = new ObjectInputStream(fis);
@SuppressWarnings("unchecked")
ArrayList colors = (ArrayList) ois.readObject();
bv.setColors(colors);
ois.close();
} catch (Exception ex) {
System.err.println("No color map file");
}
bv.start();
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy