All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.gui.boundaryvisualizer.BoundaryPanelDistributed Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.

There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *   BoundaryPanelDistrubuted.java
 *   Copyright (C) 2003-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.gui.boundaryvisualizer;

import java.awt.BorderLayout;
import java.awt.Color;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.ObjectInputStream;
import java.rmi.Naming;
import java.util.ArrayList;
import java.util.Vector;

import weka.classifiers.AbstractClassifier;
import weka.classifiers.Classifier;
import weka.core.Instances;
import weka.core.Utils;
import weka.experiment.Compute;
import weka.experiment.RemoteExperimentEvent;
import weka.experiment.RemoteExperimentListener;
import weka.experiment.TaskStatusInfo;

/**
 * This class extends BoundaryPanel with code for distributing the processing
 * necessary to create a visualization among a list of remote machines.
 * Specifically, a visualization is broken down and processed row by row using
 * the available remote computers.
 * 
 * @author Mark Hall
 * @version $Revision: 10222 $
 * @since 1.0
 * @see BoundaryPanel
 */
public class BoundaryPanelDistributed extends BoundaryPanel {

  /** for serialization */
  private static final long serialVersionUID = -1743284397893937776L;

  /** a list of RemoteExperimentListeners */
  protected Vector m_listeners = new Vector();

  /** Holds the names of machines with remoteEngine servers running */
  protected Vector m_remoteHosts = new Vector();

  /** The queue of available hosts */
  private weka.core.Queue m_remoteHostsQueue = new weka.core.Queue();

  /** The status of each of the remote hosts */
  private int[] m_remoteHostsStatus;

  /** The number of times tasks have failed on each remote host */
  private int[] m_remoteHostFailureCounts;

  protected static final int AVAILABLE = 0;
  protected static final int IN_USE = 1;
  protected static final int CONNECTION_FAILED = 2;
  protected static final int SOME_OTHER_FAILURE = 3;

  protected static final int MAX_FAILURES = 3;

  /**
   * Set to true if MAX_FAILURES exceeded on all hosts or connections fail on
   * all hosts or user aborts plotting
   */
  private boolean m_plottingAborted = false;

  /** The number of hosts removed due to exceeding max failures */
  private int m_removedHosts;

  /** The count of failed sub-tasks */
  private int m_failedCount;

  /** The queue of sub-tasks waiting to be processed */
  private weka.core.Queue m_subExpQueue = new weka.core.Queue();

  /** number of seconds between polling server */
  private final int m_minTaskPollTime = 1000;

  private int[] m_hostPollingTime;

  /**
   * Creates a new BoundaryPanelDistributed instance.
   * 
   * @param panelWidth width of the display
   * @param panelHeight height of the display
   */
  public BoundaryPanelDistributed(int panelWidth, int panelHeight) {
    super(panelWidth, panelHeight);
  }

  /**
   * Set a list of host names of machines to distribute processing to
   * 
   * @param remHosts a Vector of host names (Strings)
   */
  public void setRemoteHosts(Vector remHosts) {
    m_remoteHosts = remHosts;
  }

  /**
   * Add an object to the list of those interested in recieving update
   * information from the RemoteExperiment
   * 
   * @param r a listener
   */
  public void addRemoteExperimentListener(RemoteExperimentListener r) {
    m_listeners.addElement(r);
  }

  @Override
  protected void initialize() {
    super.initialize();

    m_plottingAborted = false;
    m_failedCount = 0;

    // initialize all remote hosts to available
    m_remoteHostsStatus = new int[m_remoteHosts.size()];
    m_remoteHostFailureCounts = new int[m_remoteHosts.size()];

    m_remoteHostsQueue = new weka.core.Queue();

    if (m_remoteHosts.size() == 0) {
      System.err.println("No hosts specified!");
      System.exit(1);
    }

    // prime the hosts queue
    m_hostPollingTime = new int[m_remoteHosts.size()];
    for (int i = 0; i < m_remoteHosts.size(); i++) {
      m_remoteHostsQueue.push(new Integer(i));
      m_hostPollingTime[i] = m_minTaskPollTime;
    }

    // set up sub taskss (just holds the row numbers to be processed
    m_subExpQueue = new weka.core.Queue();
    for (int i = 0; i < m_panelHeight; i++) {
      m_subExpQueue.push(new Integer(i));
    }

    try {
      // need to build classifier and data generator
      m_classifier.buildClassifier(m_trainingData);
    } catch (Exception ex) {
      ex.printStackTrace();
      System.exit(1);
    }

    boolean[] attsToWeightOn;
    // build DataGenerator
    attsToWeightOn = new boolean[m_trainingData.numAttributes()];
    attsToWeightOn[m_xAttribute] = true;
    attsToWeightOn[m_yAttribute] = true;

    m_dataGenerator.setWeightingDimensions(attsToWeightOn);
    try {
      m_dataGenerator.buildGenerator(m_trainingData);
    } catch (Exception ex) {
      ex.printStackTrace();
      System.exit(1);
    }
  }

  /**
   * Start processing
   * 
   * @exception Exception if an error occurs
   */
  @Override
  public void start() throws Exception {
    // done in the sub task
    /*
     * m_numOfSamplesPerGenerator = (int)Math.pow(m_samplesBase,
     * m_trainingData.numAttributes()-3);
     */

    m_stopReplotting = true;
    if (m_trainingData == null) {
      throw new Exception("No training data set (BoundaryPanel)");
    }
    if (m_classifier == null) {
      throw new Exception("No classifier set (BoundaryPanel)");
    }
    if (m_dataGenerator == null) {
      throw new Exception("No data generator set (BoundaryPanel)");
    }
    if (m_trainingData.attribute(m_xAttribute).isNominal()
      || m_trainingData.attribute(m_yAttribute).isNominal()) {
      throw new Exception("Visualization dimensions must be numeric "
        + "(BoundaryPanel)");
    }

    computeMinMaxAtts();
    initialize();

    // launch tasks on all available hosts
    int totalHosts = m_remoteHostsQueue.size();
    for (int i = 0; i < totalHosts; i++) {
      availableHost(-1);
      Thread.sleep(70);
    }
  }

  /**
   * Push a host back onto the list of available hosts and launch a waiting Task
   * (if any).
   * 
   * @param hostNum the number of the host to return to the queue. -1 if no host
   *          to return.
   */
  protected synchronized void availableHost(int hostNum) {
    if (hostNum >= 0) {
      if (m_remoteHostFailureCounts[hostNum] < MAX_FAILURES) {
        m_remoteHostsQueue.push(new Integer(hostNum));
      } else {
        notifyListeners(false, true, false, "Max failures exceeded for host "
          + (m_remoteHosts.elementAt(hostNum)) + ". Removed from host list.");
        m_removedHosts++;
      }
    }

    // check for all sub exp complete or all hosts failed or failed count
    // exceeded
    if (m_failedCount == (MAX_FAILURES * m_remoteHosts.size())) {
      m_plottingAborted = true;
      notifyListeners(false, true, true, "Plotting aborted! Max failures "
        + "exceeded on all remote hosts.");
      return;
    }

    /*
     * System.err.println("--------------");
     * System.err.println("exp q :"+m_subExpQueue.size());
     * System.err.println("host list size "+m_remoteHosts.size());
     * System.err.println("actual host list size "+m_remoteHostsQueue.size());
     * System.err.println("removed hosts "+m_removedHosts);
     */
    if (m_subExpQueue.size() == 0
      && (m_remoteHosts.size() == (m_remoteHostsQueue.size() + m_removedHosts))) {
      if (m_plotTrainingData) {
        plotTrainingData();
      }
      notifyListeners(false, true, true, "Plotting completed successfully.");

      return;
    }

    if (checkForAllFailedHosts()) {
      return;
    }

    if (m_plottingAborted
      && (m_remoteHostsQueue.size() + m_removedHosts) == m_remoteHosts.size()) {
      notifyListeners(false, true, true, "Plotting aborted. All remote tasks "
        + "finished.");
    }

    if (!m_subExpQueue.empty() && !m_plottingAborted) {
      if (!m_remoteHostsQueue.empty()) {
        int availHost, waitingTask;
        try {
          availHost = ((Integer) m_remoteHostsQueue.pop()).intValue();
          waitingTask = ((Integer) m_subExpQueue.pop()).intValue();
          launchNext(waitingTask, availHost);
        } catch (Exception ex) {
          ex.printStackTrace();
        }
      }
    }
  }

  /**
   * Inform all listeners of progress
   * 
   * @param status true if this is a status type of message
   * @param log true if this is a log type of message
   * @param finished true if the remote task has finished
   * @param message the message.
   */
  private synchronized void notifyListeners(boolean status, boolean log,
    boolean finished, String message) {
    if (m_listeners.size() > 0) {
      for (int i = 0; i < m_listeners.size(); i++) {
        RemoteExperimentListener r = (m_listeners.elementAt(i));
        r.remoteExperimentStatus(new RemoteExperimentEvent(status, log,
          finished, message));
      }
    } else {
      System.err.println(message);
    }
  }

  /**
   * Check to see if we have failed to connect to all hosts
   */
  private boolean checkForAllFailedHosts() {
    boolean allbad = true;
    for (int m_remoteHostsStatu : m_remoteHostsStatus) {
      if (m_remoteHostsStatu != CONNECTION_FAILED) {
        allbad = false;
        break;
      }
    }
    if (allbad) {
      m_plottingAborted = true;
      notifyListeners(false, true, true, "Plotting aborted! All connections "
        + "to remote hosts failed.");
    }
    return allbad;
  }

  /**
   * Increment the number of successfully completed sub experiments
   */
  protected synchronized void incrementFinished() {
  }

  /**
   * Increment the overall number of failures and the number of failures for a
   * particular host
   * 
   * @param hostNum the index of the host to increment failure count
   */
  protected synchronized void incrementFailed(int hostNum) {
    m_failedCount++;
    m_remoteHostFailureCounts[hostNum]++;
  }

  /**
   * Push an experiment back on the queue of waiting experiments
   * 
   * @param expNum the index of the experiment to push onto the queue
   */
  protected synchronized void waitingTask(int expNum) {
    m_subExpQueue.push(new Integer(expNum));
  }

  protected void launchNext(final int wtask, final int ah) {
    Thread subTaskThread;
    subTaskThread = new Thread() {
      @Override
      public void run() {
        m_remoteHostsStatus[ah] = IN_USE;
        // m_subExpComplete[wtask] = TaskStatusInfo.PROCESSING;
        RemoteBoundaryVisualizerSubTask vSubTask = new RemoteBoundaryVisualizerSubTask();
        vSubTask.setXAttribute(m_xAttribute);
        vSubTask.setYAttribute(m_yAttribute);
        vSubTask.setRowNumber(wtask);
        vSubTask.setPanelWidth(m_panelWidth);
        vSubTask.setPanelHeight(m_panelHeight);
        vSubTask.setPixHeight(m_pixHeight);
        vSubTask.setPixWidth(m_pixWidth);
        vSubTask.setClassifier(m_classifier);
        vSubTask.setDataGenerator(m_dataGenerator);
        vSubTask.setInstances(m_trainingData);
        vSubTask.setMinMaxX(m_minX, m_maxX);
        vSubTask.setMinMaxY(m_minY, m_maxY);
        vSubTask.setNumSamplesPerRegion(m_numOfSamplesPerRegion);
        vSubTask.setGeneratorSamplesBase(m_samplesBase);
        try {
          String name = "//" + (m_remoteHosts.elementAt(ah)) + "/RemoteEngine";
          Compute comp = (Compute) Naming.lookup(name);
          // assess the status of the sub-exp
          notifyListeners(false, true, false, "Starting row " + wtask
            + " on host " + (m_remoteHosts.elementAt(ah)));
          Object subTaskId = comp.executeTask(vSubTask);
          boolean finished = false;
          TaskStatusInfo is = null;
          long startTime = System.currentTimeMillis();
          while (!finished) {
            try {
              Thread.sleep(Math.max(m_minTaskPollTime, m_hostPollingTime[ah]));

              TaskStatusInfo cs = (TaskStatusInfo) comp.checkStatus(subTaskId);
              if (cs.getExecutionStatus() == TaskStatusInfo.FINISHED) {
                // push host back onto queue and try launching any waiting
                // sub-experiments
                long runTime = System.currentTimeMillis() - startTime;
                runTime /= 4;
                if (runTime < 1000) {
                  runTime = 1000;
                }
                m_hostPollingTime[ah] = (int) runTime;

                // Extract the row from the result
                RemoteResult rr = (RemoteResult) cs.getTaskResult();
                double[][] probs = rr.getProbabilities();

                for (int i = 0; i < m_panelWidth; i++) {
                  m_probabilityCache[wtask][i] = probs[i];
                  if (i < m_panelWidth - 1) {
                    plotPoint(i, wtask, probs[i], false);
                  } else {
                    plotPoint(i, wtask, probs[i], true);
                  }
                }
                notifyListeners(false, true, false, cs.getStatusMessage());
                m_remoteHostsStatus[ah] = AVAILABLE;
                incrementFinished();
                availableHost(ah);
                finished = true;
              } else if (cs.getExecutionStatus() == TaskStatusInfo.FAILED) {
                // a non connection related error---possibly host doesn't have
                // access to data sets or security policy is not set up
                // correctly or classifier(s) failed for some reason
                notifyListeners(false, true, false, cs.getStatusMessage());
                m_remoteHostsStatus[ah] = SOME_OTHER_FAILURE;
                // m_subExpComplete[wexp] = TaskStatusInfo.FAILED;
                notifyListeners(false, true, false,
                  "Row " + wtask + " " + cs.getStatusMessage()
                    + ". Scheduling for execution on another host.");
                incrementFailed(ah);
                // push experiment back onto queue
                waitingTask(wtask);
                // push host back onto queue and try launching any waiting
                // Tasks. Host is pushed back on the queue as the
                // failure may be temporary.
                availableHost(ah);
                finished = true;
              } else {
                if (is == null) {
                  is = cs;
                  notifyListeners(false, true, false, cs.getStatusMessage());
                } else {
                  RemoteResult rr = (RemoteResult) cs.getTaskResult();
                  if (rr != null) {
                    int percentComplete = rr.getPercentCompleted();
                    String timeRemaining = "";
                    if (percentComplete > 0 && percentComplete < 100) {
                      double timeSoFar = (double) System.currentTimeMillis()
                        - (double) startTime;
                      double timeToGo = ((100.0 - percentComplete) / percentComplete)
                        * timeSoFar;
                      if (timeToGo < m_hostPollingTime[ah]) {
                        m_hostPollingTime[ah] = (int) timeToGo;
                      }
                      String units = "seconds";
                      timeToGo /= 1000.0;
                      if (timeToGo > 60) {
                        units = "minutes";
                        timeToGo /= 60.0;
                      }
                      if (timeToGo > 60) {
                        units = "hours";
                        timeToGo /= 60.0;
                      }
                      timeRemaining = " (approx. time remaining "
                        + Utils.doubleToString(timeToGo, 1) + " " + units + ")";
                    }
                    if (percentComplete < 25
                    /* && minTaskPollTime < 30000 */) {
                      if (percentComplete > 0) {
                        m_hostPollingTime[ah] = (int) ((25.0 / percentComplete) * m_hostPollingTime[ah]);
                      } else {
                        m_hostPollingTime[ah] *= 2;
                      }
                      if (m_hostPollingTime[ah] > 60000) {
                        m_hostPollingTime[ah] = 60000;
                      }
                    }
                    notifyListeners(false, true, false, "Row " + wtask + " "
                      + percentComplete + "% complete" + timeRemaining + ".");
                  } else {
                    notifyListeners(false, true, false, "Row " + wtask
                      + " queued on " + (m_remoteHosts.elementAt(ah)));
                    if (m_hostPollingTime[ah] < 60000) {
                      m_hostPollingTime[ah] *= 2;
                    }
                  }

                  is = cs;
                }
              }
            } catch (InterruptedException ie) {
              ie.printStackTrace();
            }
          }
        } catch (Exception ce) {
          m_remoteHostsStatus[ah] = CONNECTION_FAILED;
          m_removedHosts++;
          System.err.println(ce);
          ce.printStackTrace();
          notifyListeners(false, true, false,
            "Connection to " + (m_remoteHosts.elementAt(ah))
              + " failed. Scheduling row " + wtask
              + " for execution on another host.");
          checkForAllFailedHosts();
          waitingTask(wtask);
        } finally {
          if (isInterrupted()) {
            System.err.println("Sub exp Interupted!");
          }
        }
      }
    };
    subTaskThread.setPriority(Thread.MIN_PRIORITY);
    subTaskThread.start();
  }

  /**
   * Main method for testing this class
   * 
   * @param args a String[] value
   */
  public static void main(String[] args) {
    try {
      if (args.length < 8) {
        System.err.println("Usage : BoundaryPanelDistributed  "
          + "   "
          + " <# loc/pixel>  " + " "
          + " ");
        System.exit(1);
      }

      Vector hostNames = new Vector();
      // try loading hosts file
      try {
        BufferedReader br = new BufferedReader(new FileReader("hosts.vis"));
        String hostName = br.readLine();
        while (hostName != null) {
          System.out.println("Adding host " + hostName);
          hostNames.add(hostName);
          hostName = br.readLine();
        }
        br.close();
      } catch (Exception ex) {
        System.err.println("No hosts.vis file - create this file in "
          + "the current directory with one host name "
          + "per line, or use BoundaryPanel instead.");
        System.exit(1);
      }

      final javax.swing.JFrame jf = new javax.swing.JFrame(
        "Weka classification boundary visualizer");
      jf.getContentPane().setLayout(new BorderLayout());

      System.err.println("Loading instances from : " + args[0]);
      java.io.Reader r = new java.io.BufferedReader(new java.io.FileReader(
        args[0]));
      final Instances i = new Instances(r);
      i.setClassIndex(Integer.parseInt(args[1]));

      // bv.setClassifier(new Logistic());
      final int xatt = Integer.parseInt(args[2]);
      final int yatt = Integer.parseInt(args[3]);
      int base = Integer.parseInt(args[4]);
      int loc = Integer.parseInt(args[5]);

      int bandWidth = Integer.parseInt(args[6]);
      int panelWidth = Integer.parseInt(args[7]);
      int panelHeight = Integer.parseInt(args[8]);

      final String classifierName = args[9];
      final BoundaryPanelDistributed bv = new BoundaryPanelDistributed(
        panelWidth, panelHeight);
      bv.addRemoteExperimentListener(new RemoteExperimentListener() {
        @Override
        public void remoteExperimentStatus(RemoteExperimentEvent e) {
          if (e.m_experimentFinished) {
            String classifierNameNew = classifierName.substring(
              classifierName.lastIndexOf('.') + 1, classifierName.length());
            bv.saveImage(classifierNameNew + "_" + i.relationName() + "_X"
              + xatt + "_Y" + yatt + ".jpg");
          } else {
            System.err.println(e.m_messageString);
          }
        }
      });
      bv.setRemoteHosts(hostNames);

      jf.getContentPane().add(bv, BorderLayout.CENTER);
      jf.setSize(bv.getMinimumSize());
      // jf.setSize(200,200);
      jf.addWindowListener(new java.awt.event.WindowAdapter() {
        @Override
        public void windowClosing(java.awt.event.WindowEvent e) {
          jf.dispose();
          System.exit(0);
        }
      });

      jf.pack();
      jf.setVisible(true);
      // bv.initialize();
      bv.repaint();

      String[] argsR = null;
      if (args.length > 10) {
        argsR = new String[args.length - 10];
        for (int j = 10; j < args.length; j++) {
          argsR[j - 10] = args[j];
        }
      }
      Classifier c = AbstractClassifier.forName(args[9], argsR);
      KDDataGenerator dataGen = new KDDataGenerator();
      dataGen.setKernelBandwidth(bandWidth);
      bv.setDataGenerator(dataGen);
      bv.setNumSamplesPerRegion(loc);
      bv.setGeneratorSamplesBase(base);
      bv.setClassifier(c);
      bv.setTrainingData(i);
      bv.setXAttribute(xatt);
      bv.setYAttribute(yatt);

      try {
        // try and load a color map if one exists
        FileInputStream fis = new FileInputStream("colors.ser");
        ObjectInputStream ois = new ObjectInputStream(fis);
        @SuppressWarnings("unchecked")
        ArrayList colors = (ArrayList) ois.readObject();
        bv.setColors(colors);
        ois.close();
      } catch (Exception ex) {
        System.err.println("No color map file");
      }
      bv.start();
    } catch (Exception ex) {
      ex.printStackTrace();
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy