All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.gui.beans.CrossValidationFoldMaker Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.

There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    CrossValidationFoldMaker.java
 *    Copyright (C) 2002-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.gui.beans;

import java.io.Serializable;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.core.Instances;

/**
 * Bean for splitting instances into training ant test sets according to a cross
 * validation
 * 
 * @author Mark Hall
 * @version $Revision: 10220 $
 */
public class CrossValidationFoldMaker extends AbstractTrainAndTestSetProducer
  implements DataSourceListener, TrainingSetListener, TestSetListener,
  UserRequestAcceptor, EventConstraints, Serializable, StructureProducer {

  /** for serialization */
  private static final long serialVersionUID = -6350179298851891512L;

  private int m_numFolds = 10;
  private int m_randomSeed = 1;

  private boolean m_preserveOrder = false;

  private transient Thread m_foldThread = null;

  private boolean m_dataProvider = false;
  private boolean m_trainingProvider = false;
  private boolean m_testProvider = false;

  public CrossValidationFoldMaker() {
    m_visual.loadIcons(BeanVisual.ICON_PATH + "CrossValidationFoldMaker.gif",
      BeanVisual.ICON_PATH + "CrossValidationFoldMaker_animated.gif");
    m_visual.setText("CrossValidationFoldMaker");
  }

  private Instances getUpstreamStructure() {
    if (m_listenee != null && m_listenee instanceof StructureProducer) {
      if (m_dataProvider) {
        return ((StructureProducer) m_listenee).getStructure("dataSet");
      }
      if (m_trainingProvider) {
        return ((StructureProducer) m_listenee).getStructure("trainingSet");
      }
      if (m_testProvider) {
        return ((StructureProducer) m_listenee).getStructure("testSet");
      }
    }
    return null;
  }

  /**
   * Get the structure of the output encapsulated in the named event. If the
   * structure can't be determined in advance of seeing input, or this
   * StructureProducer does not generate the named event, null should be
   * returned.
   * 
   * @param eventName the name of the output event that encapsulates the
   *          requested output.
   * 
   * @return the structure of the output encapsulated in the named event or null
   *         if it can't be determined in advance of seeing input or the named
   *         event is not generated by this StructureProduce.
   */
  @Override
  public Instances getStructure(String eventName) {
    if (!eventName.equals("trainingSet") && !eventName.equals("testSet")) {
      return null;
    }
    if (m_listenee == null) {
      return null;
    }

    if (eventName.equals("trainingSet") && m_trainingListeners.size() == 0) {
      // downstream has asked for the structure of something that we
      // are not producing at the moment
      return null;
    }

    if (eventName.equals("testSet") && m_testListeners.size() == 0) {
      // downstream has asked for the structure of something that we
      // are not producing at the moment
      return null;
    }

    return getUpstreamStructure();
  }

  /**
   * Notify this object that it has been registered as a listener with a source
   * with respect to the supplied event name
   * 
   * @param eventName the event
   * @param source the source with which this object has been registered as a
   *          listener
   */
  @Override
  public synchronized void connectionNotification(String eventName,
    Object source) {
    super.connectionNotification(eventName, source);

    if (connectionAllowed(eventName)) {
      if (eventName.equals("dataSet")) {
        m_dataProvider = true;
        m_trainingProvider = false;
        m_testProvider = false;
      } else if (eventName.equals("trainingSet")) {
        m_dataProvider = false;
        m_trainingProvider = true;
        m_testProvider = false;
      } else if (eventName.equals("testSet")) {
        m_dataProvider = false;
        m_trainingProvider = false;
        m_testProvider = true;
      }
    }
  }

  /**
   * Notify this object that it has been deregistered as a listener with a
   * source with respect to the supplied event name
   * 
   * @param eventName the event
   * @param source the source with which this object has been registered as a
   *          listener
   */
  @Override
  public synchronized void disconnectionNotification(String eventName,
    Object source) {
    super.disconnectionNotification(eventName, source);

    if (m_listenee == null) {
      m_dataProvider = false;
      m_trainingProvider = false;
      m_testProvider = false;
    }
  }

  /**
   * Set a custom (descriptive) name for this bean
   * 
   * @param name the name to use
   */
  @Override
  public void setCustomName(String name) {
    m_visual.setText(name);
  }

  /**
   * Get the custom (descriptive) name for this bean (if one has been set)
   * 
   * @return the custom name (or the default name)
   */
  @Override
  public String getCustomName() {
    return m_visual.getText();
  }

  /**
   * Global info for this bean
   * 
   * @return a String value
   */
  public String globalInfo() {
    return "Split an incoming data set into cross validation folds. "
      + "Separate train and test sets are produced for each of the k folds.";
  }

  /**
   * Accept a training set
   * 
   * @param e a TrainingSetEvent value
   */
  @Override
  public void acceptTrainingSet(TrainingSetEvent e) {
    Instances trainingSet = e.getTrainingSet();
    DataSetEvent dse = new DataSetEvent(this, trainingSet);
    acceptDataSet(dse);
  }

  /**
   * Accept a test set
   * 
   * @param e a TestSetEvent value
   */
  @Override
  public void acceptTestSet(TestSetEvent e) {
    Instances testSet = e.getTestSet();
    DataSetEvent dse = new DataSetEvent(this, testSet);
    acceptDataSet(dse);
  }

  /**
   * Accept a data set
   * 
   * @param e a DataSetEvent value
   */
  @Override
  public void acceptDataSet(DataSetEvent e) {
    if (e.isStructureOnly()) {
      // Pass on structure to training and test set listeners
      TrainingSetEvent tse = new TrainingSetEvent(this, e.getDataSet());
      TestSetEvent tsee = new TestSetEvent(this, e.getDataSet());
      notifyTrainingSetProduced(tse);
      notifyTestSetProduced(tsee);
      return;
    }
    if (m_foldThread == null) {
      final Instances dataSet = new Instances(e.getDataSet());
      m_foldThread = new Thread() {
        @Override
        public void run() {
          boolean errorOccurred = false;
          try {
            Random random = new Random(getSeed());
            if (!m_preserveOrder) {
              dataSet.randomize(random);
            }
            if (dataSet.classIndex() >= 0
              && dataSet.attribute(dataSet.classIndex()).isNominal()
              && !m_preserveOrder) {
              dataSet.stratify(getFolds());
              if (m_logger != null) {
                m_logger.logMessage("[" + getCustomName() + "] "
                  + "stratifying data");
              }
            }

            for (int i = 0; i < getFolds(); i++) {
              if (m_foldThread == null) {
                if (m_logger != null) {
                  m_logger.logMessage("[" + getCustomName()
                    + "] Cross validation has been canceled!");
                }
                // exit gracefully
                break;
              }
              Instances train = (!m_preserveOrder) ? dataSet.trainCV(
                getFolds(), i, random) : dataSet.trainCV(getFolds(), i);
              Instances test = dataSet.testCV(getFolds(), i);

              // inform all training set listeners
              TrainingSetEvent tse = new TrainingSetEvent(this, train);
              tse.m_setNumber = i + 1;
              tse.m_maxSetNumber = getFolds();
              String msg = getCustomName() + "$"
                + CrossValidationFoldMaker.this.hashCode() + "|";
              if (m_logger != null) {
                m_logger.statusMessage(msg + "seed: " + getSeed() + " folds: "
                  + getFolds() + "|Training fold " + (i + 1));
              }
              if (m_foldThread != null) {
                // System.err.println("--Just before notify training set");
                notifyTrainingSetProduced(tse);
                // System.err.println("---Just after notify");
              }

              // inform all test set listeners
              TestSetEvent teste = new TestSetEvent(this, test);
              teste.m_setNumber = i + 1;
              teste.m_maxSetNumber = getFolds();

              if (m_logger != null) {
                m_logger.statusMessage(msg + "seed: " + getSeed() + " folds: "
                  + getFolds() + "|Test fold " + (i + 1));
              }
              if (m_foldThread != null) {
                notifyTestSetProduced(teste);
              }
            }
          } catch (Exception ex) {
            // stop all processing
            errorOccurred = true;
            if (m_logger != null) {
              m_logger.logMessage("[" + getCustomName()
                + "] problem during fold creation. " + ex.getMessage());
            }
            ex.printStackTrace();
            CrossValidationFoldMaker.this.stop();
          } finally {
            m_foldThread = null;

            if (errorOccurred) {
              if (m_logger != null) {
                m_logger.statusMessage(getCustomName() + "$"
                  + CrossValidationFoldMaker.this.hashCode() + "|"
                  + "ERROR (See log for details).");
              }
            } else if (isInterrupted()) {
              String msg = "[" + getCustomName()
                + "] Cross validation interrupted";
              if (m_logger != null) {
                m_logger.logMessage("[" + getCustomName()
                  + "] Cross validation interrupted");
                m_logger.statusMessage(getCustomName() + "$"
                  + CrossValidationFoldMaker.this.hashCode() + "|"
                  + "INTERRUPTED");
              } else {
                System.err.println(msg);
              }
            } else {
              String msg = getCustomName() + "$"
                + CrossValidationFoldMaker.this.hashCode() + "|";
              if (m_logger != null) {
                m_logger.statusMessage(msg + "Finished.");
              }
            }
            block(false);
          }
        }
      };
      m_foldThread.setPriority(Thread.MIN_PRIORITY);
      m_foldThread.start();

      // if (m_foldThread.isAlive()) {
      block(true);
      // }
      m_foldThread = null;
    }
  }

  /**
   * Notify all test set listeners of a TestSet event
   * 
   * @param tse a TestSetEvent value
   */
  @SuppressWarnings("unchecked")
  private void notifyTestSetProduced(TestSetEvent tse) {
    Vector l;
    synchronized (this) {
      l = (Vector) m_testListeners.clone();
    }
    if (l.size() > 0) {
      for (int i = 0; i < l.size(); i++) {
        if (m_foldThread == null) {
          break;
        }
        // System.err.println("Notifying test listeners "
        // +"(cross validation fold maker)");
        l.elementAt(i).acceptTestSet(tse);
      }
    }
  }

  /**
   * Notify all listeners of a TrainingSet event
   * 
   * @param tse a TrainingSetEvent value
   */
  @SuppressWarnings("unchecked")
  protected void notifyTrainingSetProduced(TrainingSetEvent tse) {
    Vector l;
    synchronized (this) {
      l = (Vector) m_trainingListeners.clone();
    }
    if (l.size() > 0) {
      for (int i = 0; i < l.size(); i++) {
        if (m_foldThread == null) {
          break;
        }
        // System.err.println("Notifying training listeners "
        // +"(cross validation fold maker)");
        l.elementAt(i).acceptTrainingSet(tse);
      }
    }
  }

  /**
   * Set the number of folds for the cross validation
   * 
   * @param numFolds an int value
   */
  public void setFolds(int numFolds) {
    m_numFolds = numFolds;
  }

  /**
   * Get the currently set number of folds
   * 
   * @return an int value
   */
  public int getFolds() {
    return m_numFolds;
  }

  /**
   * Tip text for this property
   * 
   * @return a String value
   */
  public String foldsTipText() {
    return "The number of train and test splits to produce";
  }

  /**
   * Set the seed
   * 
   * @param randomSeed an int value
   */
  public void setSeed(int randomSeed) {
    m_randomSeed = randomSeed;
  }

  /**
   * Get the currently set seed
   * 
   * @return an int value
   */
  public int getSeed() {
    return m_randomSeed;
  }

  /**
   * Tip text for this property
   * 
   * @return a String value
   */
  public String seedTipText() {
    return "The randomization seed";
  }

  /**
   * Returns true if the order of the incoming instances is to be preserved
   * under cross-validation (no randomization or stratification is done in this
   * case).
   * 
   * @return true if the order of the incoming instances is to be preserved.
   */
  public boolean getPreserveOrder() {
    return m_preserveOrder;
  }

  /**
   * Sets whether the order of the incoming instances is to be preserved under
   * cross-validation (no randomization or stratification is done in this case).
   * 
   * @param p true if the order is to be preserved.
   */
  public void setPreserveOrder(boolean p) {
    m_preserveOrder = p;
  }

  /**
   * Returns true if. at this time, the bean is busy with some (i.e. perhaps a
   * worker thread is performing some calculation).
   * 
   * @return true if the bean is busy.
   */
  @Override
  public boolean isBusy() {
    return (m_foldThread != null);
  }

  /**
   * Stop any action
   */
  @Override
  @SuppressWarnings("deprecation")
  public void stop() {
    // tell the listenee (upstream bean) to stop
    if (m_listenee instanceof BeanCommon) {
      // System.err.println("Listener is BeanCommon");
      ((BeanCommon) m_listenee).stop();
    }

    // stop the fold thread
    if (m_foldThread != null) {
      Thread temp = m_foldThread;
      m_foldThread = null;
      temp.interrupt();
      temp.stop();
    }
  }

  /**
   * Function used to stop code that calls acceptDataSet. This is needed as
   * cross validation is performed inside a separate thread of execution.
   * 
   * @param tf a boolean value
   */
  private synchronized void block(boolean tf) {
    if (tf) {
      try {
        // make sure the thread is still running before we block
        if (m_foldThread != null && m_foldThread.isAlive()) {
          wait();
        }
      } catch (InterruptedException ex) {
      }
    } else {
      notifyAll();
    }
  }

  /**
   * Return an enumeration of user requests
   * 
   * @return an Enumeration value
   */
  @Override
  public Enumeration enumerateRequests() {
    Vector newVector = new Vector(0);
    if (m_foldThread != null) {
      newVector.addElement("Stop");
    }
    return newVector.elements();
  }

  /**
   * Perform the named request
   * 
   * @param request a String value
   * @exception IllegalArgumentException if an error occurs
   */
  @Override
  public void performRequest(String request) {
    if (request.compareTo("Stop") == 0) {
      stop();
    } else {
      throw new IllegalArgumentException(request
        + " not supported (CrossValidation)");
    }
  }

  /**
   * Returns true, if at the current time, the named event could be generated.
   * Assumes that the supplied event name is an event that could be generated by
   * this bean
   * 
   * @param eventName the name of the event in question
   * @return true if the named event could be generated at this point in time
   */
  @Override
  public boolean eventGeneratable(String eventName) {
    if (m_listenee == null) {
      return false;
    }

    if (m_listenee instanceof EventConstraints) {
      if (((EventConstraints) m_listenee).eventGeneratable("dataSet")
        || ((EventConstraints) m_listenee).eventGeneratable("trainingSet")
        || ((EventConstraints) m_listenee).eventGeneratable("testSet")) {
        return true;
      } else {
        return false;
      }
    }
    return true;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy