weka.knowledgeflow.BaseExecutionEnvironment Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of weka-stable Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This is the stable version. Apart from bugfixes, this version does not receive any other updates.
There is a newer version: 3.8.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    BaseExecutionEnvironment.java
 *    Copyright (C) 2015 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.knowledgeflow;

import weka.core.Defaults;
import weka.core.Environment;
import weka.core.Settings;
import weka.core.WekaException;
import weka.gui.Logger;
import weka.core.PluginManager;

import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

/**
 * Base class for execution environments
 *
 * @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
 */
public class BaseExecutionEnvironment implements ExecutionEnvironment {

  /** Description of the default execution environment */
  public static final String DESCRIPTION = "Default execution environment";

  static {
    // register the default execution environment
    PluginManager.addPlugin(BaseExecutionEnvironment.class.getCanonicalName(),
      DESCRIPTION, BaseExecutionEnvironment.class.getCanonicalName());
  }

  /**
   * The FlowExecutor that will be running the flow. This is not guaranteed to
   * be available from this class until the flow is initialized
   */
  protected FlowExecutor m_flowExecutor;

  /** Whether the execution environment is headless or not */
  protected boolean m_headless;

  /** The environment variables to use */
  protected transient Environment m_envVars = Environment.getSystemWide();

  /** The knowledge flow settings to use */
  protected transient Settings m_settings;

  /**
   * This is the main executor service, used by the execution environment to
   * invoke processIncoming() on a step when batch data is passed to it - i.e. a
   * step receiving data will execute in its own thread. This is not used,
   * however, when streaming data is processed, as it usually the case that
   * processing is lightweight in this case and the gains of running a separate
   * thread for each piece of data probably do not outweigh the overheads
   * involved.
   */
  protected transient ExecutorService m_executorService;

  /**
   * An executor service that steps can use to do work in parallel (if desired)
   * by using {@code StepTask instances}. This executor service is intended for
   * high cpu load tasks, and the number of threads used by it should be kept <=
   * number of physical CPU cores
   */
  protected transient ExecutorService m_clientExecutorService;

  /** The log */
  protected transient Logger m_log;

  /** Log handler to wrap log in */
  protected transient LogManager m_logHandler;

  /** The level to log at */
  protected LoggingLevel m_loggingLevel = LoggingLevel.BASIC;

  /**
   * Get a description of this execution environment
   *
   * @return a description of this execution environemtn
   */
  @Override
  public String getDescription() {
    return DESCRIPTION;
  }

  /**
   * Get whether this execution environment is headless
   *
   * @return true if this execution environment is headless
   */
  @Override
  public boolean isHeadless() {
    return m_headless;
  }

  /**
   * Set whether this execution environment is headless
   * 
   * @param headless true if the execution environment is headless
   */
  @Override
  public void setHeadless(boolean headless) {
    m_headless = headless;
  }

  /**
   * Get environment variables for this execution environment
   * 
   * @return the environment variables for this execution environment
   */
  @Override
  public Environment getEnvironmentVariables() {
    return m_envVars;
  }

  /**
   * Set environment variables for this execution environment
   * 
   * @param env the environment variables to use
   */
  @Override
  public void setEnvironmentVariables(Environment env) {
    m_envVars = env;
  }

  @Override
  public void setSettings(Settings settings) {
    m_settings = settings;

    m_logHandler.setLoggingLevel(m_settings.getSetting(
      KFDefaults.MAIN_PERSPECTIVE_ID, KFDefaults.LOGGING_LEVEL_KEY,
      KFDefaults.LOGGING_LEVEL));
  }

  @Override
  public Settings getSettings() {
    if (m_settings == null) {
      m_settings = new Settings("weka", KFDefaults.APP_ID);
    }

    return m_settings;
  }

  /**
   * Get the log in use
   * 
   * @return the log in use
   */
  @Override
  public Logger getLog() {
    return m_log;
  }

  /**
   * Set the log to use
   * 
   * @param log the log to use
   */
  @Override
  public void setLog(Logger log) {
    m_log = log;
    if (m_logHandler == null) {
      m_logHandler = new LogManager(m_log);
      m_logHandler.m_statusMessagePrefix =
        "BaseExecutionEnvironment$" + hashCode() + "|";
    }
    m_logHandler.setLog(m_log);
  }

  /**
   * Get the logging level in use
   *
   * @return the logging level in use
   */
  @Override
  public LoggingLevel getLoggingLevel() {
    return m_loggingLevel;
  }

  /**
   * Set the logging level to use
   *
   * @param level the logging level to use
   */
  @Override
  public void setLoggingLevel(LoggingLevel level) {
    m_loggingLevel = level;
  }

  /**
   * Submit a task to be run by the execution environment. The default execution
   * environment uses an ExecutorService to run tasks in parallel. Client steps
   * are free to use this service or to just do their processing locally within
   * their own code.
   * 
   * @param stepTask the StepTask encapsulating the code to be run
   * @return the Future holding the status and result when complete
   */
  @Override
  public  Future> submitTask(StepTask stepTask)
    throws WekaException {

    m_logHandler.logDebug("Submitting " + stepTask.toString()
      + (stepTask.isResourceIntensive() ? " (resource intensive)" : ""));
    return stepTask.isResourceIntensive() ? m_clientExecutorService
      .submit(stepTask) : m_executorService.submit(stepTask);
  }

  /**
   * The main point at which to request stop processing of a flow. This will
   * request the FlowExecutor to stop and then shutdown the executor service
   */
  @Override
  public void stopProcessing() {
    if (getFlowExecutor() != null) {
      getFlowExecutor().stopProcessing();
    }
    if (m_executorService != null) {
      m_executorService.shutdownNow();
      m_executorService = null;
    }
  }

  /**
   * Get the executor that will actually be responsible for running the flow.
   * This is not guaranteed to be available from this execution environment
   * until the flow is actually running (or at least initialized)
   * 
   * @return the executor that will be running the flow
   */
  protected FlowExecutor getFlowExecutor() {
    return m_flowExecutor;
  }

  /**
   * Set the executor that will actually be responsible for running the flow.
   * This is not guaranteed to be available from this execution environment
   * until the flow is actually running (or at least initialized)
   * 
   * @param executor the executor that will be running the flow
   */
  protected void setFlowExecutor(FlowExecutor executor) {
    m_flowExecutor = executor;
  }

  /**
   * Start the main step executor service and the high cpu load executor service
   * for clients to use to execute {@code StepTask} instances in this execution
   * environment. Client steps are free to use this service or to just do their
   * processing locally within their own code (in which case the main step
   * executor service is used).
   * 
   * @param numThreadsMain the number of threads to use (level of parallelism).
   *          <= 0 indicates no limit on parallelism
   * @param numThreadsHighLoad the number of threads to use for the high cpu
   *          load executor service (executes {@code StepTask} instances)
   */
  protected void startClientExecutionService(int numThreadsMain,
    int numThreadsHighLoad) {
    if (m_executorService != null) {
      m_executorService.shutdownNow();
    }

    m_logHandler
      .logDebug("Requested number of threads for main step executor: "
        + numThreadsMain);
    m_logHandler
      .logDebug("Requested number of threads for high load executor: "
        + (numThreadsHighLoad > 0 ? numThreadsHighLoad : Runtime.getRuntime()
          .availableProcessors()));
    m_executorService =
      numThreadsMain > 0 ? Executors.newFixedThreadPool(numThreadsMain)
        : Executors.newCachedThreadPool();

    m_clientExecutorService =
      numThreadsHighLoad > 0 ? Executors.newFixedThreadPool(numThreadsHighLoad)
        : Executors.newFixedThreadPool(Runtime.getRuntime()
          .availableProcessors());
  }

  /**
   * Stop the client executor service
   */
  protected void stopClientExecutionService() {
    if (m_executorService != null) {
      m_executorService.shutdown();
    }

    if (m_clientExecutorService != null) {
      m_clientExecutorService.shutdown();
    }
  }

  /**
   * Launches a Step (via the startStep() method) in either than standard step
   * executor service or the resource intensive executor service. Does not check
   * that the step is actually a start point.
   *
   * @param startPoint the step to launch as a start point
   * @throws WekaException if a problem occurs
   */
  protected void launchStartPoint(final StepManagerImpl startPoint)
    throws WekaException {

    m_logHandler.logDebug("Submitting " + startPoint.getName()
      + (startPoint.stepIsResourceIntensive() ? " (resource intensive)" : ""));
    if (startPoint.stepIsResourceIntensive()) {
      submitTask(new StepTask(null) {

        /** For serialization */
        private static final long serialVersionUID = -5466021103296024455L;

        @Override
        public void process() throws Exception {
          startPoint.startStep();
        }
      });
    } else {
      m_executorService.submit(new Runnable() {
        @Override
        public void run() {
          startPoint.startStep();
        }
      });
    }
  }

  /**
   * Send the supplied data to the specified step. Base implementation just
   * calls processIncoming() on the step directly. Subclasses may opt to do
   * something different (e.g. wrap data and target step to be executed, and
   * then execute remotely).
   * 
   * @param data the data to input to the target step
   * @param step the step to receive data
   * @throws WekaException if a problem occurs
   */
  protected void sendDataToStep(final StepManagerImpl step, final Data... data)
    throws WekaException {

    if (data != null) {
      if (data.length == 1
        && (StepManagerImpl.connectionIsIncremental(data[0]))) {
        // we don't want the overhead of spinning up a thread for single
        // instance (streaming) connections.
        step.processIncoming(data[0]);
      } else {
        m_logHandler.logDebug("Submitting " + step.getName()
          + (step.stepIsResourceIntensive() ? " (resource intensive)" : ""));
        if (step.stepIsResourceIntensive()) {
          m_clientExecutorService.submit(new Runnable() {
            @Override
            public void run() {
              for (Data d : data) {
                step.processIncoming(d);
              }
            }
          });
        } else {
          m_executorService.submit(new Runnable() {
            @Override
            public void run() {
              for (Data d : data) {
                step.processIncoming(d);
              }
            }
          });
        }
      }
    }
  }

  /**
   * Get default settings for the base execution environment
   *
   * @return the default settings
   */
  @Override
  public Defaults getDefaultSettings() {
    return new BaseExecutionEnvironmentDefaults();
  }

  /**
   * Defaults for the base execution environment
   */
  public static class BaseExecutionEnvironmentDefaults extends Defaults {

    public static final Settings.SettingKey STEP_EXECUTOR_SERVICE_NUM_THREADS_KEY =
      new Settings.SettingKey(KFDefaults.APP_ID + ".stepExecutorNumThreads",
        "Number of threads to use in the main step executor service", "");
    public static final int STEP_EXECUTOR_SERVICE_NUM_THREADS = 50;

    public static final Settings.SettingKey RESOURCE_INTENSIVE_EXECUTOR_SERVICE_NUM_THREADS_KEY =
      new Settings.SettingKey(KFDefaults.APP_ID
        + ".highResourceExecutorNumThreads",
        "Number of threads to use in the resource intensive executor service",
        "This executor service is used for executing StepTasks and
"
          + "Steps that are marked as resource intensive. 0 = use as many
"
          + "threads as there are cpu processors.");

    /** Default (0) means use as many threads as there are cpu processors */
    public static final int RESOURCE_INTENSIVE_EXECUTOR_SERVICE_NUM_THREADS = 0;
    private static final long serialVersionUID = -3386792058002464330L;

    public BaseExecutionEnvironmentDefaults() {
      super(KFDefaults.APP_ID);

      m_defaults.put(STEP_EXECUTOR_SERVICE_NUM_THREADS_KEY,
        STEP_EXECUTOR_SERVICE_NUM_THREADS);
      m_defaults.put(RESOURCE_INTENSIVE_EXECUTOR_SERVICE_NUM_THREADS_KEY,
        RESOURCE_INTENSIVE_EXECUTOR_SERVICE_NUM_THREADS);
    }
  }
}