All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.knowledgeflow.steps.Job Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.

The newest version!
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    Job.java
 *    Copyright (C) 2016 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.knowledgeflow.steps;

import weka.core.Environment;
import weka.core.OptionMetadata;
import weka.core.Settings;
import weka.core.WekaException;
import weka.gui.FilePropertyMetadata;
import weka.gui.knowledgeflow.KFGUIConsts;
import weka.knowledgeflow.BaseExecutionEnvironment;
import weka.knowledgeflow.Data;
import weka.knowledgeflow.Flow;
import weka.knowledgeflow.FlowExecutor;
import weka.knowledgeflow.FlowRunner;
import weka.knowledgeflow.JSONFlowLoader;
import weka.knowledgeflow.JobEnvironment;
import weka.knowledgeflow.KFDefaults;
import weka.knowledgeflow.LogManager;
import weka.knowledgeflow.LoggingLevel;
import weka.knowledgeflow.StepManager;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;

/**
 * Step that executes another flow as a "job". Typically, you would parameterize
 * the flow to be executed with variables (in steps that support variables) and
 * then use a a data source connected to a SetVariables step to change the
 * values of the variables dynamically at runtime. For example, a DataGrid could
 * be used to define a set of instances with a string variable containing paths
 * to ARFF files to process; SetVariables can be used to transfer these file
 * paths from the incoming instances generated by DataGrid to the values of
 * variables; then the Job step can execute it's sub-flow for each configuration
 * of variables received, thus processing a different ARFF file (if the subflow
 * uses an ArffLoader step).
 * 
 * @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
 * @version $Revision: $
 */
@KFStep(name = "Job", category = "Flow",
  toolTipText = "Execute a flow as a 'job' and wait for it to finish",
  iconPath = KFGUIConsts.BASE_ICON_PATH + "Job.gif")
public class Job extends BaseStep {
  private static final long serialVersionUID = -8684065684979500325L;

  protected File m_flowToRun = new File("--NONE--");

  protected transient FlowExecutor m_flowExecutor;

  protected LoggingLevel m_logLevel = LoggingLevel.BASIC;

  @FilePropertyMetadata(fileChooserDialogType = KFGUIConsts.OPEN_DIALOG,
    directoriesOnly = false)
  @OptionMetadata(displayName = "Flow file",
    description = "The flow to execute", displayOrder = 0)
  public void setFlowFile(File flowFile) {
    m_flowToRun = flowFile;
  }

  public File getFlowFile() {
    return m_flowToRun;
  }

  @Override
  public void stepInit() throws WekaException {
    String fileName = m_flowToRun.toString();
    if (fileName.equals("--NONE--")) {
      throw new WekaException("No flow to execute specified!");
    }
    fileName = getStepManager().environmentSubstitute(fileName);
    m_flowExecutor =
      ((BaseExecutionEnvironment) getStepManager().getExecutionEnvironment())
        .getDefaultFlowExecutor();
    Settings settings = new Settings("weka", KFDefaults.APP_ID);
    try {
      settings.loadSettings();
    } catch (IOException ex) {
      throw new WekaException(ex);
    }
    settings.applyDefaults(new KFDefaults());
    m_flowExecutor.setSettings(settings);
    // setting it on the flow executor
    Environment env =
      new Environment(getStepManager().getExecutionEnvironment()
        .getEnvironmentVariables());
    m_flowExecutor.getExecutionEnvironment().setEnvironmentVariables(env);
    Flow flowToRun = null;
    if (new File(fileName).exists()) {
      flowToRun = Flow.loadFlow(new File(fileName), getStepManager().getLog());
    } else {
      String fileNameWithCorrectSeparators =
        fileName.replace(File.separatorChar, '/');

      if (this.getClass().getClassLoader()
        .getResource(fileNameWithCorrectSeparators) != null) {
        flowToRun = Flow.loadFlow(
          this.getClass().getClassLoader()
            .getResourceAsStream(fileNameWithCorrectSeparators),
          new JSONFlowLoader());
      }
    }
    m_flowExecutor.setFlow(flowToRun);
    final String flowToRunName = flowToRun.getFlowName();
    m_flowExecutor.setLogger(new FlowRunner.SimpleLogger() {
      @Override
      public void logMessage(String lm) {
        if (lm.contains("[Low]")) {
          getStepManager().logLow(
            lm.replace("[Low]", ""));
        } else if (lm.contains("[Basic]")) {
          getStepManager().logBasic(
            lm.replace("[Basic]", ""));
        } else if (lm.contains("[Detailed]")) {
          getStepManager().logDetailed(
            lm.replace("[Detailed]", ""));
        } else if (lm.contains("[Debugging]")) {
          getStepManager().logDebug(
            lm.replace("[Debugging]", ""));
        } else if (lm.contains("[Warning]")) {
          getStepManager().logWarning(
            lm.replace("[Warning]", ""));
        } else {
          getStepManager().logBasic("" + lm);
        }
      }
    });
  }

  @Override
  public void start() throws WekaException {
    if (getStepManager().numIncomingConnections() == 0) {
      getStepManager().logBasic("Launching as a start point");
      runFlow(null, null, null);
    }
  }

  @Override
  public void processIncoming(Data data) throws WekaException {
    if (!getStepManager().isStreamFinished(data)) {
      Map varsToSet =
        data.getPayloadElement(StepManager.CON_AUX_DATA_ENVIRONMENT_VARIABLES);
      Map> propsToSet =
        data.getPayloadElement(StepManager.CON_AUX_DATA_ENVIRONMENT_PROPERTIES);
      Map> results =
        data.getPayloadElement(StepManager.CON_AUX_DATA_ENVIRONMENT_RESULTS);
      if (varsToSet != null) {
        getStepManager().logBasic(
          "Received variables (" + varsToSet.size() + " key-value pairs)");
      }
      if (propsToSet != null) {
        getStepManager().logBasic(
          "Received properties (" + propsToSet.size() + " target steps)");
      }
      if (results != null) {
        getStepManager()
          .logBasic(
            "Received results containing " + results.size()
              + " connection types");
      }
      getStepManager().logBasic("Launching sub-flow");
      runFlow(varsToSet, propsToSet, results);
    }
  }

  /**
   * Run the sub-flow using the supplied environment variables (if any)
   *
   * @param varsToSet variables to set before executing the sub-flow. Can be
   *          null.
   * @param propsToSet property values for target steps (only scheme-based steps
   *          can be targets)
   * @param results results (if any) to pass in to the sub-flow
   * @throws WekaException if a problem occurs
   */
  protected void runFlow(Map varsToSet,
    Map> propsToSet,
    Map> results) throws WekaException {
    getStepManager().processing();

    JobEnvironment env =
      new JobEnvironment(getStepManager().getExecutionEnvironment()
        .getEnvironmentVariables());
    m_flowExecutor.getExecutionEnvironment().setEnvironmentVariables(env);
    if (varsToSet != null) {
      for (Map.Entry e : varsToSet.entrySet()) {
        env.addVariable(e.getKey(), e.getValue());
      }
    }
    if (propsToSet != null) {
      env.addToStepProperties(propsToSet);
    }
    if (results != null) {
      env.addAllResults(results);
    }

    getStepManager().statusMessage(
      "Executing flow '" + m_flowExecutor.getFlow().getFlowName() + "'");
    try {
      m_flowExecutor.runParallel();
      m_flowExecutor.waitUntilFinished();
      // just give the executor a bit longer in order to
      // complete shutdown of executor services
      try {
        Thread.sleep(200);
      } catch (InterruptedException e) {
        // ignore
      }
      Data success =
        new Data(StepManager.CON_JOB_SUCCESS, getName()
          + " completed successfully.");
      success.setPayloadElement(StepManager.CON_AUX_DATA_IS_INCREMENTAL, true);
      if (varsToSet != null) {
        success.setPayloadElement(
          StepManager.CON_AUX_DATA_ENVIRONMENT_VARIABLES, varsToSet);
      }

      if (propsToSet != null) {
        success.setPayloadElement(
          StepManager.CON_AUX_DATA_ENVIRONMENT_PROPERTIES, propsToSet);
      }

      JobEnvironment flowEnv =
        (JobEnvironment) m_flowExecutor.getExecutionEnvironment()
          .getEnvironmentVariables();

      if (flowEnv.getResultData() != null) {
        success.setPayloadElement(StepManager.CON_AUX_DATA_ENVIRONMENT_RESULTS,
          flowEnv.getResultData());
      }

      getStepManager().outputData(success);
    } catch (WekaException ex) {
      Data failure =
        new Data(StepManager.CON_JOB_FAILURE, LogManager.stackTraceToString(ex));
      failure.setPayloadElement(StepManager.CON_AUX_DATA_IS_INCREMENTAL, true);
      if (varsToSet != null) {
        failure.setPayloadElement(
          StepManager.CON_AUX_DATA_ENVIRONMENT_VARIABLES, varsToSet);
      }

      if (propsToSet != null) {
        failure.setPayloadElement(
          StepManager.CON_AUX_DATA_ENVIRONMENT_PROPERTIES, propsToSet);
      }

      getStepManager().outputData(failure);
    }
    getStepManager().finished();
  }

  @Override
  public List getIncomingConnectionTypes() {
    List result = new ArrayList<>();
    if (getStepManager().numIncomingConnections() == 0) {
      return Arrays.asList(StepManager.CON_ENVIRONMENT,
        StepManager.CON_JOB_SUCCESS, StepManager.CON_JOB_FAILURE);
    }

    return result;
  }

  @Override
  public List getOutgoingConnectionTypes() {
    return Arrays.asList(StepManager.CON_JOB_SUCCESS,
      StepManager.CON_JOB_FAILURE);
  }

  /**
   * Get the custom editor for this step
   *
   * @return the fully qualified class name of the clustom editor for this step
   */
  @Override
  public String getCustomEditorForStep() {
    return "weka.gui.knowledgeflow.steps.JobStepEditorDialog";
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy