All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.knowledgeflow.steps.StorePropertiesInEnvironment Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    StorePropertiesInEnvironment.java
 *    Copyright (C) 2016 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.knowledgeflow.steps;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import weka.core.Attribute;
import weka.core.Environment;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.WekaException;
import weka.gui.ProgrammaticProperty;
import weka.gui.knowledgeflow.KFGUIConsts;
import weka.knowledgeflow.Data;
import weka.knowledgeflow.JobEnvironment;
import weka.knowledgeflow.StepManager;

/**
 * Stores property values specified in incoming instances in the flow
 * environment.
 * 
 * @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
 * @version $Revision: $
 */
@KFStep(
  name = "StorePropertiesInEnvironment",
  category = "Flow",
  toolTipText = "Store property settings for a particular algorithm-based step "
    + "(eg Classifier, Clusterer etc) in the flow environment. When connected "
    + "to a downstream Job step, the sub-flow executed by the Job can use a "
    + "SetPropertiesFromEnvironment step to access the stored properties and "
    + "set them on the underlying scheme in an algorithm-based step. Each property "
    + "is configured by specifying the attribute in the incoming instance to obtain "
    + "its value from, the target scheme-based step (in the sub-flow) that will "
    + "receive it, the property name/path to set on the target step and a default "
    + "property value (optional) to use if the value is missing in the incoming "
    + "instance. If the property/path field is left blank, then it is assumed that "
    + "the value is actually a scheme + options spec in command-line form; otherwise, "
    + "the value is set by processing the property path - e.g. if our target step "
    + "to receive property settings was Bagging (itself with default settings), and "
    + "the property path to set was 'classifier.maxDepth', then the classifier property "
    + "of Bagging would yield a REPTree base classifier and the maxDepth property of "
    + "REPTree would be set. Note that the SetPropertiesFromEnvironment step will "
    + "process property settings in the order that they are defined by this step. This "
    + "means that it is possible to set the entire base learner for a Classifier step"
    + "with one property setting and then drill down to a particular option in the "
    + "base learner using a second property setting.",
  iconPath = KFGUIConsts.BASE_ICON_PATH + "StorePropertiesInEnvironment.gif")
public class StorePropertiesInEnvironment extends BaseStep {
  private static final long serialVersionUID = -1526289154505863542L;

  /** Separators for internal variable specification */
  public static final String SEP1 = "@@vv@@";
  public static final String SEP2 = "@a@a";

  /**
   * Map of properties to set based on the values of attributes in incoming
   * instances. Keyed by attribute name/index. List contains target step name,
   * property path (can be empty string to indicate a command line spec for a
   * complete base-scheme config), default property value. If an incoming
   * attribute value is missing, and no default property value is available, an
   * exception will be generated.
   */
  protected Map> m_propsToSetFromIncomingInstances =
    new LinkedHashMap<>();

  /** True if the structure has been checked */
  protected boolean m_structureCheckComplete;

  /**
   * OK if there is at least one specified attribute in the incoming instance
   * structure
   */
  protected boolean m_structureOK;

  /** Internal string-based representation of property configs */
  protected String m_internalRep = "";

  protected boolean m_raiseErrorWhenValueMissing;

  @ProgrammaticProperty
  public void setPropsInternalRep(String rep) {
    m_internalRep = rep;
  }

  public String getPropsInternalRep() {
    return m_internalRep;
  }

  @Override
  public void stepInit() throws WekaException {
    m_structureCheckComplete = false;
    m_structureOK = false;
    m_propsToSetFromIncomingInstances = internalDynamicToMap(m_internalRep);

    Environment currentEnv =
      getStepManager().getExecutionEnvironment().getEnvironmentVariables();
    if (currentEnv == null) {
      throw new WekaException(
        "The execution environment doesn't seem to have any support for variables");
    }

    if (!(currentEnv instanceof JobEnvironment)) {
      currentEnv = new JobEnvironment(currentEnv);
      getStepManager().getExecutionEnvironment().setEnvironmentVariables(
        currentEnv);
    }

    if (getStepManager().numIncomingConnections() > 0
      && m_propsToSetFromIncomingInstances.size() == 0) {
      getStepManager().logWarning(
        "Incoming data detected, but no properties to "
          + "set from incoming instances have been defined.");
    }
  }

  @Override
  public void processIncoming(Data data) throws WekaException {
    if (!m_structureCheckComplete) {
      m_structureCheckComplete = true;
      Instances structure = null;
      if (data.getConnectionName().equals(StepManager.CON_INSTANCE)) {
        structure = ((Instance) data.getPrimaryPayload()).dataset();
      } else if (data.getConnectionName().equals(StepManager.CON_ENVIRONMENT)) {
        structure =
          ((Instance) data.getPayloadElement(StepManager.CON_AUX_DATA_INSTANCE))
            .dataset();
      } else {
        structure = data.getPrimaryPayload();
      }

      checkStructure(structure);
    }

    getStepManager().processing();

    if (data.getConnectionName().equals(StepManager.CON_INSTANCE)
      || data.getConnectionName().equals(StepManager.CON_ENVIRONMENT)) {
      if (isStopRequested()) {
        getStepManager().interrupted();
        return;
      }
      if (getStepManager().isStreamFinished(data)) {
        Data finished = new Data(StepManager.CON_ENVIRONMENT);
        if (data.getConnectionName().equals(StepManager.CON_ENVIRONMENT)) {
          finished
            .setPayloadElement(
              StepManager.CON_AUX_DATA_ENVIRONMENT_VARIABLES,
              data
                .getPayloadElement(StepManager.CON_AUX_DATA_ENVIRONMENT_VARIABLES));
          finished
            .setPayloadElement(
              StepManager.CON_AUX_DATA_ENVIRONMENT_PROPERTIES,
              data
                .getPayloadElement(StepManager.CON_AUX_DATA_ENVIRONMENT_PROPERTIES));
        }
        getStepManager().throughputFinished(finished);
        return;
      }
      Instance toProcess =
        (Instance) (data.getConnectionName().equals(StepManager.CON_INSTANCE) ? data
          .getPrimaryPayload() : data
          .getPayloadElement(StepManager.CON_AUX_DATA_INSTANCE));
      getStepManager().throughputUpdateStart();
      processInstance(toProcess,
        data.getConnectionName().equals(StepManager.CON_ENVIRONMENT) ? data
          : null);
      getStepManager().throughputUpdateEnd();
    } else {
      Instances insts = data.getPrimaryPayload();
      for (int i = 0; i < insts.numInstances(); i++) {
        if (isStopRequested()) {
          break;
        }
        processInstance(insts.instance(i), null);
        Data finished = new Data(StepManager.CON_ENVIRONMENT);
        getStepManager().throughputFinished(finished);
      }
      if (isStopRequested()) {
        getStepManager().interrupted();
      }
    }
  }

  protected void processInstance(Instance inst, Data existingEnv)
    throws WekaException {
    Map> props = new HashMap<>();

    for (Map.Entry> e : m_propsToSetFromIncomingInstances
      .entrySet()) {
      String attName = environmentSubstitute(e.getKey());
      Attribute current = inst.dataset().attribute(attName);
      int index = -1;
      if (current != null) {
        index = current.index();
      } else {
        // try as a 1-based index
        try {
          index = Integer.parseInt(attName);
          index--; // make zero-based
        } catch (NumberFormatException ex) {
          // ignore
        }
      }

      if (index != -1) {
        String stepName = environmentSubstitute(e.getValue().get(0));
        String propToSet = environmentSubstitute(e.getValue().get(1));
        String val = environmentSubstitute(e.getValue().get(2));

        if (inst.isMissing(index)) {
          if (val.length() == 0 && m_raiseErrorWhenValueMissing) {
            throw new WekaException("Value of attribute '"
              + inst.attribute(index).name()
              + "' was missing in current instance and no default value has "
              + "been specified");
          }
        } else {
          val = inst.stringValue(index);
        }
        Map propsForStep = props.get(stepName);
        if (propsForStep == null) {
          propsForStep = new LinkedHashMap<>();
          props.put(stepName, propsForStep);
        }
        propsForStep.put(propToSet, val);
        getStepManager().logDebug(
          "Storing property '" + propToSet + "' for step " + "'" + stepName
            + "' with value '" + val + "'");
      }
    }

    JobEnvironment env =
      (JobEnvironment) getStepManager().getExecutionEnvironment()
        .getEnvironmentVariables();
    env.addToStepProperties(props);

    if (existingEnv != null) {
      Map> existingProps =
        existingEnv
          .getPayloadElement(StepManager.CON_AUX_DATA_ENVIRONMENT_PROPERTIES);
      if (existingProps != null) {
        props.putAll(existingProps);
      }
    }
    Data output = new Data(StepManager.CON_ENVIRONMENT);
    output.setPayloadElement(StepManager.CON_AUX_DATA_ENVIRONMENT_PROPERTIES,
      props);
    if (existingEnv != null) {
      output.setPayloadElement(StepManager.CON_AUX_DATA_ENVIRONMENT_VARIABLES,
        existingEnv
          .getPayloadElement(StepManager.CON_AUX_DATA_ENVIRONMENT_VARIABLES));
    }
    output.setPayloadElement(StepManager.CON_AUX_DATA_INSTANCE, inst);
    output.setPayloadElement(StepManager.CON_AUX_DATA_IS_INCREMENTAL, true);
    getStepManager().outputData(output);
  }

  protected void checkStructure(Instances structure) {
    List notFoundInIncoming = new ArrayList<>();
    for (String attName : m_propsToSetFromIncomingInstances.keySet()) {
      if (structure.attribute(attName) == null) {
        notFoundInIncoming.add(attName);
      } else {
        m_structureOK = true;
      }
    }

    if (notFoundInIncoming.size() == m_propsToSetFromIncomingInstances.size()) {
      getStepManager().logWarning(
        "None of the specified attributes appear to be "
          + "in the incoming instance structure");
      return;
    }

    for (String s : notFoundInIncoming) {
      getStepManager().logWarning(
        "Attribute '" + s + "' was not found in the "
          + "incoming instance structure");
    }
  }

  @Override
  public List getIncomingConnectionTypes() {

    if (getStepManager().numIncomingConnections() == 0) {
      return Arrays.asList(StepManager.CON_DATASET,
        StepManager.CON_TRAININGSET, StepManager.CON_TESTSET,
        StepManager.CON_INSTANCE, StepManager.CON_ENVIRONMENT);
    }

    return new ArrayList<>();
  }

  @Override
  public List getOutgoingConnectionTypes() {
    if (getStepManager().numIncomingConnections() != 0) {
      return Arrays.asList(StepManager.CON_ENVIRONMENT);
    }

    return new ArrayList<>();
  }

  /**
   * Return the fully qualified name of a custom editor component (JComponent)
   * to use for editing the properties of the step. This method can return null,
   * in which case the system will dynamically generate an editor using the
   * GenericObjectEditor
   *
   * @return the fully qualified name of a step editor component
   */
  @Override
  public String getCustomEditorForStep() {
    return "weka.gui.knowledgeflow.steps.StorePropertiesInEnvironmentStepEditorDialog";
  }

  public static Map> internalDynamicToMap(
    String internalRep) {
    Map> propsToSet = new LinkedHashMap<>();
    if (internalRep != null && internalRep.length() > 0) {
      String[] parts = internalRep.split(SEP1);
      for (String p : parts) {
        String[] attVal = p.split(SEP2);
        if (attVal.length == 4) {
          String attName = attVal[0].trim();
          String stepName = attVal[1].trim();
          String propName = attVal[2].trim();
          String defVal = attVal[3].trim();

          if (attName.length() > 0 && stepName.length() > 0) {
            List stepAndDefL = new ArrayList<>();
            stepAndDefL.add(stepName);
            stepAndDefL.add(propName);
            stepAndDefL.add(defVal);
            propsToSet.put(attName, stepAndDefL);
          }
        }
      }
    }

    return propsToSet;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy