All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.core.pmml.TransformationDictionary Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    TransformationDictionary.java
 *    Copyright (C) 2008-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.core.pmml;

import java.io.Serializable;
import java.util.ArrayList;

import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import weka.core.Attribute;
import weka.core.Instances;
import weka.core.SerializedObject;

/**
 * Class encapsulating the TransformationDictionary element. Contains a list of
 * DefineFunctions and DerivedFields (if any).
 * 
 * @author Mark Hall (mhall{[at]}pentaho{[dot]}com
 * @version $Revision 1.0 $
 */
class TransformationDictionary implements Serializable {

  /** ID added to avoid warning */
  private static final long serialVersionUID = -4649092421002319829L;

  /** The defined functions (if any) */
  protected ArrayList m_defineFunctions = new ArrayList();

  /** The derived fields (if any) */
  protected ArrayList m_derivedFields = new ArrayList();

  /**
   * Construct a new TransformationDictionary
   * 
   * @param dictionary the Element containing the dictionary
   * @param dataDictionary the data dictionary as an Instances object
   * @throws Exception if there is a problem constructing the transformation
   *           dictionary
   */
  protected TransformationDictionary(Element dictionary,
    Instances dataDictionary) throws Exception {

    // set up incoming field definitions
    /*
     * ArrayList incomingFieldDefs = new ArrayList(); for
     * (int i = 0; i < dataDictionary.numAttributes(); i++) {
     * incomingFieldDefs.add(dataDictionary.attribute(i)); }
     */

    // get any derived fields and DefineFunctions
    NodeList derivedL = dictionary.getChildNodes();
    for (int i = 0; i < derivedL.getLength(); i++) {
      Node child = derivedL.item(i);
      if (child.getNodeType() == Node.ELEMENT_NODE) {
        String tagName = ((Element) child).getTagName();
        if (tagName.equals("DerivedField")) {
          DerivedFieldMetaInfo df = new DerivedFieldMetaInfo((Element) child,
            null /* incomingFieldDefs */, null);
          m_derivedFields.add(df);
        } else if (tagName.equals("DefineFunction")) {
          DefineFunction defF = new DefineFunction((Element) child, null);
          m_defineFunctions.add(defF);
        }
      }
    }
  }

  /**
   * Set the field definitions for the derived fields. Usually called once the
   * structure of the mining schema + derived fields has been determined.
   * Calling this method with an array list of field definitions in the order of
   * attributes in the mining schema + derived fields will allow the expressions
   * used in the derived fields to access the correct attribute values from the
   * incoming instance (also allows for derived fields to reference other
   * derived fields). This is necessary because construction of the
   * TransformationDictionary uses the data dictionary to reference fields (the
   * order of fields in the data dictionary is not guaranteed to be the same as
   * the order in the mining schema).
   * 
   * IMPORTANT: for derived field x to be able to reference derived field y, y
   * must have been declared before x in the PMML file. This is because the
   * process of constructing an input vector of values to the model proceeds in
   * a linear left-to-right fashion - so any referenced derived field (e.g.
   * field y), must have already computed its value when x is evaluated.
   * 
   * @param fieldDefs the definition of the incoming fields as an array list of
   *          attributes
   * @throws Exception if a problem occurs
   */
  protected void setFieldDefsForDerivedFields(ArrayList fieldDefs)
    throws Exception {
    for (int i = 0; i < m_derivedFields.size(); i++) {
      m_derivedFields.get(i).setFieldDefs(fieldDefs);
    }

    // refresh the define functions - force them to pass on their parameter
    // definitions as field defs to their encapsulated expression. Parameter
    // defs were not passed on by expressions encapsulated in DefineFunctions
    // at construction time because the encapsulated expression does not know
    // whether it is contained in a DefineFunction or a DerivedField. Since
    // we delay passing on field definitions until all derived fields are
    // loaded (in order to allow derived fields to reference other derived
    // fields),
    // we must tell DefineFunctions to pass on their parameter definitions
    for (int i = 0; i < m_defineFunctions.size(); i++) {
      m_defineFunctions.get(i).pushParameterDefs();
    }
  }

  /**
   * Set the field definitions for the derived fields. Usually called once the
   * structure of the mining schema has been determined. Calling this method
   * with an array list of field definitions in the order of attributes in the
   * mining schema will allow the expressions used in the derived fields to
   * access the correct attribute values from the incoming instances. This is
   * necessary because construction of the TransformationDictionary uses the
   * data dictionary to reference fields (the order of fields in the data
   * dictionary is not guaranteed to be the same as the order in the mining
   * schema).
   * 
   * @param fieldDefs the definition of the incoming fields as an Instances
   *          object
   * @throws Exception if a problem occurs
   */
  protected void setFieldDefsForDerivedFields(Instances fieldDefs)
    throws Exception {
    ArrayList tempDefs = new ArrayList();
    for (int i = 0; i < fieldDefs.numAttributes(); i++) {
      tempDefs.add(fieldDefs.attribute(i));
    }
    setFieldDefsForDerivedFields(tempDefs);
  }

  protected ArrayList getDerivedFields() {
    return new ArrayList(m_derivedFields);
  }

  /**
   * Get a named DefineFunction. Returns a deep copy of the function.
   * 
   * @param functionName the name of the function to get
   * @return the named function or null if it cannot be found
   * @throws Exception if there is a problem deep copying the function
   */
  protected DefineFunction getFunction(String functionName) throws Exception {

    DefineFunction copy = null;
    DefineFunction match = null;
    for (DefineFunction f : m_defineFunctions) {
      if (f.getName().equals(functionName)) {
        match = f;
        // System.err.println("Found a match!!!");
        break;
      }
    }

    if (match != null) {
      SerializedObject so = new SerializedObject(match, false);
      copy = (DefineFunction) so.getObject();
      // System.err.println(copy);
    }

    return copy;
  }

  @Override
  public String toString() {
    StringBuffer buff = new StringBuffer();

    buff.append("Transformation dictionary:\n");

    if (m_derivedFields.size() > 0) {
      buff.append("derived fields:\n");
      for (DerivedFieldMetaInfo d : m_derivedFields) {
        buff.append("" + d.getFieldAsAttribute() + "\n");
      }
    }

    if (m_defineFunctions.size() > 0) {
      buff.append("\nfunctions:\n");
      for (DefineFunction f : m_defineFunctions) {
        buff.append(f.toString("  ") + "\n");
      }
    }

    buff.append("\n");

    return buff.toString();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy