weka.core.pmml.TransformationDictionary Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-dev Show documentation
Show all versions of weka-dev Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This version represents the developer version, the
"bleeding edge" of development, you could say. New functionality gets added
to this version.
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* TransformationDictionary.java
* Copyright (C) 2008-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.core.pmml;
import java.io.Serializable;
import java.util.ArrayList;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import weka.core.Attribute;
import weka.core.Instances;
import weka.core.SerializedObject;
/**
* Class encapsulating the TransformationDictionary element. Contains a list of
* DefineFunctions and DerivedFields (if any).
*
* @author Mark Hall (mhall{[at]}pentaho{[dot]}com
* @version $Revision 1.0 $
*/
class TransformationDictionary implements Serializable {
/** ID added to avoid warning */
private static final long serialVersionUID = -4649092421002319829L;
/** The defined functions (if any) */
protected ArrayList m_defineFunctions = new ArrayList();
/** The derived fields (if any) */
protected ArrayList m_derivedFields = new ArrayList();
/**
* Construct a new TransformationDictionary
*
* @param dictionary the Element containing the dictionary
* @param dataDictionary the data dictionary as an Instances object
* @throws Exception if there is a problem constructing the transformation
* dictionary
*/
protected TransformationDictionary(Element dictionary,
Instances dataDictionary) throws Exception {
// set up incoming field definitions
/*
* ArrayList incomingFieldDefs = new ArrayList(); for
* (int i = 0; i < dataDictionary.numAttributes(); i++) {
* incomingFieldDefs.add(dataDictionary.attribute(i)); }
*/
// get any derived fields and DefineFunctions
NodeList derivedL = dictionary.getChildNodes();
for (int i = 0; i < derivedL.getLength(); i++) {
Node child = derivedL.item(i);
if (child.getNodeType() == Node.ELEMENT_NODE) {
String tagName = ((Element) child).getTagName();
if (tagName.equals("DerivedField")) {
DerivedFieldMetaInfo df = new DerivedFieldMetaInfo((Element) child,
null /* incomingFieldDefs */, null);
m_derivedFields.add(df);
} else if (tagName.equals("DefineFunction")) {
DefineFunction defF = new DefineFunction((Element) child, null);
m_defineFunctions.add(defF);
}
}
}
}
/**
* Set the field definitions for the derived fields. Usually called once the
* structure of the mining schema + derived fields has been determined.
* Calling this method with an array list of field definitions in the order of
* attributes in the mining schema + derived fields will allow the expressions
* used in the derived fields to access the correct attribute values from the
* incoming instance (also allows for derived fields to reference other
* derived fields). This is necessary because construction of the
* TransformationDictionary uses the data dictionary to reference fields (the
* order of fields in the data dictionary is not guaranteed to be the same as
* the order in the mining schema).
*
* IMPORTANT: for derived field x to be able to reference derived field y, y
* must have been declared before x in the PMML file. This is because the
* process of constructing an input vector of values to the model proceeds in
* a linear left-to-right fashion - so any referenced derived field (e.g.
* field y), must have already computed its value when x is evaluated.
*
* @param fieldDefs the definition of the incoming fields as an array list of
* attributes
* @throws Exception if a problem occurs
*/
protected void setFieldDefsForDerivedFields(ArrayList fieldDefs)
throws Exception {
for (int i = 0; i < m_derivedFields.size(); i++) {
m_derivedFields.get(i).setFieldDefs(fieldDefs);
}
// refresh the define functions - force them to pass on their parameter
// definitions as field defs to their encapsulated expression. Parameter
// defs were not passed on by expressions encapsulated in DefineFunctions
// at construction time because the encapsulated expression does not know
// whether it is contained in a DefineFunction or a DerivedField. Since
// we delay passing on field definitions until all derived fields are
// loaded (in order to allow derived fields to reference other derived
// fields),
// we must tell DefineFunctions to pass on their parameter definitions
for (int i = 0; i < m_defineFunctions.size(); i++) {
m_defineFunctions.get(i).pushParameterDefs();
}
}
/**
* Set the field definitions for the derived fields. Usually called once the
* structure of the mining schema has been determined. Calling this method
* with an array list of field definitions in the order of attributes in the
* mining schema will allow the expressions used in the derived fields to
* access the correct attribute values from the incoming instances. This is
* necessary because construction of the TransformationDictionary uses the
* data dictionary to reference fields (the order of fields in the data
* dictionary is not guaranteed to be the same as the order in the mining
* schema).
*
* @param fieldDefs the definition of the incoming fields as an Instances
* object
* @throws Exception if a problem occurs
*/
protected void setFieldDefsForDerivedFields(Instances fieldDefs)
throws Exception {
ArrayList tempDefs = new ArrayList();
for (int i = 0; i < fieldDefs.numAttributes(); i++) {
tempDefs.add(fieldDefs.attribute(i));
}
setFieldDefsForDerivedFields(tempDefs);
}
protected ArrayList getDerivedFields() {
return new ArrayList(m_derivedFields);
}
/**
* Get a named DefineFunction. Returns a deep copy of the function.
*
* @param functionName the name of the function to get
* @return the named function or null if it cannot be found
* @throws Exception if there is a problem deep copying the function
*/
protected DefineFunction getFunction(String functionName) throws Exception {
DefineFunction copy = null;
DefineFunction match = null;
for (DefineFunction f : m_defineFunctions) {
if (f.getName().equals(functionName)) {
match = f;
// System.err.println("Found a match!!!");
break;
}
}
if (match != null) {
SerializedObject so = new SerializedObject(match, false);
copy = (DefineFunction) so.getObject();
// System.err.println(copy);
}
return copy;
}
@Override
public String toString() {
StringBuffer buff = new StringBuffer();
buff.append("Transformation dictionary:\n");
if (m_derivedFields.size() > 0) {
buff.append("derived fields:\n");
for (DerivedFieldMetaInfo d : m_derivedFields) {
buff.append("" + d.getFieldAsAttribute() + "\n");
}
}
if (m_defineFunctions.size() > 0) {
buff.append("\nfunctions:\n");
for (DefineFunction f : m_defineFunctions) {
buff.append(f.toString(" ") + "\n");
}
}
buff.append("\n");
return buff.toString();
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy