weka.core.pmml.NormContinuous Maven / Gradle / Ivy
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* NormContinuous.java
* Copyright (C) 2008-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.core.pmml;
import java.util.ArrayList;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import weka.core.Attribute;
import weka.core.Utils;
/**
* Class encapsulating a NormContinuous Expression.
*
* @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
* @version $Revision 1.0 $
*/
public class NormContinuous extends Expression {
/**
* For serialization
*/
private static final long serialVersionUID = 4714332374909851542L;
/** The name of the field to use */
protected String m_fieldName;
/** The index of the field */
protected int m_fieldIndex;
/** True if a replacement for missing values has been specified */
protected boolean m_mapMissingDefined = false;
/** The value of the missing value replacement (if defined) */
protected double m_mapMissingTo;
/** Outlier treatment method (default = asIs) */
protected MiningFieldMetaInfo.Outlier m_outlierTreatmentMethod =
MiningFieldMetaInfo.Outlier.ASIS;
/** original values for the LinearNorm entries */
protected double[] m_linearNormOrig;
/** norm values for the LinearNorm entries */
protected double[] m_linearNormNorm;
public NormContinuous(Element normCont, FieldMetaInfo.Optype opType, ArrayList fieldDefs)
throws Exception {
super(opType, fieldDefs);
if (opType != FieldMetaInfo.Optype.CONTINUOUS) {
throw new Exception("[NormContinuous] can only have a continuous optype");
}
m_fieldName = normCont.getAttribute("field");
String mapMissing = normCont.getAttribute("mapMissingTo");
if (mapMissing != null && mapMissing.length() > 0) {
m_mapMissingTo = Double.parseDouble(mapMissing);
m_mapMissingDefined = true;
}
String outliers = normCont.getAttribute("outliers");
if (outliers != null && outliers.length() > 0) {
for (MiningFieldMetaInfo.Outlier o : MiningFieldMetaInfo.Outlier.values()) {
if (o.toString().equals(outliers)) {
m_outlierTreatmentMethod = o;
break;
}
}
}
// get the LinearNorm elements
NodeList lnL = normCont.getElementsByTagName("LinearNorm");
if (lnL.getLength() < 2) {
throw new Exception("[NormContinuous] Must be at least 2 LinearNorm elements!");
}
m_linearNormOrig = new double[lnL.getLength()];
m_linearNormNorm = new double[lnL.getLength()];
for (int i = 0; i < lnL.getLength(); i++) {
Node lnN = lnL.item(i);
if (lnN.getNodeType() == Node.ELEMENT_NODE) {
Element lnE = (Element)lnN;
String orig = lnE.getAttribute("orig");
m_linearNormOrig[i] = Double.parseDouble(orig);
String norm = lnE.getAttribute("norm");
m_linearNormNorm[i] = Double.parseDouble(norm);
}
}
if (fieldDefs != null) {
setUpField();
}
}
/**
* Set the field definitions for this Expression to use
*
* @param fieldDefs the field definitions to use
* @throws Exception if there is a problem setting the field definitions
*/
public void setFieldDefs(ArrayList fieldDefs) throws Exception {
super.setFieldDefs(fieldDefs);
setUpField();
}
private void setUpField() throws Exception {
m_fieldIndex = -1;
if (m_fieldDefs != null) {
m_fieldIndex = getFieldDefIndex(m_fieldName);
// System.err.println("NormCont... index of " + m_fieldName + " " + m_fieldIndex);
if (m_fieldIndex < 0) {
throw new Exception("[NormContinuous] Can't find field " + m_fieldName
+ " in the supplied field definitions.");
}
Attribute field = m_fieldDefs.get(m_fieldIndex);
if (!field.isNumeric()) {
throw new Exception("[NormContinuous] reference field " + m_fieldName
+" must be continuous.");
}
}
}
/**
* Return the structure of the result of applying this Expression
* as an Attribute.
*
* @return the structure of the result of applying this Expression as an
* Attribute.
*/
protected Attribute getOutputDef() {
return new Attribute(m_fieldName + "_normContinuous");
}
/**
* Get the result of evaluating the expression. In the case
* of a continuous optype, a real number is returned; in
* the case of a categorical/ordinal optype, the index of the nominal
* value is returned as a double.
*
* @param incoming the incoming parameter values
* @return the result of normalizing the input field
* @throws Exception if there is a problem computing the result
*/
public double getResult(double[] incoming) throws Exception {
double[] a = m_linearNormOrig;
double[] b = m_linearNormNorm;
return computeNorm(a, b, incoming);
}
/**
* Compute the inverse of the normalization (i.e. map back to a unormalized value).
*
* @param incoming the incoming parameter values
* @return the unormalized value
*/
public double getResultInverse(double[] incoming) {
double[] a = m_linearNormNorm;
double[] b = m_linearNormOrig;
return computeNorm(a, b, incoming);
}
private double computeNorm(double[] a, double[] b, double[] incoming) {
double result = 0.0;
if (Utils.isMissingValue(incoming[m_fieldIndex])) {
if (m_mapMissingDefined) {
result = m_mapMissingTo;
} else {
result = incoming[m_fieldIndex]; // just return the missing value
}
} else {
double x = incoming[m_fieldIndex];
/*System.err.println("NormCont (index): " + m_fieldIndex);
System.err.println("NormCont (input val): " + x); */
// boundary cases first
if (x < a[0]) {
if (m_outlierTreatmentMethod == MiningFieldMetaInfo.Outlier.ASIS) {
double slope = (b[1] - b[0]) /
(a[1] - a[0]);
double offset = b[0] - (slope * a[0]);
result = slope * x + offset;
} else if (m_outlierTreatmentMethod == MiningFieldMetaInfo.Outlier.ASEXTREMEVALUES) {
result = b[0];
} else {
// map to missing replacement value
result = m_mapMissingTo;
}
} else if (x > a[a.length - 1]) {
int length = a.length;
if (m_outlierTreatmentMethod == MiningFieldMetaInfo.Outlier.ASIS) {
double slope = (b[length - 1] - b[length - 2]) /
(a[length - 1] - a[length - 2]);
double offset = b[length - 1] - (slope * a[length - 1]);
result = slope * x + offset;
} else if (m_outlierTreatmentMethod == MiningFieldMetaInfo.Outlier.ASEXTREMEVALUES) {
result = b[length - 1];
} else {
// map to missing replacement value
result = m_mapMissingTo;
}
} else {
// find the segment that this value falls in to
for (int i = 1; i < a.length; i++) {
if (x <= a[i]) {
result = b[i - 1];
result += ((x - a[i - 1])/(a[i] - a[i - 1]) *
(b[i] - b[i - 1]));
break;
}
}
}
}
return result;
}
/**
* Always throws an Exception since the result of NormContinuous must
* be continuous.
*
* @param incoming the incoming parameter values
* @throws Exception always
*/
public String getResultCategorical(double[] incoming) throws Exception {
throw new Exception("[NormContinuous] Can't return the result as a categorical value!");
}
public String toString(String pad) {
StringBuffer buff = new StringBuffer();
buff.append(pad + "NormContinuous (" + m_fieldName + "):\n" + pad + "linearNorm: ");
for (int i = 0; i < m_linearNormOrig.length; i++) {
buff.append("" + m_linearNormOrig[i] + ":" + m_linearNormNorm[i] + " ");
}
buff.append("\n" + pad);
buff.append("outlier treatment: " + m_outlierTreatmentMethod.toString());
if (m_mapMissingDefined) {
buff.append("\n" + pad);
buff.append("map missing values to: " + m_mapMissingTo);
}
return buff.toString();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy