weka.core.pmml.MiningFieldMetaInfo Maven / Gradle / Ivy
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* MiningFieldMetaInfo.java
* Copyright (C) 2008-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.core.pmml;
import java.io.Serializable;
import org.w3c.dom.Element;
import weka.core.Attribute;
import weka.core.Instances;
import weka.core.Utils;
/**
* Class encapsulating information about a MiningField.
*
* @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
* @version $Revision: 8034 $
*/
public class MiningFieldMetaInfo extends FieldMetaInfo implements Serializable {
/** for serialization */
private static final long serialVersionUID = -1256774332779563185L;
enum Usage {
ACTIVE ("active"),
PREDICTED ("predicted"),
SUPPLEMENTARY ("supplementary"),
GROUP ("group"),
ORDER ("order");
private final String m_stringVal;
Usage(String name) {
m_stringVal = name;
}
public String toString() {
return m_stringVal;
}
}
/** usage type */
Usage m_usageType = Usage.ACTIVE;
enum Outlier {
ASIS ("asIs"),
ASMISSINGVALUES ("asMissingValues"),
ASEXTREMEVALUES ("asExtremeValues");
private final String m_stringVal;
Outlier(String name){
m_stringVal = name;
}
public String toString() {
return m_stringVal;
}
}
/** outlier treatmemnt method */
protected Outlier m_outlierTreatmentMethod = Outlier.ASIS;
/** outlier low value */
protected double m_lowValue;
/** outlier high value */
protected double m_highValue;
enum Missing {
ASIS ("asIs"),
ASMEAN ("asMean"),
ASMODE ("asMode"),
ASMEDIAN ("asMedian"),
ASVALUE ("asValue");
private final String m_stringVal;
Missing(String name) {
m_stringVal = name;
}
public String toString() {
return m_stringVal;
}
}
/** missing values treatment method */
protected Missing m_missingValueTreatmentMethod = Missing.ASIS;
/** actual missing value replacements (if specified) */
protected String m_missingValueReplacementNominal;
protected double m_missingValueReplacementNumeric;
/** optype overrides (override data dictionary type - NOT SUPPORTED AT PRESENT) */
protected FieldMetaInfo.Optype m_optypeOverride = FieldMetaInfo.Optype.NONE;
/** the index of the field in the mining schema Instances */
protected int m_index;
/** importance (if defined) */
protected double m_importance;
/** mining schema (needed for toString method) */
Instances m_miningSchemaI = null;
// TO-DO: invalid values?
/**
* Set the Instances that represent the mining schema. Needed so that
* the toString() method for this class can output attribute names
* and values.
*
* @param miningSchemaI the mining schema as an Instances object
*/
protected void setMiningSchemaInstances(Instances miningSchemaI) {
m_miningSchemaI = miningSchemaI;
}
/**
* Get the usage type of this field.
*
* @return the usage type of this field
*/
public Usage getUsageType() {
return m_usageType;
}
/**
* Return a textual representation of this MiningField.
*
* @return a String describing this mining field
*/
public String toString() {
StringBuffer temp = new StringBuffer();
temp.append(m_miningSchemaI.attribute(m_index));
temp.append("\n\tusage: " + m_usageType
+ "\n\toutlier treatment: " + m_outlierTreatmentMethod);
if (m_outlierTreatmentMethod == Outlier.ASEXTREMEVALUES) {
temp.append(" (lowValue = " + m_lowValue + " highValue = " + m_highValue + ")");
}
temp.append("\n\tmissing value treatment: "
+ m_missingValueTreatmentMethod);
if (m_missingValueTreatmentMethod != Missing.ASIS) {
temp.append(" (replacementValue = "
+ ((m_missingValueReplacementNominal != null)
? m_missingValueReplacementNominal
: Utils.doubleToString(m_missingValueReplacementNumeric, 4))
+ ")");
}
return temp.toString();
}
/**
* Set the index of this field in the mining schema Instances
*
* @param index the index of the attribute in the mining schema Instances
* that this field represents
*/
public void setIndex(int index) {
m_index = index;
}
/**
* Get the name of this field.
*
* @return the name of this field
*/
public String getName() {
return m_fieldName;
}
/**
* Get the outlier treatment method used for this field.
*
* @return the outlier treatment method
*/
public Outlier getOutlierTreatmentMethod() {
return m_outlierTreatmentMethod;
}
/**
* Get the missing value treatment method for this field.
*
* @return the missing value treatment method
*/
public Missing getMissingValueTreatmentMethod() {
return m_missingValueTreatmentMethod;
}
/**
* Apply the missing value treatment method for this field.
*
* @param value the incoming value to apply the treatment to
* @return the value after applying the missing value treatment (if any)
* @throws Exception if there is a problem
*/
public double applyMissingValueTreatment(double value) throws Exception {
double newVal = value;
if (m_missingValueTreatmentMethod != Missing.ASIS &&
Utils.isMissingValue(value)) {
if (m_missingValueReplacementNominal != null) {
Attribute att = m_miningSchemaI.attribute(m_index);
int valIndex = att.indexOfValue(m_missingValueReplacementNominal);
if (valIndex < 0) {
throw new Exception("[MiningSchema] Nominal missing value replacement value doesn't "
+ "exist in the mining schema Instances!");
}
newVal = valIndex;
} else {
newVal = m_missingValueReplacementNumeric;
}
}
return newVal;
}
/**
* Apply the outlier treatment method for this field.
*
* @param value the incoming value to apply the treatment to
* @return the value after applying the treatment (if any)
* @throws Exception if there is a problem
*/
public double applyOutlierTreatment(double value) throws Exception {
double newVal = value;
if (m_outlierTreatmentMethod != Outlier.ASIS) {
if (m_outlierTreatmentMethod == Outlier.ASMISSINGVALUES) {
newVal = applyMissingValueTreatment(value);
} else {
if (value < m_lowValue) {
newVal = m_lowValue;
} else if (value > m_highValue) {
newVal = m_highValue;
}
}
}
return newVal;
}
/**
* Return this mining field as an Attribute.
*
* @return an Attribute for this field.
*/
public Attribute getFieldAsAttribute() {
return m_miningSchemaI.attribute(m_index);
}
/**
* Constructs a new MiningFieldMetaInfo object.
*
* @param field the Element that contains the field information
* @throws Exception if there is a problem during construction
*/
public MiningFieldMetaInfo(Element field) throws Exception {
super(field);
// m_fieldName = field.getAttribute("name");
// get the usage type
String usage = field.getAttribute("usageType");
for (MiningFieldMetaInfo.Usage u : Usage.values()) {
if (u.toString().equals(usage)) {
m_usageType = u;
break;
}
}
// optype override
/*String optype = field.getAttribute("optype");
if (optype.length() > 0) {
if (optype.equals("continuous")) {
m_optypeOverride = FieldMetaInfo.Optype.CONTINUOUS;
} else if (optype.equals("categorical")) {
m_optypeOverride = FieldMetaInfo.Optype.CATEGORICAL;
} else if (optype.equals("ordinal")) {
m_optypeOverride = FieldMetaInfo.Optype.ORDINAL;
}
}*/
// importance
String importance = field.getAttribute("importance");
if (importance.length() > 0) {
m_importance = Double.parseDouble(importance);
}
// outliers
String outliers = field.getAttribute("outliers");
for (MiningFieldMetaInfo.Outlier o : Outlier.values()) {
if (o.toString().equals(outliers)) {
m_outlierTreatmentMethod = o;
break;
}
}
if (outliers.length() > 0 && m_outlierTreatmentMethod == Outlier.ASEXTREMEVALUES) {
// low and high values are required for as extreme values handling
String lowValue = field.getAttribute("lowValue");
if (lowValue.length() > 0) {
m_lowValue = Double.parseDouble(lowValue);
} else {
throw new Exception("[MiningFieldMetaInfo] as extreme values outlier treatment "
+ "specified, but no low value defined!");
}
String highValue = field.getAttribute("highValue");
if (highValue.length() > 0) {
m_highValue = Double.parseDouble(highValue);
} else {
throw new Exception("[MiningFieldMetaInfo] as extreme values outlier treatment "
+ "specified, but no high value defined!");
}
}
// missing values
String missingReplacement = field.getAttribute("missingValueReplacement");
if (missingReplacement.length() > 0) {
// try and parse it as a number
try {
m_missingValueReplacementNumeric = Double.parseDouble(missingReplacement);
} catch (IllegalArgumentException ex) {
// must be numeric
m_missingValueReplacementNominal = missingReplacement;
}
// treatment type
String missingTreatment = field.getAttribute("missingValueTreatment");
for (MiningFieldMetaInfo.Missing m : Missing.values()) {
if (m.toString().equals(missingTreatment)) {
m_missingValueTreatmentMethod = m;
break;
}
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy