Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* MergeNominalValues.java
* Copyright (C) 2013 University of Waikato, Hamilton, New Zealand
*
*/
package weka.filters.supervised.attribute;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;
import weka.core.*;
import weka.core.Capabilities.Capability;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.filters.SimpleBatchFilter;
import weka.filters.SupervisedFilter;
/**
* Merges values of all nominal attributes among the
* specified attributes, excluding the class attribute, using the CHAID method,
* but without considering re-splitting of merged subsets. It implements Steps 1 and
* 2 described by Kass (1980), see
*
* Gordon V. Kass (1980). An Exploratory Technique for Investigating Large
* Quantities of Categorical Data. Applied Statistics. 29(2):119-127.
*
* Once attribute values have been merged, a chi-squared test using the
* Bonferroni correction is applied to check if the resulting attribute is a
* valid predictor, based on the Bonferroni multiplier in Equation 3.2 in Kass
* (1980). If an attribute does not pass this test, all remaining values (if
* any) are merged. Nevertheless, useless predictors can slip through without
* being fully merged, e.g. identifier attributes.
*
* The code applies the Yates correction when the chi-squared statistic is
* computed.
*
* Note that the algorithm is quadratic in the number of attribute values for an
* attribute.
*
*
*
* Valid options are:
*
*
*
* -D
* Turns on output of debugging information.
*
*
*
* -L <double>
* The significance level (default: 0.05).
*
*
*
* -R <range>
* Sets list of attributes to act on (or its inverse). 'first and 'last' are accepted as well.'
* E.g.: first-5,7,9,20-last
* (default: first-last)
*
*
*
* -V
* Invert matching sense (i.e. act on all attributes not specified in list)
*
*
*
* -O
* Use short identifiers for merged subsets.
*
*
*
*
* @author Eibe Frank
* @version $Revision: 14508 $
*/
public class MergeNominalValues extends SimpleBatchFilter implements
SupervisedFilter, WeightedInstancesHandler, WeightedAttributesHandler, TechnicalInformationHandler {
/** for serialization */
static final long serialVersionUID = 7447337831221353842L;
/** Set the significance level */
protected double m_SigLevel = 0.05;
/** Stores which atributes to operate on (or nto) */
protected Range m_SelectCols = new Range("first-last");
/** Stores the indexes of the selected attributes in order. */
protected int[] m_SelectedAttributes;
/** Indicators for which attributes need to be changed. */
protected boolean[] m_AttToBeModified;
/** The indicators used to map the old values. */
protected int[][] m_Indicators;
/** Use short values */
protected boolean m_UseShortIdentifiers = false;
/**
* Returns a string describing this filter.
*
* @return a description of the filter suitable for displaying in the
* explorer/experimenter gui
*/
@Override
public String globalInfo() {
return "Merges values of all nominal attributes among the specified attributes, excluding "
+ "the class attribute, using the CHAID method, but without considering re-splitting of "
+ "merged subsets. It implements Steps 1 and 2 described by Kass (1980), see\n\n"
+ getTechnicalInformation().toString()
+ "\n\n"
+ "Once attribute values have been merged, a chi-squared test using the Bonferroni "
+ "correction is applied to check if the resulting attribute is a valid predictor, "
+ "based on the Bonferroni multiplier in Equation 3.2 in Kass (1980). If an attribute does "
+ "not pass this test, all remaining values (if any) are merged. Nevertheless, useless "
+ "predictors can slip through without being fully merged, e.g. identifier attributes.\n\n"
+ "The code applies the Yates correction when the chi-squared statistic is computed.\n\n"
+ "Note that the algorithm is quadratic in the number of attribute values for an attribute.";
}
/**
* Returns an instance of a TechnicalInformation object, containing detailed
* information about the technical background of this class, e.g., paper
* reference or book this class is based on.
*
* @return the technical information about this class
*/
@Override
public TechnicalInformation getTechnicalInformation() {
TechnicalInformation result;
result = new TechnicalInformation(Type.ARTICLE);
result.setValue(Field.AUTHOR, "Gordon V. Kass");
result
.setValue(
Field.TITLE,
"An Exploratory Technique for Investigating Large Quantities of Categorical Data");
result.setValue(Field.JOURNAL, "Applied Statistics");
result.setValue(Field.YEAR, "1980");
result.setValue(Field.VOLUME, "29");
result.setValue(Field.NUMBER, "2");
result.setValue(Field.PAGES, "119-127");
return result;
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
@Override
public Enumeration