All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.filters.supervised.attribute.NominalToBinary Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.

There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    NominalToBinary.java
 *    Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.filters.supervised.attribute;

import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Vector;

import weka.core.*;
import weka.core.Capabilities.Capability;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.filters.Filter;
import weka.filters.SupervisedFilter;

/**
 *  Converts all nominal attributes into binary numeric
 * attributes. An attribute with k values is transformed into k binary
 * attributes if the class is nominal (using the one-attribute-per-value
 * approach). Binary attributes are left binary if option '-A' is not given. If
 * the class is numeric, k - 1 new binary attributes are generated in the manner
 * described in "Classification and Regression Trees" by Breiman et al. (i.e.
 * by taking the average class value associated with each attribute value into
 * account)
*
* For more information, see:
*
* L. Breiman, J.H. Friedman, R.A. Olshen, C.J. Stone (1984). Classification and * Regression Trees. Wadsworth Inc. *

* * * BibTeX: * *

 * @book{Breiman1984,
 *    author = {L. Breiman and J.H. Friedman and R.A. Olshen and C.J. Stone},
 *    publisher = {Wadsworth Inc},
 *    title = {Classification and Regression Trees},
 *    year = {1984},
 *    ISBN = {0412048418}
 * }
 * 
*

* * * Valid options are: *

* *

 * -N
 *  Sets if binary attributes are to be coded as nominal ones.
 * 
* *
 * -A
 *  For each nominal value a new attribute is created, 
 *  not only if there are more than 2 values.
 * 
* *
-spread-attribute-weight
 *  When generating binary attributes, spread weight of old
 *  attribute across new attributes. Do not give each new attribute the old weight.
* * * * @author Eibe Frank ([email protected]) * @version $Revision: 14509 $ */ public class NominalToBinary extends Filter implements SupervisedFilter, OptionHandler, TechnicalInformationHandler, WeightedAttributesHandler, WeightedInstancesHandler { /** for serialization */ static final long serialVersionUID = -5004607029857673950L; /** The sorted indices of the attribute values. */ private int[][] m_Indices = null; /** Are the new attributes going to be nominal or numeric ones? */ private boolean m_Numeric = true; /** Are all values transformed into new attributes? */ private boolean m_TransformAll = false; /** Whether we need to transform at all */ private boolean m_needToTransform = false; /** Whether to spread attribute weight when creating binary attributes */ protected boolean m_SpreadAttributeWeight = false; /** * Returns a string describing this filter * * @return a description of the filter suitable for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "Converts all nominal attributes into binary numeric attributes. An " + "attribute with k values is transformed into k binary attributes if " + "the class is nominal (using the one-attribute-per-value approach). " + "Binary attributes are left binary if option '-A' is not given. " + "If the class is numeric, k - 1 new binary attributes are generated " + "in the manner described in \"Classification and Regression " + "Trees\" by Breiman et al. (i.e., by taking the average class value associated " + "with each attribute value into account).\n\n" + "For more information, see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing detailed * information about the technical background of this class, e.g., paper * reference or book this class is based on. * * @return the technical information about this class */ @Override public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.BOOK); result.setValue(Field.AUTHOR, "L. Breiman and J.H. Friedman and R.A. Olshen and C.J. Stone"); result.setValue(Field.TITLE, "Classification and Regression Trees"); result.setValue(Field.YEAR, "1984"); result.setValue(Field.PUBLISHER, "Wadsworth Inc"); result.setValue(Field.ISBN, "0412048418"); return result; } /** * Returns the Capabilities of this filter. * * @return the capabilities of this object * @see Capabilities */ @Override public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); result.disableAll(); // attributes result.enableAllAttributes(); result.enable(Capability.MISSING_VALUES); // class result.enable(Capability.NUMERIC_CLASS); result.enable(Capability.DATE_CLASS); result.enable(Capability.NOMINAL_CLASS); result.enable(Capability.MISSING_CLASS_VALUES); return result; } /** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input instance * structure (any instances contained in the object are ignored - * only the structure is required). * @return true if the outputFormat may be collected immediately * @throws Exception if the input format can't be set successfully */ @Override public boolean setInputFormat(Instances instanceInfo) throws Exception { super.setInputFormat(instanceInfo); if (instanceInfo.classIndex() < 0) { throw new UnassignedClassException( "No class has been assigned to the instances"); } setOutputFormat(); m_Indices = null; if (instanceInfo.classAttribute().isNominal()) { return true; } else { return false; } } /** * Input an instance for filtering. Filter requires all training instances be * read before producing output. * * @param instance the input instance * @return true if the filtered instance may now be collected with output(). * @throws IllegalStateException if no input format has been set */ @Override public boolean input(Instance instance) { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } if ((m_Indices != null) || (getInputFormat().classAttribute().isNominal())) { convertInstance((Instance)instance.copy()); return true; } bufferInput(instance); return false; } /** * Signify that this batch of input to the filter is finished. If the filter * requires all instances prior to filtering, output() may now be called to * retrieve the filtered instances. * * @return true if there are instances pending output * @throws IllegalStateException if no input structure has been defined */ @Override public boolean batchFinished() { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if ((m_Indices == null) && (getInputFormat().classAttribute().isNumeric())) { computeAverageClassValues(); setOutputFormat(); // Convert pending input instances for (int i = 0; i < getInputFormat().numInstances(); i++) { convertInstance(getInputFormat().instance(i)); } } flushInput(); m_NewBatch = true; return (numPendingOutput() != 0); } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ @Override public Enumeration




© 2015 - 2024 Weber Informatics LLC | Privacy Policy