All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.filters.unsupervised.attribute.RemoveUseless Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    RemoveUseless.java
 *    Copyright (C) 2002-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.filters.unsupervised.attribute;

import java.util.Enumeration;
import java.util.Vector;

import weka.core.*;
import weka.core.Capabilities.Capability;
import weka.filters.Filter;
import weka.filters.UnsupervisedFilter;

/**
 *  This filter removes attributes that do not vary at
 * all or that vary too much. All constant attributes are deleted automatically,
 * along with any that exceed the maximum percentage of variance parameter. The
 * maximum variance test is only applied to nominal attributes.
 * 

* * * Valid options are: *

* *

 * -M <max variance %>
 *  Maximum variance percentage allowed (default 99). Specifically, if
 *  (number_of_distinct_values / total_number_of_values * 100)
 *  is greater than this value, then the attribute will be removed.
 * 
* * * * @author Richard Kirkby ([email protected]) * @version $Revision: 14508 $ */ public class RemoveUseless extends Filter implements UnsupervisedFilter, OptionHandler, WeightedInstancesHandler, WeightedAttributesHandler { /** for serialization */ static final long serialVersionUID = -8659417851407640038L; /** The filter used to remove attributes */ protected Remove m_removeFilter = null; /** The type of attribute to delete */ protected double m_maxVariancePercentage = 99.0; /** * Returns the Capabilities of this filter. * * @return the capabilities of this object * @see Capabilities */ @Override public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); // attributes result.enable(Capability.NOMINAL_ATTRIBUTES); result.enable(Capability.NUMERIC_ATTRIBUTES); result.enable(Capability.DATE_ATTRIBUTES); result.enable(Capability.STRING_ATTRIBUTES); result.enable(Capability.MISSING_VALUES); // class result.enableAllClasses(); result.enable(Capability.MISSING_CLASS_VALUES); result.enable(Capability.NO_CLASS); return result; } /** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input instance * structure (any instances contained in the object are ignored - * only the structure is required). * @return true if the outputFormat may be collected immediately * @throws Exception if the inputFormat can't be set successfully */ @Override public boolean setInputFormat(Instances instanceInfo) throws Exception { super.setInputFormat(instanceInfo); m_removeFilter = null; return false; } /** * Input an instance for filtering. * * @param instance the input instance * @return true if the filtered instance may now be collected with output(). */ @Override public boolean input(Instance instance) { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } if (m_removeFilter != null) { m_removeFilter.input(instance); Instance processed = m_removeFilter.output(); copyValues(processed, false, instance.dataset(), outputFormatPeek()); push(processed, false); // No need to copy return true; } bufferInput(instance); return false; } /** * Signify that this batch of input to the filter is finished. * * @return true if there are instances pending output * @throws Exception if no input format defined */ @Override public boolean batchFinished() throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_removeFilter == null) { // establish attributes to remove from first batch Instances toFilter = getInputFormat(); int[] attsToDelete = new int[toFilter.numAttributes()]; int numToDelete = 0; for (int i = 0; i < toFilter.numAttributes(); i++) { if (i == toFilter.classIndex()) { continue; // skip class } AttributeStats stats = toFilter.attributeStats(i); if (stats.missingCount == toFilter.numInstances()) { attsToDelete[numToDelete++] = i; } else if (stats.distinctCount < 2) { // remove constant attributes attsToDelete[numToDelete++] = i; } else if (toFilter.attribute(i).isNominal()) { // remove nominal attributes that vary too much double variancePercent = (double) stats.distinctCount / (double) (stats.totalCount - stats.missingCount) * 100.0; if (variancePercent > m_maxVariancePercentage) { attsToDelete[numToDelete++] = i; } } } int[] finalAttsToDelete = new int[numToDelete]; System.arraycopy(attsToDelete, 0, finalAttsToDelete, 0, numToDelete); m_removeFilter = new Remove(); m_removeFilter.setAttributeIndicesArray(finalAttsToDelete); m_removeFilter.setInvertSelection(false); m_removeFilter.setInputFormat(toFilter); for (int i = 0; i < toFilter.numInstances(); i++) { m_removeFilter.input(toFilter.instance(i)); } m_removeFilter.batchFinished(); Instance processed; Instances outputDataset = m_removeFilter.getOutputFormat(); // restore old relation name to hide attribute filter stamp outputDataset.setRelationName(toFilter.relationName()); setOutputFormat(outputDataset); while ((processed = m_removeFilter.output()) != null) { processed.setDataset(outputDataset); push(processed, false); // No need to copy } } flushInput(); m_NewBatch = true; return (numPendingOutput() != 0); } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ @Override public Enumeration




© 2015 - 2024 Weber Informatics LLC | Privacy Policy