All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.filters.unsupervised.instance.RemoveFrequentValues Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.

There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    RemoveFrequentValues.java
 *    Copyright (C) 2004-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.filters.unsupervised.instance;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Vector;

import weka.core.*;
import weka.core.Capabilities.Capability;
import weka.filters.Filter;
import weka.filters.UnsupervisedFilter;

/**
 *  Determines which values (frequent or infrequent
 * ones) of an (nominal) attribute are retained and filters the instances
 * accordingly. In case of values with the same frequency, they are kept in the
 * way they appear in the original instances object. E.g. if you have the values
 * "1,2,3,4" with the frequencies "10,5,5,3" and you chose to keep the 2 most
 * common values, the values "1,2" would be returned, since the value "2" comes
 * before "3", even though they have the same frequency.
 * 

* * * Valid options are: *

* *

 * -C <num>
 *  Choose attribute to be used for selection.
 * 
* *
 * -N <num>
 *  Number of values to retain for the specified attribute,
 *  i.e. the ones with the most instances (default 2).
 * 
* *
 * -L
 *  Instead of values with the most instances the ones with the 
 *  least are retained.
 * 
* *
 * -H
 *  When selecting on nominal attributes, removes header
 *  references to excluded values.
 * 
* *
 * -V
 *  Invert matching sense.
 * 
* * * * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 14508 $ */ public class RemoveFrequentValues extends Filter implements OptionHandler, UnsupervisedFilter, WeightedAttributesHandler { /** for serialization */ static final long serialVersionUID = -2447432930070059511L; /** The attribute's index setting. */ private final SingleIndex m_AttIndex = new SingleIndex("last"); /** the number of values to retain. */ protected int m_NumValues = 2; /** whether to retain values with least instances instead of most. */ protected boolean m_LeastValues = false; /** whether to invert the matching sense. */ protected boolean m_Invert = false; /** Modify header for nominal attributes? */ protected boolean m_ModifyHeader = false; /** If m_ModifyHeader, stores a mapping from old to new indexes */ protected int[] m_NominalMapping; /** contains the values to retain */ protected HashSet m_Values = null; /** * Returns a string describing this filter * * @return a description of the classifier suitable for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "Determines which values (frequent or infrequent ones) of an " + "(nominal) attribute are retained and filters the instances " + "accordingly. In case of values with the same frequency, they are " + "kept in the way they appear in the original instances object. E.g. " + "if you have the values \"1,2,3,4\" with the frequencies \"10,5,5,3\" " + "and you chose to keep the 2 most common values, the values \"1,2\" " + "would be returned, since the value \"2\" comes before \"3\", even " + "though they have the same frequency."; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ @Override public Enumeration




© 2015 - 2024 Weber Informatics LLC | Privacy Policy