All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.attributeSelection.CfsSubsetEval Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This is the stable version. Apart from bugfixes, this version does not receive any other updates.

There is a newer version: 3.8.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    CfsSubsetEval.java
 *    Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.attributeSelection;

import java.util.BitSet;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Set;
import java.util.Vector;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicInteger;

import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.ContingencyTables;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.ThreadSafe;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.supervised.attribute.Discretize;

/**
 *  CfsSubsetEval :
*
* Evaluates the worth of a subset of attributes by considering the individual * predictive ability of each feature along with the degree of redundancy * between them.
*
* Subsets of features that are highly correlated with the class while having * low intercorrelation are preferred.
*
* For more information see:
*
* M. A. Hall (1998). Correlation-based Feature Subset Selection for Machine * Learning. Hamilton, New Zealand. *

* * * BibTeX: * *

 * @phdthesis{Hall1998,
 *    address = {Hamilton, New Zealand},
 *    author = {M. A. Hall},
 *    school = {University of Waikato},
 *    title = {Correlation-based Feature Subset Selection for Machine Learning},
 *    year = {1998}
 * }
 * 
*

* * * Valid options are: *

* *

 * -M
 *  Treat missing values as a separate value.
 * 
* *
 * -L
 *  Don't include locally predictive attributes.
 * 
* *
 * -Z
 *  Precompute the full correlation matrix at the outset, rather than compute correlations lazily (as needed) during the search. Use this in conjuction with parallel processing in order to speed up a backward search.
 * 
* *
 * -P <int>
 *  The size of the thread pool, for example, the number of cores in the CPU. (default 1)
 * 
* *
 * -E <int>
 *  The number of threads to use, which should be >= size of thread pool. (default 1)
 * 
* *
 * -D
 *  Output debugging info.
 * 
* * * * @author Mark Hall ([email protected]) * @version $Revision: 11852 $ * @see Discretize */ public class CfsSubsetEval extends ASEvaluation implements SubsetEvaluator, ThreadSafe, OptionHandler, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = 747878400813276317L; /** The training instances */ private Instances m_trainInstances; /** Discretise attributes when class in nominal */ private Discretize m_disTransform; /** The class index */ private int m_classIndex; /** Is the class numeric */ private boolean m_isNumeric; /** Number of attributes in the training data */ private int m_numAttribs; /** Number of instances in the training data */ private int m_numInstances; /** Treat missing values as separate values */ private boolean m_missingSeparate; /** Include locally predictive attributes */ private boolean m_locallyPredictive; /** Holds the matrix of attribute correlations */ // private Matrix m_corr_matrix; private float[][] m_corr_matrix; /** Standard deviations of attributes (when using pearsons correlation) */ private double[] m_std_devs; /** Threshold for admitting locally predictive features */ private double m_c_Threshold; /** Output debugging info */ protected boolean m_debug; /** Number of entries in the correlation matrix */ protected int m_numEntries; /** Number of correlations actually computed */ protected AtomicInteger m_numFilled; protected boolean m_preComputeCorrelationMatrix; /** * The number of threads used to compute the correlation matrix. Used when * correlation matrix is precomputed */ protected int m_numThreads = 1; /** * The size of the thread pool. Usually set equal to the number of CPUs or CPU * cores available */ protected int m_poolSize = 1; /** Thread pool */ protected transient ExecutorService m_pool = null; /** * Returns a string describing this attribute evaluator * * @return a description of the evaluator suitable for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "CfsSubsetEval :\n\nEvaluates the worth of a subset of attributes " + "by considering the individual predictive ability of each feature " + "along with the degree of redundancy between them.\n\n" + "Subsets of features that are highly correlated with the class " + "while having low intercorrelation are preferred.\n\n" + "For more information see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing detailed * information about the technical background of this class, e.g., paper * reference or book this class is based on. * * @return the technical information about this class */ @Override public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.PHDTHESIS); result.setValue(Field.AUTHOR, "M. A. Hall"); result.setValue(Field.YEAR, "1998"); result.setValue(Field.TITLE, "Correlation-based Feature Subset Selection for Machine Learning"); result.setValue(Field.SCHOOL, "University of Waikato"); result.setValue(Field.ADDRESS, "Hamilton, New Zealand"); return result; } /** * Constructor */ public CfsSubsetEval() { resetOptions(); } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. * **/ @Override public Enumeration




© 2015 - 2024 Weber Informatics LLC | Privacy Policy