All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.classifiers.meta.ClassificationViaClustering Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * ClassificationViaClustering.java
 * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
 */

package weka.classifiers.meta;

import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;

import weka.classifiers.AbstractClassifier;
import weka.classifiers.Classifier;
import weka.classifiers.rules.ZeroR;
import weka.clusterers.AbstractClusterer;
import weka.clusterers.ClusterEvaluation;
import weka.clusterers.Clusterer;
import weka.clusterers.SimpleKMeans;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;

/**
 *  A simple meta-classifier that uses a clusterer for
 * classification. For cluster algorithms that use a fixed number of clusterers,
 * like SimpleKMeans, the user has to make sure that the number of clusters to
 * generate are the same as the number of class labels in the dataset in order
 * to obtain a useful model.
*
* Note: at prediction time, a missing value is returned if no cluster is found * for the instance.
*
* The code is based on the 'clusters to classes' functionality of the * weka.clusterers.ClusterEvaluation class by Mark Hall. *

* * * Valid options are: *

* *

 * -D
 *  If set, classifier is run in debug mode and
 *  may output additional info to the console
 * 
* *
 * -W
 *  Full name of clusterer.
 *  (default: weka.clusterers.SimpleKMeans)
 * 
* *
 * Options specific to clusterer weka.clusterers.SimpleKMeans:
 * 
* *
 * -N <num>
 *  number of clusters.
 *  (default 2).
 * 
* *
 * -V
 *  Display std. deviations for centroids.
 * 
* *
 * -M
 *  Replace missing values with mean/mode.
 * 
* *
 * -S <num>
 *  Random number seed.
 *  (default 10)
 * 
* * * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision: 14257 $ */ public class ClassificationViaClustering extends AbstractClassifier { /** for serialization */ private static final long serialVersionUID = -5687069451420259135L; /** the cluster algorithm used (template) */ protected Clusterer m_Clusterer; /** the actual cluster algorithm being used */ protected Clusterer m_ActualClusterer; /** the original training data header */ protected Instances m_OriginalHeader; /** the modified training data header */ protected Instances m_ClusteringHeader; /** the mapping between clusters and classes */ protected double[] m_ClustersToClasses; /** the default model */ protected Classifier m_ZeroR; /** * default constructor */ public ClassificationViaClustering() { super(); m_Clusterer = new SimpleKMeans(); } /** * Returns a string describing classifier * * @return a description suitable for displaying in the explorer/experimenter * gui */ public String globalInfo() { return "A simple meta-classifier that uses a clusterer for classification. " + "For cluster algorithms that use a fixed number of clusterers, like " + "SimpleKMeans, the user has to make sure that the number of clusters " + "to generate are the same as the number of class labels in the dataset " + "in order to obtain a useful model.\n" + "\n" + "Note: at prediction time, a missing value is returned if no cluster " + "is found for the instance.\n" + "\n" + "The code is based on the 'clusters to classes' functionality of the " + "weka.clusterers.ClusterEvaluation class by Mark Hall."; } /** * Gets an enumeration describing the available options. * * @return an enumeration of all the available options. */ @Override public Enumeration




© 2015 - 2025 Weber Informatics LLC | Privacy Policy