weka.classifiers.lazy.KStar Maven / Gradle / Ivy
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* KStar.java
* Copyright (C) 1995-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.classifiers.lazy;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
import weka.classifiers.AbstractClassifier;
import weka.classifiers.UpdateableClassifier;
import weka.classifiers.lazy.kstar.KStarCache;
import weka.classifiers.lazy.kstar.KStarConstants;
import weka.classifiers.lazy.kstar.KStarNominalAttribute;
import weka.classifiers.lazy.kstar.KStarNumericAttribute;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
/**
* K* is an instance-based classifier, that is the class of a test instance is based upon the class of those training instances similar to it, as determined by some similarity function. It differs from other instance-based learners in that it uses an entropy-based distance function.
*
* For more information on K*, see
*
* John G. Cleary, Leonard E. Trigg: K*: An Instance-based Learner Using an Entropic Distance Measure. In: 12th International Conference on Machine Learning, 108-114, 1995.
*
*
* BibTeX:
*
* @inproceedings{Cleary1995,
* author = {John G. Cleary and Leonard E. Trigg},
* booktitle = {12th International Conference on Machine Learning},
* pages = {108-114},
* title = {K*: An Instance-based Learner Using an Entropic Distance Measure},
* year = {1995}
* }
*
*
*
* Valid options are:
*
* -B <num>
* Manual blend setting (default 20%)
*
*
* -E
* Enable entropic auto-blend setting (symbolic class only)
*
*
* -M <char>
* Specify the missing value treatment mode (default a)
* Valid options are: a(verage), d(elete), m(axdiff), n(ormal)
*
*
*
* @author Len Trigg ([email protected])
* @author Abdelaziz Mahoui ([email protected]) - Java port
* @version $Revision: 10141 $
*/
public class KStar
extends AbstractClassifier
implements KStarConstants, UpdateableClassifier, TechnicalInformationHandler {
/** for serialization */
static final long serialVersionUID = 332458330800479083L;
/** The training instances used for classification. */
protected Instances m_Train;
/** The number of instances in the dataset */
protected int m_NumInstances;
/** The number of class values */
protected int m_NumClasses;
/** The number of attributes */
protected int m_NumAttributes;
/** The class attribute type */
protected int m_ClassType;
/** Table of random class value colomns */
protected int [][] m_RandClassCols;
/** Flag turning on and off the computation of random class colomns */
protected int m_ComputeRandomCols = ON;
/** Flag turning on and off the initialisation of config variables */
protected int m_InitFlag = ON;
/**
* A custom data structure for caching distinct attribute values
* and their scale factor or stop parameter.
*/
protected KStarCache [] m_Cache;
/** missing value treatment */
protected int m_MissingMode = M_AVERAGE;
/** 0 = use specified blend, 1 = entropic blend setting */
protected int m_BlendMethod = B_SPHERE;
/** default sphere of influence blend setting */
protected int m_GlobalBlend = 20;
/** Define possible missing value handling methods */
public static final Tag [] TAGS_MISSING = {
new Tag(M_DELETE, "Ignore the instances with missing values"),
new Tag(M_MAXDIFF, "Treat missing values as maximally different"),
new Tag(M_NORMAL, "Normalize over the attributes"),
new Tag(M_AVERAGE, "Average column entropy curves")
};
/**
* Returns a string describing classifier
* @return a description suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "K* is an instance-based classifier, that is the class of a test "
+ "instance is based upon the class of those training instances "
+ "similar to it, as determined by some similarity function. It differs "
+ "from other instance-based learners in that it uses an entropy-based "
+ "distance function.\n\n"
+ "For more information on K*, see\n\n"
+ getTechnicalInformation().toString();
}
/**
* Returns an instance of a TechnicalInformation object, containing
* detailed information about the technical background of this class,
* e.g., paper reference or book this class is based on.
*
* @return the technical information about this class
*/
public TechnicalInformation getTechnicalInformation() {
TechnicalInformation result;
result = new TechnicalInformation(Type.INPROCEEDINGS);
result.setValue(Field.AUTHOR, "John G. Cleary and Leonard E. Trigg");
result.setValue(Field.TITLE, "K*: An Instance-based Learner Using an Entropic Distance Measure");
result.setValue(Field.BOOKTITLE, "12th International Conference on Machine Learning");
result.setValue(Field.YEAR, "1995");
result.setValue(Field.PAGES, "108-114");
return result;
}
/**
* Returns default capabilities of the classifier.
*
* @return the capabilities of this classifier
*/
public Capabilities getCapabilities() {
Capabilities result = super.getCapabilities();
result.disableAll();
// attributes
result.enable(Capability.NOMINAL_ATTRIBUTES);
result.enable(Capability.NUMERIC_ATTRIBUTES);
result.enable(Capability.DATE_ATTRIBUTES);
result.enable(Capability.MISSING_VALUES);
// class
result.enable(Capability.NOMINAL_CLASS);
result.enable(Capability.NUMERIC_CLASS);
result.enable(Capability.DATE_CLASS);
result.enable(Capability.MISSING_CLASS_VALUES);
// instances
result.setMinimumNumberInstances(0);
return result;
}
/**
* Generates the classifier.
*
* @param instances set of instances serving as training data
* @throws Exception if the classifier has not been generated successfully
*/
public void buildClassifier(Instances instances) throws Exception {
// can classifier handle the data?
getCapabilities().testWithFail(instances);
// remove instances with missing class
instances = new Instances(instances);
instances.deleteWithMissingClass();
m_Train = new Instances(instances, 0, instances.numInstances());
// initializes class attributes ** java-speaking! :-) **
init_m_Attributes();
}
/**
* Adds the supplied instance to the training set
*
* @param instance the instance to add
* @throws Exception if instance could not be incorporated successfully
*/
public void updateClassifier(Instance instance) throws Exception {
if (m_Train.equalHeaders(instance.dataset()) == false)
throw new Exception("Incompatible instance types\n" + m_Train.equalHeadersMsg(instance.dataset()));
if ( instance.classIsMissing() )
return;
m_Train.add(instance);
// update relevant attributes ...
update_m_Attributes();
}
/**
* Calculates the class membership probabilities for the given test instance.
*
* @param instance the instance to be classified
* @return predicted class probability distribution
* @throws Exception if an error occurred during the prediction
*/
public double [] distributionForInstance(Instance instance) throws Exception {
double transProb = 0.0, temp = 0.0;
double [] classProbability = new double[m_NumClasses];
double [] predictedValue = new double[1];
// initialization ...
for (int i=0; i enu = m_Train.enumerateInstances();
while ( enu.hasMoreElements() ) {
trainInstance = (Instance)enu.nextElement();
transProb = instanceTransformationProbability(instance, trainInstance);
switch ( m_ClassType )
{
case Attribute.NOMINAL:
classProbability[(int)trainInstance.classValue()] += transProb;
break;
case Attribute.NUMERIC:
predictedValue[0] += transProb * trainInstance.classValue();
temp += transProb;
break;
}
}
if (m_ClassType == Attribute.NOMINAL) {
double sum = Utils.sum(classProbability);
if (sum <= 0.0)
for (int i=0; i listOptions() {
Vector
© 2015 - 2025 Weber Informatics LLC | Privacy Policy