weka.classifiers.lazy.kstar.KStarNumericAttribute Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-stable Show documentation
Show all versions of weka-stable Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This is the stable version. Apart from bugfixes, this version
does not receive any other updates.
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* KStarNumericAttribute.java
* Copyright (C) 1995 Univeristy of Waikato
* Java port to Weka by Abdelaziz Mahoui ([email protected]).
*
*/
package weka.classifiers.lazy.kstar;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
/**
* A custom class which provides the environment for computing the
* transformation probability of a specified test instance numeric
* attribute to a specified train instance numeric attribute.
*
* @author Len Trigg ([email protected])
* @author Abdelaziz Mahoui ([email protected])
* @version $Revision 1.0 $
*/
public class KStarNumericAttribute
implements KStarConstants, RevisionHandler {
/** The training instances used for classification. */
protected Instances m_TrainSet;
/** The test instance */
protected Instance m_Test;
/** The train instance */
protected Instance m_Train;
/** The index of the attribute in the test and train instances */
protected int m_AttrIndex;
/** The scale parameter */
protected double m_Scale = 1.0;
/** Probability of test attribute transforming into train attribute
with missing value */
protected double m_MissingProb = 1.0;
/** Average probability of test attribute transforming into train
attribute */
protected double m_AverageProb = 1.0;
/** Smallest probability of test attribute transforming into train
attribute */
protected double m_SmallestProb = 1.0;
/** The set of disctances from the test attribute to the set of train
attributes */
protected double [] m_Distances;
/** Set of colomns: each colomn representing a randomised version of
the train dataset class colomn */
protected int [][] m_RandClassCols;
/** The number of train instances with no missing attribute values */
protected int m_ActualCount = 0;
/** A cache for storing attribute values and their corresponding scale
parameters */
protected KStarCache m_Cache;
/** The number of instances in the dataset */
protected int m_NumInstances;
/** The number of class values */
protected int m_NumClasses;
/** The number of attributes */
protected int m_NumAttributes;
/** The class attribute type */
protected int m_ClassType;
/** missing value treatment */
protected int m_MissingMode = M_AVERAGE;
/** 0 = use specified blend, 1 = entropic blend setting */
protected int m_BlendMethod = B_SPHERE ;
/** default sphere of influence blend setting */
protected int m_BlendFactor = 20;
/**
* Constructor
*/
public KStarNumericAttribute(Instance test, Instance train, int attrIndex,
Instances trainSet,
int [][] randClassCols,
KStarCache cache)
{
m_Test = test;
m_Train = train;
m_AttrIndex = attrIndex;
m_TrainSet = trainSet;
m_RandClassCols = randClassCols;
m_Cache = cache;
init();
}
/**
* Initializes the m_Attributes of the class.
*/
private void init() {
try {
m_NumInstances = m_TrainSet.numInstances();
m_NumClasses = m_TrainSet.numClasses();
m_NumAttributes = m_TrainSet.numAttributes();
m_ClassType = m_TrainSet.classAttribute().type();
} catch(Exception e) {
e.printStackTrace();
}
}
/**
* Calculates the transformation probability of the attribute indexed
* "m_AttrIndex" in test instance "m_Test" to the same attribute in
* the train instance "m_Train".
*
* @return the probability value
*/
public double transProb() {
String debug = "(KStarNumericAttribute.transProb) ";
double transProb, distance, scale;
// check if the attribute value has been encountred before
// in which case it should be in the numeric cache
if ( m_Cache.containsKey(m_Test.value(m_AttrIndex))) {
KStarCache.TableEntry te =
m_Cache.getCacheValues( m_Test.value(m_AttrIndex) );
m_Scale = te.value;
m_MissingProb = te.pmiss;
}
else {
if (m_BlendMethod == B_ENTROPY) {
m_Scale = scaleFactorUsingEntropy();
}
else { // default is B_SPHERE
m_Scale = scaleFactorUsingBlend();
}
m_Cache.store( m_Test.value(m_AttrIndex), m_Scale, m_MissingProb );
}
// now what???
if (m_Train.isMissing(m_AttrIndex)) {
transProb = m_MissingProb;
}
else {
distance =
Math.abs( m_Test.value(m_AttrIndex) - m_Train.value(m_AttrIndex) );
transProb = PStar( distance, m_Scale );
}
return transProb;
}
/**
* Calculates the scale factor for the attribute indexed
* "m_AttrIndex" in test instance "m_Test" using a global
* blending factor (default value is 20%).
*
* @return the scale factor value
*/
private double scaleFactorUsingBlend() {
String debug = "(KStarNumericAttribute.scaleFactorUsingBlend)";
int i, j, lowestcount = 0, count = 0;
double lowest = -1.0, nextlowest = -1.0;
double root, broot, up, bot;
double aimfor, min_val = 9e300, scale = 1.0;
double avgprob = 0.0, minprob = 0.0, min_pos = 0.0;
KStarWrapper botvals = new KStarWrapper();
KStarWrapper upvals = new KStarWrapper();
KStarWrapper vals = new KStarWrapper();
m_Distances = new double [m_NumInstances];
for (j=0; j 0) { // Couldn't include that few,
// going for min possible
min_pos = up;
avgprob = upvals.avgProb;
minprob = upvals.minProb;
}
else {
// Root finding Algorithm starts here !
for (;;) {
calculateSphereSize(root, vals);
vals.sphere -= aimfor;
if ( Math.abs(vals.sphere) < min_val ) {
min_val = Math.abs(vals.sphere);
min_pos = root;
avgprob = vals.avgProb;
minprob = vals.minProb;
}
if ( Math.abs(vals.sphere) <= ROOT_FINDER_ACCURACY ) {
break; // converged to a solution, done!
}
if (vals.sphere > 0.0) {
broot = (root + up) / 2.0;
bot = root;
root = broot;
}
else {
broot = (root + bot) / 2.0;
up = root;
root = broot;
}
i++;
if (i > ROOT_FINDER_MAX_ITER) {
// System.err.println("Warning: "+debug+"
// ROOT_FINDER_MAX_ITER exceeded");
root = min_pos;
break;
}
}
}
m_SmallestProb = minprob;
m_AverageProb = avgprob;
// Set the probability of transforming to a missing value
switch ( m_MissingMode )
{
case M_DELETE:
m_MissingProb = 0.0;
break;
case M_NORMAL:
m_MissingProb = 1.0;
break;
case M_MAXDIFF:
m_MissingProb = m_SmallestProb;
break;
case M_AVERAGE:
m_MissingProb = m_AverageProb;
break;
}
// set the scale factor value
scale = min_pos;
return scale;
}
}
/**
* Calculates the size of the "sphere of influence" defined as:
* sphere = sum(P)^2/sum(P^2) where
* P(i) = root*exp(-2*i*root).
* Since there are n different training instances we multiply P(i) by 1/n.
*/
private void calculateSphereSize(double scale, KStarWrapper params) {
String debug = "(KStarNumericAttribute.calculateSphereSize)";
int i;
double sphereSize, minprob = 1.0;
double pstar; // P*(b|a)
double pstarSum = 0.0; // sum(P*)
double pstarSquareSum = 0.0; // sum(P*^2)
double inc;
for (i = 0; i < m_NumInstances; i++) {
if (m_Distances[i] < 0) {
// instance with missing value
continue;
}
else {
pstar = PStar( m_Distances[i], scale );
if (minprob > pstar) {
minprob = pstar;
}
inc = pstar / m_ActualCount;
pstarSum += inc;
pstarSquareSum += inc * inc;
}
}
sphereSize = (pstarSquareSum == 0 ? 0
: pstarSum * pstarSum / pstarSquareSum);
// return the values
params.sphere = sphereSize;
params.avgProb = pstarSum;
params.minProb = minprob;
}
/**
* Calculates the scale factor using entropy.
*
* @return the scale factor value
*/
private double scaleFactorUsingEntropy() {
String debug = "(KStarNumericAttribute.scaleFactorUsingEntropy)";
if ( m_ClassType != Attribute.NOMINAL ) {
System.err.println("Error: "+debug+" attribute class must be nominal!");
System.exit(1);
}
int i,j, lowestcount = 0, count, itcount;
double lowest = -1.0, nextlowest = -1.0;
double root, up, bot, stepsize, delta;
double actentropy = 0.0, randentropy = 0.0, actscale, randscale;
double minrand = 0.0, minact = 0.0, maxrand = 0.0, maxact = 0.0;
double bestdiff, bestroot, currentdiff, lastdiff;
double bestpsum, bestminprob, scale = 1.0;
KStarWrapper botvals = new KStarWrapper();
KStarWrapper upvals = new KStarWrapper();
KStarWrapper vals = new KStarWrapper();
m_Distances = new double [m_NumInstances];
for (j=0; j= up) {
root = up;
currentdiff = 0.0;
delta = -1.0;
}
else {
calculateEntropy(root, vals);
// Normalise entropies
vals.randEntropy = (vals.randEntropy - upvals.randEntropy) /
randscale;
vals.actEntropy = (vals.actEntropy - upvals.actEntropy) /
randscale;
currentdiff = vals.randEntropy - vals.actEntropy;
if (currentdiff < FLOOR1) {
currentdiff = FLOOR1;
if (stepsize < 0) {
// If we've hit the end and turned around we can't
// have found any peaks
bestdiff = currentdiff;
bestroot = bot;
bestpsum = botvals.avgProb;
bestminprob = botvals.minProb;
break;
}
}
delta = currentdiff - lastdiff;
}
if (currentdiff > bestdiff) {
bestdiff = currentdiff;
bestroot = root;
bestminprob = vals.minProb;
bestpsum = vals.avgProb;
}
if (delta < 0) {
if (Math.abs(stepsize) < ROOT_FINDER_ACCURACY) {
break;
}
else {
stepsize /= -4.0;
}
}
if (itcount > ROOT_FINDER_MAX_ITER) {
// System.err.println("Warning: "+debug+" ROOT_FINDER_MAX_ITER
// exceeded");
break;
}
} // while
m_SmallestProb = bestminprob;
m_AverageProb = bestpsum;
// Set the probability of transforming to a missing value
switch ( m_MissingMode )
{
case M_DELETE:
m_MissingProb = 0.0;
break;
case M_NORMAL:
m_MissingProb = 1.0;
break;
case M_MAXDIFF:
m_MissingProb = m_SmallestProb;
break;
case M_AVERAGE:
m_MissingProb = m_AverageProb;
break;
}
// set scale factor
scale = bestroot;
} // else
return scale;
}
/**
* Calculates several parameters aside from the entropy: for a specified
* scale factor, calculates the actual entropy, a random entropy using a
* randomized set of class value colomns, and records the average and
* smallest probabilities (for use in missing value case).
*/
private void calculateEntropy(double scale, KStarWrapper params) {
String debug = "(KStarNumericAttribute.calculateEntropy)";
int i,j,k;
double actent = 0.0, randent = 0.0;
double pstar, tprob, avgprob = 0.0, minprob = 1.0;
double actClassProb, randClassProb;
double [][] pseudoClassProbs = new double[NUM_RAND_COLS+1][m_NumClasses];
// init
for(j = 0; j <= NUM_RAND_COLS; j++) {
for(i = 0; i < m_NumClasses; i++) {
pseudoClassProbs[j][i] = 0.0;
}
}
for (i=0; i < m_NumInstances; i++) {
if (m_Distances[i] < 0) {
// train instance has mising value
continue;
}
else {
pstar = PStar(m_Distances[i], scale);
tprob = pstar / m_ActualCount;
avgprob += tprob;
if (pstar < minprob) {
minprob = pstar;
}
// filter instances with same class value
for (k=0; k <= NUM_RAND_COLS; k++) {
// instance i is assigned a random class value in colomn k;
// colomn k = NUM_RAND_COLS contains the original mapping:
// instance -> class vlaue
pseudoClassProbs[k][ m_RandClassCols[k][i] ] += tprob;
}
}
}
// compute the actual entropy using the class probabilities
// with the original class value mapping (colomn NUM_RAND_COLS)
for (j = m_NumClasses-1; j >= 0; j--) {
actClassProb = pseudoClassProbs[NUM_RAND_COLS][j] / avgprob;
if (actClassProb > 0) {
actent -= actClassProb * Math.log(actClassProb) / LOG2;
}
}
// compute a random entropy using the pseudo class probs
// excluding the colomn NUM_RAND_COLS
for (k=0; k < NUM_RAND_COLS; k++) {
for (i = m_NumClasses-1; i >= 0; i--) {
randClassProb = pseudoClassProbs[k][i] / avgprob;
if (randClassProb > 0) {
randent -= randClassProb * Math.log(randClassProb) / LOG2;
}
}
}
randent /= NUM_RAND_COLS;
// return the values
params.actEntropy = actent;
params.randEntropy = randent;
params.avgProb = avgprob;
params.minProb = minprob;
}
/**
* Calculates the value of P for a given value x using the expression:
* P(x) = scale * exp( -2.0 * x * scale )
*
* @param x input value
* @param scale the scale factor
* @return output of the function P(x)
*/
private double PStar(double x, double scale) {
return scale * Math.exp( -2.0 * x * scale );
}
/**
* Set options.
* @param missingmode the missing value treatment to use
* @param blendmethod the blending method to use
* @param blendfactor the level of blending to use
*/
public void setOptions(int missingmode, int blendmethod, int blendfactor) {
m_MissingMode = missingmode;
m_BlendMethod = blendmethod;
m_BlendFactor = blendfactor;
}
/**
* Set the missing value mode.
* @param mode the type of missing value treatment to use
*/
public void setMissingMode(int mode) {
m_MissingMode = mode;
}
/**
* Set the blending method
* @param method the blending method to use
*/
public void setBlendMethod(int method) {
m_BlendMethod = method;
}
/**
* Set the blending factor
* @param factor the level of blending to use
*/
public void setBlendFactor(int factor) {
m_BlendFactor = factor;
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 1.7 $");
}
} // class
© 2015 - 2025 Weber Informatics LLC | Privacy Policy