All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.core.neighboursearch.balltrees.MedianOfWidestDimension Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This is the stable version. Apart from bugfixes, this version does not receive any other updates.

There is a newer version: 3.8.6
Show newest version
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 * MedianOfWidestDimension.java
 * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
 */

package weka.core.neighboursearch.balltrees;

import weka.core.EuclideanDistance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;

import java.util.Enumeration;
import java.util.Vector;

/**
 
 * Class that splits a BallNode of a ball tree based on the median value of the widest dimension of the points in the ball. It essentially implements Omohundro's  KD construction algorithm.
 * 

* * BibTeX: *

 * @techreport{Omohundro1989,
 *    author = {Stephen M. Omohundro},
 *    institution = {International Computer Science Institute},
 *    month = {December},
 *    number = {TR-89-063},
 *    title = {Five Balltree Construction Algorithms},
 *    year = {1989}
 * }
 * 
*

* * Valid options are:

* *

 -N
 *  Normalize dimensions' widths.
* * * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) * @version $Revision: 1.2 $ */ public class MedianOfWidestDimension extends BallSplitter implements OptionHandler, TechnicalInformationHandler { /** for serialization. */ private static final long serialVersionUID = 3054842574468790421L; /** * Should we normalize the widths(ranges) of the dimensions (attributes) * before selecting the widest one. */ protected boolean m_NormalizeDimWidths = true; /** * Constructor. */ public MedianOfWidestDimension() { } /** * Constructor. * @param instList The master index array. * @param insts The instances on which the tree * is (or is to be) built. * @param e The Euclidean distance function to * use for splitting. */ public MedianOfWidestDimension(int[] instList, Instances insts, EuclideanDistance e) { super(instList, insts, e); } /** * Returns a string describing this nearest neighbour search algorithm. * * @return a description of the algorithm for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "Class that splits a BallNode of a ball tree based on the " + "median value of the widest dimension of the points in the ball. " + "It essentially implements Omohundro's KD construction algorithm."; } /** * Returns an instance of a TechnicalInformation object, containing detailed * information about the technical background of this class, e.g., paper * reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.TECHREPORT); result.setValue(Field.AUTHOR, "Stephen M. Omohundro"); result.setValue(Field.YEAR, "1989"); result.setValue(Field.TITLE, "Five Balltree Construction Algorithms"); result.setValue(Field.MONTH, "December"); result.setValue(Field.NUMBER, "TR-89-063"); result.setValue(Field.INSTITUTION, "International Computer Science Institute"); return result; } /** * Splits a ball into two. * @param node The node to split. * @param numNodesCreated The number of nodes that so far have been * created for the tree, so that the newly created nodes are * assigned correct/meaningful node numbers/ids. * @throws Exception If there is some problem in splitting the * given node. */ public void splitNode(BallNode node, int numNodesCreated) throws Exception { correctlyInitialized(); //int[] instList = getNodesInstsList(node); double[][] ranges = m_DistanceFunction.initializeRanges(m_Instlist, node.m_Start, node.m_End); int splitAttrib = widestDim(ranges, m_DistanceFunction.getRanges()); //In this case median is defined to be either the middle value (in case of //odd number of values) or the left of the two middle values (in case of //even number of values). int medianIdxIdx = node.m_Start + (node.m_End-node.m_Start)/2; //the following finds the median and also re-arranges the array so all //elements to the left are < median and those to the right are > median. int medianIdx = select(splitAttrib, m_Instlist, node.m_Start, node.m_End, (node.m_End-node.m_Start)/2+1); //Utils.select(array, indices, node.m_Start, node.m_End, (node.m_End-node.m_Start)/2+1); //(int) (node.m_NumInstances/2D+0.5D); Instance pivot; node.m_SplitAttrib = splitAttrib; node.m_SplitVal = m_Instances.instance(m_Instlist[medianIdx]) .value(splitAttrib); node.m_Left = new BallNode(node.m_Start, medianIdxIdx, numNodesCreated+1, (pivot=BallNode.calcCentroidPivot(node.m_Start, medianIdxIdx, m_Instlist, m_Instances)), BallNode.calcRadius(node.m_Start, medianIdxIdx, m_Instlist, m_Instances, pivot, m_DistanceFunction) ); node.m_Right = new BallNode(medianIdxIdx+1, node.m_End, numNodesCreated+2, (pivot=BallNode.calcCentroidPivot(medianIdxIdx+1, node.m_End, m_Instlist, m_Instances)), BallNode.calcRadius(medianIdxIdx+1, node.m_End, m_Instlist, m_Instances, pivot, m_DistanceFunction) ); } /** * Partitions the instances around a pivot. Used by quicksort and * kthSmallestValue. * * @param attIdx The attribution/dimension based on which the * instances should be partitioned. * @param index The master index array containing indices of the * instances. * @param l The begining index of the portion of master index * array that should be partitioned. * @param r The end index of the portion of master index array * that should be partitioned. * @return the index of the middle element (in the master * index array, i.e. index of the index of middle element). */ protected int partition(int attIdx, int[] index, int l, int r) { double pivot = m_Instances.instance(index[(l + r) / 2]).value(attIdx); int help; while (l < r) { while ((m_Instances.instance(index[l]).value(attIdx) < pivot) && (l < r)) { l++; } while ((m_Instances.instance(index[r]).value(attIdx) > pivot) && (l < r)) { r--; } if (l < r) { help = index[l]; index[l] = index[r]; index[r] = help; l++; r--; } } if ((l == r) && (m_Instances.instance(index[r]).value(attIdx) > pivot)) { r--; } return r; } /** * Implements computation of the kth-smallest element according * to Manber's "Introduction to Algorithms". * * @param attIdx The dimension/attribute of the instances in * which to find the kth-smallest element. * @param indices The master index array containing indices of * the instances. * @param left The begining index of the portion of the master * index array in which to find the kth-smallest element. * @param right The end index of the portion of the master index * array in which to find the kth-smallest element. * @param k The value of k * @return The index of the kth-smallest element */ public int select(int attIdx, int[] indices, int left, int right, int k) { if (left == right) { return left; } else { int middle = partition(attIdx, indices, left, right); if ((middle - left + 1) >= k) { return select(attIdx, indices, left, middle, k); } else { return select(attIdx, indices, middle + 1, right, k - (middle - left + 1)); } } } /** * Returns the widest dimension. The width of each * dimension (for the points inside the node) is * normalized, if m_NormalizeNodeWidth is set to * true. * @param nodeRanges The attributes' range of the * points inside the node that is to be split. * @param universe The attributes' range for the * whole point-space. * @return The index of the attribute/dimension * in which the points of the node have widest * spread. */ protected int widestDim(double[][] nodeRanges, double[][] universe) { final int classIdx = m_Instances.classIndex(); double widest = 0.0; int w = -1; if (m_NormalizeDimWidths) { for (int i = 0; i < nodeRanges.length; i++) { double newWidest = nodeRanges[i][m_DistanceFunction.R_WIDTH] / universe[i][m_DistanceFunction.R_WIDTH]; if (newWidest > widest) { if(i == classIdx) continue; widest = newWidest; w = i; } } } else { for (int i = 0; i < nodeRanges.length; i++) { if (nodeRanges[i][m_DistanceFunction.R_WIDTH] > widest) { if(i == classIdx) continue; widest = nodeRanges[i][m_DistanceFunction.R_WIDTH]; w = i; } } } return w; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String normalizeDimWidthsTipText() { return "Whether to normalize the widths(ranges) of the dimensions " + "(attributes) before selecting the widest one."; } /** * Should we normalize the widths(ranges) of the dimensions (attributes) * before selecting the widest one. * @param normalize Should be true if the widths are to be * normalized. */ public void setNormalizeDimWidths(boolean normalize) { m_NormalizeDimWidths = normalize; } /** * Whether we are normalizing the widths(ranges) of the dimensions (attributes) * or not. * @return true if widths are being normalized. */ public boolean getNormalizeDimWidths() { return m_NormalizeDimWidths; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector newVector = new Vector(); newVector.addElement(new Option( "\tNormalize dimensions' widths.", "N", 0, "-N")); return newVector.elements(); } /** * Parses a given list of options. * * Valid options are:

* *

 -N
   *  Normalize dimensions' widths.
* * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { setNormalizeDimWidths(Utils.getFlag('N', options)); } /** * Gets the current settings. * @return An array of strings suitable for passing to * setOptions or to be displayed by a * GenericObjectEditor. */ public String[] getOptions() { Vector result; result = new Vector(); if (getNormalizeDimWidths()) result.add("-N"); return result.toArray(new String[result.size()]); } /** * Returns the revision string. * * @return the revision */ public String getRevision() { return RevisionUtils.extract("$Revision: 1.2 $"); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy