![JAR search and dependency download from the Maven repository](/logo.png)
moa.classifiers.lazy.neighboursearch.kdtrees.MedianOfWidestDimension Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of moa Show documentation
Show all versions of moa Show documentation
Massive On-line Analysis is an environment for massive data mining. MOA
provides a framework for data stream mining and includes tools for evaluation
and a collection of machine learning algorithms. Related to the WEKA project,
also written in Java, while scaling to more demanding problems.
The newest version!
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* MedianOfWidestDimension.java
* Copyright (C) 2007-2012 University of Waikato, Hamilton, New Zealand
*/
package moa.classifiers.lazy.neighboursearch.kdtrees;
/**
* The class that splits a KDTree node based on the median value of a dimension in which the node's points have the widest spread.
*
* For more information see also:
*
* Jerome H. Friedman, Jon Luis Bentley, Raphael Ari Finkel (1977). An Algorithm for Finding Best Matches in Logarithmic Expected Time. ACM Transactions on Mathematics Software. 3(3):209-226.
*
*
* BibTeX:
*
* @article{Friedman1977,
* author = {Jerome H. Friedman and Jon Luis Bentley and Raphael Ari Finkel},
* journal = {ACM Transactions on Mathematics Software},
* month = {September},
* number = {3},
* pages = {209-226},
* title = {An Algorithm for Finding Best Matches in Logarithmic Expected Time},
* volume = {3},
* year = {1977}
* }
*
*
*
*
* @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
* @version $Revision: 8034 $
*/
public class MedianOfWidestDimension
extends KDTreeNodeSplitter {
/** for serialization. */
private static final long serialVersionUID = 1383443320160540663L;
/**
* Returns a string describing this nearest neighbour search algorithm.
*
* @return a description of the algorithm for displaying in the
* explorer/experimenter gui
*/
public String globalInfo() {
return
"The class that splits a KDTree node based on the median value of "
+ "a dimension in which the node's points have the widest spread.\n\n"
+ "For more information see also:\n\n";
}
/**
* Splits a node into two based on the median value of the dimension
* in which the points have the widest spread. After splitting two
* new nodes are created and correctly initialised. And, node.left
* and node.right are set appropriately.
*
* @param node The node to split.
* @param numNodesCreated The number of nodes that so far have been
* created for the tree, so that the newly created nodes are
* assigned correct/meaningful node numbers/ids.
* @param nodeRanges The attributes' range for the points inside
* the node that is to be split.
* @param universe The attributes' range for the whole
* point-space.
* @throws Exception If there is some problem in splitting the
* given node.
*/
public void splitNode(KDTreeNode node, int numNodesCreated,
double[][] nodeRanges, double[][] universe) throws Exception {
correctlyInitialized();
int splitDim = widestDim(nodeRanges, universe);
//In this case median is defined to be either the middle value (in case of
//odd number of values) or the left of the two middle values (in case of
//even number of values).
int medianIdxIdx = node.m_Start + (node.m_End-node.m_Start)/2;
//the following finds the median and also re-arranges the array so all
//elements to the left are < median and those to the right are > median.
int medianIdx = select(splitDim, m_InstList, node.m_Start, node.m_End, (node.m_End-node.m_Start)/2+1);
node.m_SplitDim = splitDim;
node.m_SplitValue = m_Instances.instance(m_InstList[medianIdx]).value(splitDim);
node.m_Left = new KDTreeNode(numNodesCreated+1, node.m_Start, medianIdxIdx,
m_EuclideanDistance.initializeRanges(m_InstList, node.m_Start, medianIdxIdx));
node.m_Right = new KDTreeNode(numNodesCreated+2, medianIdxIdx+1, node.m_End,
m_EuclideanDistance.initializeRanges(m_InstList, medianIdxIdx+1, node.m_End));
}
/**
* Partitions the instances around a pivot. Used by quicksort and
* kthSmallestValue.
*
* @param attIdx The attribution/dimension based on which the
* instances should be partitioned.
* @param index The master index array containing indices of the
* instances.
* @param l The begining index of the portion of master index
* array that should be partitioned.
* @param r The end index of the portion of master index array
* that should be partitioned.
* @return the index of the middle element
*/
protected int partition(int attIdx, int[] index, int l, int r) {
double pivot = m_Instances.instance(index[(l + r) / 2]).value(attIdx);
int help;
while (l < r) {
while ((m_Instances.instance(index[l]).value(attIdx) < pivot) && (l < r)) {
l++;
}
while ((m_Instances.instance(index[r]).value(attIdx) > pivot) && (l < r)) {
r--;
}
if (l < r) {
help = index[l];
index[l] = index[r];
index[r] = help;
l++;
r--;
}
}
if ((l == r) && (m_Instances.instance(index[r]).value(attIdx) > pivot)) {
r--;
}
return r;
}
/**
* Implements computation of the kth-smallest element according
* to Manber's "Introduction to Algorithms".
*
* @param attIdx The dimension/attribute of the instances in
* which to find the kth-smallest element.
* @param indices The master index array containing indices of
* the instances.
* @param left The begining index of the portion of the master
* index array in which to find the kth-smallest element.
* @param right The end index of the portion of the master index
* array in which to find the kth-smallest element.
* @param k The value of k
* @return The index of the kth-smallest element
*/
public int select(int attIdx, int[] indices, int left, int right, int k) {
if (left == right) {
return left;
} else {
int middle = partition(attIdx, indices, left, right);
if ((middle - left + 1) >= k) {
return select(attIdx, indices, left, middle, k);
} else {
return select(attIdx, indices, middle + 1, right, k - (middle - left + 1));
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy