
weka.core.neighboursearch.balltrees.MiddleOutConstructor Maven / Gradle / Ivy
Go to download
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This is the stable version. Apart from bugfixes, this version
does not receive any other updates.
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* MiddleOutConstructor.java
* Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
*/
package weka.core.neighboursearch.balltrees;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.Randomizable;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
/**
* The class that builds a BallTree middle out.
*
* For more information see also:
*
* Andrew W. Moore: The Anchors Hierarchy: Using the Triangle Inequality to Survive High Dimensional Data. In: UAI '00: Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence, San Francisco, CA, USA, 397-405, 2000.
*
* Ashraf Masood Kibriya (2007). Fast Algorithms for Nearest Neighbour Search. Hamilton, New Zealand.
*
*
* BibTeX:
*
* @inproceedings{Moore2000,
* address = {San Francisco, CA, USA},
* author = {Andrew W. Moore},
* booktitle = {UAI '00: Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence},
* pages = {397-405},
* publisher = {Morgan Kaufmann Publishers Inc.},
* title = {The Anchors Hierarchy: Using the Triangle Inequality to Survive High Dimensional Data},
* year = {2000}
* }
*
* @mastersthesis{Kibriya2007,
* address = {Hamilton, New Zealand},
* author = {Ashraf Masood Kibriya},
* school = {Department of Computer Science, School of Computing and Mathematical Sciences, University of Waikato},
* title = {Fast Algorithms for Nearest Neighbour Search},
* year = {2007}
* }
*
*
*
* Valid options are:
*
* -S <num>
* The seed for the random number generator used
* in selecting random anchor.
* (default: 1)
*
* -R
* Use randomly chosen initial anchors.
*
*
* @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
* @version $Revision: 1.3 $
*/
public class MiddleOutConstructor
extends BallTreeConstructor
implements Randomizable, TechnicalInformationHandler {
/** for serialization. */
private static final long serialVersionUID = -8523314263062524462L;
/** Seed form random number generator. */
protected int m_RSeed = 1;
/**
* The random number generator for selecting
* the first anchor point randomly
* (if selecting randomly).
*/
protected Random rand = new Random(m_RSeed);
/**
* The radius of the smallest ball enclosing all the data points.
*/
private double rootRadius = -1;
/**
* True if the initial anchor is chosen randomly. False if it is the furthest
* point from the mean/centroid.
*/
protected boolean m_RandomInitialAnchor = true;
/**
* Creates a new instance of MiddleOutConstructor.
*/
public MiddleOutConstructor() {
}
/**
* Returns a string describing this nearest neighbour search algorithm.
*
* @return a description of the algorithm for displaying in the
* explorer/experimenter gui
*/
public String globalInfo() {
return
"The class that builds a BallTree middle out.\n\n"
+ "For more information see also:\n\n"
+ getTechnicalInformation().toString();
}
/**
* Returns an instance of a TechnicalInformation object, containing detailed
* information about the technical background of this class, e.g., paper
* reference or book this class is based on.
*
* @return the technical information about this class
*/
public TechnicalInformation getTechnicalInformation() {
TechnicalInformation result;
TechnicalInformation additional;
result = new TechnicalInformation(Type.INPROCEEDINGS);
result.setValue(Field.AUTHOR, "Andrew W. Moore");
result.setValue(Field.TITLE, "The Anchors Hierarchy: Using the Triangle Inequality to Survive High Dimensional Data");
result.setValue(Field.YEAR, "2000");
result.setValue(Field.BOOKTITLE, "UAI '00: Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence");
result.setValue(Field.PAGES, "397-405");
result.setValue(Field.PUBLISHER, "Morgan Kaufmann Publishers Inc.");
result.setValue(Field.ADDRESS, "San Francisco, CA, USA");
additional = result.add(Type.MASTERSTHESIS);
additional.setValue(Field.AUTHOR, "Ashraf Masood Kibriya");
additional.setValue(Field.TITLE, "Fast Algorithms for Nearest Neighbour Search");
additional.setValue(Field.YEAR, "2007");
additional.setValue(Field.SCHOOL, "Department of Computer Science, School of Computing and Mathematical Sciences, University of Waikato");
additional.setValue(Field.ADDRESS, "Hamilton, New Zealand");
return result;
}
/**
* Builds a ball tree middle out.
* @return The root node of the tree.
* @throws Exception If there is problem building
* the tree.
*/
public BallNode buildTree() throws Exception {
m_NumNodes = m_MaxDepth = m_NumLeaves = 0;
if(rootRadius == -1) {
rootRadius = BallNode.calcRadius(m_InstList, m_Instances,
BallNode.calcCentroidPivot(m_InstList, m_Instances),
m_DistanceFunction);
}
BallNode root = buildTreeMiddleOut(0, m_Instances.numInstances()-1);
return root;
}
/**
* Builds a ball tree middle out from the
* portion of the master index array given
* by supplied start and end index.
* @param startIdx The start of the portion
* in master index array.
* @param endIdx the end of the portion in
* master index array.
* @return The root node of the built tree.
* @throws Exception If there is some
* problem building the tree.
*/
protected BallNode buildTreeMiddleOut(int startIdx, int endIdx)
throws Exception {
Instance pivot;
double radius;
Vector anchors;
int numInsts = endIdx - startIdx + 1;
int numAnchors = (int) Math.round(Math.sqrt(numInsts));
//create anchor's hierarchy
if (numAnchors > 1) {
pivot = BallNode.calcCentroidPivot(startIdx, endIdx, m_InstList,m_Instances);
radius = BallNode.calcRadius(startIdx, endIdx, m_InstList, m_Instances,
pivot, m_DistanceFunction);
if(numInsts <= m_MaxInstancesInLeaf ||
(rootRadius==0 ? true : radius/rootRadius < m_MaxRelLeafRadius)) { //just make a leaf don't make anchors hierarchy
BallNode node = new BallNode(startIdx, endIdx, m_NumNodes,pivot, radius);
return node;
}
anchors = new Vector(numAnchors);
createAnchorsHierarchy(anchors, numAnchors, startIdx, endIdx);
BallNode node = mergeNodes(anchors, startIdx, endIdx);
buildLeavesMiddleOut(node);
return node;
}// end anchors hierarchy
else {
BallNode node = new BallNode(startIdx, endIdx, m_NumNodes,
(pivot=BallNode.calcCentroidPivot(startIdx, endIdx,
m_InstList, m_Instances)),
BallNode.calcRadius(startIdx, endIdx, m_InstList,
m_Instances, pivot,
m_DistanceFunction)
);
return node;
}
}
/**
* Creates an anchors hierarchy from a portion
* of master index array.
*
* @param anchors The vector for putting the anchors
* into.
* @param numAnchors The number of anchors to create.
* @param startIdx The start of the portion of master
* index array.
* @param endIdx The end of the portion of master
* index array.
* @throws Exception If there is some problem in creating
* the hierarchy.
*/
protected void createAnchorsHierarchy(Vector anchors, final int numAnchors,
final int startIdx, final int endIdx)
throws Exception {
TempNode anchr1 = m_RandomInitialAnchor ?
getRandomAnchor(startIdx, endIdx) :
getFurthestFromMeanAnchor(startIdx, endIdx);
TempNode amax = anchr1; //double maxradius = anchr1.radius;
TempNode newAnchor;
Vector anchorDistances = new Vector(numAnchors-1);
anchors.add(anchr1);
//creating anchors
while(anchors.size() < numAnchors) {
//create new anchor
newAnchor = new TempNode();
newAnchor.points = new MyIdxList();
Instance newpivot = m_Instances.instance(((ListNode)amax.points.getFirst()).idx);
newAnchor.anchor = newpivot;
newAnchor.idx = ((ListNode)amax.points.getFirst()).idx;
setInterAnchorDistances(anchors, newAnchor, anchorDistances);
if(stealPoints(newAnchor, anchors, anchorDistances)) //if points stolen
newAnchor.radius = ((ListNode)newAnchor.points.getFirst()).distance;
else
newAnchor.radius = 0.0;
anchors.add(newAnchor);
//find new amax
amax = (TempNode)anchors.elementAt(0);
for(int i=1; i amax.radius)
amax = newAnchor;
}//end for
}//end while
}
/**
* Applies the middle out build procedure to
* the leaves of the tree. The leaf nodes
* should be the ones that were created by
* createAnchorsHierarchy(). The process
* continues recursively for the leaves
* created for each leaf of the given tree
* until for some leaf node <=
* m_MaxInstancesInLeaf instances remain
* in the leaf.
*
* @param node The root of the tree.
* @throws Exception If there is some problem
* in building the tree leaves.
*/
protected void buildLeavesMiddleOut(BallNode node) throws Exception {
if(node.m_Left!=null && node.m_Right!=null) { //if an internal node
buildLeavesMiddleOut(node.m_Left);
buildLeavesMiddleOut(node.m_Right);
}
else if(node.m_Left!=null || node.m_Right!=null) {
throw new Exception("Invalid leaf assignment. Please check code");
}
else { //if node is a leaf
BallNode n2 = buildTreeMiddleOut(node.m_Start, node.m_End);
if(n2.m_Left!=null && n2.m_Right!=null) {
node.m_Left = n2.m_Left;
node.m_Right = n2.m_Right;
buildLeavesMiddleOut(node);
//the stopping condition in buildTreeMiddleOut will stop the recursion,
//where it won't split a node at all, and we won't recurse here.
}
else if(n2.m_Left!=null || n2.m_Right!=null)
throw new Exception("Invalid leaf assignment. Please check code");
}
}
/**
* Merges nodes created by createAnchorsHierarchy()
* into one top node.
*
* @param list List of anchor nodes.
* @param startIdx The start of the portion of
* master index array containing these anchor
* nodes.
* @param endIdx The end of the portion of master
* index array containing these anchor nodes.
* @return The top/root node after merging
* the given anchor nodes.
* @throws Exception IF there is some problem in
* merging.
*/
protected BallNode mergeNodes(Vector list, int startIdx, int endIdx)
throws Exception {
for(int i=0; i 1) { //main merging loop
minRadius=Double.POSITIVE_INFINITY;
for(int i=0; i anchor.radius) {
anchor.idx = m_InstList[i];
anchor.anchor = temp;
anchor.radius = tmpr;
}
}
setPoints(anchor, startIdx, endIdx, m_InstList);
return anchor;
}
/**
* Returns a random anchor point/instance from a
* given set of points/instances.
*
* @param startIdx The start index of the points
* for which anchor is required.
* @param endIdx The end index of the points for
* which anchor is required.
* @return The random anchor point/instance
* for the given set of
*/
protected TempNode getRandomAnchor(int startIdx, int endIdx) {
TempNode anchr1 = new TempNode();
anchr1.idx = m_InstList[startIdx+rand.nextInt((endIdx-startIdx+1))];
anchr1.anchor = m_Instances.instance(anchr1.idx);
setPoints(anchr1, startIdx, endIdx, m_InstList);
anchr1.radius = ((ListNode)anchr1.points.getFirst()).distance;
return anchr1;
}
/**
* Sets the points of an anchor node. It takes the
* indices of points from the given portion of
* an index array and stores those indices, together
* with their distances to the given anchor node,
* in the point index list of the anchor node.
*
* @param node The node in which the points are
* needed to be set.
* @param startIdx The start of the portion in
* the given index array (the master index
* array).
* @param endIdx The end of the portion in the
* given index array.
* @param indices The index array.
*/
public void setPoints(TempNode node, int startIdx, int endIdx, int[] indices) {
node.points = new MyIdxList();
Instance temp; double dist;
for(int i=startIdx; i<=endIdx; i++) {
temp = m_Instances.instance(indices[i]);
dist = m_DistanceFunction.distance(node.anchor, temp);
node.points.insertReverseSorted(indices[i], dist);
}
}
/**
* Sets the distances of a supplied new
* anchor to all the rest of the
* previous anchor points.
* @param anchors The old anchor points.
* @param newAnchor The new anchor point.
* @param anchorDistances The vector to
* store the distances of newAnchor to
* each of the old anchors.
* @throws Exception If there is some
* problem in calculating the distances.
*/
public void setInterAnchorDistances(Vector anchors, TempNode newAnchor,
Vector anchorDistances) throws Exception {
double[] distArray = new double[anchors.size()];
for(int i=0; iradius)
radius = dist;
}
for(int j=0; jradius)
radius = dist;
}
return radius;
}
/**
* Adds an instance to the tree. This implementation of
* MiddleOutConstructor doesn't support addition of
* instances to already built tree, hence it always
* throws an exception.
* @param node The root of the tree to which the
* instance is to be added.
* @param inst The instance to add to the tree.
* @return The updated master index array after
* adding the instance.
* @throws Exception Always as this implementation of
* MiddleOutConstructor doesn't support addition of
* instances after batch construction of the tree.
*/
public int[] addInstance(BallNode node, Instance inst) throws Exception {
throw new Exception("Addition of instances after the tree is built, not " +
"possible with MiddleOutConstructor.");
}
/**
* Sets the maximum number of instances allowed in a leaf.
* @param num The maximum number of instances allowed in
* a leaf.
* @throws Exception If the num is < 2, as the method
* cannot work for < 2 instances.
*/
public void setMaxInstancesInLeaf(int num) throws Exception {
if(num<2)
throw new Exception("The maximum number of instances in a leaf for " +
"using MiddleOutConstructor must be >=2.");
super.setMaxInstancesInLeaf(num);
}
/**
* Sets the instances on which the tree is to be built.
* @param inst The instances on which to build the
* ball tree.
*/
public void setInstances(Instances insts) {
super.setInstances(insts);
rootRadius = -1; //this needs to be re-calculated by buildTree()
}
/**
* Sets the master index array that points to
* instances in m_Instances, so that only this array
* is manipulated, and m_Instances is left
* untouched.
* @param instList The master index array.
*/
public void setInstanceList(int[] instList) {
super.setInstanceList(instList);
rootRadius = -1; //this needs to be re-calculated by buildTree()
}
/**
* Returns the tip text for this property.
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String initialAnchorRandomTipText() {
return "Whether the initial anchor is chosen randomly.";
}
/**
* Gets whether if the initial anchor is chosen randomly.
* @return true if the initial anchor is a random one.
*/
public boolean isInitialAnchorRandom() {
return m_RandomInitialAnchor;
}
/**
* Sets whether if the initial anchor is chosen randomly. If not
* then if it is the furthest point from the mean/centroid.
* @param randomInitialAnchor Should be true if the first
* anchor is to be chosen randomly.
*/
public void setInitialAnchorRandom(boolean randomInitialAnchor) {
m_RandomInitialAnchor = randomInitialAnchor;
}
/**
* Returns the tip text for this property.
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String seedTipText() {
return "The seed value for the random number generator.";
}
/**
* Returns the seed for random number generator.
* @return The random number seed.
*/
public int getSeed() {
return m_RSeed;
}
/**
* Sets the seed for random number generator
* (that is used for selecting the first anchor
* point randomly).
* @param seed The seed.
*/
public void setSeed(int seed) {
m_RSeed = seed;
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector();
newVector.addElement(new Option(
"\tThe seed for the random number generator used\n"
+ "\tin selecting random anchor.\n"
+ "(default: 1)",
"S", 1, "-S "));
newVector.addElement(new Option(
"\tUse randomly chosen initial anchors.",
"R", 0, "-R"));
return newVector.elements();
}
/**
* Parses a given list of options.
*
* Valid options are:
*
* -S <num>
* The seed for the random number generator used
* in selecting random anchor.
* (default: 1)
*
* -R
* Use randomly chosen initial anchors.
*
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
**/
public void setOptions(String[] options)
throws Exception {
super.setOptions(options);
String temp = Utils.getOption('S', options);
if(temp.length()>0) {
setSeed(Integer.parseInt(temp));
}
else {
setSeed(1);
}
setInitialAnchorRandom(Utils.getFlag('R', options));
}
/**
* Gets the current settings of this BallTree MiddleOutConstructor.
*
* @return an array of strings suitable for passing to setOptions
*/
public String[] getOptions() {
Vector result;
String[] options;
int i;
result = new Vector();
options = super.getOptions();
for (i = 0; i < options.length; i++)
result.add(options[i]);
result.add("-S");
result.add("" + getSeed());
if(isInitialAnchorRandom())
result.add("-R");
return result.toArray(new String[result.size()]);
}
/**
* Checks whether if the points in an index list
* are in some specified of the master index array.
* @param list The point list.
* @param startidx The start of the portion in
* master index array.
* @param endidx The end of the portion in master
* index array.
* @throws Exception If some point in the point
* list is not in the specified portion of master
* index array.
*/
public void checkIndicesList(MyIdxList list, int startidx, int endidx)
throws Exception {
boolean found;
ListNode node;
for(int i=0; i
© 2015 - 2025 Weber Informatics LLC | Privacy Policy