All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.core.neighboursearch.balltrees.MiddleOutConstructor Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This is the stable version. Apart from bugfixes, this version does not receive any other updates.

There is a newer version: 3.8.6
Show newest version
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 * MiddleOutConstructor.java
 * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
 */

package weka.core.neighboursearch.balltrees;

import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.Randomizable;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;

import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

/**
 
 * The class that builds a BallTree middle out.
*
* For more information see also:
*
* Andrew W. Moore: The Anchors Hierarchy: Using the Triangle Inequality to Survive High Dimensional Data. In: UAI '00: Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence, San Francisco, CA, USA, 397-405, 2000.
*
* Ashraf Masood Kibriya (2007). Fast Algorithms for Nearest Neighbour Search. Hamilton, New Zealand. *

* * BibTeX: *

 * @inproceedings{Moore2000,
 *    address = {San Francisco, CA, USA},
 *    author = {Andrew W. Moore},
 *    booktitle = {UAI '00: Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence},
 *    pages = {397-405},
 *    publisher = {Morgan Kaufmann Publishers Inc.},
 *    title = {The Anchors Hierarchy: Using the Triangle Inequality to Survive High Dimensional Data},
 *    year = {2000}
 * }
 * 
 * @mastersthesis{Kibriya2007,
 *    address = {Hamilton, New Zealand},
 *    author = {Ashraf Masood Kibriya},
 *    school = {Department of Computer Science, School of Computing and Mathematical Sciences, University of Waikato},
 *    title = {Fast Algorithms for Nearest Neighbour Search},
 *    year = {2007}
 * }
 * 
*

* * Valid options are:

* *

 -S <num>
 *  The seed for the random number generator used
 *  in selecting random anchor.
 * (default: 1)
* *
 -R
 *  Use randomly chosen initial anchors.
* * * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) * @version $Revision: 1.3 $ */ public class MiddleOutConstructor extends BallTreeConstructor implements Randomizable, TechnicalInformationHandler { /** for serialization. */ private static final long serialVersionUID = -8523314263062524462L; /** Seed form random number generator. */ protected int m_RSeed = 1; /** * The random number generator for selecting * the first anchor point randomly * (if selecting randomly). */ protected Random rand = new Random(m_RSeed); /** * The radius of the smallest ball enclosing all the data points. */ private double rootRadius = -1; /** * True if the initial anchor is chosen randomly. False if it is the furthest * point from the mean/centroid. */ protected boolean m_RandomInitialAnchor = true; /** * Creates a new instance of MiddleOutConstructor. */ public MiddleOutConstructor() { } /** * Returns a string describing this nearest neighbour search algorithm. * * @return a description of the algorithm for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "The class that builds a BallTree middle out.\n\n" + "For more information see also:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing detailed * information about the technical background of this class, e.g., paper * reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; TechnicalInformation additional; result = new TechnicalInformation(Type.INPROCEEDINGS); result.setValue(Field.AUTHOR, "Andrew W. Moore"); result.setValue(Field.TITLE, "The Anchors Hierarchy: Using the Triangle Inequality to Survive High Dimensional Data"); result.setValue(Field.YEAR, "2000"); result.setValue(Field.BOOKTITLE, "UAI '00: Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence"); result.setValue(Field.PAGES, "397-405"); result.setValue(Field.PUBLISHER, "Morgan Kaufmann Publishers Inc."); result.setValue(Field.ADDRESS, "San Francisco, CA, USA"); additional = result.add(Type.MASTERSTHESIS); additional.setValue(Field.AUTHOR, "Ashraf Masood Kibriya"); additional.setValue(Field.TITLE, "Fast Algorithms for Nearest Neighbour Search"); additional.setValue(Field.YEAR, "2007"); additional.setValue(Field.SCHOOL, "Department of Computer Science, School of Computing and Mathematical Sciences, University of Waikato"); additional.setValue(Field.ADDRESS, "Hamilton, New Zealand"); return result; } /** * Builds a ball tree middle out. * @return The root node of the tree. * @throws Exception If there is problem building * the tree. */ public BallNode buildTree() throws Exception { m_NumNodes = m_MaxDepth = m_NumLeaves = 0; if(rootRadius == -1) { rootRadius = BallNode.calcRadius(m_InstList, m_Instances, BallNode.calcCentroidPivot(m_InstList, m_Instances), m_DistanceFunction); } BallNode root = buildTreeMiddleOut(0, m_Instances.numInstances()-1); return root; } /** * Builds a ball tree middle out from the * portion of the master index array given * by supplied start and end index. * @param startIdx The start of the portion * in master index array. * @param endIdx the end of the portion in * master index array. * @return The root node of the built tree. * @throws Exception If there is some * problem building the tree. */ protected BallNode buildTreeMiddleOut(int startIdx, int endIdx) throws Exception { Instance pivot; double radius; Vector anchors; int numInsts = endIdx - startIdx + 1; int numAnchors = (int) Math.round(Math.sqrt(numInsts)); //create anchor's hierarchy if (numAnchors > 1) { pivot = BallNode.calcCentroidPivot(startIdx, endIdx, m_InstList,m_Instances); radius = BallNode.calcRadius(startIdx, endIdx, m_InstList, m_Instances, pivot, m_DistanceFunction); if(numInsts <= m_MaxInstancesInLeaf || (rootRadius==0 ? true : radius/rootRadius < m_MaxRelLeafRadius)) { //just make a leaf don't make anchors hierarchy BallNode node = new BallNode(startIdx, endIdx, m_NumNodes,pivot, radius); return node; } anchors = new Vector(numAnchors); createAnchorsHierarchy(anchors, numAnchors, startIdx, endIdx); BallNode node = mergeNodes(anchors, startIdx, endIdx); buildLeavesMiddleOut(node); return node; }// end anchors hierarchy else { BallNode node = new BallNode(startIdx, endIdx, m_NumNodes, (pivot=BallNode.calcCentroidPivot(startIdx, endIdx, m_InstList, m_Instances)), BallNode.calcRadius(startIdx, endIdx, m_InstList, m_Instances, pivot, m_DistanceFunction) ); return node; } } /** * Creates an anchors hierarchy from a portion * of master index array. * * @param anchors The vector for putting the anchors * into. * @param numAnchors The number of anchors to create. * @param startIdx The start of the portion of master * index array. * @param endIdx The end of the portion of master * index array. * @throws Exception If there is some problem in creating * the hierarchy. */ protected void createAnchorsHierarchy(Vector anchors, final int numAnchors, final int startIdx, final int endIdx) throws Exception { TempNode anchr1 = m_RandomInitialAnchor ? getRandomAnchor(startIdx, endIdx) : getFurthestFromMeanAnchor(startIdx, endIdx); TempNode amax = anchr1; //double maxradius = anchr1.radius; TempNode newAnchor; Vector anchorDistances = new Vector(numAnchors-1); anchors.add(anchr1); //creating anchors while(anchors.size() < numAnchors) { //create new anchor newAnchor = new TempNode(); newAnchor.points = new MyIdxList(); Instance newpivot = m_Instances.instance(((ListNode)amax.points.getFirst()).idx); newAnchor.anchor = newpivot; newAnchor.idx = ((ListNode)amax.points.getFirst()).idx; setInterAnchorDistances(anchors, newAnchor, anchorDistances); if(stealPoints(newAnchor, anchors, anchorDistances)) //if points stolen newAnchor.radius = ((ListNode)newAnchor.points.getFirst()).distance; else newAnchor.radius = 0.0; anchors.add(newAnchor); //find new amax amax = (TempNode)anchors.elementAt(0); for(int i=1; i amax.radius) amax = newAnchor; }//end for }//end while } /** * Applies the middle out build procedure to * the leaves of the tree. The leaf nodes * should be the ones that were created by * createAnchorsHierarchy(). The process * continues recursively for the leaves * created for each leaf of the given tree * until for some leaf node <= * m_MaxInstancesInLeaf instances remain * in the leaf. * * @param node The root of the tree. * @throws Exception If there is some problem * in building the tree leaves. */ protected void buildLeavesMiddleOut(BallNode node) throws Exception { if(node.m_Left!=null && node.m_Right!=null) { //if an internal node buildLeavesMiddleOut(node.m_Left); buildLeavesMiddleOut(node.m_Right); } else if(node.m_Left!=null || node.m_Right!=null) { throw new Exception("Invalid leaf assignment. Please check code"); } else { //if node is a leaf BallNode n2 = buildTreeMiddleOut(node.m_Start, node.m_End); if(n2.m_Left!=null && n2.m_Right!=null) { node.m_Left = n2.m_Left; node.m_Right = n2.m_Right; buildLeavesMiddleOut(node); //the stopping condition in buildTreeMiddleOut will stop the recursion, //where it won't split a node at all, and we won't recurse here. } else if(n2.m_Left!=null || n2.m_Right!=null) throw new Exception("Invalid leaf assignment. Please check code"); } } /** * Merges nodes created by createAnchorsHierarchy() * into one top node. * * @param list List of anchor nodes. * @param startIdx The start of the portion of * master index array containing these anchor * nodes. * @param endIdx The end of the portion of master * index array containing these anchor nodes. * @return The top/root node after merging * the given anchor nodes. * @throws Exception IF there is some problem in * merging. */ protected BallNode mergeNodes(Vector list, int startIdx, int endIdx) throws Exception { for(int i=0; i 1) { //main merging loop minRadius=Double.POSITIVE_INFINITY; for(int i=0; i anchor.radius) { anchor.idx = m_InstList[i]; anchor.anchor = temp; anchor.radius = tmpr; } } setPoints(anchor, startIdx, endIdx, m_InstList); return anchor; } /** * Returns a random anchor point/instance from a * given set of points/instances. * * @param startIdx The start index of the points * for which anchor is required. * @param endIdx The end index of the points for * which anchor is required. * @return The random anchor point/instance * for the given set of */ protected TempNode getRandomAnchor(int startIdx, int endIdx) { TempNode anchr1 = new TempNode(); anchr1.idx = m_InstList[startIdx+rand.nextInt((endIdx-startIdx+1))]; anchr1.anchor = m_Instances.instance(anchr1.idx); setPoints(anchr1, startIdx, endIdx, m_InstList); anchr1.radius = ((ListNode)anchr1.points.getFirst()).distance; return anchr1; } /** * Sets the points of an anchor node. It takes the * indices of points from the given portion of * an index array and stores those indices, together * with their distances to the given anchor node, * in the point index list of the anchor node. * * @param node The node in which the points are * needed to be set. * @param startIdx The start of the portion in * the given index array (the master index * array). * @param endIdx The end of the portion in the * given index array. * @param indices The index array. */ public void setPoints(TempNode node, int startIdx, int endIdx, int[] indices) { node.points = new MyIdxList(); Instance temp; double dist; for(int i=startIdx; i<=endIdx; i++) { temp = m_Instances.instance(indices[i]); dist = m_DistanceFunction.distance(node.anchor, temp); node.points.insertReverseSorted(indices[i], dist); } } /** * Sets the distances of a supplied new * anchor to all the rest of the * previous anchor points. * @param anchors The old anchor points. * @param newAnchor The new anchor point. * @param anchorDistances The vector to * store the distances of newAnchor to * each of the old anchors. * @throws Exception If there is some * problem in calculating the distances. */ public void setInterAnchorDistances(Vector anchors, TempNode newAnchor, Vector anchorDistances) throws Exception { double[] distArray = new double[anchors.size()]; for(int i=0; iradius) radius = dist; } for(int j=0; jradius) radius = dist; } return radius; } /** * Adds an instance to the tree. This implementation of * MiddleOutConstructor doesn't support addition of * instances to already built tree, hence it always * throws an exception. * @param node The root of the tree to which the * instance is to be added. * @param inst The instance to add to the tree. * @return The updated master index array after * adding the instance. * @throws Exception Always as this implementation of * MiddleOutConstructor doesn't support addition of * instances after batch construction of the tree. */ public int[] addInstance(BallNode node, Instance inst) throws Exception { throw new Exception("Addition of instances after the tree is built, not " + "possible with MiddleOutConstructor."); } /** * Sets the maximum number of instances allowed in a leaf. * @param num The maximum number of instances allowed in * a leaf. * @throws Exception If the num is < 2, as the method * cannot work for < 2 instances. */ public void setMaxInstancesInLeaf(int num) throws Exception { if(num<2) throw new Exception("The maximum number of instances in a leaf for " + "using MiddleOutConstructor must be >=2."); super.setMaxInstancesInLeaf(num); } /** * Sets the instances on which the tree is to be built. * @param inst The instances on which to build the * ball tree. */ public void setInstances(Instances insts) { super.setInstances(insts); rootRadius = -1; //this needs to be re-calculated by buildTree() } /** * Sets the master index array that points to * instances in m_Instances, so that only this array * is manipulated, and m_Instances is left * untouched. * @param instList The master index array. */ public void setInstanceList(int[] instList) { super.setInstanceList(instList); rootRadius = -1; //this needs to be re-calculated by buildTree() } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String initialAnchorRandomTipText() { return "Whether the initial anchor is chosen randomly."; } /** * Gets whether if the initial anchor is chosen randomly. * @return true if the initial anchor is a random one. */ public boolean isInitialAnchorRandom() { return m_RandomInitialAnchor; } /** * Sets whether if the initial anchor is chosen randomly. If not * then if it is the furthest point from the mean/centroid. * @param randomInitialAnchor Should be true if the first * anchor is to be chosen randomly. */ public void setInitialAnchorRandom(boolean randomInitialAnchor) { m_RandomInitialAnchor = randomInitialAnchor; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String seedTipText() { return "The seed value for the random number generator."; } /** * Returns the seed for random number generator. * @return The random number seed. */ public int getSeed() { return m_RSeed; } /** * Sets the seed for random number generator * (that is used for selecting the first anchor * point randomly). * @param seed The seed. */ public void setSeed(int seed) { m_RSeed = seed; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector newVector = new Vector(); newVector.addElement(new Option( "\tThe seed for the random number generator used\n" + "\tin selecting random anchor.\n" + "(default: 1)", "S", 1, "-S ")); newVector.addElement(new Option( "\tUse randomly chosen initial anchors.", "R", 0, "-R")); return newVector.elements(); } /** * Parses a given list of options. * * Valid options are:

* *

 -S <num>
   *  The seed for the random number generator used
   *  in selecting random anchor.
   * (default: 1)
* *
 -R
   *  Use randomly chosen initial anchors.
* * * @param options the list of options as an array of strings * @throws Exception if an option is not supported **/ public void setOptions(String[] options) throws Exception { super.setOptions(options); String temp = Utils.getOption('S', options); if(temp.length()>0) { setSeed(Integer.parseInt(temp)); } else { setSeed(1); } setInitialAnchorRandom(Utils.getFlag('R', options)); } /** * Gets the current settings of this BallTree MiddleOutConstructor. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { Vector result; String[] options; int i; result = new Vector(); options = super.getOptions(); for (i = 0; i < options.length; i++) result.add(options[i]); result.add("-S"); result.add("" + getSeed()); if(isInitialAnchorRandom()) result.add("-R"); return result.toArray(new String[result.size()]); } /** * Checks whether if the points in an index list * are in some specified of the master index array. * @param list The point list. * @param startidx The start of the portion in * master index array. * @param endidx The end of the portion in master * index array. * @throws Exception If some point in the point * list is not in the specified portion of master * index array. */ public void checkIndicesList(MyIdxList list, int startidx, int endidx) throws Exception { boolean found; ListNode node; for(int i=0; i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy