All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.core.neighboursearch.CoverTree Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.

There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * CoverTree.java
 * Copyright (C) 2006 Alina Beygelzimer and Sham Kakade and John Langford
 */

package weka.core.neighboursearch;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.Serializable;
import java.util.Collections;
import java.util.Enumeration;
import java.util.List;
import java.util.Vector;

import weka.core.DistanceFunction;
import weka.core.EuclideanDistance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.converters.CSVLoader;
import weka.core.neighboursearch.covertrees.Stack;

/**
 *  Class implementing the CoverTree datastructure.
* The class is very much a translation of the c source code made available by * the authors.
*
* For more information and original source code see:
*
* Alina Beygelzimer, Sham Kakade, John Langford: Cover trees for nearest * neighbor. In: ICML'06: Proceedings of the 23rd international conference on * Machine learning, New York, NY, USA, 97-104, 2006. *

* * * BibTeX: * *

 * @inproceedings{Beygelzimer2006,
 *    address = {New York, NY, USA},
 *    author = {Alina Beygelzimer and Sham Kakade and John Langford},
 *    booktitle = {ICML'06: Proceedings of the 23rd international conference on Machine learning},
 *    pages = {97-104},
 *    publisher = {ACM Press},
 *    title = {Cover trees for nearest neighbor},
 *    year = {2006},
 *    location = {Pittsburgh, Pennsylvania},
 *    HTTP = {http://hunch.net/\~jl/projects/cover_tree/cover_tree.html}
 * }
 * 
*

* * * Valid options are: *

* *

 * -B <value>
 *  Set base of the expansion constant
 *  (default = 1.3).
 * 
* * * * @author Alina Beygelzimer (original C++ code) * @author Sham Kakade (original C++ code) * @author John Langford (original C++ code) * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) * (Java port) * @version $Revision: 10203 $ */ public class CoverTree extends NearestNeighbourSearch implements TechnicalInformationHandler { /** for serialization. */ private static final long serialVersionUID = 7617412821497807586L; /** * class representing a node of the cover tree. * * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) * @version $Revision: 10203 $ */ public class CoverTreeNode implements Serializable, RevisionHandler { /** for serialization. */ private static final long serialVersionUID = 1808760031169036512L; /** Index of the instance represented by this node in the index array. */ private Integer idx; /** The distance of the furthest descendant of the node. */ private double max_dist; // The maximum distance to any grandchild. /** The distance to the nodes parent. */ private double parent_dist; // The distance to the parent. /** The children of the node. */ private Stack children; /** The number of children node has. */ private int num_children; // The number of children. /** The min i that makes base^i <= max_dist. */ private int scale; // Essentially, an upper bound on the distance to any // child. /** Constructor for the class. */ public CoverTreeNode() { } /** * Constructor. * * @param i The index of the Instance this node is associated with. * @param md The distance of the furthest descendant. * @param pd The distance of the node to its parent. * @param childs Children of the node in a stack. * @param numchilds The number of children of the node. * @param s The scale/level of the node in the tree. */ public CoverTreeNode(Integer i, double md, double pd, Stack childs, int numchilds, int s) { idx = i; max_dist = md; parent_dist = pd; children = childs; num_children = numchilds; scale = s; } /** * Returns the instance represented by the node. * * @return The instance represented by the node. */ public Instance p() { return m_Instances.instance(idx); } /** * Returns whether if the node is a leaf or not. * * @return true if the node is a leaf node. */ public boolean isALeaf() { return num_children == 0; } /** * Returns the revision string. * * @return the revision */ @Override public String getRevision() { return RevisionUtils.extract("$Revision: 10203 $"); } } /** * Private class holding a point's distance to the current reference point p. * * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) * @version $Revision: 10203 $ */ private class DistanceNode implements RevisionHandler { /** * The last distance is to the current reference point (potential current * parent). The previous ones are to reference points that were previously * looked at (all potential ancestors). */ Stack dist; /** The index of the instance represented by this node. */ Integer idx; /** * Returns the instance represent by this DistanceNode. * * @return The instance represented by this node. */ public Instance q() { return m_Instances.instance(idx); } /** * Returns the revision string. * * @return the revision */ @Override public String getRevision() { return RevisionUtils.extract("$Revision: 10203 $"); } } /** The euclidean distance function to use. */ protected EuclideanDistance m_EuclideanDistance; { // to make sure we have only one object of EuclideanDistance if (m_DistanceFunction instanceof EuclideanDistance) { m_EuclideanDistance = (EuclideanDistance) m_DistanceFunction; } else { m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance(); } } /** The root node. */ protected CoverTreeNode m_Root; /** * Array holding the distances of the nearest neighbours. It is filled up both * by nearestNeighbour() and kNearestNeighbours(). */ protected double[] m_DistanceList; /** Number of nodes in the tree. */ protected int m_NumNodes, m_NumLeaves, m_MaxDepth; /** Tree Stats variables. */ protected TreePerformanceStats m_TreeStats = null; /** * The base of our expansion constant. In other words the 2 in 2^i used in * covering tree and separation invariants of a cover tree. P.S.: In paper * it's suggested the separation invariant is relaxed in batch construction. */ protected double m_Base = 1.3; /** * if we have base 2 then this can be viewed as 1/ln(2), which can be used * later on to do il2*ln(d) instead of ln(d)/ln(2), to get log2(d), in * get_scale method. */ protected double il2 = 1.0 / Math.log(m_Base); /** * default constructor. */ public CoverTree() { super(); if (getMeasurePerformance()) { m_Stats = m_TreeStats = new TreePerformanceStats(); } } /** * Returns a string describing this nearest neighbour search algorithm. * * @return a description of the algorithm for displaying in the * explorer/experimenter gui */ @Override public String globalInfo() { return "Class implementing the CoverTree datastructure.\n" + "The class is very much a translation of the c source code made " + "available by the authors.\n\n" + "For more information and original source code see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing detailed * information about the technical background of this class, e.g., paper * reference or book this class is based on. * * @return the technical information about this class */ @Override public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.INPROCEEDINGS); result.setValue(Field.AUTHOR, "Alina Beygelzimer and Sham Kakade and John Langford"); result.setValue(Field.TITLE, "Cover trees for nearest neighbor"); result .setValue(Field.BOOKTITLE, "ICML'06: Proceedings of the 23rd international conference on Machine learning"); result.setValue(Field.PAGES, "97-104"); result.setValue(Field.YEAR, "2006"); result.setValue(Field.PUBLISHER, "ACM Press"); result.setValue(Field.ADDRESS, "New York, NY, USA"); result.setValue(Field.LOCATION, "Pittsburgh, Pennsylvania"); result.setValue(Field.HTTP, "http://hunch.net/~jl/projects/cover_tree/cover_tree.html"); return result; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ @Override public Enumeration




© 2015 - 2024 Weber Informatics LLC | Privacy Policy