All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.core.neighboursearch.NearestNeighbourSearch Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    NearestNeighbourSearch.java
 *    Copyright (C) 1999-2012 University of Waikato
 */

package weka.core.neighboursearch;

import java.io.Serializable;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;

import weka.core.AdditionalMeasureProducer;
import weka.core.DistanceFunction;
import weka.core.EuclideanDistance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;

/**
 * Abstract class for nearest neighbour search. All algorithms (classes) that do
 * nearest neighbour search should extend this class.
 * 
 * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
 * @version $Revision: 10203 $
 */
public abstract class NearestNeighbourSearch implements Serializable,
  OptionHandler, AdditionalMeasureProducer, RevisionHandler {

  /** ID to avoid warning */
  private static final long serialVersionUID = 7516898393890379876L;

  /**
   * A class for a heap to store the nearest k neighbours to an instance. The
   * heap also takes care of cases where multiple neighbours are the same
   * distance away. i.e. the minimum size of the heap is k.
   * 
   * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
   * @version $Revision: 10203 $
   */
  protected class MyHeap implements RevisionHandler {

    /** the heap. */
    MyHeapElement m_heap[] = null;

    /**
     * constructor.
     * 
     * @param maxSize the maximum size of the heap
     */
    public MyHeap(int maxSize) {
      if ((maxSize % 2) == 0) {
        maxSize++;
      }

      m_heap = new MyHeapElement[maxSize + 1];
      m_heap[0] = new MyHeapElement(0, 0);
    }

    /**
     * returns the size of the heap.
     * 
     * @return the size
     */
    public int size() {
      return m_heap[0].index;
    }

    /**
     * peeks at the first element.
     * 
     * @return the first element
     */
    public MyHeapElement peek() {
      return m_heap[1];
    }

    /**
     * returns the first element and removes it from the heap.
     * 
     * @return the first element
     * @throws Exception if no elements in heap
     */
    public MyHeapElement get() throws Exception {
      if (m_heap[0].index == 0) {
        throw new Exception("No elements present in the heap");
      }
      MyHeapElement r = m_heap[1];
      m_heap[1] = m_heap[m_heap[0].index];
      m_heap[0].index--;
      downheap();
      return r;
    }

    /**
     * adds the value to the heap.
     * 
     * @param i the index
     * @param d the distance
     * @throws Exception if the heap gets too large
     */
    public void put(int i, double d) throws Exception {
      if ((m_heap[0].index + 1) > (m_heap.length - 1)) {
        throw new Exception("the number of elements cannot exceed the "
          + "initially set maximum limit");
      }
      m_heap[0].index++;
      m_heap[m_heap[0].index] = new MyHeapElement(i, d);
      upheap();
    }

    /**
     * Puts an element by substituting it in place of the top most element.
     * 
     * @param i the index
     * @param d the distance
     * @throws Exception if distance is smaller than that of the head element
     */
    public void putBySubstitute(int i, double d) throws Exception {
      MyHeapElement head = get();
      put(i, d);
      // System.out.println("previous: "+head.distance+" current: "+m_heap[1].distance);
      if (head.distance == m_heap[1].distance) { // Utils.eq(head.distance,
                                                 // m_heap[1].distance)) {
        putKthNearest(head.index, head.distance);
      } else if (head.distance > m_heap[1].distance) { // Utils.gr(head.distance,
                                                       // m_heap[1].distance)) {
        m_KthNearest = null;
        m_KthNearestSize = 0;
        initSize = 10;
      } else if (head.distance < m_heap[1].distance) {
        throw new Exception("The substituted element is smaller than the "
          + "head element. put() should have been called "
          + "in place of putBySubstitute()");
      }
    }

    /** the kth nearest ones. */
    MyHeapElement m_KthNearest[] = null;

    /** The number of kth nearest elements. */
    int m_KthNearestSize = 0;

    /** the initial size of the heap. */
    int initSize = 10;

    /**
     * returns the number of k nearest.
     * 
     * @return the number of k nearest
     * @see #m_KthNearestSize
     */
    public int noOfKthNearest() {
      return m_KthNearestSize;
    }

    /**
     * Stores kth nearest elements (if there are more than one).
     * 
     * @param i the index
     * @param d the distance
     */
    public void putKthNearest(int i, double d) {
      if (m_KthNearest == null) {
        m_KthNearest = new MyHeapElement[initSize];
      }
      if (m_KthNearestSize >= m_KthNearest.length) {
        initSize += initSize;
        MyHeapElement temp[] = new MyHeapElement[initSize];
        System.arraycopy(m_KthNearest, 0, temp, 0, m_KthNearest.length);
        m_KthNearest = temp;
      }
      m_KthNearest[m_KthNearestSize++] = new MyHeapElement(i, d);
    }

    /**
     * returns the kth nearest element or null if none there.
     * 
     * @return the kth nearest element
     */
    public MyHeapElement getKthNearest() {
      if (m_KthNearestSize == 0) {
        return null;
      }
      m_KthNearestSize--;
      return m_KthNearest[m_KthNearestSize];
    }

    /**
     * performs upheap operation for the heap to maintian its properties.
     */
    protected void upheap() {
      int i = m_heap[0].index;
      MyHeapElement temp;
      while (i > 1 && m_heap[i].distance > m_heap[i / 2].distance) {
        temp = m_heap[i];
        m_heap[i] = m_heap[i / 2];
        i = i / 2;
        m_heap[i] = temp; // this is i/2 done here to avoid another division.
      }
    }

    /**
     * performs downheap operation for the heap to maintian its properties.
     */
    protected void downheap() {
      int i = 1;
      MyHeapElement temp;
      while (((2 * i) <= m_heap[0].index && m_heap[i].distance < m_heap[2 * i].distance)
        || ((2 * i + 1) <= m_heap[0].index && m_heap[i].distance < m_heap[2 * i + 1].distance)) {
        if ((2 * i + 1) <= m_heap[0].index) {
          if (m_heap[2 * i].distance > m_heap[2 * i + 1].distance) {
            temp = m_heap[i];
            m_heap[i] = m_heap[2 * i];
            i = 2 * i;
            m_heap[i] = temp;
          } else {
            temp = m_heap[i];
            m_heap[i] = m_heap[2 * i + 1];
            i = 2 * i + 1;
            m_heap[i] = temp;
          }
        } else {
          temp = m_heap[i];
          m_heap[i] = m_heap[2 * i];
          i = 2 * i;
          m_heap[i] = temp;
        }
      }
    }

    /**
     * returns the total size.
     * 
     * @return the total size
     */
    public int totalSize() {
      return size() + noOfKthNearest();
    }

    /**
     * Returns the revision string.
     * 
     * @return the revision
     */
    @Override
    public String getRevision() {
      return RevisionUtils.extract("$Revision: 10203 $");
    }
  }

  /**
   * A class for storing data about a neighboring instance.
   * 
   * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
   * @version $Revision: 10203 $
   */
  protected class MyHeapElement implements RevisionHandler {

    /** the index of this element. */
    public int index;

    /** the distance of this element. */
    public double distance;

    /**
     * constructor.
     * 
     * @param i the index
     * @param d the distance
     */
    public MyHeapElement(int i, double d) {
      distance = d;
      index = i;
    }

    /**
     * Returns the revision string.
     * 
     * @return the revision
     */
    @Override
    public String getRevision() {
      return RevisionUtils.extract("$Revision: 10203 $");
    }
  }

  /**
   * A class for storing data about a neighboring instance.
   * 
   * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
   * @version $Revision: 10203 $
   */
  // better to change this into a heap element
  protected class NeighborNode implements RevisionHandler {

    /** The neighbor instance. */
    public Instance m_Instance;

    /** The distance from the current instance to this neighbor. */
    public double m_Distance;

    /** A link to the next neighbor instance. */
    public NeighborNode m_Next;

    /**
     * Create a new neighbor node.
     * 
     * @param distance the distance to the neighbor
     * @param instance the neighbor instance
     * @param next the next neighbor node
     */
    public NeighborNode(double distance, Instance instance, NeighborNode next) {
      m_Distance = distance;
      m_Instance = instance;
      m_Next = next;
    }

    /**
     * Create a new neighbor node that doesn't link to any other nodes.
     * 
     * @param distance the distance to the neighbor
     * @param instance the neighbor instance
     */
    public NeighborNode(double distance, Instance instance) {

      this(distance, instance, null);
    }

    /**
     * Returns the revision string.
     * 
     * @return the revision
     */
    @Override
    public String getRevision() {
      return RevisionUtils.extract("$Revision: 10203 $");
    }
  }

  /**
   * A class for a linked list to store the nearest k neighbours to an instance.
   * We use a list so that we can take care of cases where multiple neighbours
   * are the same distance away. i.e. the minimum length of the list is k.
   * 
   * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
   * @version $Revision: 10203 $
   */
  // better to change this into a heap
  protected class NeighborList implements RevisionHandler {

    /** The first node in the list. */
    protected NeighborNode m_First;

    /** The last node in the list. */
    protected NeighborNode m_Last;

    /** The number of nodes to attempt to maintain in the list. */
    protected int m_Length = 1;

    /**
     * Creates the neighborlist with a desired length.
     * 
     * @param length the length of list to attempt to maintain
     */
    public NeighborList(int length) {
      m_Length = length;
    }

    /**
     * Gets whether the list is empty.
     * 
     * @return true if list is empty
     */
    public boolean isEmpty() {
      return (m_First == null);
    }

    /**
     * Gets the current length of the list.
     * 
     * @return the current length of the list
     */
    public int currentLength() {
      int i = 0;
      NeighborNode current = m_First;
      while (current != null) {
        i++;
        current = current.m_Next;
      }
      return i;
    }

    /**
     * Inserts an instance neighbor into the list, maintaining the list sorted
     * by distance.
     * 
     * @param distance the distance to the instance
     * @param instance the neighboring instance
     */
    public void insertSorted(double distance, Instance instance) {

      if (isEmpty()) {
        m_First = m_Last = new NeighborNode(distance, instance);
      } else {
        NeighborNode current = m_First;
        if (distance < m_First.m_Distance) {// Insert at head
          m_First = new NeighborNode(distance, instance, m_First);
        } else { // Insert further down the list
          for (; (current.m_Next != null)
            && (current.m_Next.m_Distance < distance); current = current.m_Next) {
            ;
          }
          current.m_Next = new NeighborNode(distance, instance, current.m_Next);
          if (current.equals(m_Last)) {
            m_Last = current.m_Next;
          }
        }

        // Trip down the list until we've got k list elements (or more if the
        // distance to the last elements is the same).
        int valcount = 0;
        for (current = m_First; current.m_Next != null; current = current.m_Next) {
          valcount++;
          if ((valcount >= m_Length)
            && (current.m_Distance != current.m_Next.m_Distance)) {
            m_Last = current;
            current.m_Next = null;
            break;
          }
        }
      }
    }

    /**
     * Prunes the list to contain the k nearest neighbors. If there are multiple
     * neighbors at the k'th distance, all will be kept.
     * 
     * @param k the number of neighbors to keep in the list.
     */
    public void pruneToK(int k) {

      if (isEmpty()) {
        return;
      }
      if (k < 1) {
        k = 1;
      }
      int currentK = 0;
      double currentDist = m_First.m_Distance;
      NeighborNode current = m_First;
      for (; current.m_Next != null; current = current.m_Next) {
        currentK++;
        currentDist = current.m_Distance;
        if ((currentK >= k) && (currentDist != current.m_Next.m_Distance)) {
          m_Last = current;
          current.m_Next = null;
          break;
        }
      }
    }

    /**
     * Prints out the contents of the neighborlist.
     */
    public void printList() {

      if (isEmpty()) {
        System.out.println("Empty list");
      } else {
        NeighborNode current = m_First;
        while (current != null) {
          System.out.println("Node: instance " + current.m_Instance
            + ", distance " + current.m_Distance);
          current = current.m_Next;
        }
        System.out.println();
      }
    }

    /**
     * returns the first element in the list.
     * 
     * @return the first element
     */
    public NeighborNode getFirst() {
      return m_First;
    }

    /**
     * returns the last element in the list.
     * 
     * @return the last element
     */
    public NeighborNode getLast() {
      return m_Last;
    }

    /**
     * Returns the revision string.
     * 
     * @return the revision
     */
    @Override
    public String getRevision() {
      return RevisionUtils.extract("$Revision: 10203 $");
    }
  }

  /** The neighbourhood of instances to find neighbours in. */
  protected Instances m_Instances;

  /** The number of neighbours to find. */
  protected int m_kNN;

  /** the distance function used. */
  protected DistanceFunction m_DistanceFunction = new EuclideanDistance();

  /** Performance statistics. */
  protected PerformanceStats m_Stats = null;

  /** Should we measure Performance. */
  protected boolean m_MeasurePerformance = false;

  /**
   * Constructor.
   */
  public NearestNeighbourSearch() {
    if (m_MeasurePerformance) {
      m_Stats = new PerformanceStats();
    }
  }

  /**
   * Constructor.
   * 
   * @param insts The set of instances that constitute the neighbourhood.
   */
  public NearestNeighbourSearch(Instances insts) {
    this();
    m_Instances = insts;
  }

  /**
   * Returns a string describing this nearest neighbour search algorithm.
   * 
   * @return a description of the algorithm for displaying in the
   *         explorer/experimenter gui
   */
  public String globalInfo() {
    return "Abstract class for nearest neighbour search. All algorithms (classes) that "
      + "do nearest neighbour search should extend this class.";
  }

  /**
   * Returns an enumeration describing the available options.
   * 
   * @return an enumeration of all the available options.
   */
  @Override
  public Enumeration




© 2015 - 2025 Weber Informatics LLC | Privacy Policy