All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.neighbor.BKTree Maven / Gradle / Ivy

The newest version!
/*******************************************************************************
 * Copyright (c) 2010 Haifeng Li
 *   
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *  
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/

package smile.neighbor;

import java.util.List;
import java.util.ArrayList;
import java.util.Collection;
import smile.math.distance.Metric;

/**
 * A BK-tree is a metric tree specifically adapted to discrete metric spaces.
 * For simplicity, let us consider integer discrete metric d(x,y). Then, BK-tree
 * is defined in the following way. An arbitrary element a is selected as root
 * root. Root may have zero or more subtrees. The k-th subtree is
 * recursively built of all elements b such that d(a,b) = k. BK-trees can be
 * used for approximate string matching in a dictionary.
 * 
 * 

References

*
    *
  1. W. Burkhard and R. Keller. Some approaches to best-match file searching. CACM, 1973.
  2. *
* * @param the type of data objects in the tree. * * @author Haifeng Li */ public class BKTree implements RNNSearch { /** * The root in the BK-tree. */ class Node { /** * The datum object. */ E object; /** * The index of datum in the dataset. */ int index; /** * The children nodes. Note that the i-th root's distance to * the parent is i. */ ArrayList children; /** * Constructor. * @param object the datum object. * @param index the index of datum in the dataset. */ Node(int index, E object) { this.index = index; this.object = object; } /** * Add a datum into the subtree. * @param datum the datum object. */ private void add(E datum) { int d = (int) distance.d(object, datum); if (d == 0) { return; } if (children == null) { children = new ArrayList(); } while (children.size() <= d) { children.add(null); } Node child = children.get(d); if (child == null) { Node node = new Node(count++, datum); children.set(d, node); } else { child.add(datum); } } } /** * The root root of BK-tree. */ private Node root; /** * The distance metric. Note that the metric must be a discrete distance, * e.g. edit distance, Hamming distance, Lee distance, Jaccard distance, * and taxonomic distance, etc. */ private Metric distance; /** * The number of nodes in the tree. */ private int count = 0; /** * Whether to exclude query object self from the neighborhood. */ private boolean identicalExcluded = true; /** * Constructor. * @param distance the metric used to build BK-tree. Note that the metric * must be a discrete distance, e.g. edit distance, Hamming distance, Lee * distance, Jaccard distance, and taxonomic distance, etc. */ public BKTree(Metric distance) { this.distance = distance; } /** * Add a dataset into BK-tree. * @param data the dataset to insert into the BK-tree. */ public void add(E[] data) { for (E datum : data) { add(datum); } } /** * Add a dataset into BK-tree. * @param data the dataset to insert into the BK-tree. */ public void add(Collection data) { for (E datum : data) { add(datum); } } @Override public String toString() { return String.format("BK-Tree (%s)", distance); } /** * Add a datum into the BK-tree. */ public void add(E datum) { if (root == null) { root = new Node(count++, datum); } else { root.add(datum); } } /** * Set if exclude query object self from the neighborhood. */ public BKTree setIdenticalExcluded(boolean excluded) { identicalExcluded = excluded; return this; } /** * Get whether if query object self be excluded from the neighborhood. */ public boolean isIdenticalExcluded() { return identicalExcluded; } /** * Do a range search in the given subtree. * @param node the root of subtree. * @param q the query object. * @param k the range of query. * @param neighbors the returned results of which d(x, target) ≤ k. */ private void search(Node node, E q, int k, List> neighbors) { int d = (int) distance.d(node.object, q); if (d <= k) { if (d > 0 || !identicalExcluded) { neighbors.add(new Neighbor(node.object, node.object, node.index, d)); } } if (node.children != null) { int start = Math.max(1, d-k); int end = Math.min(node.children.size(), d+k+1); for (int i = start; i < end; i++) { Node child = node.children.get(i); if (child != null) { search(child, q, k, neighbors); } } } } @Override public void range(E q, double radius, List> neighbors) { if (radius != (int) radius) { throw new IllegalArgumentException("The parameter radius has to be an integer: " + radius); } search(root, q, (int) radius, neighbors); } /** * Search the neighbors in the given radius of query object, i.e. * d(q, v) ≤ radius. * * @param q the query object. * @param radius the radius of search range from target. * @param neighbors the list to store found neighbors in the given range on output. */ public void range(E q, int radius, List> neighbors) { search(root, q, radius, neighbors); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy