All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ch.ethz.globis.phtree.v11.PhQueryKnnMbbPP Maven / Gradle / Ivy

There is a newer version: 2.8.1
Show newest version
/*
 * Copyright 2011-2016 ETH Zurich. All Rights Reserved.
 *
 * This software is the proprietary information of ETH Zurich.
 * Use is subject to license terms.
 */
package ch.ethz.globis.phtree.v11;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.NoSuchElementException;

import ch.ethz.globis.phtree.PhDistance;
import ch.ethz.globis.phtree.PhEntry;
import ch.ethz.globis.phtree.PhEntryDist;
import ch.ethz.globis.phtree.PhFilterDistance;
import ch.ethz.globis.phtree.PhTree.PhExtent;
import ch.ethz.globis.phtree.PhTree.PhKnnQuery;

/**
 * kNN query implementation that uses preprocessors and distance functions.
 * 
 * The algorithm works as follows:
 * 
 * First we drill down in the tree to find an entry that is 'close' to
 * desired center of the kNN query. A 'close' entry is one that is in the same node
 * where the center would be, or in one of its sub-nodes. Note that we do not use
 * the center-point itself in case it exists in the tree. The result of the first step is 
 * a guess at the initial search distance (this would be 0 if we used the center itself). 
 * 
 * We then use a combination of rectangle query (center +/- initDistance) and distance-query. 
 * The query traverses only nodes and values that lie in the query rectangle and that satisfy the
 * distance requirement (circular distance when using euclidean space).
 * 
 * While iterating through the query result, we regularly sort the returned entries 
 * to see which distance would suffice to return 'k' result. If the new distance is smaller,
 * we adjust the query rectangle and the distance function before continuing the
 * query. As a result, when the query returns no more entries, we are guaranteed to
 * have all closest neighbours.
 * 
 * The only thing that can go wrong is that we may get less than 'k' neighbours if the
 * initial distance was too small. In that case we multiply the initial distance by 10
 * and run the algorithm again. Not that multiplying the distance by 10 means a 10^k fold
 * increase in the search volume. 
 *   
 *   
 * WARNING:
 * The query rectangle is calculated using the PhDistance.toMBB() method.
 * The implementation of this method may not work with non-euclidean spaces! 
 * 
 * @param  value type
 */
public class PhQueryKnnMbbPP implements PhKnnQuery {

	private final int dims;
	private int nMin;
	private PhTree11 pht;
	private PhDistance distance;
	private final ArrayList> entries = new ArrayList<>();
	private int resultSize = 0;
	private int currentPos = -1;
	private final long[] mbbMin;
	private final long[] mbbMax;
	private final PhIteratorNoGC iter;
	private final PhFilterDistance checker;

	public PhQueryKnnMbbPP(PhTree11 pht) {
		this.dims = pht.getDim();
		this.mbbMin = new long[dims];
		this.mbbMax = new long[dims];
		this.pht = pht;
		this.checker = new PhFilterDistance();
		this.iter = new PhIteratorNoGC<>(pht, checker);
	}

	@Override
	public long[] nextKey() {
		return nextEntryReuse().getKey();
	}

	@Override
	public T nextValue() {
		return nextEntryReuse().getValue();
	}

	@Override
	public PhEntryDist nextEntry() {
		return new PhEntryDist<>(nextEntryReuse());
	} 

	@Override
	public PhEntryDist nextEntryReuse() {
		if (currentPos >= resultSize) {
			throw new NoSuchElementException();
		}
		return entries.get(currentPos++);
	}

	@Override
	public boolean hasNext() {
		return currentPos < resultSize;
	}

	@Override
	public T next() {
		return nextValue();
	}

	@Override
	public PhKnnQuery reset(int nMin, PhDistance dist, long... center) {
		this.distance = dist == null ? this.distance : dist;
		this.nMin = nMin;
		clearEntries();

		if (nMin > 0) {
			nearestNeighbourBinarySearch(center, nMin);
		}

		currentPos = 0;
		return this;
	}

	private void findKnnCandidate(long[] center, long[] ret) {
		findKnnCandidate(center, pht.getRoot(), ret);
	}

	private long[] findKnnCandidate(long[] key, Node node, long[] ret) {
		Object v = node.doIfMatching(key, true, null, null, null, pht);
		if (v == null) {
			//Okay, there is no perfect match:
			//just perform a query on the current node and return the first value that we find.
			return returnAnyValue(ret, key, node);
		}
		if (v instanceof Node) {
			return findKnnCandidate(key, (Node) v, ret);
		}

		//so we have a perfect match!
		//But we should return it only if nMin=1, otherwise our search area is too small.
		if (nMin == 1) {
			//Never return closest key if we look for nMin>1 keys!
			//now return the key, even if it may not be an exact match (we don't check)
			System.arraycopy(key, 0, ret, 0, key.length);
			return ret;
		}
		//Okay just perform a query on the current node and return the first value that we find.
		return returnAnyValue(ret, key, node);
	}

	private long[] returnAnyValue(long[] ret, long[] key, Node node) {
		//First, get correct prefix.
		long mask = (-1L) << (node.getPostLen()+1);
		for (int i = 0; i < dims; i++) {
			ret[i] = key[i] & mask;
		}
		
		NodeIteratorFullNoGC ni = new NodeIteratorFullNoGC<>(dims, ret);
		//This allows writing the result directly into 'ret'
		PhEntry result = new PhEntry<>(ret, null);
		ni.init(node, null);
		while (ni.increment(result)) {
			if (result.hasNodeInternal()) {
				//traverse sub node
				ni.init((Node) result.getNodeInternal(), null);
			} else {
				//Never return closest key if we look for nMin>1 keys!
				if (nMin > 1 && Arrays.equals(key, result.getKey())) {
					//Never return a perfect match if we look for nMin>1 keys!
					//otherwise the distance is too small.
					//This check should be cheap and will not be executed more than once anyway.
					continue;
				}
				return ret;
			}
		}
		throw new IllegalStateException();
	}

	/**
	 * This approach applies binary search to queries.
	 * It start with a query that covers the whole tree. Then whenever it finds an entry (the first)
	 * it discards the query and starts a smaller one with half the distance to the search-point.
	 * This effectively reduces the volume by 2^k.
	 * Once a query returns no result, it uses the previous query to traverse all results
	 * and find the nearest result.
	 * As an intermediate step, it may INCREASE the query size until a non-empty query appears.
	 * Then it could decrease again, like a true binary search.
	 * 
	 * When looking for nMin > 1, one could search for queries with at least nMin results...
	 * 
	 * @param val
	 * @param nMin
	 */
	private void nearestNeighbourBinarySearch(long[] val, int nMin) {
		//special case with minDist = 0
		if (nMin == 1 && pht.contains(val)) {
			addEntry(new PhEntry(val, pht.get(val)), val);
			return;
		}

		//special case with size() <= nMin
		if (pht.size() <= nMin) {
			PhExtent itEx = pht.queryExtent();
			while (itEx.hasNext()) {
				PhEntry e = itEx.nextEntryReuse();
				addEntry(e, val);
			}

			sortEntries();
			return;
		}

		//estimate initial distance
		long[] cand = new long[dims];
		findKnnCandidate(val, cand);
		double currentDist = distance.dist(val, cand);

		while (!findNeighbours(currentDist, nMin, val)) {
			currentDist *= 10;
		}
	}

	private final boolean findNeighbours(double maxDist, int nMin, long[] val) {
		//Epsilon for calculating the distance depends on DIM, the magnitude of the values and
		//the precision of the Double mantissa.
		final double EPS = dims * maxDist / (double)(1L << 51);//2^(53-2));
		final int CONSOLIDATION_INTERVAL = 10;
		clearEntries();
		checker.set(val, distance, maxDist);
		distance.toMBB(maxDist, val, mbbMin, mbbMax);
		iter.reset(mbbMin, mbbMax);

		// Get nMin results
		while (iter.hasNext() && resultSize < nMin) {
			PhEntry en = iter.nextEntryReuse();
			addEntry(en, val);
		}
		sortEntries();

		if (resultSize < nMin) {
			//too small, we need a bigger range
			return false;
		}
		if (!iter.hasNext()) {
			//perfect fit!
			return true;
		}

		//get distance of farthest entry and continue query with this new distance
		maxDist = entries.get(nMin-1).dist();
		checker.set(val, distance, maxDist);
		distance.toMBB(maxDist, val, mbbMin, mbbMax);
		iter.adjustMinMax();

		// we continue the query but reduce the range maximum range 
		int cnt = 0;
		while (iter.hasNext()) {
			PhEntry e = iter.nextEntryReuse();
			addEntry(e, val);
			cnt++;
			if (cnt % CONSOLIDATION_INTERVAL == 0) {
				maxDist = consolidate(nMin, EPS, maxDist);
				//update query-dist
				checker.set(val, distance, maxDist);
				distance.toMBB(maxDist, val, mbbMin, mbbMax);
				iter.adjustMinMax();
			}
		}
		// no more elements in tree
		consolidate(nMin, EPS, maxDist);
		return true;
	}

	private double consolidate(int nMin, double eps, double max) {
		sortEntries();
		double maxDnew = entries.get(nMin-1).dist();
		if (maxDnew < max+eps) { //TODO epsilon?
			max = maxDnew;
			for (int i2 = nMin; i2 < resultSize; i2++) {
				//purge 
				if (entries.get(i2).dist() + eps > max) {
					resultSize = i2;
					break;
				}
			}
		}
		return max;
	}


	private void addEntry(PhEntry e, long[] center) {
		double dist = distance.dist(center, e.getKey());
		if (resultSize < entries.size()) {
			entries.get(resultSize).set(e, dist);
		} else {
			PhEntryDist de = new PhEntryDist<>(e, dist);
			entries.add(de);
		}
		resultSize++;
	}

	private void clearEntries() {
		resultSize = 0;
		for (int i = 0; i < entries.size(); i++) {
			entries.get(i).clear();
		}
	}

	private void sortEntries() {
		entries.sort(PhEntryDist.COMP);
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy