All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openimaj.knn.pq.DoubleProductQuantiser Maven / Gradle / Ivy

/*
	AUTOMATICALLY GENERATED BY jTemp FROM
	/Users/jsh2/Work/openimaj/target/checkout/machine-learning/nearest-neighbour/src/main/jtemp/org/openimaj/knn/pq/#T#ProductQuantiser.jtemp
*/
/**
 * Copyright (c) 2011, The University of Southampton and the individual contributors.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 *
 *   * 	Redistributions of source code must retain the above copyright notice,
 * 	this list of conditions and the following disclaimer.
 *
 *   *	Redistributions in binary form must reproduce the above copyright notice,
 * 	this list of conditions and the following disclaimer in the documentation
 * 	and/or other materials provided with the distribution.
 *
 *   *	Neither the name of the University of Southampton nor the names of its
 * 	contributors may be used to endorse or promote products derived from this
 * 	software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
  
package org.openimaj.knn.pq;

import java.util.Arrays;

import org.openimaj.citation.annotation.Reference;
import org.openimaj.citation.annotation.ReferenceType;
import org.openimaj.knn.DoubleNearestNeighboursExact;
import org.openimaj.knn.NearestNeighbours;

/**
 * Implementation of a Product Quantiser for vectors/arrays of doubles. Product
 * Quantisers quantise data into a very large number of clusters (large enough
 * that the centroids could not possibly fit into memory - i.e. 2^64 centroids).
 * The Product Quantiser can be used to create compressed representations of
 * high-dimensional vectors, and also as a means to perform efficient
 * nearest-neighbour search over large collections of vectors (which have been
 * effectively compressed using the product quantiser).
 * 

* This is achieved by breaking down the input vectors into non-overlapping * sub-vectors, and applying quantisation to these sub-vectors individually. The * number of bins (cluster centroids) for the sub-vectors is small (up to 256 in * this implementation), but when combined over all sub-vectors, the number of * bins is much larger as it accounts for all combinations of bins across * sub-vectors. As only a small set of centroids needs to be held for the * sub-vectors, the memory requirements are quite modest. The output of the * quantisation action in this implementation is an array of bytes corresponding * to the index of the matching centroid for each sub-vector (index numbers are * offset by -128 so that 256 centroids indexes can fit in a single byte). The * bit-pattern of this byte array could be interpreted as a numeric value of * global cluster index, however in practice this is not useful. *

* Typically the product quantiser is "trained" so that it adapts to the data * that is is being applied too. The standard approach to this is to use * K-Means, however, this is not required. Insofar as this implementation is * concerned, any set of compatible {@link NearestNeighbours} implementations * can be provided to the constructor. Each of the {@link NearestNeighbours} * could even potentially have a different number of dimensions (corresponding * to the sub-vector lengths). *

* In the standard case, where you just want to use K-Means to train the Product * Quantiser, a set of utility methods can be found in the * org.openimaj.knn.pq.DoubleProductQuantiserUtilities class which can be found in * the clustering sub-project (due to the dependence on the K-Means algorithm). * * @author Jonathon Hare ([email protected]) * */ @Reference( type = ReferenceType.Article, author = { "Jegou, Herve", "Douze, Matthijs", "Schmid, Cordelia" }, title = "Product Quantization for Nearest Neighbor Search", year = "2011", journal = "IEEE Trans. Pattern Anal. Mach. Intell.", pages = { "117", "", "128" }, url = "http://dx.doi.org/10.1109/TPAMI.2010.57", month = "January", number = "1", publisher = "IEEE Computer Society", volume = "33", customData = { "issn", "0162-8828", "numpages", "12", "doi", "10.1109/TPAMI.2010.57", "acmid", "1916695", "address", "Washington, DC, USA", "keywords", "High-dimensional indexing, High-dimensional indexing, image indexing, very large databases, approximate search., approximate search., image indexing, very large databases" }) public class DoubleProductQuantiser { protected DoubleNearestNeighboursExact[] assigners; protected int ndims; /** * Construct a {@link DoubleProductQuantiser} with the given * nearest-neighbour assigners. The number of dimensions of the assigners * determines how long each sub-vector is. There is a one-to-one mapping * between in the order of assigners and sub-vectors. * * @param assigners * the nearest-neighbour assigners. */ public DoubleProductQuantiser(DoubleNearestNeighboursExact[] assigners) { this.assigners = assigners; for (final DoubleNearestNeighboursExact nn : assigners) ndims += nn.numDimensions(); } /** * Quantise the given data using this Product Quantiser. The output is an * array of bytes corresponding to the index of the matching centroid for * each sub-vector (index numbers are offset by -128 so that 256 centroids * indexes can fit in a single byte). * * @param data * the data to quantise * @return the quantised data. */ public byte[] quantise(double[] data) { final byte[] quantised = new byte[assigners.length]; final int[] idx = { 0 }; final double[] dst = { 0 }; final double[][] qus = new double[1][0]; for (int i = 0, from = 0; i < assigners.length; i++) { final int to = assigners[i].numDimensions(); qus[0] = Arrays.copyOfRange(data, from, from + to); assigners[i].searchNN(qus, idx, dst); quantised[i] = (byte) (idx[0] - 128); from += to; } return quantised; } /** * Decompress the quantised data by replacing each encoded index with the actual centroid subvector. * * @param qdata the quantised data * * @return the (approximate) decompressed feature */ public double[] decompress(byte[] qdata) { final double[] data = new double[ndims]; for (int i = 0, from = 0; i < assigners.length; i++) { final int len = assigners[i].numDimensions(); int index = (int)qdata[i] + 128; System.arraycopy(this.assigners[i].getPoints()[index], 0, data, from, len); from += len; } return data; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy