All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openimaj.image.feature.local.aggregate.VLAD Maven / Gradle / Ivy

Go to download

Methods for the extraction of local features. Local features are descriptions of regions of images (SIFT, ...) selected by detectors (Difference of Gaussian, Harris, ...).

There is a newer version: 1.3.8
Show newest version
/**
 * Copyright (c) 2011, The University of Southampton and the individual contributors.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 *
 *   * 	Redistributions of source code must retain the above copyright notice,
 * 	this list of conditions and the following disclaimer.
 *
 *   *	Redistributions in binary form must reproduce the above copyright notice,
 * 	this list of conditions and the following disclaimer in the documentation
 * 	and/or other materials provided with the distribution.
 *
 *   *	Neither the name of the University of Southampton nor the names of its
 * 	contributors may be used to endorse or promote products derived from this
 * 	software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.openimaj.image.feature.local.aggregate;

import java.lang.reflect.Array;
import java.util.List;

import org.openimaj.citation.annotation.Reference;
import org.openimaj.citation.annotation.ReferenceType;
import org.openimaj.feature.ArrayFeatureVector;
import org.openimaj.feature.MultidimensionalFloatFV;
import org.openimaj.feature.local.LocalFeature;
import org.openimaj.image.FImage;
import org.openimaj.ml.clustering.CentroidsProvider;
import org.openimaj.ml.clustering.assignment.HardAssigner;

/**
 * Implementation of VLAD, the "Vector of Locally Aggregated Descriptors"
 * algorithm. VLAD aggregates descriptors into a vector form based on locality
 * in feature space. Like in the {@link BagOfVisualWords}, features are assigned
 * to centroids in a codebook (usually learnt through k-means), but instead of
 * creating a vector of codeword occurances, VLAD accumulates the residual
 * vector between each input vector and its assigned centroid. Fundamentally,
 * VLAD describes the distribution of the image vectors with respect to their
 * assigned centroids. VLAD is closely related to the more complex
 * {@link FisherVector}.
 * 

* For a given number of centroids, K, and vector length, D, the VLAD descriptor * has dimension K*D. This is obviously longer than the K-dimensional vector * produced by {@link BagOfVisualWords}. However, the VLAD descriptor is can be * useful with a much smaller K (i.e. of the order of 16-64 dimensions versus up * to 1 million (or more) for {@link BagOfVisualWords}). * * @author Jonathon Hare ([email protected]) * * @param * Primitive array type of the {@link ArrayFeatureVector}s used by * the {@link LocalFeature}s that will be processed. */ @Reference( type = ReferenceType.Inproceedings, author = { "Jegou, H.", "Douze, M.", "Schmid, C.", "Perez, P." }, title = "Aggregating local descriptors into a compact image representation", year = "2010", booktitle = "Computer Vision and Pattern Recognition (CVPR), 2010 IEEE Conference on", pages = { "3304 ", "3311" }, month = "june", customData = { "doi", "10.1109/CVPR.2010.5540039", "ISSN", "1063-6919" }) public class VLAD implements VectorAggregator, MultidimensionalFloatFV> { private HardAssigner assigner; private T[] centroids; private boolean normalise; /** * Construct with the given assigner and the centroids associated with the * assigner. * * @param assigner * the assigner * @param centroids * the centroids associated with the assigner * @param normalise * if true then output feature is l2 normalised */ public VLAD(HardAssigner assigner, T[] centroids, boolean normalise) { this.assigner = assigner; this.centroids = centroids; this.normalise = normalise; } /** * Construct with the given assigner and the centroids associated with the * assigner. * * @param assigner * the assigner * @param centroids * the centroids associated with the assigner * @param normalise * if true then output feature is l2 normalised */ public VLAD(HardAssigner assigner, CentroidsProvider centroids, boolean normalise) { this(assigner, centroids.getCentroids(), normalise); } @Override public MultidimensionalFloatFV aggregate(List>> features) { if (features == null || features.size() <= 0) return null; final int K = this.centroids.length; final int D = features.get(0).getFeatureVector().length(); final float[][] vector = new float[K][D]; for (final LocalFeature> f : features) { final T x = f.getFeatureVector().values; final int i = assigner.assign(x); for (int j = 0; j < D; j++) { vector[i][j] += (float) (Array.getDouble(x, j) - Array.getDouble(centroids[i], j)); } } final MultidimensionalFloatFV out = new MultidimensionalFloatFV(vector); if (normalise) { // l2 norm double sumsq = 0; for (int i = 0; i < out.values.length; i++) { sumsq += (out.values[i] * out.values[i]); } final float norm = (float) (1.0 / Math.sqrt(sumsq)); for (int i = 0; i < out.values.length; i++) { out.values[i] *= norm; } } return out; } /** * Generate a visualisation of the feature. This is primarily designed for * descriptors that originated from the aggregation of SIFT. * * @param descr * the feature * @param nterms * the number of centroids used to create the feature * @param nSpatialBins * the number of spatial bins used in the feature * @param nOriBins * the number of orientation bins * @return an image depicting the feature */ public static FImage drawDescriptor(float[] descr, int nterms, int nSpatialBins, int nOriBins) { final FImage image = new FImage(40 * nterms, 40); for (int z = 0; z < nterms; z++) { for (int y = 0; y < nSpatialBins; y++) { for (int x = 0; x < nSpatialBins; x++) { for (int i = 0; i < nOriBins; i++) { final int xx = 5 + (10 * x); final int yy = 5 + (10 * y); final int length = (int) (descr[(4 * 4 * 8 * z) + (4 * 4 * y) + (4 * x) + i] * 100); final double theta = i * 2 * Math.PI / 8; image.drawLine(xx + 40 * z, yy, theta, length, 1F); } } } } return image; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy