All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openimaj.image.feature.global.Gist Maven / Gradle / Ivy

Go to download

Methods for the extraction of low-level image features, including global image features and pixel/patch classification models.

There is a newer version: 1.3.10
Show newest version
/**
 * Copyright (c) 2011, The University of Southampton and the individual contributors.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 *
 *   * 	Redistributions of source code must retain the above copyright notice,
 * 	this list of conditions and the following disclaimer.
 *
 *   *	Redistributions in binary form must reproduce the above copyright notice,
 * 	this list of conditions and the following disclaimer in the documentation
 * 	and/or other materials provided with the distribution.
 *
 *   *	Neither the name of the University of Southampton nor the names of its
 * 	contributors may be used to endorse or promote products derived from this
 * 	software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.openimaj.image.feature.global;

import org.openimaj.citation.annotation.Reference;
import org.openimaj.citation.annotation.ReferenceType;
import org.openimaj.feature.FloatFV;
import org.openimaj.image.FImage;
import org.openimaj.image.Image;
import org.openimaj.image.MBFImage;
import org.openimaj.image.analyser.ImageAnalyser;
import org.openimaj.image.colour.ColourMap;
import org.openimaj.image.colour.ColourSpace;
import org.openimaj.image.colour.RGBColour;
import org.openimaj.image.processing.algorithm.FourierTransform;
import org.openimaj.image.processing.convolution.FourierConvolve;
import org.openimaj.image.processing.convolution.GaborFilters;
import org.openimaj.image.processing.resize.ResizeProcessor;
import org.openimaj.image.processor.SinglebandImageProcessor;

import edu.emory.mathcs.jtransforms.fft.FloatFFT_2D;

/**
 * Implementation of the "Gist" spatial envelope feature. Based on (and tested
 * against) the original Matlab implementation from http://people.csail.mit.edu/torralba/code/spatialenvelope/, and designed
 * to produce comparable features.
 * 

* The Gist or Spatial Envelope is a very low dimensional representation of the * scene. Gist encodes a set of perceptual dimensions (naturalness, openness, * roughness, expansion, ruggedness) that represents the dominant spatial * structure of a scene. These perceptual dimensions are reliably estimated * using coarsely localized spectral information from the image. *

* Implementation notes: This class supports both fixed and variable size * Gist. In the fixed size mode, the image is resized and centre-cropped to a * pre-defined size (allowing the Gabor filters to be pre-computed and reused). * In the variable size mode the Gabor filters will be re-computed as necessary * if the image size changes. For computing features to compare images, fixed * size mode makes more sense. *

* Example usage for image comparison:
*
*

 * FImage i1, i2 = ...
 * Gist fsg = new Gist(256, 256);
 *
 * fsg.analyseImage(i1);
 * FloatFV f1 = fsg.getResponse();
 *
 * fsg.analyseImage(i2);
 * FloatFV f2 = fsg.getResponse();
 *
 * double d = FloatFVComparison.SUM_SQUARE.compare(f1, f2);
 * 
* *

* The ability to generate a visualisation of the current descriptor in the same * way as provided by the original Matlab code is given by the * {@link #visualiseDescriptor(int)} method. * * @author Jonathon Hare ([email protected]) * * @param * The type of image. {@link MBFImage} and {@link FImage} are * supported; other types might not work. */ @Reference( type = ReferenceType.Article, author = { "Oliva, Aude", "Torralba, Antonio" }, title = "Modeling the Shape of the Scene: A Holistic Representation of the Spatial Envelope", year = "2001", journal = "Int. J. Comput. Vision", pages = { "145", "", "175" }, url = "http://dx.doi.org/10.1023/A:1011139631724", month = "May", number = "3", publisher = "Kluwer Academic Publishers", volume = "42", customData = { "issn", "0920-5691", "numpages", "31", "doi", "10.1023/A:1011139631724", "acmid", "598462", "address", "Hingham, MA, USA", "keywords", "energy spectrum, natural images, principal components, scene recognition, spatial layout" }) public class Gist & SinglebandImageProcessor.Processable> implements ImageAnalyser { /** * The default number of filter orientations per scale (from HF to LF) */ public static final int[] DEFAULT_ORIENTATIONS_PER_SCALE = { 8, 8, 8, 8 }; /** * The default number spatial blocks */ public static final int DEFAULT_NUMBER_OF_BLOCKS = 4; /** * The default number of cycles per image for the pre-filter Gaussian */ public static final int DEFAULT_PREFILTER_FC = 4; /** * The default amount of padding to apply before convolving with the Gabor * filters */ public static final int DEFAULT_BOUNDARY_EXTENSION = 32; /** * The number of blocks in each direction */ public int numberOfBlocks = DEFAULT_NUMBER_OF_BLOCKS; /** * The number of cycles per image for the pre-filter Gaussian */ public int prefilterFC = DEFAULT_PREFILTER_FC; /** * The amount of padding to add before convolving with the Gabor functions */ public int boundaryExtension = DEFAULT_BOUNDARY_EXTENSION; /** * Default image size (both height and width) for fixed size Gist */ public static final int DEFAULT_SIZE = 128; protected FImage[] gaborFilters; protected FloatFV response; protected int[] orientationsPerScale; protected boolean fixedSize; protected int imageWidth; protected int imageHeight; /** * Construct a fixed size Gist extractor using the default values. */ public Gist() { this(true); } /** * Construct a variable or fixed size Gist extractor using the default * values. * * @param fixedSize * if true the images should be resized and cropped */ public Gist(boolean fixedSize) { this(DEFAULT_SIZE, DEFAULT_SIZE, DEFAULT_ORIENTATIONS_PER_SCALE, fixedSize); } /** * Construct a fixed-size Gist extractor, with the given image size and the * default number of orientations per scale for the Gabor jet. * * @param width * the image width * @param height * the image height */ public Gist(int width, int height) { this(width, height, DEFAULT_ORIENTATIONS_PER_SCALE); } /** * Construct a fixed-size Gist extractor, with the given image size and * number of orientations per scale for the Gabor jet. * * @param width * the image width * @param height * the image height * @param orientationsPerScale * the number of Gabor orientations per scale (from HF to LF) */ public Gist(int width, int height, int[] orientationsPerScale) { this(width, height, orientationsPerScale, true); } /** * Construct a variable or fixed size Gist extractor with the given number * of orientations per scale for the Gabor jet. * * @param orientationsPerScale * the number of Gabor orientations per scale (from HF to LF) * @param fixedSize * if true the images should be resized and cropped */ public Gist(int[] orientationsPerScale, boolean fixedSize) { this(DEFAULT_SIZE, DEFAULT_SIZE, orientationsPerScale, fixedSize); } /** * Construct a variable or fixed size Gist extractor with the given number * of orientations per scale for the Gabor jet. The given height and width * are ignored in variable size mode. * * @param width * the image width * @param height * the image height * @param orientationsPerScale * the number of Gabor orientations per scale (from HF to LF) * @param fixedSize * if true the images should be resized and cropped */ public Gist(int width, int height, int[] orientationsPerScale, boolean fixedSize) { this.fixedSize = fixedSize; this.orientationsPerScale = orientationsPerScale; this.imageWidth = width; this.imageHeight = height; if (fixedSize) this.gaborFilters = GaborFilters.createGaborJets(width + 2 * this.boundaryExtension, height + 2 * this.boundaryExtension, orientationsPerScale); } @Override public void analyseImage(IMAGE image) { if (fixedSize) { final double sc = Math.max((double) imageWidth / (double) image.getWidth(), (double) imageHeight / (double) image.getHeight()); final IMAGE resized = image.process(new ResizeProcessor((float) sc)); final IMAGE roi = resized.extractCenter(imageWidth, imageHeight); extractGist(roi); } else { if (gaborFilters == null || gaborFilters[0].width != image.getWidth() || gaborFilters[0].height != image.getHeight()) { gaborFilters = GaborFilters.createGaborJets(image.getWidth() + 2 * this.boundaryExtension, image.getHeight() + 2 * this.boundaryExtension, orientationsPerScale); } extractGist(image.clone()); // clone to stop side effects from // normalisation further down } } protected Gist(int[] orientationsPerScale) { this.orientationsPerScale = orientationsPerScale; } protected void extractGist(IMAGE image) { MBFImage mbfimage; if (image instanceof FImage) { mbfimage = new MBFImage((FImage) image); } else if (image instanceof MBFImage) { mbfimage = (MBFImage) image; } else { throw new UnsupportedOperationException("Image type " + image.getClass() + " is not currently supported. Please file a bug report."); } final MBFImage o = prefilter(mbfimage.normalise()); this.response = gistGabor(o); } private MBFImage prefilter(MBFImage img) { final int w = 5; final double s1 = this.prefilterFC / Math.sqrt(Math.log(2)); final int sw = img.getWidth() + 2 * w; final int sh = img.getHeight() + 2 * w; int n = Math.max(sw, sh); n = n + n % 2; img = img.paddingSymmetric(w, w, w + n - sw, w + n - sh); final FImage filter = new FImage(2 * n, n); for (int j = 0; j < n; j++) { final int fy = j - n / 2; for (int i = 0; i < n * 2; i += 2) { final int fx = (i / 2) - n / 2; filter.pixels[j][i] = (float) Math.exp(-(fx * fx + fy * fy) / (s1 * s1)); } } final MBFImage output = new MBFImage(); for (int b = 0; b < img.numBands(); b++) { final FImage band = img.getBand(b); for (int y = 0; y < band.height; y++) { for (int x = 0; x < band.width; x++) { band.pixels[y][x] = (float) Math.log(1 + band.pixels[y][x] * 255); } } output.bands.add(band.subtractInplace(FourierConvolve.convolvePrepared(band, filter, true))); } final FImage mean = output.flatten(); final FImage meansq = mean.multiply(mean); final FImage localstd = FourierConvolve.convolvePrepared(meansq, filter, true); for (int b = 0; b < img.numBands(); b++) { final FImage band = output.getBand(b); for (int y = 0; y < localstd.height; y++) for (int x = 0; x < localstd.width; x++) band.pixels[y][x] = (float) (band.pixels[y][x] / (0.2 + Math .sqrt(Math.abs(localstd.pixels[y][x])))); } return output.extractROI(w, w, sw - w - w, sh - w - w); } private FloatFV gistGabor(MBFImage img) { final int blocksPerFilter = computeNumberOfSamplingBlocks(); final int nFeaturesPerBand = gaborFilters.length * blocksPerFilter; final int nFilters = this.gaborFilters.length; // pad the image img = img.paddingSymmetric(boundaryExtension, boundaryExtension, boundaryExtension, boundaryExtension); final int cols = img.getCols(); final int rows = img.getRows(); final FloatFFT_2D fft = new FloatFFT_2D(rows, cols); final float[][] workingSpace = new float[rows][cols * 2]; final FloatFV fv = new FloatFV(nFeaturesPerBand * img.numBands()); for (int b = 0; b < img.numBands(); b++) { final FImage band = img.bands.get(b); final float[][] preparedImage = FourierTransform.prepareData(band.pixels, rows, cols, true); fft.complexForward(preparedImage); for (int i = 0; i < nFilters; i++) { // convolve with the filter FImage ig = performConv(fft, preparedImage, workingSpace, this.gaborFilters[i], rows, cols); // remove padding ig = ig.extractROI(boundaryExtension, boundaryExtension, band.width - 2 * boundaryExtension, band.height - 2 * boundaryExtension); sampleResponses(ig, fv.values, b * nFeaturesPerBand + i * blocksPerFilter); } } return fv; } /** * Compute the number of sampling blocks that are used for every filter. The * default implementation returns {@link #numberOfBlocks}* * {@link #numberOfBlocks}, but can be overridden in combination with * {@link #sampleResponses(FImage, float[], int)} in a subclass to support * different spatial sampling strategies. * * @return the number of sampling blocks per filter. */ protected int computeNumberOfSamplingBlocks() { return numberOfBlocks * numberOfBlocks; } /** * Sample the average response from each of the blocks in the image and * insert into the vector. This method could be overridden to support * different spatial aggregation strategies (in which case * {@link #computeNumberOfSamplingBlocks()} should also be overridden). * * @param image * the image to sample * @param v * the vector to write into * @param offset * the offset from which to sample */ protected void sampleResponses(FImage image, float[] v, int offset) { final int gridWidth = image.width / this.numberOfBlocks; final int gridHeight = image.height / this.numberOfBlocks; for (int iy = 0; iy < this.numberOfBlocks; iy++) { final int starty = gridHeight * iy; final int stopy = Math.min(starty + gridHeight, image.height); for (int ix = 0; ix < this.numberOfBlocks; ix++) { final int startx = gridWidth * ix; final int stopx = Math.min(startx + gridWidth, image.width); float avg = 0; for (int y = starty; y < stopy; y++) { for (int x = startx; x < stopx; x++) { avg += image.pixels[y][x]; } } avg /= ((stopx - startx) * (stopy - starty)); // note y and x transposed to conform to the matlab // implementation v[offset + iy + ix * this.numberOfBlocks] = avg; } } } /* * Perform convolution in the frequency domain an reconstruct the resultant * image as the magnitudes of the complex components from the ifft. */ private FImage performConv(FloatFFT_2D fft, float[][] preparedImage, float[][] workingSpace, FImage filterfft, int rows, int cols) { final float[][] preparedKernel = filterfft.pixels; for (int y = 0; y < rows; y++) { for (int x = 0; x < cols; x++) { final float reImage = preparedImage[y][x * 2]; final float imImage = preparedImage[y][1 + x * 2]; final float reKernel = preparedKernel[y][x * 2]; final float imKernel = preparedKernel[y][1 + x * 2]; final float re = reImage * reKernel - imImage * imKernel; final float im = reImage * imKernel + imImage * reKernel; workingSpace[y][x * 2] = re; workingSpace[y][1 + x * 2] = im; } } fft.complexInverse(workingSpace, true); final FImage out = new FImage(cols, rows); for (int r = 0; r < rows; r++) { for (int c = 0; c < cols; c++) { out.pixels[r][c] = (float) Math.sqrt(workingSpace[r][c * 2] * workingSpace[r][c * 2] + workingSpace[r][1 + c * 2] * workingSpace[r][1 + c * 2]); } } return out; } /** * Compute the descriptor visualisation in the same form as the original * matlab code. The resultant image illustrates the dominant filter * responses for each spatial bin. The filter scales are drawn with * differing hues, and brightness is proportional to response value. * * @param height * the desired height of the produced image * @return the visualisation TODO: colour descriptors */ public MBFImage visualiseDescriptor(int height) { final Float[][] C = ColourMap.HSV.generateColours(orientationsPerScale.length); final FImage[] G = new FImage[this.gaborFilters.length]; for (int i = 0; i < gaborFilters.length; i++) { G[i] = new FImage(this.gaborFilters[i].width / 2, this.gaborFilters[i].height); FourierTransform.unprepareData(gaborFilters[i].pixels, G[i], false); G[i] = ResizeProcessor.halfSize(G[i]); G[i].addInplace(G[i].clone().flipY().flipX()); } final int blocksPerFilter = computeNumberOfSamplingBlocks(); final int nFeaturesPerBand = gaborFilters.length * blocksPerFilter; final int nBands = this.response.values.length / nFeaturesPerBand; final MBFImage output = new MBFImage(nBands * height, height, ColourSpace.RGB); output.fill(RGBColour.WHITE); for (int b = 0; b < nBands; b++) { float max = 0; final MBFImage[] blockImages = new MBFImage[numberOfBlocks * numberOfBlocks]; for (int y = 0, k = 0; y < numberOfBlocks; y++) { for (int x = 0; x < numberOfBlocks; x++, k++) { blockImages[k] = new MBFImage(G[0].width, G[0].height, 3); for (int s = 0, j = 0; s < orientationsPerScale.length; s++) { for (int i = 0; i < orientationsPerScale[s]; i++, j++) { final MBFImage col = new MBFImage(G[0].width, G[0].height, 3); col.fill(C[s]).multiplyInplace( this.response.values[y + x * numberOfBlocks + j * numberOfBlocks * numberOfBlocks]); blockImages[k].addInplace(G[j].toRGB().multiply(col)); } } for (int i = 0; i < blockImages[k].numBands(); i++) max = Math.max(max, blockImages[k].bands.get(i).max()); } } final int ts = (height / 4); final ResizeProcessor rp = new ResizeProcessor(ts, ts, true); for (int y = 0, k = 0; y < numberOfBlocks; y++) { for (int x = 0; x < numberOfBlocks; x++, k++) { blockImages[k].divideInplace(max); final MBFImage tmp = blockImages[k].process(rp); tmp.drawLine(0, 0, 0, ts - 1, 1, RGBColour.WHITE); tmp.drawLine(0, 0, ts - 1, 0, 1, RGBColour.WHITE); tmp.drawLine(ts - 1, 0, ts - 1, ts - 1, 1, RGBColour.WHITE); tmp.drawLine(0, ts - 1, ts - 1, ts - 1, 1, RGBColour.WHITE); output.drawImage(tmp, x * ts + b * height, y * ts); } } } return output; } /** * Get the response vector from the previous call to * {@link #analyseImage(Image)}. * * @return the response */ public FloatFV getResponse() { return response; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy