org.openimaj.image.feature.global.Gist Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of image-feature-extraction Show documentation
Methods for the extraction of low-level image features, including global image features and pixel/patch classification models.
There is a newer version: 1.3.10
Show newest version
/**
 * Copyright (c) 2011, The University of Southampton and the individual contributors.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 *
 *   * 	Redistributions of source code must retain the above copyright notice,
 * 	this list of conditions and the following disclaimer.
 *
 *   *	Redistributions in binary form must reproduce the above copyright notice,
 * 	this list of conditions and the following disclaimer in the documentation
 * 	and/or other materials provided with the distribution.
 *
 *   *	Neither the name of the University of Southampton nor the names of its
 * 	contributors may be used to endorse or promote products derived from this
 * 	software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.openimaj.image.feature.global;

import org.openimaj.citation.annotation.Reference;
import org.openimaj.citation.annotation.ReferenceType;
import org.openimaj.feature.FloatFV;
import org.openimaj.image.FImage;
import org.openimaj.image.Image;
import org.openimaj.image.MBFImage;
import org.openimaj.image.analyser.ImageAnalyser;
import org.openimaj.image.colour.ColourMap;
import org.openimaj.image.colour.ColourSpace;
import org.openimaj.image.colour.RGBColour;
import org.openimaj.image.processing.algorithm.FourierTransform;
import org.openimaj.image.processing.convolution.FourierConvolve;
import org.openimaj.image.processing.convolution.GaborFilters;
import org.openimaj.image.processing.resize.ResizeProcessor;
import org.openimaj.image.processor.SinglebandImageProcessor;

import edu.emory.mathcs.jtransforms.fft.FloatFFT_2D;

/**
 * Implementation of the "Gist" spatial envelope feature. Based on (and tested
 * against) the original Matlab implementation from http://people.csail.mit.edu/torralba/code/spatialenvelope/, and designed
 * to produce comparable features.
 * 
 * The Gist or Spatial Envelope is a very low dimensional representation of the
 * scene. Gist encodes a set of perceptual dimensions (naturalness, openness,
 * roughness, expansion, ruggedness) that represents the dominant spatial
 * structure of a scene. These perceptual dimensions are reliably estimated
 * using coarsely localized spectral information from the image.
 * 

 * Implementation notes: This class supports both fixed and variable size
 * Gist. In the fixed size mode, the image is resized and centre-cropped to a
 * pre-defined size (allowing the Gabor filters to be pre-computed and reused).
 * In the variable size mode the Gabor filters will be re-computed as necessary
 * if the image size changes. For computing features to compare images, fixed
 * size mode makes more sense.
 * 

 * Example usage for image comparison:

 * 
 
 * 
 * FImage i1, i2 = ...
 * Gist fsg = new Gist(256, 256);
 *
 * fsg.analyseImage(i1);
 * FloatFV f1 = fsg.getResponse();
 *
 * fsg.analyseImage(i2);
 * FloatFV f2 = fsg.getResponse();
 *
 * double d = FloatFVComparison.SUM_SQUARE.compare(f1, f2);
 * 

 * 
 * 
 * The ability to generate a visualisation of the current descriptor in the same
 * way as provided by the original Matlab code is given by the
 * {@link #visualiseDescriptor(int)} method.
 *
 * @author Jonathon Hare ([email protected])
 *
 * @param 
 *            The type of image. {@link MBFImage} and {@link FImage} are
 *            supported; other types might not work.
 */
@Reference(
		type = ReferenceType.Article,
		author = { "Oliva, Aude", "Torralba, Antonio" },
		title = "Modeling the Shape of the Scene: A Holistic Representation of the Spatial Envelope",
		year = "2001",
		journal = "Int. J. Comput. Vision",
		pages = { "145", "", "175" },
		url = "http://dx.doi.org/10.1023/A:1011139631724",
		month = "May",
		number = "3",
		publisher = "Kluwer Academic Publishers",
		volume = "42",
		customData = {
				"issn", "0920-5691",
				"numpages", "31",
				"doi", "10.1023/A:1011139631724",
				"acmid", "598462",
				"address", "Hingham, MA, USA",
				"keywords", "energy spectrum, natural images, principal components, scene recognition, spatial layout"
		})
public class Gist & SinglebandImageProcessor.Processable>
implements
ImageAnalyser
{
	/**
	 * The default number of filter orientations per scale (from HF to LF)
	 */
	public static final int[] DEFAULT_ORIENTATIONS_PER_SCALE = { 8, 8, 8, 8 };

	/**
	 * The default number spatial blocks
	 */
	public static final int DEFAULT_NUMBER_OF_BLOCKS = 4;

	/**
	 * The default number of cycles per image for the pre-filter Gaussian
	 */
	public static final int DEFAULT_PREFILTER_FC = 4;

	/**
	 * The default amount of padding to apply before convolving with the Gabor
	 * filters
	 */
	public static final int DEFAULT_BOUNDARY_EXTENSION = 32;

	/**
	 * The number of blocks in each direction
	 */
	public int numberOfBlocks = DEFAULT_NUMBER_OF_BLOCKS;

	/**
	 * The number of cycles per image for the pre-filter Gaussian
	 */
	public int prefilterFC = DEFAULT_PREFILTER_FC;

	/**
	 * The amount of padding to add before convolving with the Gabor functions
	 */
	public int boundaryExtension = DEFAULT_BOUNDARY_EXTENSION;

	/**
	 * Default image size (both height and width) for fixed size Gist
	 */
	public static final int DEFAULT_SIZE = 128;

	protected FImage[] gaborFilters;
	protected FloatFV response;
	protected int[] orientationsPerScale;
	protected boolean fixedSize;
	protected int imageWidth;
	protected int imageHeight;

	/**
	 * Construct a fixed size Gist extractor using the default values.
	 */
	public Gist() {
		this(true);
	}

	/**
	 * Construct a variable or fixed size Gist extractor using the default
	 * values.
	 *
	 * @param fixedSize
	 *            if true the images should be resized and cropped
	 */
	public Gist(boolean fixedSize) {
		this(DEFAULT_SIZE, DEFAULT_SIZE, DEFAULT_ORIENTATIONS_PER_SCALE, fixedSize);
	}

	/**
	 * Construct a fixed-size Gist extractor, with the given image size and the
	 * default number of orientations per scale for the Gabor jet.
	 *
	 * @param width
	 *            the image width
	 * @param height
	 *            the image height
	 */
	public Gist(int width, int height) {
		this(width, height, DEFAULT_ORIENTATIONS_PER_SCALE);
	}

	/**
	 * Construct a fixed-size Gist extractor, with the given image size and
	 * number of orientations per scale for the Gabor jet.
	 *
	 * @param width
	 *            the image width
	 * @param height
	 *            the image height
	 * @param orientationsPerScale
	 *            the number of Gabor orientations per scale (from HF to LF)
	 */
	public Gist(int width, int height, int[] orientationsPerScale) {
		this(width, height, orientationsPerScale, true);
	}

	/**
	 * Construct a variable or fixed size Gist extractor with the given number
	 * of orientations per scale for the Gabor jet.
	 *
	 * @param orientationsPerScale
	 *            the number of Gabor orientations per scale (from HF to LF)
	 * @param fixedSize
	 *            if true the images should be resized and cropped
	 */
	public Gist(int[] orientationsPerScale, boolean fixedSize) {
		this(DEFAULT_SIZE, DEFAULT_SIZE, orientationsPerScale, fixedSize);
	}

	/**
	 * Construct a variable or fixed size Gist extractor with the given number
	 * of orientations per scale for the Gabor jet. The given height and width
	 * are ignored in variable size mode.
	 *
	 * @param width
	 *            the image width
	 * @param height
	 *            the image height
	 * @param orientationsPerScale
	 *            the number of Gabor orientations per scale (from HF to LF)
	 * @param fixedSize
	 *            if true the images should be resized and cropped
	 */
	public Gist(int width, int height, int[] orientationsPerScale, boolean fixedSize) {
		this.fixedSize = fixedSize;
		this.orientationsPerScale = orientationsPerScale;
		this.imageWidth = width;
		this.imageHeight = height;

		if (fixedSize)
			this.gaborFilters = GaborFilters.createGaborJets(width + 2 * this.boundaryExtension, height + 2
					* this.boundaryExtension, orientationsPerScale);
	}

	@Override
	public void analyseImage(IMAGE image) {
		if (fixedSize) {
			final double sc = Math.max((double) imageWidth / (double) image.getWidth(), (double) imageHeight
					/ (double) image.getHeight());

			final IMAGE resized = image.process(new ResizeProcessor((float) sc));
			final IMAGE roi = resized.extractCenter(imageWidth, imageHeight);
			extractGist(roi);
		} else {
			if (gaborFilters == null || gaborFilters[0].width != image.getWidth()
					|| gaborFilters[0].height != image.getHeight())
			{
				gaborFilters = GaborFilters.createGaborJets(image.getWidth() + 2 * this.boundaryExtension,
						image.getHeight() + 2 * this.boundaryExtension, orientationsPerScale);
			}

			extractGist(image.clone()); // clone to stop side effects from
			// normalisation further down
		}
	}

	protected Gist(int[] orientationsPerScale) {
		this.orientationsPerScale = orientationsPerScale;
	}

	protected void extractGist(IMAGE image) {
		MBFImage mbfimage;
		if (image instanceof FImage) {
			mbfimage = new MBFImage((FImage) image);
		} else if (image instanceof MBFImage) {
			mbfimage = (MBFImage) image;
		} else {
			throw new UnsupportedOperationException("Image type " + image.getClass()
					+ " is not currently supported. Please file a bug report.");
		}

		final MBFImage o = prefilter(mbfimage.normalise());
		this.response = gistGabor(o);
	}

	private MBFImage prefilter(MBFImage img) {
		final int w = 5;
		final double s1 = this.prefilterFC / Math.sqrt(Math.log(2));

		final int sw = img.getWidth() + 2 * w;
		final int sh = img.getHeight() + 2 * w;
		int n = Math.max(sw, sh);
		n = n + n % 2;
		img = img.paddingSymmetric(w, w, w + n - sw, w + n - sh);

		final FImage filter = new FImage(2 * n, n);
		for (int j = 0; j < n; j++) {
			final int fy = j - n / 2;

			for (int i = 0; i < n * 2; i += 2) {
				final int fx = (i / 2) - n / 2;

				filter.pixels[j][i] = (float) Math.exp(-(fx * fx + fy * fy) / (s1 * s1));
			}
		}

		final MBFImage output = new MBFImage();
		for (int b = 0; b < img.numBands(); b++) {
			final FImage band = img.getBand(b);
			for (int y = 0; y < band.height; y++) {
				for (int x = 0; x < band.width; x++) {
					band.pixels[y][x] = (float) Math.log(1 + band.pixels[y][x] * 255);
				}
			}

			output.bands.add(band.subtractInplace(FourierConvolve.convolvePrepared(band, filter, true)));
		}
		final FImage mean = output.flatten();
		final FImage meansq = mean.multiply(mean);
		final FImage localstd = FourierConvolve.convolvePrepared(meansq, filter, true);

		for (int b = 0; b < img.numBands(); b++) {
			final FImage band = output.getBand(b);
			for (int y = 0; y < localstd.height; y++)
				for (int x = 0; x < localstd.width; x++)
					band.pixels[y][x] = (float) (band.pixels[y][x] / (0.2 + Math
							.sqrt(Math.abs(localstd.pixels[y][x]))));
		}

		return output.extractROI(w, w, sw - w - w, sh - w - w);
	}

	private FloatFV gistGabor(MBFImage img) {
		final int blocksPerFilter = computeNumberOfSamplingBlocks();
		final int nFeaturesPerBand = gaborFilters.length * blocksPerFilter;
		final int nFilters = this.gaborFilters.length;

		// pad the image
		img = img.paddingSymmetric(boundaryExtension, boundaryExtension, boundaryExtension, boundaryExtension);

		final int cols = img.getCols();
		final int rows = img.getRows();
		final FloatFFT_2D fft = new FloatFFT_2D(rows, cols);

		final float[][] workingSpace = new float[rows][cols * 2];
		final FloatFV fv = new FloatFV(nFeaturesPerBand * img.numBands());

		for (int b = 0; b < img.numBands(); b++) {
			final FImage band = img.bands.get(b);

			final float[][] preparedImage =
					FourierTransform.prepareData(band.pixels, rows, cols, true);
			fft.complexForward(preparedImage);

			for (int i = 0; i < nFilters; i++) {
				// convolve with the filter
				FImage ig = performConv(fft, preparedImage, workingSpace, this.gaborFilters[i], rows, cols);

				// remove padding
				ig = ig.extractROI(boundaryExtension, boundaryExtension, band.width - 2 * boundaryExtension, band.height
						- 2
						* boundaryExtension);

				sampleResponses(ig, fv.values, b * nFeaturesPerBand + i * blocksPerFilter);
			}
		}

		return fv;
	}

	/**
	 * Compute the number of sampling blocks that are used for every filter. The
	 * default implementation returns {@link #numberOfBlocks}*
	 * {@link #numberOfBlocks}, but can be overridden in combination with
	 * {@link #sampleResponses(FImage, float[], int)} in a subclass to support
	 * different spatial sampling strategies.
	 *
	 * @return the number of sampling blocks per filter.
	 */
	protected int computeNumberOfSamplingBlocks() {
		return numberOfBlocks * numberOfBlocks;
	}

	/**
	 * Sample the average response from each of the blocks in the image and
	 * insert into the vector. This method could be overridden to support
	 * different spatial aggregation strategies (in which case
	 * {@link #computeNumberOfSamplingBlocks()} should also be overridden).
	 *
	 * @param image
	 *            the image to sample
	 * @param v
	 *            the vector to write into
	 * @param offset
	 *            the offset from which to sample
	 */
	protected void sampleResponses(FImage image, float[] v, int offset) {
		final int gridWidth = image.width / this.numberOfBlocks;
		final int gridHeight = image.height / this.numberOfBlocks;

		for (int iy = 0; iy < this.numberOfBlocks; iy++) {
			final int starty = gridHeight * iy;
			final int stopy = Math.min(starty + gridHeight, image.height);

			for (int ix = 0; ix < this.numberOfBlocks; ix++) {
				final int startx = gridWidth * ix;
				final int stopx = Math.min(startx + gridWidth, image.width);

				float avg = 0;
				for (int y = starty; y < stopy; y++) {
					for (int x = startx; x < stopx; x++) {
						avg += image.pixels[y][x];
					}
				}
				avg /= ((stopx - startx) * (stopy - starty));

				// note y and x transposed to conform to the matlab
				// implementation
				v[offset + iy + ix * this.numberOfBlocks] = avg;
			}
		}
	}

	/*
	 * Perform convolution in the frequency domain an reconstruct the resultant
	 * image as the magnitudes of the complex components from the ifft.
	 */
	private FImage performConv(FloatFFT_2D fft, float[][] preparedImage, float[][] workingSpace, FImage filterfft,
			int rows, int cols)
	{
		final float[][] preparedKernel = filterfft.pixels;

		for (int y = 0; y < rows; y++) {
			for (int x = 0; x < cols; x++) {
				final float reImage = preparedImage[y][x * 2];
				final float imImage = preparedImage[y][1 + x * 2];

				final float reKernel = preparedKernel[y][x * 2];
				final float imKernel = preparedKernel[y][1 + x * 2];

				final float re = reImage * reKernel - imImage * imKernel;
				final float im = reImage * imKernel + imImage * reKernel;

				workingSpace[y][x * 2] = re;
				workingSpace[y][1 + x * 2] = im;
			}
		}

		fft.complexInverse(workingSpace, true);

		final FImage out = new FImage(cols, rows);
		for (int r = 0; r < rows; r++) {
			for (int c = 0; c < cols; c++) {
				out.pixels[r][c] = (float) Math.sqrt(workingSpace[r][c * 2] * workingSpace[r][c * 2]
						+ workingSpace[r][1 + c * 2]
								* workingSpace[r][1 + c * 2]);
			}
		}
		return out;
	}

	/**
	 * Compute the descriptor visualisation in the same form as the original
	 * matlab code. The resultant image illustrates the dominant filter
	 * responses for each spatial bin. The filter scales are drawn with
	 * differing hues, and brightness is proportional to response value.
	 *
	 * @param height
	 *            the desired height of the produced image
	 * @return the visualisation TODO: colour descriptors
	 */
	public MBFImage visualiseDescriptor(int height) {
		final Float[][] C = ColourMap.HSV.generateColours(orientationsPerScale.length);

		final FImage[] G = new FImage[this.gaborFilters.length];
		for (int i = 0; i < gaborFilters.length; i++) {
			G[i] = new FImage(this.gaborFilters[i].width / 2, this.gaborFilters[i].height);
			FourierTransform.unprepareData(gaborFilters[i].pixels, G[i], false);
			G[i] = ResizeProcessor.halfSize(G[i]);
			G[i].addInplace(G[i].clone().flipY().flipX());
		}

		final int blocksPerFilter = computeNumberOfSamplingBlocks();
		final int nFeaturesPerBand = gaborFilters.length * blocksPerFilter;
		final int nBands = this.response.values.length / nFeaturesPerBand;

		final MBFImage output = new MBFImage(nBands * height, height, ColourSpace.RGB);
		output.fill(RGBColour.WHITE);

		for (int b = 0; b < nBands; b++) {
			float max = 0;
			final MBFImage[] blockImages = new MBFImage[numberOfBlocks * numberOfBlocks];
			for (int y = 0, k = 0; y < numberOfBlocks; y++) {
				for (int x = 0; x < numberOfBlocks; x++, k++) {
					blockImages[k] = new MBFImage(G[0].width, G[0].height, 3);

					for (int s = 0, j = 0; s < orientationsPerScale.length; s++) {
						for (int i = 0; i < orientationsPerScale[s]; i++, j++) {
							final MBFImage col = new MBFImage(G[0].width, G[0].height, 3);
							col.fill(C[s]).multiplyInplace(
									this.response.values[y + x * numberOfBlocks + j * numberOfBlocks * numberOfBlocks]);

							blockImages[k].addInplace(G[j].toRGB().multiply(col));
						}
					}

					for (int i = 0; i < blockImages[k].numBands(); i++)
						max = Math.max(max, blockImages[k].bands.get(i).max());
				}
			}

			final int ts = (height / 4);
			final ResizeProcessor rp = new ResizeProcessor(ts, ts, true);
			for (int y = 0, k = 0; y < numberOfBlocks; y++) {
				for (int x = 0; x < numberOfBlocks; x++, k++) {
					blockImages[k].divideInplace(max);
					final MBFImage tmp = blockImages[k].process(rp);
					tmp.drawLine(0, 0, 0, ts - 1, 1, RGBColour.WHITE);
					tmp.drawLine(0, 0, ts - 1, 0, 1, RGBColour.WHITE);
					tmp.drawLine(ts - 1, 0, ts - 1, ts - 1, 1, RGBColour.WHITE);
					tmp.drawLine(0, ts - 1, ts - 1, ts - 1, 1, RGBColour.WHITE);
					output.drawImage(tmp, x * ts + b * height, y * ts);
				}
			}
		}

		return output;
	}

	/**
	 * Get the response vector from the previous call to
	 * {@link #analyseImage(Image)}.
	 *
	 * @return the response
	 */
	public FloatFV getResponse() {
		return response;
	}
}