boofcv.alg.segmentation.ms.SegmentMeanShiftSearch Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of feature Show documentation
BoofCV is an open source Java library for real-time computer vision and robotics applications.
There is a newer version: 0.26
/*
 * Copyright (c) 2011-2014, Peter Abeles. All Rights Reserved.
 *
 * This file is part of BoofCV (http://boofcv.org).
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package boofcv.alg.segmentation.ms;

import boofcv.struct.image.ImageBase;
import boofcv.struct.image.ImageSInt32;
import boofcv.struct.image.ImageType;
import georegression.struct.point.Point2D_I32;
import org.ddogleg.struct.FastQueue;
import org.ddogleg.struct.GrowQueue_I32;

/**
 * 
 * Performs the search step in mean-shift image segmentation [1].  The mode of a pixel is the point at which mean-shift
 * converges when initialized at that pixel.  Pixels which have the same mode belong to the same segment. The weight
 * kernel G(|x-y|^2/h) has independent normalization factors h for spacial and color components.  A precomputed
 * Normal distribution is used for the weight kernel.
 * 
 * 
 * Output is provided in the form of an image where each pixel contains the index of a region the pixel belongs to.
 * Three other lists provide color value of the region, number of pixels in the region and the location
 * of the mean-shift peak for that region.  This output is unlikely to be final processing step since it will over
 * segment the image.  Merging of similar modes and pruning of small regions is a common next step.
 * 
 *
 * 
 * An approximation of running mean-shift on each pixel is performed if the 'fast' flag is set to true.  The
 * approximation is about 5x faster and works by saving the mean-shift trajectory [2].  All points along the trajectory
 * are given the same mode. When performing mean-shift if a pixel is encountered which has already been assigned a
 * mode the search stops. This approximation tends to produce more regions and reduces clustering quality in high
 * texture regions.
 * 
 *
 * 
 * NOTES:
 * 

 * Spacial distance is normalized by dividing the found Euclidean distance squared by the maximum possible
 * Euclidean distance squared, thus ensuring it will be between 0 and 1.
 * Color distance is normalized by dividing it by the maximum allows Euclidean distance squared.  If its distance
 * is more than the maximum allowed value then G() will be zero.
 * Image edges are handled by truncating the spacial kernel.  This truncation
 * will create an asymmetric kernel, but there is really no good way to handle image edges.
 * 
 * 
 *
 * 
 * CITATIONS:

 * 

 * Comaniciu, Dorin, and Peter Meer. "Mean shift analysis and applications." Computer Vision, 1999.
 * The Proceedings of the Seventh IEEE International Conference on. Vol. 2. IEEE, 1999.
 * Christoudias, Christopher M., Bogdan Georgescu, and Peter Meer. "Synergism in low level vision."
 * Pattern Recognition, 2002. Proceedings. 16th International Conference on. Vol. 4. IEEE, 2002.
 * 
 * 
 *
 * @author Peter Abeles
 */
public abstract class SegmentMeanShiftSearch {

	// used to detect convergence of mean-shift
	protected int maxIterations;
	protected float convergenceTol;

	// specifies the size of the mean-shift kernel in spacial pixels
	protected int radiusX,radiusY;
	protected int widthX,widthY;
	// specifies the maximum Euclidean distance squared for the color components
	protected float maxColorDistanceSq;

	// converts a pixel location into the index of the mode that mean-shift converged to
	protected ImageSInt32 pixelToMode = new ImageSInt32(1,1);

	// Quick look up for the index of a mode from an image pixel.  It is possible for a pixel that is a mode
	// to have mean-shift converge to a different pixel
	protected ImageSInt32 quickMode = new ImageSInt32(1,1);

	// location of each peak in image pixel indexes
	protected FastQueue modeLocation = new FastQueue(Point2D_I32.class,true);

	// number of members in this peak
	protected GrowQueue_I32 modeMemberCount = new GrowQueue_I32();

	// storage for segment colors
	protected FastQueue modeColor;

	// quick lookup for spacial distance
	protected float[] spacialTable;

	// quick lookup for Gaussian kernel
	protected float weightTable[] = new float[100];

	// If true it will use the fast approximation of mean-shift
	boolean fast;

	// The input image
	protected T image;

	// mode of mean-shift
	protected float modeX, modeY;

	/**
	 * Configures mean-shift segmentation
	 *
	 * @param maxIterations Maximum number of mean-shift iterations.  Try 30
	 * @param convergenceTol When the change is less than this amount stop.  Try 0.005
	 * @param radiusX Spacial kernel radius x-axis
	 * @param radiusY Spacial kernel radius y-axis
	 * @param maxColorDistance Maximum allowed Euclidean distance squared for the color component
	 * @param fast Improve runtime by approximating running mean-shift on each pixel. Try true.
	 */
	public SegmentMeanShiftSearch(int maxIterations, float convergenceTol,
								  int radiusX , int radiusY , float maxColorDistance ,
								  boolean fast ) {
		this.maxIterations = maxIterations;
		this.convergenceTol = convergenceTol;
		this.fast = fast;

		this.radiusX = radiusX;
		this.radiusY = radiusY;
		this.widthX = radiusX*2+1;
		this.widthY = radiusY*2+1;

		this.maxColorDistanceSq = maxColorDistance*maxColorDistance;

		// precompute the distance each pixel is from the sample point
		// normalize the values such that the maximum distance will be 1
		spacialTable = new float[widthX*widthY];
		int indexKernel = 0;
		float maxRadius = radiusX*radiusX + radiusY*radiusY;
		for( int y = -radiusY; y <= radiusY; y++ ) {
			for( int x = -radiusX; x <= radiusX; x++ ) {
				spacialTable[indexKernel++] = (x*x + y*y)/maxRadius;
			}
		}

		// precompute the weight table for inputs from 0 to 1, inclusive
		for( int i = 0; i < weightTable.length; i++ ) {
			weightTable[i] = (float)Math.exp(-i/(float)(weightTable.length-1));
		}
	}

	/**
	 * Performs mean-shift clustering on the input image
	 *
	 * @param image Input image
	 */
	public abstract void process( T image );

	/**
	 * Returns the Euclidean distance squared between the two vectors
	 */
	public static float distanceSq( float[] a , float[]b ) {
		float ret = 0;
		for( int i = 0; i < a.length; i++ ) {
			float d = a[i] - b[i];
			ret += d*d;
		}
		return ret;
	}

	/**
	 * Returns the weight given the normalized distance.  Instead of computing the kernel distance every time
	 * a lookup table with linear interpolation is used.  The distance has a domain from 0 to 1, inclusive
	 *
	 * @param distance Normalized Euclidean distance squared.  From 0 to 1.
	 * @return Weight.
	 */
	protected float weight( float distance ) {
		float findex = distance*100f;
		int index = (int)findex;

		if( index >= 99 )
			return weightTable[99];

		float sample0 = weightTable[index];
		float sample1 = weightTable[index+1];

		float w = findex-index;
		return sample0*(1f-w) + sample1*w;
	}

	/**
	 * From peak index to pixel index
	 */
	public ImageSInt32 getPixelToRegion() {
		return pixelToMode;
	}

	/**
	 * Location of each peak in the image
	 */
	public FastQueue getModeLocation() {
		return modeLocation;
	}

	/**
	 * Number of pixels which each peak as a member
	 */
	public GrowQueue_I32 getRegionMemberCount() {
		return modeMemberCount;
	}

	public FastQueue getModeColor() {
		return modeColor;
	}

	public abstract ImageType getImageType();
}