boofcv.alg.tracker.circulant.CirculantTracker Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of boofcv-recognition Show documentation
BoofCV is an open source Java library for real-time computer vision and robotics applications.
There is a newer version: 1.1.7
/*
 * Copyright (c) 2011-2017, Peter Abeles. All Rights Reserved.
 *
 * This file is part of BoofCV (http://boofcv.org).
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package boofcv.alg.tracker.circulant;

import boofcv.abst.feature.detect.peak.SearchLocalPeak;
import boofcv.abst.transform.fft.DiscreteFourierTransform;
import boofcv.alg.interpolate.InterpolatePixelS;
import boofcv.alg.misc.PixelMath;
import boofcv.alg.transform.fft.DiscreteFourierTransformOps;
import boofcv.factory.feature.detect.peak.FactorySearchLocalPeak;
import boofcv.misc.BoofMiscOps;
import boofcv.struct.image.GrayF64;
import boofcv.struct.image.ImageGray;
import boofcv.struct.image.InterleavedF64;
import georegression.struct.shapes.RectangleLength2D_F32;

import java.util.Random;

/**
 * 
 * Tracker that uses the theory of Circulant matrices, Discrete Fourier Transform (DCF), and linear classifiers to track
 * a target and learn its changes in appearance [1].  The target is assumed to be rectangular and has fixed size and
 * location.  A dense local search is performed around the most recent target location.  The search is done quickly
 * using the DCF.
 * 
 *
 * 
 * Tracking is performed using texture information.  Since only one description of the target is saved, tracks can
 * drift over time.  Tracking performance seems to improve if the object has distinctive edges.
 * 
 *
 * 
 * CHANGES FROM PAPER:

 * 

 * Input image is sampled into a square work region of constant size to improve runtime speed of FFT.
 * Peak of response is found using mean-shift.  Provides sub-pixel precision.
 * Pixels outside the image are assigned random values to avoid the tracker from fitting to them. Ideally they
 * wouldn't be processed, but that is complex to implement 
 * 
 * 
 *
 * 
 * [1] Henriques, Joao F., et al. "Exploiting the circulant structure of tracking-by-detection with kernels."
 * Computer Vision–ECCV 2012. Springer Berlin Heidelberg, 2012. 702-715.
 * 
 *
 * @author Peter Abeles
 */
public class CirculantTracker> {

	// --- Tuning parameters
	// spatial bandwidth (proportional to target)
	private double output_sigma_factor;

	// gaussian kernel bandwidth
	private double sigma;

	// regularization term
	private double lambda;
	// linear interpolation term.  Adjusts how fast it can learn
	private double interp_factor;

	// the maximum pixel value
	private double maxPixelValue;

	// extra padding around the selected region
	private double padding;

	//----- Internal variables
	// Input image width and height
	private int imageWidth,imageHeight;

	// computes the FFT
	private DiscreteFourierTransform fft = DiscreteFourierTransformOps.createTransformF64();

	// storage for subimage of input image
	protected GrayF64 templateNew = new GrayF64(1,1);
	// storage for the subimage of the previous frame
	protected GrayF64 template = new GrayF64(1,1);

	// cosine window used to reduce artifacts from FFT
	protected GrayF64 cosine = new GrayF64(1,1);

	// Storage for the kernel's response
	private GrayF64 k = new GrayF64(1,1);
	private InterleavedF64 kf = new InterleavedF64(1,1,2);

	// Learn values.  used to compute weight in linear classifier
	private InterleavedF64 alphaf = new InterleavedF64(1,1,2);
	private InterleavedF64 newAlphaf = new InterleavedF64(1,1,2);

	// location of target
	protected RectangleLength2D_F32 regionTrack = new RectangleLength2D_F32();
	protected RectangleLength2D_F32 regionOut = new RectangleLength2D_F32();

	// Used for computing the gaussian kernel
	protected GrayF64 gaussianWeight = new GrayF64(1,1);
	protected InterleavedF64 gaussianWeightDFT = new InterleavedF64(1,1,2);

	// detector response
	private GrayF64 response = new GrayF64(1,1);

	// storage for storing temporary results
	private GrayF64 tmpReal0 = new GrayF64(1,1);
	private GrayF64 tmpReal1 = new GrayF64(1,1);

	private InterleavedF64 tmpFourier0 = new InterleavedF64(1,1,2);
	private InterleavedF64 tmpFourier1 = new InterleavedF64(1,1,2);
	private InterleavedF64 tmpFourier2 = new InterleavedF64(1,1,2);

	// interpolation used when sampling input image into work space
	private InterpolatePixelS interp;

	// used to compute sub-pixel location
	private SearchLocalPeak localPeak =
			FactorySearchLocalPeak.meanShiftUniform(5, 1e-4f, GrayF64.class);

	// adjustment from sub-pixel
	protected float offX,offY;

	// size of the work space in pixels
	private int workRegionSize;
	// conversion from workspace to image pixels
	private float stepX,stepY;

	// used to fill the area outside of the image with unstructured data.
	private Random rand = new Random(234);

	/**
	 * Configure tracker
	 *
	 * @param output_sigma_factor  spatial bandwidth (proportional to target) Try 1.0/16.0
	 * @param sigma Sigma for Gaussian kernel in linear classifier.  Try 0.2
	 * @param lambda Try 1e-2
	 * @param interp_factor Try 0.075
	 * @param padding Padding added around the selected target.  Try 1
	 * @param workRegionSize Size of work region. Best if power of 2.  Try 64
	 * @param maxPixelValue Maximum pixel value.  Typically 255
	 */
	public CirculantTracker(double output_sigma_factor, double sigma, double lambda, double interp_factor,
							double padding ,
							int workRegionSize ,
							double maxPixelValue,
							InterpolatePixelS interp ) {
		if( workRegionSize < 3 )
			throw new IllegalArgumentException("Minimum size of work region is 3 pixels.");

		this.output_sigma_factor = output_sigma_factor;
		this.sigma = sigma;
		this.lambda = lambda;
		this.interp_factor = interp_factor;
		this.maxPixelValue = maxPixelValue;
		this.interp = interp;

		this.padding = padding;
		this.workRegionSize = workRegionSize;

		resizeImages(workRegionSize);
		computeCosineWindow(cosine);
		computeGaussianWeights(workRegionSize);

		localPeak.setImage(response);
	}

	/**
	 * Initializes tracking around the specified rectangle region
	 * @param image Image to start tracking from
	 * @param x0 top-left corner of region
	 * @param y0 top-left corner of region
	 * @param regionWidth region's width
	 * @param regionHeight region's height
	 */
	public void initialize( T image , int x0 , int y0 , int regionWidth , int regionHeight ) {

		this.imageWidth = image.width;
		this.imageHeight = image.height;

		setTrackLocation(x0,y0,regionWidth,regionHeight);

		initialLearning(image);
	}

	/**
	 * Used to change the track's location. If this method is used it is assumed that tracking is active and that
	 * the appearance of the target has not changed
	 * @param x0 top-left corner of region
	 * @param y0 top-left corner of region
	 * @param regionWidth region's width
	 * @param regionHeight region's height
	 */
	public void setTrackLocation( int x0 , int y0 , int regionWidth , int regionHeight ) {
		if( imageWidth < regionWidth || imageHeight < regionHeight)
			throw new IllegalArgumentException("Track region is larger than input image: "+regionWidth+" "+regionHeight);

		regionOut.width = regionWidth;
		regionOut.height = regionHeight;

		// adjust for padding
		int w = (int)(regionWidth*(1+padding));
		int h = (int)(regionHeight*(1+padding));
		int cx = x0 + regionWidth/2;
		int cy = y0 + regionHeight/2;

		// save the track location
		this.regionTrack.width = w;
		this.regionTrack.height = h;
		this.regionTrack.x0 = cx-w/2;
		this.regionTrack.y0 = cy-h/2;

		stepX = (w-1)/(float)(workRegionSize-1);
		stepY = (h-1)/(float)(workRegionSize-1);

		updateRegionOut();
	}


	/**
	 * Learn the target's appearance.
	 */
	protected void initialLearning( T image ) {
		// get subwindow at current estimated target position, to train classifier
		get_subwindow(image, template);

		// Kernel Regularized Least-Squares, calculate alphas (in Fourier domain)
		//	k = dense_gauss_kernel(sigma, x);
		dense_gauss_kernel(sigma, template, template,k);
		fft.forward(k, kf);

		// new_alphaf = yf ./ (fft2(k) + lambda);   %(Eq. 7)
		computeAlphas(gaussianWeightDFT, kf, lambda, alphaf);
	}

	/**
	 * Computes the cosine window
	 */
	protected static void computeCosineWindow( GrayF64 cosine ) {
		double cosX[] = new double[ cosine.width ];
		for( int x = 0; x < cosine.width; x++ ) {
			cosX[x] = 0.5*(1 - Math.cos( 2.0*Math.PI*x/(cosine.width-1) ));
		}
		for( int y = 0; y < cosine.height; y++ ) {
			int index = cosine.startIndex + y*cosine.stride;
			double cosY = 0.5*(1 - Math.cos( 2.0*Math.PI*y/(cosine.height-1) ));
			for( int x = 0; x < cosine.width; x++ ) {
				cosine.data[index++] = cosX[x]*cosY;
			}
		}
	}

	/**
	 * Computes the weights used in the gaussian kernel
	 *
	 * This isn't actually symmetric for even widths.  These weights are used has label in the learning phase.  Closer
	 * to one the more likely it is the true target.  It should be a peak in the image center.  If it is not then
	 * it will learn an incorrect model.
	 */
	protected void computeGaussianWeights( int width ) {
		// desired output (gaussian shaped), bandwidth proportional to target size
		double output_sigma = Math.sqrt(width*width) * output_sigma_factor;

		double left = -0.5/(output_sigma*output_sigma);

		int radius = width/2;

		for( int y = 0; y < gaussianWeight.height; y++ ) {
			int index = gaussianWeight.startIndex + y*gaussianWeight.stride;

			double ry = y-radius;

			for( int x = 0; x < width; x++ ) {
				double rx = x-radius;

				gaussianWeight.data[index++] = Math.exp(left * (ry * ry + rx * rx));
			}
		}

		fft.forward(gaussianWeight,gaussianWeightDFT);
	}


	protected void resizeImages( int workRegionSize ) {
		templateNew.reshape(workRegionSize, workRegionSize);
		template.reshape(workRegionSize, workRegionSize);
		cosine.reshape(workRegionSize,workRegionSize);
		k.reshape(workRegionSize,workRegionSize);
		kf.reshape(workRegionSize,workRegionSize);
		alphaf.reshape(workRegionSize,workRegionSize);
		newAlphaf.reshape(workRegionSize,workRegionSize);
		response.reshape(workRegionSize,workRegionSize);
		tmpReal0.reshape(workRegionSize,workRegionSize);
		tmpReal1.reshape(workRegionSize,workRegionSize);
		tmpFourier0.reshape(workRegionSize,workRegionSize);
		tmpFourier1.reshape(workRegionSize,workRegionSize);
		tmpFourier2.reshape(workRegionSize,workRegionSize);
		gaussianWeight.reshape(workRegionSize,workRegionSize);
		gaussianWeightDFT.reshape(workRegionSize,workRegionSize);
	}

	/**
	 * Search for the track in the image and
	 *
	 * @param image Next image in the sequence
	 */
	public void performTracking( T image ) {
		if( image.width != imageWidth || image.height != imageHeight )
			throw new IllegalArgumentException("Tracking image size is not the same as " +
					"input image. Expected "+imageWidth+" x "+imageHeight);
		updateTrackLocation(image);
		if( interp_factor != 0 )
			performLearning(image);
	}

	/**
	 * Find the target inside the current image by searching around its last known location
	 */
	protected void updateTrackLocation(T image) {
		get_subwindow(image, templateNew);

		// calculate response of the classifier at all locations
		// matlab: k = dense_gauss_kernel(sigma, x, z);
		dense_gauss_kernel(sigma, templateNew, template,k);

		fft.forward(k,kf);

		// response = real(ifft2(alphaf .* fft2(k)));   %(Eq. 9)
		DiscreteFourierTransformOps.multiplyComplex(alphaf, kf, tmpFourier0);
		fft.inverse(tmpFourier0, response);

		// find the pixel with the largest response
		int N = response.width*response.height;
		int indexBest = -1;
		double valueBest = -1;
		for( int i = 0; i < N; i++ ) {
			double v = response.data[i];
			if( v > valueBest ) {
				valueBest = v;
				indexBest = i;
			}
		}

		int peakX = indexBest % response.width;
		int peakY = indexBest / response.width;

		// sub-pixel peak estimation
		subpixelPeak(peakX, peakY);

		// peak in region's coordinate system
		float deltaX = (peakX+offX) - templateNew.width/2;
		float deltaY = (peakY+offY) - templateNew.height/2;

		// convert peak location into image coordinate system
		regionTrack.x0 = regionTrack.x0 + deltaX*stepX;
		regionTrack.y0 = regionTrack.y0 + deltaY*stepY;

		updateRegionOut();
	}

	/**
	 * Refine the local-peak using a search algorithm for sub-pixel accuracy.
	 */
	protected void subpixelPeak(int peakX, int peakY) {
		// this function for r was determined empirically by using work regions of 32,64,128
		int r = Math.min(2,response.width/25);
		if( r < 0 )
			return;

		localPeak.setSearchRadius(r);
		localPeak.search(peakX,peakY);

		offX = localPeak.getPeakX() - peakX;
		offY = localPeak.getPeakY() - peakY;
	}

	private void updateRegionOut() {
		regionOut.x0 = (regionTrack.x0+((int)regionTrack.width)/2)-((int)regionOut.width)/2;
		regionOut.y0 = (regionTrack.y0+((int)regionTrack.height)/2)-((int)regionOut.height)/2;
	}

	/**
	 * Update the alphas and the track's appearance
	 */
	public void performLearning(T image) {
		// use the update track location
		get_subwindow(image, templateNew);

		// Kernel Regularized Least-Squares, calculate alphas (in Fourier domain)
		//	k = dense_gauss_kernel(sigma, x);
		dense_gauss_kernel(sigma, templateNew, templateNew, k);
		fft.forward(k,kf);

		// new_alphaf = yf ./ (fft2(k) + lambda);   %(Eq. 7)
		computeAlphas(gaussianWeightDFT, kf, lambda, newAlphaf);

		// subsequent frames, interpolate model
		// alphaf = (1 - interp_factor) * alphaf + interp_factor * new_alphaf;
		int N = alphaf.width*alphaf.height*2;
		for( int i = 0; i < N; i++ ) {
			alphaf.data[i] = (1-interp_factor)*alphaf.data[i] + interp_factor*newAlphaf.data[i];
		}

		// Set the previous image to be an interpolated version
		//		z = (1 - interp_factor) * z + interp_factor * new_z;
		N = templateNew.width* templateNew.height;
		for( int i = 0; i < N; i++ ) {
			template.data[i] = (1-interp_factor)* template.data[i] + interp_factor*templateNew.data[i];
		}
	}

	/**
	 * Gaussian Kernel with dense sampling.
	 *  Evaluates a gaussian kernel with bandwidth SIGMA for all displacements
	 *  between input images X and Y, which must both be MxN. They must also
	 *  be periodic (ie., pre-processed with a cosine window). The result is
	 *  an MxN map of responses.
	 *
	 * @param sigma Gaussian kernel bandwidth
	 * @param x Input image
	 * @param y Input image
	 * @param k Output containing Gaussian kernel for each element in target region
	 */
	public void dense_gauss_kernel(double sigma , GrayF64 x , GrayF64 y , GrayF64 k ) {

		InterleavedF64 xf=tmpFourier0,yf,xyf=tmpFourier2;
		GrayF64 xy = tmpReal0;
		double yy;

		// find x in Fourier domain
		fft.forward(x, xf);
		double xx = imageDotProduct(x);

		if( x != y ) {
			// general case, x and y are different
			yf = tmpFourier1;
			fft.forward(y,yf);
			yy = imageDotProduct(y);
		} else {
			// auto-correlation of x, avoid repeating a few operations
			yf = xf;
			yy = xx;
		}

		//----   xy = invF[ F(x)*F(y) ]
		// cross-correlation term in Fourier domain
		elementMultConjB(xf,yf,xyf);
		// convert to spatial domain
		fft.inverse(xyf,xy);
		circshift(xy,tmpReal1);

		// calculate gaussian response for all positions
		gaussianKernel(xx, yy, tmpReal1, sigma, k);
	}

	public static void circshift(GrayF64 a, GrayF64 b ) {
		int w2 = a.width/2;
		int h2 = b.height/2;

		for( int y = 0; y < a.height; y++ ) {
			int yy = (y+h2)%a.height;

			for( int x = 0; x < a.width; x++ ) {
				int xx = (x+w2)%a.width;

				b.set( xx , yy , a.get(x,y));
			}
		}

	}

	/**
	 * Computes the dot product of the image with itself
	 */
	public static double imageDotProduct(GrayF64 a) {

		double total = 0;

		int N = a.width*a.height;
		for( int index = 0; index < N; index++ ) {
			double value = a.data[index];
			total += value*value;
		}

		return total;
	}

	/**
	 * Element-wise multiplication of 'a' and the complex conjugate of 'b'
	 */
	public static void elementMultConjB( InterleavedF64 a , InterleavedF64 b , InterleavedF64 output ) {
		for( int y = 0; y < a.height; y++ ) {

			int index = a.startIndex + y*a.stride;

			for( int x = 0; x < a.width; x++, index += 2 ) {

				double realA = a.data[index];
				double imgA = a.data[index+1];
				double realB = b.data[index];
				double imgB = b.data[index+1];

				output.data[index] = realA*realB + imgA*imgB;
				output.data[index+1] = -realA*imgB + imgA*realB;
			}
		}
	}

	/**
	 * new_alphaf = yf ./ (fft2(k) + lambda);   %(Eq. 7)
	 */
	protected static void computeAlphas( InterleavedF64 yf , InterleavedF64 kf , double lambda ,
										 InterleavedF64 alphaf ) {

		for( int y = 0; y < kf.height; y++ ) {

			int index = yf.startIndex + y*yf.stride;

			for( int x = 0; x < kf.width; x++, index += 2 ) {
				double a = yf.data[index];
				double b = yf.data[index+1];

				double c = kf.data[index] + lambda;
				double d = kf.data[index+1];

				double bottom = c*c + d*d;

				alphaf.data[index] = (a*c + b*d)/bottom;
				alphaf.data[index+1] = (b*c - a*d)/bottom;
			}
		}
	}

	/**
	 * Computes the output of the Gaussian kernel for each element in the target region
	 *
	 * k = exp(-1 / sigma^2 * max(0, (xx + yy - 2 * xy) / numel(x)));
	 *
	 * @param xx ||x||^2
	 * @param yy ||y||^2
	 */
	protected static void gaussianKernel(double xx , double yy , GrayF64 xy , double sigma  , GrayF64 output ) {
		double sigma2 = sigma*sigma;
		double N = xy.width*xy.height;

		for( int y = 0; y < xy.height; y++ ) {
			int index = xy.startIndex + y*xy.stride;

			for( int x = 0; x < xy.width; x++ , index++ ) {

				// (xx + yy - 2 * xy) / numel(x)
				double value = (xx + yy - 2*xy.data[index])/N;

				double v = Math.exp(-Math.max(0, value) / sigma2);

				output.data[index] = v;
			}
		}
	}

	/**
	 * Copies the target into the output image and applies the cosine window to it.
	 */
	protected void get_subwindow( T image , GrayF64 output ) {

		// copy the target region

		interp.setImage(image);
		int index = 0;
		for( int y = 0; y < workRegionSize; y++ ) {
			float yy = regionTrack.y0 + y*stepY;

			for( int x = 0; x < workRegionSize; x++ ) {
				float xx = regionTrack.x0 + x*stepX;

				if( interp.isInFastBounds(xx,yy))
					output.data[index++] = interp.get_fast(xx,yy);
				else if( BoofMiscOps.checkInside(image, xx, yy))
					output.data[index++] = interp.get(xx, yy);
				else {
					// randomize to make pixels outside the image poorly correlate.  It will then focus on matching
					// what's inside the image since it has structure
					output.data[index++] = rand.nextFloat()*maxPixelValue;
				}
			}
		}

		// normalize values to be from -0.5 to 0.5
		PixelMath.divide(output, maxPixelValue, output);
		PixelMath.plus(output, -0.5f, output);
		// apply the cosine window to it
		PixelMath.multiply(output,cosine,output);
	}

	/**
	 * The location of the target in the image
	 */
	public RectangleLength2D_F32 getTargetLocation() {
		return regionOut;
	}

	/**
	 * Visual appearance of the target
	 */
	public GrayF64 getTargetTemplate() {
		return template;
	}

	public GrayF64 getResponse() {
		return response;
	}
}