All Downloads are FREE. Search and download functionalities are using the official Maven repository.

boofcv.alg.tracker.circulant.CirculantTracker Maven / Gradle / Ivy

/*
 * Copyright (c) 2021, Peter Abeles. All Rights Reserved.
 *
 * This file is part of BoofCV (http://boofcv.org).
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package boofcv.alg.tracker.circulant;

import boofcv.abst.feature.detect.peak.SearchLocalPeak;
import boofcv.abst.transform.fft.DiscreteFourierTransform;
import boofcv.alg.interpolate.InterpolatePixelS;
import boofcv.alg.misc.PixelMath;
import boofcv.alg.transform.fft.DiscreteFourierTransformOps;
import boofcv.factory.feature.detect.peak.ConfigMeanShiftSearch;
import boofcv.factory.feature.detect.peak.FactorySearchLocalPeak;
import boofcv.misc.BoofMiscOps;
import boofcv.struct.image.GrayF64;
import boofcv.struct.image.ImageGray;
import boofcv.struct.image.InterleavedF64;
import georegression.struct.shapes.RectangleLength2D_F32;

import java.util.Random;

/**
 * 

* Tracker that uses the theory of Circulant matrices, Discrete Fourier Transform (DCF), and linear classifiers to track * a target and learn its changes in appearance [1]. The target is assumed to be rectangular and has fixed size and * location. A dense local search is performed around the most recent target location. The search is done quickly * using the DCF. *

* *

* Tracking is performed using texture information. Since only one description of the target is saved, tracks can * drift over time. Tracking performance seems to improve if the object has distinctive edges. *

* *

* CHANGES FROM PAPER:
*

    *
  • Input image is sampled into a square work region of constant size to improve runtime speed of FFT.
  • *
  • Peak of response is found using mean-shift. Provides sub-pixel precision.
  • *
  • Pixels outside the image are assigned random values to avoid the tracker from fitting to them. Ideally they * wouldn't be processed, but that is complex to implement
  • *
*

* *

* [1] Henriques, Joao F., et al. "Exploiting the circulant structure of tracking-by-detection with kernels." * Computer Vision–ECCV 2012. Springer Berlin Heidelberg, 2012. 702-715. *

* * @author Peter Abeles */ public class CirculantTracker> { // --- Tuning parameters // spatial bandwidth (proportional to target) private final double output_sigma_factor; // gaussian kernel bandwidth private final double sigma; // regularization term private final double lambda; // linear interpolation term. Adjusts how fast it can learn private final double interp_factor; // the maximum pixel value private final double maxPixelValue; // extra padding around the selected region private final double padding; //----- Internal variables // Input image width and height private int imageWidth, imageHeight; // computes the FFT private final DiscreteFourierTransform fft = DiscreteFourierTransformOps.createTransformF64(); // storage for subimage of input image protected GrayF64 templateNew = new GrayF64(1, 1); // storage for the subimage of the previous frame protected GrayF64 template = new GrayF64(1, 1); // cosine window used to reduce artifacts from FFT protected GrayF64 cosine = new GrayF64(1, 1); // Storage for the kernel's response private final GrayF64 k = new GrayF64(1, 1); private final InterleavedF64 kf = new InterleavedF64(1, 1, 2); // Learn values. used to compute weight in linear classifier private final InterleavedF64 alphaf = new InterleavedF64(1, 1, 2); private final InterleavedF64 newAlphaf = new InterleavedF64(1, 1, 2); // location of target protected RectangleLength2D_F32 regionTrack = new RectangleLength2D_F32(); protected RectangleLength2D_F32 regionOut = new RectangleLength2D_F32(); // Used for computing the gaussian kernel protected GrayF64 gaussianWeight = new GrayF64(1, 1); protected InterleavedF64 gaussianWeightDFT = new InterleavedF64(1, 1, 2); // detector response private final GrayF64 response = new GrayF64(1, 1); // storage for storing temporary results private final GrayF64 tmpReal0 = new GrayF64(1, 1); private final GrayF64 tmpReal1 = new GrayF64(1, 1); private final InterleavedF64 tmpFourier0 = new InterleavedF64(1, 1, 2); private final InterleavedF64 tmpFourier1 = new InterleavedF64(1, 1, 2); private final InterleavedF64 tmpFourier2 = new InterleavedF64(1, 1, 2); // interpolation used when sampling input image into work space private final InterpolatePixelS interp; // used to compute sub-pixel location private final SearchLocalPeak localPeak = FactorySearchLocalPeak.meanShiftUniform(new ConfigMeanShiftSearch(5, 1e-4), GrayF64.class); // adjustment from sub-pixel protected float offX, offY; // size of the work space in pixels private final int workRegionSize; // conversion from workspace to image pixels private float stepX, stepY; // used to fill the area outside of the image with unstructured data. private final Random rand = new Random(234); /** * Configure tracker * * @param output_sigma_factor spatial bandwidth (proportional to target) Try 1.0/16.0 * @param sigma Sigma for Gaussian kernel in linear classifier. Try 0.2 * @param lambda Try 1e-2 * @param interp_factor Try 0.075 * @param padding Padding added around the selected target. Try 1 * @param workRegionSize Size of work region. Best if power of 2. Try 64 * @param maxPixelValue Maximum pixel value. Typically 255 */ public CirculantTracker( double output_sigma_factor, double sigma, double lambda, double interp_factor, double padding, int workRegionSize, double maxPixelValue, InterpolatePixelS interp ) { if (workRegionSize < 3) throw new IllegalArgumentException("Minimum size of work region is 3 pixels."); this.output_sigma_factor = output_sigma_factor; this.sigma = sigma; this.lambda = lambda; this.interp_factor = interp_factor; this.maxPixelValue = maxPixelValue; this.interp = interp; this.padding = padding; this.workRegionSize = workRegionSize; resizeImages(workRegionSize); computeCosineWindow(cosine); computeGaussianWeights(workRegionSize); localPeak.setImage(response); } /** * Initializes tracking around the specified rectangle region * * @param image Image to start tracking from * @param x0 top-left corner of region * @param y0 top-left corner of region * @param regionWidth region's width * @param regionHeight region's height */ public void initialize( T image, int x0, int y0, int regionWidth, int regionHeight ) { this.imageWidth = image.width; this.imageHeight = image.height; setTrackLocation(x0, y0, regionWidth, regionHeight); initialLearning(image); } /** * Used to change the track's location. If this method is used it is assumed that tracking is active and that * the appearance of the target has not changed * * @param x0 top-left corner of region * @param y0 top-left corner of region * @param regionWidth region's width * @param regionHeight region's height */ public void setTrackLocation( int x0, int y0, int regionWidth, int regionHeight ) { if (imageWidth < regionWidth || imageHeight < regionHeight) throw new IllegalArgumentException("Track region is larger than input image: " + regionWidth + " " + regionHeight); regionOut.width = regionWidth; regionOut.height = regionHeight; // adjust for padding int w = (int)(regionWidth*(1 + padding)); int h = (int)(regionHeight*(1 + padding)); int cx = x0 + regionWidth/2; int cy = y0 + regionHeight/2; // save the track location this.regionTrack.width = w; this.regionTrack.height = h; this.regionTrack.x0 = cx - w/2; this.regionTrack.y0 = cy - h/2; stepX = (w - 1)/(float)(workRegionSize - 1); stepY = (h - 1)/(float)(workRegionSize - 1); updateRegionOut(); } /** * Learn the target's appearance. */ protected void initialLearning( T image ) { // get subwindow at current estimated target position, to train classifier get_subwindow(image, template); // Kernel Regularized Least-Squares, calculate alphas (in Fourier domain) // k = dense_gauss_kernel(sigma, x); dense_gauss_kernel(sigma, template, template, k); fft.forward(k, kf); // new_alphaf = yf ./ (fft2(k) + lambda); %(Eq. 7) computeAlphas(gaussianWeightDFT, kf, lambda, alphaf); } /** * Computes the cosine window */ protected static void computeCosineWindow( GrayF64 cosine ) { double[] cosX = new double[cosine.width]; for (int x = 0; x < cosine.width; x++) { cosX[x] = 0.5*(1 - Math.cos(2.0*Math.PI*x/(cosine.width - 1))); } for (int y = 0; y < cosine.height; y++) { int index = cosine.startIndex + y*cosine.stride; double cosY = 0.5*(1 - Math.cos(2.0*Math.PI*y/(cosine.height - 1))); for (int x = 0; x < cosine.width; x++) { cosine.data[index++] = cosX[x]*cosY; } } } /** * Computes the weights used in the gaussian kernel * * This isn't actually symmetric for even widths. These weights are used has label in the learning phase. Closer * to one the more likely it is the true target. It should be a peak in the image center. If it is not then * it will learn an incorrect model. */ protected void computeGaussianWeights( int width ) { // desired output (gaussian shaped), bandwidth proportional to target size double output_sigma = Math.sqrt(width*width)*output_sigma_factor; double left = -0.5/(output_sigma*output_sigma); int radius = width/2; for (int y = 0; y < gaussianWeight.height; y++) { int index = gaussianWeight.startIndex + y*gaussianWeight.stride; double ry = y - radius; for (int x = 0; x < width; x++) { double rx = x - radius; gaussianWeight.data[index++] = Math.exp(left*(ry*ry + rx*rx)); } } fft.forward(gaussianWeight, gaussianWeightDFT); } protected void resizeImages( int workRegionSize ) { templateNew.reshape(workRegionSize, workRegionSize); template.reshape(workRegionSize, workRegionSize); cosine.reshape(workRegionSize, workRegionSize); k.reshape(workRegionSize, workRegionSize); kf.reshape(workRegionSize, workRegionSize); alphaf.reshape(workRegionSize, workRegionSize); newAlphaf.reshape(workRegionSize, workRegionSize); response.reshape(workRegionSize, workRegionSize); tmpReal0.reshape(workRegionSize, workRegionSize); tmpReal1.reshape(workRegionSize, workRegionSize); tmpFourier0.reshape(workRegionSize, workRegionSize); tmpFourier1.reshape(workRegionSize, workRegionSize); tmpFourier2.reshape(workRegionSize, workRegionSize); gaussianWeight.reshape(workRegionSize, workRegionSize); gaussianWeightDFT.reshape(workRegionSize, workRegionSize); } /** * Search for the track in the image and * * @param image Next image in the sequence */ public void performTracking( T image ) { if (image.width != imageWidth || image.height != imageHeight) throw new IllegalArgumentException("Tracking image size is not the same as " + "input image. Expected " + imageWidth + " x " + imageHeight); updateTrackLocation(image); if (interp_factor != 0) performLearning(image); } /** * Find the target inside the current image by searching around its last known location */ protected void updateTrackLocation( T image ) { get_subwindow(image, templateNew); // calculate response of the classifier at all locations // matlab: k = dense_gauss_kernel(sigma, x, z); dense_gauss_kernel(sigma, templateNew, template, k); fft.forward(k, kf); // response = real(ifft2(alphaf .* fft2(k))); %(Eq. 9) DiscreteFourierTransformOps.multiplyComplex(alphaf, kf, tmpFourier0); fft.inverse(tmpFourier0, response); // find the pixel with the largest response int N = response.width*response.height; int indexBest = -1; double valueBest = -1; for (int i = 0; i < N; i++) { double v = response.data[i]; if (v > valueBest) { valueBest = v; indexBest = i; } } int peakX = indexBest%response.width; int peakY = indexBest/response.width; // sub-pixel peak estimation subpixelPeak(peakX, peakY); // peak in region's coordinate system float deltaX = (peakX + offX) - templateNew.width/2; float deltaY = (peakY + offY) - templateNew.height/2; // convert peak location into image coordinate system regionTrack.x0 = regionTrack.x0 + deltaX*stepX; regionTrack.y0 = regionTrack.y0 + deltaY*stepY; updateRegionOut(); } /** * Refine the local-peak using a search algorithm for sub-pixel accuracy. */ protected void subpixelPeak( int peakX, int peakY ) { // this function for r was determined empirically by using work regions of 32,64,128 int r = Math.min(2, response.width/25); if (r < 0) return; localPeak.setSearchRadius(r); localPeak.search(peakX, peakY); offX = localPeak.getPeakX() - peakX; offY = localPeak.getPeakY() - peakY; } private void updateRegionOut() { regionOut.x0 = (regionTrack.x0 + ((int)regionTrack.width)/2) - ((int)regionOut.width)/2; regionOut.y0 = (regionTrack.y0 + ((int)regionTrack.height)/2) - ((int)regionOut.height)/2; } /** * Update the alphas and the track's appearance */ public void performLearning( T image ) { // use the update track location get_subwindow(image, templateNew); // Kernel Regularized Least-Squares, calculate alphas (in Fourier domain) // k = dense_gauss_kernel(sigma, x); dense_gauss_kernel(sigma, templateNew, templateNew, k); fft.forward(k, kf); // new_alphaf = yf ./ (fft2(k) + lambda); %(Eq. 7) computeAlphas(gaussianWeightDFT, kf, lambda, newAlphaf); // subsequent frames, interpolate model // alphaf = (1 - interp_factor) * alphaf + interp_factor * new_alphaf; int N = alphaf.width*alphaf.height*2; for (int i = 0; i < N; i++) { alphaf.data[i] = (1 - interp_factor)*alphaf.data[i] + interp_factor*newAlphaf.data[i]; } // Set the previous image to be an interpolated version // z = (1 - interp_factor) * z + interp_factor * new_z; N = templateNew.width*templateNew.height; for (int i = 0; i < N; i++) { template.data[i] = (1 - interp_factor)*template.data[i] + interp_factor*templateNew.data[i]; } } /** * Gaussian Kernel with dense sampling. * Evaluates a gaussian kernel with bandwidth SIGMA for all displacements * between input images X and Y, which must both be MxN. They must also * be periodic (ie., pre-processed with a cosine window). The result is * an MxN map of responses. * * @param sigma Gaussian kernel bandwidth * @param x Input image * @param y Input image * @param k Output containing Gaussian kernel for each element in target region */ public void dense_gauss_kernel( double sigma, GrayF64 x, GrayF64 y, GrayF64 k ) { InterleavedF64 xf = tmpFourier0, yf, xyf = tmpFourier2; GrayF64 xy = tmpReal0; double yy; // find x in Fourier domain fft.forward(x, xf); double xx = imageDotProduct(x); if (x != y) { // general case, x and y are different yf = tmpFourier1; fft.forward(y, yf); yy = imageDotProduct(y); } else { // auto-correlation of x, avoid repeating a few operations yf = xf; yy = xx; } //---- xy = invF[ F(x)*F(y) ] // cross-correlation term in Fourier domain elementMultConjB(xf, yf, xyf); // convert to spatial domain fft.inverse(xyf, xy); circshift(xy, tmpReal1); // calculate gaussian response for all positions gaussianKernel(xx, yy, tmpReal1, sigma, k); } public static void circshift( GrayF64 a, GrayF64 b ) { int w2 = a.width/2; int h2 = b.height/2; for (int y = 0; y < a.height; y++) { int yy = (y + h2)%a.height; for (int x = 0; x < a.width; x++) { int xx = (x + w2)%a.width; b.set(xx, yy, a.get(x, y)); } } } /** * Computes the dot product of the image with itself */ public static double imageDotProduct( GrayF64 a ) { double total = 0; int N = a.width*a.height; for (int index = 0; index < N; index++) { double value = a.data[index]; total += value*value; } return total; } /** * Element-wise multiplication of 'a' and the complex conjugate of 'b' */ public static void elementMultConjB( InterleavedF64 a, InterleavedF64 b, InterleavedF64 output ) { for (int y = 0; y < a.height; y++) { int index = a.startIndex + y*a.stride; for (int x = 0; x < a.width; x++, index += 2) { double realA = a.data[index]; double imgA = a.data[index + 1]; double realB = b.data[index]; double imgB = b.data[index + 1]; output.data[index] = realA*realB + imgA*imgB; output.data[index + 1] = -realA*imgB + imgA*realB; } } } /** * new_alphaf = yf ./ (fft2(k) + lambda); %(Eq. 7) */ protected static void computeAlphas( InterleavedF64 yf, InterleavedF64 kf, double lambda, InterleavedF64 alphaf ) { for (int y = 0; y < kf.height; y++) { int index = yf.startIndex + y*yf.stride; for (int x = 0; x < kf.width; x++, index += 2) { double a = yf.data[index]; double b = yf.data[index + 1]; double c = kf.data[index] + lambda; double d = kf.data[index + 1]; double bottom = c*c + d*d; alphaf.data[index] = (a*c + b*d)/bottom; alphaf.data[index + 1] = (b*c - a*d)/bottom; } } } /** * Computes the output of the Gaussian kernel for each element in the target region * * k = exp(-1 / sigma^2 * max(0, (xx + yy - 2 * xy) / numel(x))); * * @param xx ||x||^2 * @param yy ||y||^2 */ protected static void gaussianKernel( double xx, double yy, GrayF64 xy, double sigma, GrayF64 output ) { double sigma2 = sigma*sigma; double N = xy.width*xy.height; for (int y = 0; y < xy.height; y++) { int index = xy.startIndex + y*xy.stride; for (int x = 0; x < xy.width; x++, index++) { // (xx + yy - 2 * xy) / numel(x) double value = (xx + yy - 2*xy.data[index])/N; double v = Math.exp(-Math.max(0, value)/sigma2); output.data[index] = v; } } } /** * Copies the target into the output image and applies the cosine window to it. */ protected void get_subwindow( T image, GrayF64 output ) { // copy the target region interp.setImage(image); int index = 0; for (int y = 0; y < workRegionSize; y++) { float yy = regionTrack.y0 + y*stepY; for (int x = 0; x < workRegionSize; x++) { float xx = regionTrack.x0 + x*stepX; if (interp.isInFastBounds(xx, yy)) output.data[index++] = interp.get_fast(xx, yy); else if (BoofMiscOps.isInside(image, xx, yy)) output.data[index++] = interp.get(xx, yy); else { // randomize to make pixels outside the image poorly correlate. It will then focus on matching // what's inside the image since it has structure output.data[index++] = rand.nextFloat()*maxPixelValue; } } } // normalize values to be from -0.5 to 0.5 PixelMath.divide(output, maxPixelValue, output); PixelMath.plus(output, -0.5f, output); // apply the cosine window to it PixelMath.multiply(output, cosine, output); } /** * The location of the target in the image */ public RectangleLength2D_F32 getTargetLocation() { return regionOut; } /** * Visual appearance of the target */ public GrayF64 getTargetTemplate() { return template; } public GrayF64 getResponse() { return response; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy