boofcv.alg.tracker.tld.TldTracker Maven / Gradle / Ivy
/*
* Copyright (c) 2021, Peter Abeles. All Rights Reserved.
*
* This file is part of BoofCV (http://boofcv.org).
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package boofcv.alg.tracker.tld;
import boofcv.abst.filter.derivative.ImageGradient;
import boofcv.alg.interpolate.InterpolatePixelS;
import boofcv.alg.tracker.klt.PyramidKltTracker;
import boofcv.factory.tracker.FactoryTrackerAlg;
import boofcv.factory.transform.pyramid.FactoryPyramid;
import boofcv.struct.ImageRectangle;
import boofcv.struct.image.ImageGray;
import boofcv.struct.pyramid.ConfigDiscreteLevels;
import boofcv.struct.pyramid.PyramidDiscrete;
import georegression.struct.shapes.Rectangle2D_F64;
import org.ddogleg.struct.DogArray;
import org.jetbrains.annotations.Nullable;
import java.util.Objects;
import java.util.Random;
/**
*
* Main class for Tracking-Learning-Detection (TLD) [1] (a.k.a Predator) object tracker for video sequences.
* TLD tracks an object which is specified by a user using a rectangle. The description of the object is
* dynamically updated using P and N constraints.
*
*
*
* To start tracking {@link #initialize(ImageGray, int, int, int, int)} must first be called
* to specify the region being tracked. Then each time a new image in the sequences arrives
* {@link #track(ImageGray)} is called. Be sure to check its return value to see if tracking
* was successful or not. If tracking fails one frame it can recover. This is often the case where an object
* becomes obscured and then visible again.
*
*
*
* NOTE: This implementation is based the description found in [1]. The spirit of the original algorithm is replicated,
* but there are a several algorithmic changes. The most significant modifications are as follow; 1) The KLT tracker
* used to update the rectangle does not use NCC features to validate a track or the median based outlier removal.
* Instead a robust model matching algorithm finds the best fit motion. 2) The non-maximum suppression algorithm has
* been changed so that it computes a more accurate local maximum and only uses local rectangles to
* compute the average response. 3) Fern selection is done by selecting the N best using a likelihood ratio
* conditional on the current image. 4) Learning only happens when a track is considered strong.
* See code for more details. Note, this is not a port of the OpenTLD project.
*
*
* [1] Zdenek Kalal, "Tracking-Learning-Detection" University of Surrey, April 2011 Phd Thesis.
*
*
* @author Peter Abeles
*/
@SuppressWarnings({"NullAway.Init"})
public class TldTracker, D extends ImageGray> {
// specified configuration parameters for the tracker
private final ConfigTld config;
// selected region for output
private final Rectangle2D_F64 targetRegion = new Rectangle2D_F64();
// region selected by KLT tracker
// NOTE: The tracker updates a pointing point region. Rounding to the closest integer rectangle introduces errors
// which can build up.
private final Rectangle2D_F64 trackerRegion = new Rectangle2D_F64();
private final ImageRectangle trackerRegion_I32 = new ImageRectangle();
// Region used inside detection cascade
private final DogArray cascadeRegions = new DogArray<>(ImageRectangle::new);
// Image pyramid of input image
private PyramidDiscrete imagePyramid;
// Tracks features inside the current region
private final TldRegionTracker tracking;
// Adjusts the region using track information
private final TldAdjustRegion adjustRegion;
// Detects rectangles: Removes candidates which lack texture
private final TldVarianceFilter variance;
// Detects rectangles: Removes candidates don't match the fern descriptors
private final TldFernClassifier fern;
// Detects rectangles: Removes candidates don't match NCC descriptors
private final TldTemplateMatching template;
// code for detection cascade
private final TldDetection detection;
// did tracking totally fail and it needs to reacquire a track?
private boolean reacquiring;
// Is the region hypothesis valid and can be used for learning?
private boolean valid;
// is the current track considered a strong match and learning can occur?
private boolean strongMatch;
// area of the previous track before it lost track
private double previousTrackArea;
private final TldLearning learning;
// is learning on or off
private boolean performLearning = true;
/**
* Configures the TLD tracker
*
* @param config Configuration class which specifies the tracker's behavior
*/
public TldTracker( ConfigTld config,
InterpolatePixelS interpolate, ImageGradient gradient,
Class imageType, Class derivType ) {
this.config = config;
Random rand = new Random(config.randomSeed);
PyramidKltTracker tracker = FactoryTrackerAlg.kltPyramid(config.trackerConfig, imageType, derivType);
tracking = new TldRegionTracker<>(config.trackerGridWidth, config.trackerFeatureRadius,
config.maximumErrorFB, gradient, tracker, imageType, derivType);
adjustRegion = new TldAdjustRegion(config.motionIterations);
variance = new TldVarianceFilter<>(imageType);
template = new TldTemplateMatching<>(interpolate);
fern = new TldFernClassifier<>(
rand, config.numFerns, config.fernSize, 20, 0.5f, interpolate);
detection = new TldDetection<>(fern, template, variance, config);
learning = new TldLearning<>(rand, config, template, variance, fern, detection);
}
/**
* Starts tracking the rectangular region.
*
* @param image First image in the sequence.
* @param x0 Top-left corner of rectangle. x-axis
* @param y0 Top-left corner of rectangle. y-axis
* @param x1 Bottom-right corner of rectangle. x-axis
* @param y1 Bottom-right corner of rectangle. y-axis
*/
public void initialize( T image, int x0, int y0, int x1, int y1 ) {
if (imagePyramid == null ||
imagePyramid.getInputWidth() != image.width || imagePyramid.getInputHeight() != image.height) {
int minSize = (config.trackerFeatureRadius*2 + 1)*5;
ConfigDiscreteLevels configLevels = ConfigDiscreteLevels.minSize(minSize);
imagePyramid = FactoryPyramid.discreteGaussian(configLevels, -1, 1, true, image.getImageType());
}
imagePyramid.process(image);
reacquiring = false;
targetRegion.setTo(x0, y0, x1, y1);
createCascadeRegion(image.width, image.height);
template.reset();
fern.reset();
tracking.initialize(imagePyramid);
variance.setImage(image);
template.setImage(image);
fern.setImage(image);
adjustRegion.init(image.width, image.height);
learning.initialLearning(targetRegion, cascadeRegions);
strongMatch = true;
previousTrackArea = targetRegion.area();
}
/**
* Used to set the location of the track without changing any appearance history.
*
* Move the track region but keep the same aspect ratio as it had before
* So scale the region and re-center it
*/
public void setTrackerLocation( int x0, int y0, int x1, int y1 ) {
int width = x1 - x0;
int height = y1 - y0;
// change change in scale
double scale = (width/targetRegion.getWidth() + height/targetRegion.getHeight())/2.0;
// new center location
double centerX = (x0 + x1)/2.0;
double centerY = (y0 + y1)/2.0;
targetRegion.p0.x = centerX - scale*targetRegion.getWidth()/2.0;
targetRegion.p1.x = targetRegion.p0.x + scale*targetRegion.getWidth();
targetRegion.p0.y = centerY - scale*targetRegion.getHeight()/2.0;
targetRegion.p1.y = targetRegion.p0.y + scale*targetRegion.getHeight();
}
/**
* Creates a list containing all the regions which need to be tested
*/
private void createCascadeRegion( int imageWidth, int imageHeight ) {
cascadeRegions.reset();
int rectWidth = (int)(targetRegion.getWidth() + 0.5);
int rectHeight = (int)(targetRegion.getHeight() + 0.5);
for (int scaleInt = -config.scaleSpread; scaleInt <= config.scaleSpread; scaleInt++) {
// try several scales as specified in the paper
double scale = Math.pow(1.2, scaleInt);
// the actual rectangular region being tested at this scale
int actualWidth = (int)(rectWidth*scale);
int actualHeight = (int)(rectHeight*scale);
// see if the region is too small or too large
if (actualWidth < config.detectMinimumSide || actualHeight < config.detectMinimumSide)
continue;
if (actualWidth >= imageWidth || actualHeight >= imageHeight)
continue;
// step size at this scale
int stepWidth = (int)(rectWidth*scale*0.1);
int stepHeight = (int)(rectHeight*scale*0.1);
if (stepWidth < 1) stepWidth = 1;
if (stepHeight < 1) stepHeight = 1;
// maximum allowed values
int maxX = imageWidth - actualWidth;
int maxY = imageHeight - actualHeight;
// start at (1,1). Otherwise a more complex algorithm needs to be used for integral images
for (int y0 = 1; y0 < maxY; y0 += stepHeight) {
for (int x0 = 1; x0 < maxX; x0 += stepWidth) {
ImageRectangle r = cascadeRegions.grow();
r.x0 = x0;
r.y0 = y0;
r.x1 = x0 + actualWidth;
r.y1 = y0 + actualHeight;
}
}
}
}
/**
* Updates track region.
*
* @param image Next image in the sequence.
* @return true if the object could be found and false if not
*/
public boolean track( T image ) {
boolean success = true;
valid = false;
imagePyramid.process(image);
template.setImage(image);
variance.setImage(image);
fern.setImage(image);
if (reacquiring) {
// It can reinitialize if there is a single detection
detection.detectionCascade(cascadeRegions);
if (detection.isSuccess() && !detection.isAmbiguous()) {
TldRegion region = Objects.requireNonNull(detection.getBest());
reacquiring = false;
valid = false;
// set it to the detected region
ImageRectangle r = region.rect;
targetRegion.setTo(r.x0, r.y0, r.x1, r.y1);
// get tracking running again
tracking.initialize(imagePyramid);
checkNewTrackStrong(region.confidence);
} else {
success = false;
}
} else {
detection.detectionCascade(cascadeRegions);
// update the previous track region using the tracker
trackerRegion.setTo(targetRegion);
boolean trackingWorked = tracking.process(imagePyramid, trackerRegion);
trackingWorked &= adjustRegion.process(tracking.getPairs(), trackerRegion);
TldHelperFunctions.convertRegion(trackerRegion, trackerRegion_I32);
if (hypothesisFusion(trackingWorked, detection.isSuccess())) {
// if it found a hypothesis and it is valid for learning, then learn
if (valid && performLearning) {
learning.updateLearning(targetRegion);
}
} else {
reacquiring = true;
success = false;
}
}
if (strongMatch) {
previousTrackArea = targetRegion.area();
}
return success;
}
private void checkNewTrackStrong( double confidence ) {
// see if there is very high confidence of a match
strongMatch = confidence > config.confidenceThresholdStrong;
// otherwise see if it's the expected shape
if (!strongMatch) {
double similarity = Math.abs((targetRegion.area() - previousTrackArea)/previousTrackArea);
strongMatch = similarity <= config.thresholdSimilarArea;
}
}
/**
* Combines hypotheses from tracking and detection.
*
* @param trackingWorked If the sequential tracker updated the track region successfully or not
* @return true a hypothesis was found, false if it failed to find a hypothesis
*/
protected boolean hypothesisFusion( boolean trackingWorked, boolean detectionWorked ) {
valid = false;
boolean uniqueDetection = detectionWorked && !detection.isAmbiguous();
double confidenceTarget;
if (trackingWorked) {
@Nullable TldRegion detectedRegion = detection.getBest();
// get the scores from tracking and detection
double scoreTrack = template.computeConfidence(trackerRegion_I32);
double scoreDetected = 0;
if (uniqueDetection) {
scoreDetected = Objects.requireNonNull(detectedRegion).confidence;
}
double adjustment = strongMatch ? 0.07 : 0.02;
if (uniqueDetection && scoreDetected > scoreTrack + adjustment) {
Objects.requireNonNull(detectedRegion);
// if there is a unique detection and it has higher confidence than the
// track region, use the detected region
TldHelperFunctions.convertRegion(detectedRegion.rect, targetRegion);
confidenceTarget = detectedRegion.confidence;
// if it's far away from the current track, re-evaluate if it's a strongMatch
checkNewTrackStrong(scoreDetected);
} else {
// Otherwise use the tracker region
targetRegion.setTo(trackerRegion);
confidenceTarget = scoreTrack;
strongMatch |= confidenceTarget > config.confidenceThresholdStrong;
// see if the most likely detected region overlaps the track region
if (strongMatch && confidenceTarget >= config.confidenceThresholdLower) {
valid = true;
}
}
} else if (uniqueDetection) {
// just go with the best detected region
TldRegion detectedRegion = Objects.requireNonNull(detection.getBest());
TldHelperFunctions.convertRegion(detectedRegion.rect, targetRegion);
confidenceTarget = detectedRegion.confidence;
strongMatch = confidenceTarget > config.confidenceThresholdStrong;
} else {
return false;
}
return confidenceTarget >= config.confidenceAccept;
}
/**
* Selects the scale for the image pyramid based on image size and feature size
*
* @return scales for image pyramid
*/
public static int[] selectPyramidScale( int imageWidth, int imageHeight, int minSize ) {
int w = Math.max(imageWidth, imageHeight);
int maxScale = w/minSize;
int n = 1;
int scale = 1;
while (scale*2 < maxScale) {
n++;
scale *= 2;
}
int[] ret = new int[n];
scale = 1;
for (int i = 0; i < n; i++) {
ret[i] = scale;
scale *= 2;
}
return ret;
}
public boolean isPerformLearning() {
return performLearning;
}
public void setPerformLearning( boolean performLearning ) {
this.performLearning = performLearning;
}
public TldTemplateMatching getTemplateMatching() {
return template;
}
/**
* Returns the estimated location of the target in the current image
*
* @return Location of the target
*/
public Rectangle2D_F64 getTargetRegion() {
return targetRegion;
}
public Rectangle2D_F64 getTrackerRegion() {
return trackerRegion;
}
public ConfigTld getConfig() {
return config;
}
public TldDetection getDetection() {
return detection;
}
}