
boofcv.alg.sfm.d3.VisOdomDualTrackPnP Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of boofcv-sfm Show documentation
Show all versions of boofcv-sfm Show documentation
BoofCV is an open source Java library for real-time computer vision and robotics applications.
The newest version!
/*
* Copyright (c) 2024, Peter Abeles. All Rights Reserved.
*
* This file is part of BoofCV (http://boofcv.org).
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package boofcv.alg.sfm.d3;
import boofcv.abst.feature.associate.AssociateDescription2D;
import boofcv.abst.feature.describe.DescribePointRadiusAngle;
import boofcv.abst.geo.Triangulate2ViewsMetric;
import boofcv.abst.tracker.PointTrack;
import boofcv.abst.tracker.PointTracker;
import boofcv.alg.feature.associate.StereoConsistencyCheck;
import boofcv.alg.geo.PerspectiveOps;
import boofcv.alg.sfm.d3.structure.VisOdomBundleAdjustment;
import boofcv.alg.sfm.d3.structure.VisOdomBundleAdjustment.BFrame;
import boofcv.alg.sfm.d3.structure.VisOdomBundleAdjustment.BTrack;
import boofcv.factory.distort.LensDistortionFactory;
import boofcv.factory.geo.ConfigTriangulation;
import boofcv.factory.geo.FactoryMultiView;
import boofcv.struct.calib.StereoParameters;
import boofcv.struct.feature.AssociatedIndex;
import boofcv.struct.feature.TupleDesc;
import boofcv.struct.image.ImageBase;
import boofcv.struct.sfm.Stereo2D3D;
import georegression.struct.point.Point2D_F64;
import georegression.struct.point.Point3D_F64;
import georegression.struct.point.Point4D_F64;
import georegression.struct.se.Se3_F64;
import georegression.transform.se.SePointOps_F64;
import lombok.Getter;
import lombok.Setter;
import org.ddogleg.fitting.modelset.ModelFitter;
import org.ddogleg.fitting.modelset.ModelMatcher;
import org.ddogleg.struct.DogArray;
import org.ddogleg.struct.DogArray_I32;
import org.ddogleg.struct.FastAccess;
import org.ddogleg.struct.FastArray;
import org.jetbrains.annotations.Nullable;
import java.util.ArrayList;
import java.util.List;
/**
* Stereo visual odometry algorithm which relies on tracking features independently in the left and right images
* and then matching those tracks together. The idea behind this tracker is that the expensive task of association
* features between left and right cameras only needs to be done once when track is spawned. Triangulation
* is used to estimate each feature's 3D location. Motion is estimated robustly using a RANSAC type algorithm
* provided by the user which internally uses {@link boofcv.abst.geo.Estimate1ofPnP PnP} type algorithm.
*
* Estimated motion is relative to left camera.
*
* FUTURE WORK: Save visual tracks without stereo matches and do monocular tracking on them. This is useful for stereo
* systems with only a little bit of overlap.
*
* @author Peter Abeles
*/
@SuppressWarnings({"NullAway.Init"})
public class VisOdomDualTrackPnP, TD extends TupleDesc>
extends VisOdomBundlePnPBase {
// TODO must modify so that tracks can exist in only one camera after the initial spawn. Requiring tracks always
// be mutual greatly increases the number of dropped tracks when there is motion blur
// TODO Apply a rigid constraints in SBA when that feature has been added
// TODO Add more checks to stereo association. Similar to what's done in greedy now
// index of the left camera in the camera list
public static final int CAMERA_LEFT = 0;
public static final int CAMERA_RIGHT = 1;
// Left and right input images
private T inputLeft;
private T inputRight;
// computes camera motion
private @Getter final ModelMatcher matcher;
private @Getter final @Nullable ModelFitter modelRefiner;
// trackers for left and right cameras
private final PointTracker trackerLeft;
private final PointTracker trackerRight;
/** Used to describe tracks so that they can be matches between the two cameras */
private final DescribePointRadiusAngle describe;
/** Radius of a descriptor's region */
private @Getter @Setter double describeRadius = 11.0;
// Data structures used when associating left and right cameras
private final FastArray pointsLeft = new FastArray<>(Point2D_F64.class);
private final FastArray pointsRight = new FastArray<>(Point2D_F64.class);
private final DogArray descLeft;
private final DogArray descRight;
// matches features between left and right images
private final AssociateDescription2D assocL2R;
/** Triangulates points from the two stereo correspondences */
private final Triangulate2ViewsMetric triangulate2;
//----- Data structures for Bundle Adjustment and Track Information
private BFrame currentLeft, currentRight;
private BFrame previousLeft;
// Ensures that the epipolar constraint still applies to the tracks
private final StereoConsistencyCheck stereoCheck;
// known stereo baseline
private final Se3_F64 world_to_prev = new Se3_F64();
/** List of tracks from left image that remain after geometric filters have been applied */
private @Getter final List candidates = new ArrayList<>();
// Internal profiling
private @Getter double timeTracking, timeEstimate, timeBundle, timeDropUnused, timeSceneMaintenance, timeSpawn;
//---------------------------------------------------------------------------------------------------
//----------- Internal Work Space
DogArray listStereo2D3D = new DogArray<>(Stereo2D3D::new);
private final Se3_F64 left_to_right = new Se3_F64();
private final Se3_F64 right_to_left = new Se3_F64();
Point4D_F64 prevLoc4 = new Point4D_F64();
// storage for the triangulated location in the camera frame
Point3D_F64 cameraP3 = new Point3D_F64();
// Normalized image coordinate for pixel track observations
Point2D_F64 normLeft = new Point2D_F64();
Point2D_F64 normRight = new Point2D_F64();
/**
* Specifies internal algorithms and parameters
*
* @param epilolarTol Tolerance in pixels for enforcing the epipolar constraint
* @param trackerLeft Tracker used for left camera
* @param trackerRight Tracker used for right camera
* @param describe Describes features in tracks
* @param assocL2R Assocation for left to right
* @param triangulate2 Triangulation for estimating 3D location from stereo pair
* @param matcher Robust motion model estimation with outlier rejection
* @param modelRefiner Non-linear refinement of motion model
*/
public VisOdomDualTrackPnP( double epilolarTol,
PointTracker trackerLeft, PointTracker trackerRight,
DescribePointRadiusAngle describe,
AssociateDescription2D assocL2R,
Triangulate2ViewsMetric triangulate2,
ModelMatcher matcher,
@Nullable ModelFitter modelRefiner ) {
if (!assocL2R.uniqueSource() || !assocL2R.uniqueDestination())
throw new IllegalArgumentException("Both unique source and destination must be ensure by association");
this.describe = describe;
this.trackerLeft = trackerLeft;
this.trackerRight = trackerRight;
this.assocL2R = assocL2R;
this.triangulate2 = triangulate2;
this.matcher = matcher;
this.modelRefiner = modelRefiner;
descLeft = new DogArray<>(describe::createDescription);
descRight = new DogArray<>(describe::createDescription);
stereoCheck = new StereoConsistencyCheck(epilolarTol, epilolarTol);
bundleViso = new VisOdomBundleAdjustment<>(TrackInfo::new);
// TODO would be best if this reduced pixel error and not geometric error
// TODO remove and replace with calibrated homogeneous coordinates when it exists
ConfigTriangulation config = new ConfigTriangulation();
config.type = ConfigTriangulation.Type.GEOMETRIC;
config.converge.maxIterations = 10;
triangulateN = FactoryMultiView.triangulateNViewMetric(config);
}
/**
* Specifies the stereo parameters. Note that classes which are passed into constructor are maintained outside.
* Example, the RANSAC distance model might need to have stereo parameters passed to it externally
* since there's no generic way to handle that.
*/
public void setCalibration( StereoParameters param ) {
right_to_left.setTo(param.right_to_left);
param.right_to_left.invert(left_to_right);
CameraModel left = new CameraModel();
left.pixelToNorm = LensDistortionFactory.narrow(param.left).undistort_F64(true, false);
CameraModel right = new CameraModel();
right.pixelToNorm = LensDistortionFactory.narrow(param.right).undistort_F64(true, false);
stereoCheck.setCalibration(param);
cameraModels.add(left);
cameraModels.add(right);
bundleViso.addCamera(param.left);
bundleViso.addCamera(param.right);
}
/**
* Resets the algorithm into its original state
*/
@Override
public void reset() {
super.reset();
trackerLeft.reset();
trackerRight.reset();
}
/**
* Updates motion estimate using the stereo pair.
*
* @param left Image from left camera
* @param right Image from right camera
* @return true if motion estimate was updated and false if not
*/
public boolean process( T left, T right ) {
if (verbose != null) {
verbose.println("----------- Process --------------");
verbose.println("Scene: Frames=" + bundleViso.frames.size + " Tracks=" + bundleViso.tracks.size);
for (int frameIdx = 0; frameIdx < bundleViso.frames.size; frameIdx++) {
BFrame bf = bundleViso.frames.get(frameIdx);
verbose.printf(" frame[%2d] cam=%d tracks=%d\n", frameIdx, bf.camera.index, bf.tracks.size);
}
}
this.inputLeft = left;
this.inputRight = right;
//=============================================================================================
//========== Visually track features
double time0 = System.nanoTime();
inlierTracks.clear();
visibleTracks.clear();
initialVisible.clear();
candidates.clear();
// Create a new frame for the current image
currentLeft = bundleViso.addFrame(CAMERA_LEFT, trackerLeft.getFrameID());
currentRight = bundleViso.addFrame(CAMERA_RIGHT, trackerRight.getFrameID());
// TODO in the future when bundle adjustment supports rigid relationships between two views use that here
// Track objects given the new images
trackerLeft.process(left);
trackerRight.process(right);
double time1 = System.nanoTime();
//=============================================================================================
//========== Initialize VO from the first image and return
if (first) {
first = false;
frameManager.initialize(bundleViso.cameras);
addNewTracks();
// The left camera is the world frame right now
currentLeft.frame_to_world.reset();
currentRight.frame_to_world.setTo(right_to_left);
return true;
}
// This will be used as a reference for motion estimation
// tail(3) since the two visible frames (left + right) where just added
previousLeft = bundleViso.frames.getTail(3);
// If one tracker dropped a track then drop the same track in the other camera
mutualTrackDrop();
// Find tracks which pass a geometric test and put into candidates list
selectCandidateStereoTracks();
// Robustly estimate motion using features in candidates list
if (!estimateMotion()) {
if (verbose != null) verbose.println("!!! Motion Failed !!!");
removedBundleTracks.clear();
bundleViso.removeFrame(currentRight, removedBundleTracks);
bundleViso.removeFrame(currentLeft, removedBundleTracks);
return false;
}
addInlierObservationsToScene();
removeOldUnusedVisibleTracks();
//=============================================================================================
//========== Refine the scene's state estimate
double time2 = System.nanoTime();
optimizeTheScene();
double time3 = System.nanoTime();
//=============================================================================================
//========== Perform maintenance by dropping elements from the scene
dropBadBundleTracks();
long time4 = System.nanoTime();
boolean droppedCurrentFrame = performKeyFrameMaintenance(trackerLeft, 2);
long time5 = System.nanoTime();
if (!droppedCurrentFrame) {
if (verbose != null) verbose.println("Saving new key frames");
// We are keeping the current frame! Spawn new tracks inside of it
addNewTracks();
}
long time6 = System.nanoTime();
//=============================================================================================
//========== Summarize profiling results
timeTracking = (time1 - time0)*1e-6;
timeEstimate = (time2 - time1)*1e-6;
timeBundle = (time3 - time2)*1e-6;
timeDropUnused = (time4 - time3)*1e-6;
timeSceneMaintenance = (time5 - time4)*1e-6;
timeSpawn = (time6 - time5)*1e-6;
if (profileOut != null) {
double timeTotal = (time6 - time0)*1e-6;
profileOut.printf("TIME: TRK %5.1f Est %5.1f Bun %5.1f DU %5.1f Scene %5.1f Spn %5.1f TOTAL %5.1f\n",
timeTracking, timeEstimate, timeBundle, timeDropUnused, timeSceneMaintenance, timeSpawn, timeTotal);
}
return true;
}
/**
* Runs bundle adjustment and update the state of views and features
*/
private void optimizeTheScene() {
// Update the state estimate
if (bundleViso.isOptimizeActive()) {
bundleViso.optimize(verbose);
triangulateNotSelectedBundleTracks();
}
// Save the output
current_to_world.setTo(currentLeft.frame_to_world);
}
/**
* Given the set of active tracks, estimate the cameras motion robustly
*
* @return true if successful
*/
private boolean estimateMotion() {
CameraModel leftCM = cameraModels.get(CAMERA_LEFT);
CameraModel rightCM = cameraModels.get(CAMERA_RIGHT);
// Perform motion estimation relative to the most recent key frame
previousLeft.frame_to_world.invert(world_to_prev);
// Put observation and prior knowledge into a format the model matcher will understand
listStereo2D3D.reserve(candidates.size());
listStereo2D3D.reset();
for (int candidateIdx = 0; candidateIdx < candidates.size(); candidateIdx++) {
PointTrack l = candidates.get(candidateIdx);
Stereo2D3D stereo = listStereo2D3D.grow();
// Get the track location
TrackInfo bt = l.getCookie();
PointTrack r = bt.visualRight;
// Get the 3D coordinate of the point in the 'previous' frame
SePointOps_F64.transform(world_to_prev, bt.worldLoc, prevLoc4);
PerspectiveOps.homogeneousTo3dPositiveZ(prevLoc4, 1e8, 1e-8, stereo.location);
// compute normalized image coordinate for track in left and right image
leftCM.pixelToNorm.compute(l.pixel.x, l.pixel.y, stereo.leftObs);
rightCM.pixelToNorm.compute(r.pixel.x, r.pixel.y, stereo.rightObs);
// TODO Could this transform be done just once?
}
// Robustly estimate left camera motion
if (!matcher.process(listStereo2D3D.toList()))
return false;
if (modelRefiner != null) {
modelRefiner.fitModel(matcher.getMatchSet(), matcher.getModelParameters(), previous_to_current);
} else {
previous_to_current.setTo(matcher.getModelParameters());
}
// Convert the found transforms back to world
previous_to_current.invert(current_to_previous);
current_to_previous.concat(previousLeft.frame_to_world, currentLeft.frame_to_world);
right_to_left.concat(currentLeft.frame_to_world, currentRight.frame_to_world);
return true;
}
private void addInlierObservationsToScene() {
// mark tracks that are in the inlier set and add their observations to the scene
int N = matcher.getMatchSet().size();
if (verbose != null) verbose.println("Total Inliers " + N + " / " + candidates.size());
for (int i = 0; i < N; i++) {
int index = matcher.getInputIndex(i);
TrackInfo bt = candidates.get(index).getCookie();
if (bt.visualTrack == null) throw new RuntimeException("BUG!");
bt.lastInlier = getFrameID();
bt.hasBeenInlier = true;
PointTrack l = bt.visualTrack;
PointTrack r = bt.visualRight;
bundleViso.addObservation(currentLeft, bt, l.pixel.x, l.pixel.y);
bundleViso.addObservation(currentRight, bt, r.pixel.x, r.pixel.y);
inlierTracks.add(bt);
}
}
/**
* If a track was dropped in one image make sure it was dropped in the other image
*/
private void mutualTrackDrop() {
int total = 0;
for (PointTrack t : trackerLeft.getDroppedTracks(null)) { // lint:forbidden ignore_line
TrackInfo bt = t.getCookie();
trackerRight.dropTrack(bt.visualRight);
bt.visualTrack = null; // This tells the scene that it is no longer in the visual tracker
total++;
}
for (PointTrack t : trackerRight.getDroppedTracks(null)) { // lint:forbidden ignore_line
TrackInfo bt = t.getCookie();
if (bt.visualTrack != null) {
trackerLeft.dropTrack(bt.visualTrack);
bt.visualTrack = null;
total++;
}
}
if (verbose != null) verbose.println("Dropped Tracks Mutual: " + total);
}
/**
* Searches for tracks which are active and meet the epipolar constraints
*/
private void selectCandidateStereoTracks() {
final long frameID = getFrameID();
// mark tracks in right frame that are active
List activeRight = trackerRight.getActiveTracks(null);
for (PointTrack t : activeRight) { // lint:forbidden ignore_line
TrackInfo bt = t.getCookie();
// If the visual track is null then it got dropped earlier
if (bt.visualTrack == null)
continue;
bt.lastSeenRightFrame = frameID;
initialVisible.add(bt);
}
List activeLeft = trackerLeft.getActiveTracks(null);
candidates.clear();
for (PointTrack left : activeLeft) { // lint:forbidden ignore_line
TrackInfo bt = left.getCookie();
if (bt.lastSeenRightFrame != frameID) {
continue;
}
if (bt.visualTrack == null)
throw new RuntimeException("BUG!!! Should have been skipped over in the right camera");
// check epipolar constraint and see if it is still valid
if (stereoCheck.checkPixel(bt.visualTrack.pixel, bt.visualRight.pixel)) {
bt.lastStereoFrame = frameID;
candidates.add(left);
}
}
if (verbose != null)
verbose.println("Visual Tracks: Left: " + activeLeft.size() + " Right: " + activeRight.size() + " Candidates: " + candidates.size());
}
/**
* Removes tracks which have not been included in the inlier set recently from the visual tracker
*/
private void removeOldUnusedVisibleTracks() {
long currentFrameID = getFrameID();
// Drop unused tracks from the left camera
trackerLeft.dropTracks(track -> {
TrackInfo bt = track.getCookie();
if (bt == null) throw new RuntimeException("BUG!");
if (currentFrameID - bt.lastInlier >= thresholdRetireTracks) {
// System.out.println("Removing visible track due to lack of inlier");
bt.visualTrack = null;
return true;
}
return false;
});
// remove unused tracks from the right camera. Since the tracks are coupled
// there should be no surprised here
trackerRight.dropTracks(track -> {
TrackInfo bt = track.getCookie();
if (bt == null) throw new RuntimeException("BUG!");
if (bt.visualTrack == null)
return true;
if (currentFrameID - bt.lastInlier >= thresholdRetireTracks) {
throw new RuntimeException("BUG! Should have already been dropped by left camera");
}
return false;
});
}
/**
* Spawns tracks in each image and associates features together.
*/
private void addNewTracks() {
CameraModel leftCM = cameraModels.get(CAMERA_LEFT);
CameraModel rightCM = cameraModels.get(CAMERA_RIGHT);
final long frameID = getFrameID();
trackerLeft.spawnTracks();
trackerRight.spawnTracks();
List spawnedLeft = trackerLeft.getNewTracks(null);
List spawnedRight = trackerRight.getNewTracks(null);
// get a list of new tracks and their descriptions
describeSpawnedTracks(inputLeft, spawnedLeft, pointsLeft, descLeft);
describeSpawnedTracks(inputRight, spawnedRight, pointsRight, descRight);
// associate using L2R
assocL2R.setSource(pointsLeft, descLeft);
assocL2R.setDestination(pointsRight, descRight);
assocL2R.associate();
FastAccess matches = assocL2R.getMatches();
int total = 0;
for (int i = 0; i < matches.size; i++) {
AssociatedIndex m = matches.get(i);
PointTrack trackL = spawnedLeft.get(m.src);
PointTrack trackR = spawnedRight.get(m.dst);
TrackInfo bt = bundleViso.tracks.grow();
// convert pixel observations into normalized image coordinates
leftCM.pixelToNorm.compute(trackL.pixel.x, trackL.pixel.y, normLeft);
rightCM.pixelToNorm.compute(trackR.pixel.x, trackR.pixel.y, normRight);
// triangulate 3D coordinate in the current camera frame
if (triangulate2.triangulate(normLeft, normRight, left_to_right, cameraP3)) {
// put the track into the world coordinate system
SePointOps_F64.transform(currentLeft.frame_to_world, cameraP3, cameraP3);
bt.worldLoc.setTo(cameraP3.x, cameraP3.y, cameraP3.z, 1.0);
// Finalize the track data structure
bt.id = trackL.featureId;
bt.visualTrack = trackL;
bt.visualRight = trackR;
bt.lastStereoFrame = bt.lastSeenRightFrame = frameID;
trackL.cookie = bt;
trackR.cookie = bt;
bundleViso.addObservation(currentLeft, bt, trackL.pixel.x, trackL.pixel.y);
bundleViso.addObservation(currentRight, bt, trackR.pixel.x, trackR.pixel.y);
visibleTracks.add(bt);
total++;
} else {
// triangulation failed, drop track
trackerLeft.dropTrack(trackL);
trackerRight.dropTrack(trackR);
bundleViso.tracks.removeTail();
}
}
if (verbose != null)
verbose.println("New Tracks: left=" + spawnedLeft.size() + " right=" + spawnedRight.size() + " stereo=" + total);
// drop visual tracks that were not associated
DogArray_I32 unassignedRight = assocL2R.getUnassociatedDestination();
for (int i = 0; i < unassignedRight.size; i++) {
int index = unassignedRight.get(i);
trackerRight.dropTrack(spawnedRight.get(index));
}
DogArray_I32 unassignedLeft = assocL2R.getUnassociatedSource();
for (int i = 0; i < unassignedLeft.size; i++) {
int index = unassignedLeft.get(i);
trackerLeft.dropTrack(spawnedLeft.get(index));
}
// Let the frame manager know how many tracks were just spawned
frameManager.handleSpawnedTracks(trackerLeft, bundleViso.cameras.get(CAMERA_LEFT));
frameManager.handleSpawnedTracks(trackerRight, bundleViso.cameras.get(CAMERA_RIGHT));
}
/**
* Given list of new visual tracks, describe the region around each track using a descriptor
*/
private void describeSpawnedTracks( T image, List tracks,
FastArray points, DogArray descs ) {
describe.setImage(image);
points.reset();
descs.reset();
for (int i = 0; i < tracks.size(); i++) {
PointTrack t = tracks.get(i);
// ignoring the return value. most descriptors never return false and the ones that due will rarely do so
describe.process(t.pixel.x, t.pixel.y, 0, describeRadius, descs.grow());
points.add(t.pixel);
}
}
@Override
public long getFrameID() {
return trackerLeft.getFrameID();
}
/**
* If there are no candidates then a fault happened.
*
* @return true if fault. false is no fault
*/
public boolean isFault() {
return candidates.isEmpty();
}
@Override
protected void dropVisualTrack( PointTrack left ) {
TrackInfo info = left.getCookie();
PointTrack right = info.visualRight;
trackerLeft.dropTrack(left);
trackerRight.dropTrack(right);
}
/**
* A coupled track between the left and right cameras.
*/
@SuppressWarnings({"NullAway.Init"})
public static class TrackInfo extends BTrack {
// Image based tracks in left and right camera
public PointTrack visualRight;
public long lastStereoFrame;
// last time it was in the inlier list
public long lastInlier;
// the last frame it was seen in
public long lastSeenRightFrame;
@SuppressWarnings({"NullAway"})
@Override public void reset() {
super.reset();
visualRight = null;
lastStereoFrame = -1;
lastInlier = -1;
lastSeenRightFrame = -1;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy