boofcv.alg.sfm.structure.ThreeViewEstimateMetricScene Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of boofcv-sfm Show documentation
Show all versions of boofcv-sfm Show documentation
BoofCV is an open source Java library for real-time computer vision and robotics applications.
/*
* Copyright (c) 2011-2019, Peter Abeles. All Rights Reserved.
*
* This file is part of BoofCV (http://boofcv.org).
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package boofcv.alg.sfm.structure;
import boofcv.abst.geo.Estimate1ofTrifocalTensor;
import boofcv.abst.geo.RefineThreeViewProjective;
import boofcv.abst.geo.bundle.BundleAdjustment;
import boofcv.abst.geo.bundle.PruneStructureFromSceneMetric;
import boofcv.abst.geo.bundle.SceneObservations;
import boofcv.abst.geo.bundle.SceneStructureMetric;
import boofcv.alg.geo.GeometricResult;
import boofcv.alg.geo.MultiViewOps;
import boofcv.alg.geo.PerspectiveOps;
import boofcv.alg.geo.bundle.cameras.BundlePinholeSimplified;
import boofcv.alg.geo.selfcalib.EstimatePlaneAtInfinityGivenK;
import boofcv.alg.geo.selfcalib.SelfCalibrationLinearDualQuadratic;
import boofcv.factory.geo.*;
import boofcv.misc.ConfigConverge;
import boofcv.struct.calib.CameraPinhole;
import boofcv.struct.geo.AssociatedTriple;
import boofcv.struct.geo.TrifocalTensor;
import georegression.struct.point.Point3D_F64;
import georegression.struct.point.Vector3D_F64;
import georegression.struct.se.Se3_F64;
import org.ddogleg.fitting.modelset.ransac.Ransac;
import org.ddogleg.optimization.lm.ConfigLevenbergMarquardt;
import org.ejml.data.DMatrixRMaj;
import org.ejml.dense.row.CommonOps_DDRM;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import static boofcv.alg.geo.MultiViewOps.triangulatePoints;
/**
* Estimates the metric scene's structure given a set of sparse features associations from three views. This is
* intended to give the best possible solution from the sparse set of matching features. Its internal
* methods are updated as better strategies are found.
*
* Assumptions:
*
* - Principle point is zero
* - Zero skew
* - fx = fy approximately
*
*
* The zero principle point is enforced prior to calling {@link #process} by subtracting the image center from
* each pixel observations.
*
* Steps:
*
* - Fit Trifocal tensor using RANSAC
* - Get and refine camera matrices
* - Compute dual absolute quadratic
* - Estimate intrinsic parameters from DAC
* - Estimate metric scene structure
* - Sparse bundle adjustment
* - Tweak parameters and sparse bundle adjustment again
*
*
* @author Peter Abeles
*/
public class ThreeViewEstimateMetricScene {
// Make all configurations public for ease of manipulation
public ConfigRansac configRansac = new ConfigRansac();
public ConfigTrifocal configTriRansac = new ConfigTrifocal();
public ConfigTrifocal configTriFit = new ConfigTrifocal();
public ConfigTrifocalError configError = new ConfigTrifocalError();
public ConfigLevenbergMarquardt configLM = new ConfigLevenbergMarquardt();
public ConfigBundleAdjustment configSBA = new ConfigBundleAdjustment();
public ConfigConverge convergeSBA = new ConfigConverge(1e-6,1e-6,100);
// estimating the trifocal tensor and storing which observations are in the inlier set
public Ransac ransac;
public List inliers;
public Estimate1ofTrifocalTensor trifocalEstimator;
// how much and where it should print to
private PrintStream verbose;
private int verboseLevel;
// Projective camera matrices
protected DMatrixRMaj P1 = CommonOps_DDRM.identity(3,4);
protected DMatrixRMaj P2 = new DMatrixRMaj(3,4);
protected DMatrixRMaj P3 = new DMatrixRMaj(3,4);
// storage for pinhole cameras
protected List listPinhole = new ArrayList<>();
// Refines the structure
public BundleAdjustment bundleAdjustment;
// Bundle adjustment data structure and tuning parameters
public SceneStructureMetric structure;
public SceneObservations observations;
// If a positive number the focal length will be assumed to be that
public double manualFocalLength=-1;
// How many features it will keep when pruning
public double pruneFraction = 0.7;
// shape of input images.
private int width, height; // TODO Get size for each image individually
// metric location of each camera. The first view is always identity
protected List worldToView = new ArrayList<>();
/**
* Sets configurations to their default value
*/
public ThreeViewEstimateMetricScene() {
configRansac.maxIterations = 500;
configRansac.inlierThreshold = 1;
configError.model = ConfigTrifocalError.Model.REPROJECTION_REFINE;
configTriFit.which = EnumTrifocal.ALGEBRAIC_7;
configTriFit.converge.maxIterations = 100;
configLM.dampeningInitial = 1e-3;
configLM.hessianScaling = false;
configSBA.configOptimizer = configLM;
for (int i = 0; i < 3; i++) {
worldToView.add( new Se3_F64());
}
}
/**
* Determines the metric scene. The principle point is assumed to be zero in the passed in pixel coordinates.
* Typically this is done by subtracting the image center from each pixel coordinate for each view.
*
* @param associated List of associated features from 3 views. pixels
* @param width width of all images
* @param height height of all images
* @return true if successful or false if it failed
*/
public boolean process(List associated , int width , int height ) {
init(width, height);
// Fit a trifocal tensor to the input observations
if (!robustFitTrifocal(associated) )
return false;
// estimate the scene's structure
if( !estimateProjectiveScene())
return false;
if( !projectiveToMetric() )
return false;
// Run bundle adjustment while make sure a valid solution is found
setupMetricBundleAdjustment(inliers);
bundleAdjustment = FactoryMultiView.bundleSparseMetric(configSBA);
findBestValidSolution(bundleAdjustment);
// Prune outliers and run bundle adjustment one last time
pruneOutliers(bundleAdjustment);
return true;
}
private void init( int width , int height) {
this.width = width;
this.height = height;
ransac = FactoryMultiViewRobust.trifocalRansac(configTriRansac,configError,configRansac);
trifocalEstimator = FactoryMultiView.trifocal_1(configTriFit);
structure = null;
observations = null;
}
/**
* Fits a trifocal tensor to the list of matches features using a robust method
*/
private boolean robustFitTrifocal(List associated) {
// Fit a trifocal tensor to the observations robustly
ransac.process(associated);
inliers = ransac.getMatchSet();
TrifocalTensor model = ransac.getModelParameters();
if( verbose != null )
verbose.println("Remaining after RANSAC "+inliers.size()+" / "+associated.size());
// estimate using all the inliers
// No need to re-scale the input because the estimator automatically adjusts the input on its own
if( !trifocalEstimator.process(inliers,model) ) {
if( verbose != null ) {
verbose.println("Trifocal estimator failed");
}
return false;
}
return true;
}
/**
* Prunes the features with the largest reprojection error
*/
private void pruneOutliers(BundleAdjustment bundleAdjustment) {
// see if it's configured to not prune
if( pruneFraction == 1.0 )
return;
PruneStructureFromSceneMetric pruner = new PruneStructureFromSceneMetric(structure,observations);
pruner.pruneObservationsByErrorRank(pruneFraction);
pruner.pruneViews(10);
pruner.prunePoints(1);
bundleAdjustment.setParameters(structure,observations);
bundleAdjustment.optimize(structure);
if( verbose != null ) {
verbose.println("\nCamera");
for (int i = 0; i < structure.cameras.size; i++) {
verbose.println(structure.cameras.data[i].getModel().toString());
}
verbose.println("\n\nworldToView");
for (int i = 0; i < structure.views.size; i++) {
verbose.println(structure.views.data[i].worldToView.toString());
}
verbose.println("Fit Score: " + bundleAdjustment.getFitScore());
}
}
/**
* Tries a bunch of stuff to ensure that it can find the best solution which is physically possible
*/
private void findBestValidSolution(BundleAdjustment bundleAdjustment) {
// prints out useful debugging information that lets you know how well it's converging
if( verbose != null && verboseLevel > 0 )
bundleAdjustment.setVerbose(verbose,0);
// Specifies convergence criteria
bundleAdjustment.configure(convergeSBA.ftol, convergeSBA.gtol, convergeSBA.maxIterations);
bundleAdjustment.setParameters(structure,observations);
bundleAdjustment.optimize(structure);
// ensure that the points are in front of the camera and are a valid solution
if( checkBehindCamera(structure) ) {
if( verbose != null )
verbose.println(" flipping view");
flipAround(structure,observations);
bundleAdjustment.setParameters(structure,observations);
bundleAdjustment.optimize(structure);
}
double bestScore = bundleAdjustment.getFitScore();
List bestPose = new ArrayList<>();
List bestCameras = new ArrayList<>();
for (int i = 0; i < structure.views.size; i++) {
BundlePinholeSimplified c = structure.cameras.data[i].getModel();
bestPose.add(structure.views.data[i].worldToView.copy());
bestCameras.add( c.copy());
}
for (int i = 0; i < structure.cameras.size; i++) {
BundlePinholeSimplified c = structure.cameras.data[i].getModel();
c.f = listPinhole.get(i).fx;
c.k1 = c.k2 = 0;
}
// flip rotation assuming that it was done wrong
for (int i = 1; i < structure.views.size; i++) {
CommonOps_DDRM.transpose(structure.views.data[i].worldToView.R);
}
triangulatePoints(structure,observations);
bundleAdjustment.setParameters(structure,observations);
bundleAdjustment.optimize(structure);
if( checkBehindCamera(structure) ) {
if( verbose != null )
verbose.println(" flipping view");
flipAround(structure,observations);
bundleAdjustment.setParameters(structure,observations);
bundleAdjustment.optimize(structure);
}
// revert to old settings
if( verbose != null )
verbose.println(" ORIGINAL / NEW = " + bestScore+" / "+bundleAdjustment.getFitScore());
if( bundleAdjustment.getFitScore() > bestScore ) {
if( verbose != null )
verbose.println(" recomputing old structure");
for (int i = 0; i < structure.cameras.size; i++) {
BundlePinholeSimplified c = structure.cameras.data[i].getModel();
c.set(bestCameras.get(i));
structure.views.data[i].worldToView.set(bestPose.get(i));
}
triangulatePoints(structure,observations);
bundleAdjustment.setParameters(structure,observations);
bundleAdjustment.optimize(structure);
if( verbose != null )
verbose.println(" score = "+bundleAdjustment.getFitScore());
}
}
/**
* Estimate the projective scene from the trifocal tensor
*/
private boolean estimateProjectiveScene() {
List inliers = ransac.getMatchSet();
TrifocalTensor model = ransac.getModelParameters();
MultiViewOps.extractCameraMatrices(model,P2,P3);
// Most of the time this makes little difference, but in some edges cases this enables it to
// converge correctly
RefineThreeViewProjective refineP23 = FactoryMultiView.threeViewRefine(null);
if( !refineP23.process(inliers,P2,P3,P2,P3) ) {
if( verbose != null ) {
verbose.println("Can't refine P2 and P3!");
}
return false;
}
return true;
}
/**
* Using the initial metric reconstruction, provide the initial configurations for bundle adjustment
*/
private void setupMetricBundleAdjustment(List inliers) {
// Construct bundle adjustment data structure
structure = new SceneStructureMetric(false);
observations = new SceneObservations(3);
structure.initialize(3,3,inliers.size());
for (int i = 0; i < listPinhole.size(); i++) {
CameraPinhole cp = listPinhole.get(i);
BundlePinholeSimplified bp = new BundlePinholeSimplified();
bp.f = cp.fx;
structure.setCamera(i,false,bp);
structure.setView(i,i==0,worldToView.get(i));
structure.connectViewToCamera(i,i);
}
for (int i = 0; i < inliers.size(); i++) {
AssociatedTriple t = inliers.get(i);
observations.getView(0).add(i,(float)t.p1.x,(float)t.p1.y);
observations.getView(1).add(i,(float)t.p2.x,(float)t.p2.y);
observations.getView(2).add(i,(float)t.p3.x,(float)t.p3.y);
structure.connectPointToView(i,0);
structure.connectPointToView(i,1);
structure.connectPointToView(i,2);
}
// Initial estimate for point 3D locations
triangulatePoints(structure,observations);
}
/**
* Estimates the transform from projective to metric geometry
* @return true if successful
*/
boolean projectiveToMetric() {
// homography from projective to metric
DMatrixRMaj H = new DMatrixRMaj(4,4);
listPinhole.clear();
if( manualFocalLength <= 0 ) {
// Estimate calibration parameters
SelfCalibrationLinearDualQuadratic selfcalib = new SelfCalibrationLinearDualQuadratic(1.0);
selfcalib.addCameraMatrix(P1);
selfcalib.addCameraMatrix(P2);
selfcalib.addCameraMatrix(P3);
GeometricResult result = selfcalib.solve();
if (GeometricResult.SOLVE_FAILED != result && selfcalib.getSolutions().size() == 3) {
for (int i = 0; i < 3; i++) {
SelfCalibrationLinearDualQuadratic.Intrinsic c = selfcalib.getSolutions().get(i);
CameraPinhole p = new CameraPinhole(c.fx, c.fy, 0, 0, 0, width, height);
listPinhole.add(p);
}
} else {
// TODO Handle this better
System.out.println("Self calibration failed!");
for (int i = 0; i < 3; i++) {
CameraPinhole p = new CameraPinhole(width / 2, width / 2, 0, 0, 0, width, height);
listPinhole.add(p);
}
}
// convert camera matrix from projective to metric
if( !MultiViewOps.absoluteQuadraticToH(selfcalib.getQ(),H) ) {
if( verbose != null ) {
verbose.println("Projective to metric failed");
}
return false;
}
} else {
// Assume all cameras have a fixed known focal length
EstimatePlaneAtInfinityGivenK estimateV = new EstimatePlaneAtInfinityGivenK();
estimateV.setCamera1(manualFocalLength,manualFocalLength,0,0,0);
estimateV.setCamera2(manualFocalLength,manualFocalLength,0,0,0);
Vector3D_F64 v = new Vector3D_F64(); // plane at infinity
if( !estimateV.estimatePlaneAtInfinity(P2,v))
throw new RuntimeException("Failed!");
DMatrixRMaj K = PerspectiveOps.pinholeToMatrix(manualFocalLength,manualFocalLength,0,0,0);
MultiViewOps.createProjectiveToMetric(K,v.x,v.y,v.z,1,H);
for (int i = 0; i < 3; i++) {
CameraPinhole p = new CameraPinhole(manualFocalLength,manualFocalLength, 0, 0, 0, width, height);
listPinhole.add(p);
}
}
if( verbose != null ) {
for (int i = 0; i < 3; i++) {
CameraPinhole r = listPinhole.get(i);
verbose.println("fx=" + r.fx + " fy=" + r.fy + " skew=" + r.skew);
}
verbose.println("Projective to metric");
}
DMatrixRMaj K = new DMatrixRMaj(3,3);
// ignore K since we already have that
MultiViewOps.projectiveToMetric(P1,H,worldToView.get(0),K);
MultiViewOps.projectiveToMetric(P2,H,worldToView.get(1),K);
MultiViewOps.projectiveToMetric(P3,H,worldToView.get(2),K);
// scale is arbitrary. Set max translation to 1
double maxT = 0;
for( Se3_F64 p : worldToView ) {
maxT = Math.max(maxT,p.T.norm());
}
for( Se3_F64 p : worldToView ) {
p.T.scale(1.0/maxT);
if( verbose != null ) {
verbose.println(p);
}
}
return true;
}
/**
* Checks to see if a solution was converged to where the points are behind the camera. This is
* pysically impossible
*/
private boolean checkBehindCamera(SceneStructureMetric structure ) {
int totalBehind = 0;
Point3D_F64 X = new Point3D_F64();
for (int i = 0; i < structure.points.size; i++) {
structure.points.data[i].get(X);
if( X.z < 0 )
totalBehind++;
}
if( verbose != null ) {
verbose.println("points behind "+totalBehind+" / "+structure.points.size);
}
return totalBehind > structure.points.size/2;
}
/**
* Flip the camera pose around. This seems to help it converge to a valid solution if it got it backwards
* even if it's not technically something which can be inverted this way
*/
private static void flipAround(SceneStructureMetric structure, SceneObservations observations) {
// The first view will be identity
for (int i = 1; i < structure.views.size; i++) {
Se3_F64 w2v = structure.views.data[i].worldToView;
w2v.set(w2v.invert(null));
}
triangulatePoints(structure,observations);
}
public void setVerbose( PrintStream verbose , int level ) {
this.verbose = verbose;
this.verboseLevel = level;
}
public SceneStructureMetric getStructure() {
return structure;
}
}