boofcv.alg.sfm.d3.direct.VisOdomDirectColorDepth Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of boofcv-sfm Show documentation
Show all versions of boofcv-sfm Show documentation
BoofCV is an open source Java library for real-time computer vision and robotics applications.
/*
* Copyright (c) 2011-2019, Peter Abeles. All Rights Reserved.
*
* This file is part of BoofCV (http://boofcv.org).
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package boofcv.alg.sfm.d3.direct;
import boofcv.abst.filter.derivative.ImageGradient;
import boofcv.abst.sfm.ImagePixelTo3D;
import boofcv.alg.InputSanityCheck;
import boofcv.alg.filter.derivative.DerivativeType;
import boofcv.alg.interpolate.InterpolatePixelS;
import boofcv.alg.interpolate.InterpolationType;
import boofcv.core.image.FactoryGImageMultiBand;
import boofcv.core.image.GImageMultiBand;
import boofcv.factory.filter.derivative.FactoryDerivative;
import boofcv.factory.interpolate.FactoryInterpolation;
import boofcv.struct.border.BorderType;
import boofcv.struct.image.ImageGray;
import boofcv.struct.image.ImageType;
import boofcv.struct.image.Planar;
import georegression.struct.point.Point2D_F32;
import georegression.struct.point.Point3D_F32;
import georegression.struct.se.Se3_F32;
import georegression.transform.se.SePointOps_F32;
import georegression.transform.twist.TwistCoordinate_F32;
import georegression.transform.twist.TwistOps_F32;
import org.ddogleg.struct.FastQueue;
import org.ejml.data.DMatrixRMaj;
import org.ejml.dense.row.factory.LinearSolverFactory_DDRM;
import org.ejml.interfaces.linsol.LinearSolverDense;
/**
* TODO Fill in
*
* @author Peter Abeles
*/
// TODO Handle pathological situations that will basically never happen in real life
// anything that makes the A matrix singular. dx or dy being zero will do the trick
@SuppressWarnings("unchecked")
public class VisOdomDirectColorDepth, D extends ImageGray>
{
// Type of input images
private ImageType> imageType;
private ImageType> derivType;
private LinearSolverDense solver;
private DMatrixRMaj A = new DMatrixRMaj(1,6);
private DMatrixRMaj y = new DMatrixRMaj(1,1);
private DMatrixRMaj twistMatrix = new DMatrixRMaj(6,1);
private ImageGradient,Planar> computeD;
private InterpolatePixelS interpI;
private InterpolatePixelS interpDX;
private InterpolatePixelS interpDY;
private GImageMultiBand wrapI;
// gradient of the current frame
Planar derivX, derivY;
// storage for pixel information in the key frame
FastQueue keypixels;
// Estimated motion from key frame to current frame
private Se3_F32 keyToCurrent = new Se3_F32();
// estimated motion from twist parameters for current iterations
Se3_F32 motionTwist = new Se3_F32();
private Se3_F32 tmp = new Se3_F32(); // work space
/** focal length along x and y axis (units: pixels) */
private float fx,fy;
/** image center (units: pixels) */
private float cx,cy;
private float convergeTol = 1e-6f;
private int maxIterations = 10;
// average optical error per pixel and band
private float errorOptical;
// number of valid pixels used to compute error
private int inboundsPixels = 0;
// work space
Point3D_F32 S = new Point3D_F32();
private TwistCoordinate_F32 twist = new TwistCoordinate_F32();
// used to compute spatial diveresity of tracked features
FeatureSpatialDiversity_F32 diversity = new FeatureSpatialDiversity_F32();
/**
* Declares internal data structures and specifies the type of input images to expect
* @param imageType Input image type
* @param derivType Type of image to store the derivative in
*/
public VisOdomDirectColorDepth(final int numBands , Class imageType , Class derivType ) {
this.imageType = ImageType.pl(numBands,imageType);
this.derivType = ImageType.pl(numBands,derivType);
wrapI = FactoryGImageMultiBand.create(this.imageType);
// min/max doesn't matter for bilinear interpolation
setInterpolation(0,0,0,0, InterpolationType.BILINEAR);
derivX = this.derivType.createImage(1,1);
derivY = this.derivType.createImage(1,1);
keypixels = new FastQueue(Pixel.class,true) {
@Override
protected Pixel createInstance() {
return new Pixel(numBands);
}
};
computeD = FactoryDerivative.gradient(DerivativeType.THREE, this.imageType, this.derivType);
}
/**
* Specifies intrinsic camera parameters. Must be called.
* @param fx focal length x (pixels)
* @param fy focal length y (pixels)
* @param cx principle point x (pixels)
* @param cy principle point y (pixels)
* @param width Width of the image
* @param height Height of the image
*/
public void setCameraParameters( float fx , float fy , float cx , float cy ,
int width , int height ) {
this.fx = fx;
this.fy = fy;
this.cx = cx;
this.cy = cy;
derivX.reshape(width, height);
derivY.reshape(width, height);
// set these to the maximum possible size
int N = width*height*imageType.getNumBands();
A.reshape(N,6);
y.reshape(N,1);
}
/**
* Used to change interpolation method. Probably don't want to do this.
* @param inputMin min value for input pixels. 0 is typical
* @param inputMax max value for input pixels. 255 is typical
* @param derivMin min value for the derivative of input pixels
* @param derivMax max value for the derivative of input pixels
* @param type Type of interpolation method to use
*/
public void setInterpolation( double inputMin , double inputMax, double derivMin , double derivMax ,
InterpolationType type) {
interpI = FactoryInterpolation.createPixelS(inputMin,inputMax,type, BorderType.EXTENDED, imageType.getImageClass());
interpDX = FactoryInterpolation.createPixelS(derivMin,derivMax,type, BorderType.EXTENDED, derivType.getImageClass());
interpDY = FactoryInterpolation.createPixelS(derivMin,derivMax,type, BorderType.EXTENDED, derivType.getImageClass());
}
/**
* Specifies convergence parameters
*
* @param convergenceTol When change in error is less than this fraction stop. Try 1e-6
* @param maxIterations When this number of iterations has been exceeded stop. Try 10
*/
public void setConvergence( float convergenceTol , int maxIterations ) {
this.convergeTol = convergenceTol;
this.maxIterations = maxIterations;
}
/**
* Set's the keyframe. This is the image which motion is estimated relative to. The 3D location of points in
* the keyframe must be known.
*
* @param input Image which is to be used as the key frame
* @param pixelTo3D Used to compute 3D points from pixels in key frame
*/
void setKeyFrame(Planar input, ImagePixelTo3D pixelTo3D) {
InputSanityCheck.checkSameShape(derivX,input);
wrapI.wrap(input);
keypixels.reset();
for (int y = 0; y < input.height; y++) {
for (int x = 0; x < input.width; x++) {
// See if there's a valid 3D point at this location
if( !pixelTo3D.process(x,y) ) {
continue;
}
float P_x = (float)pixelTo3D.getX();
float P_y = (float)pixelTo3D.getY();
float P_z = (float)pixelTo3D.getZ();
float P_w = (float)pixelTo3D.getW();
// skip point if it's at infinity or has a negative value
if( P_w <= 0 )
continue;
// save the results
Pixel p = keypixels.grow();
p.valid = true;
wrapI.get(x,y,p.bands);
p.x = x;
p.y = y;
p.p3.set(P_x/P_w,P_y/P_w,P_z/P_w);
}
}
}
/**
* Computes the diversity of valid pixels in keyframe to the location in the current frame.
* @return Angular spread along the smallest axis in radians
*/
public double computeFeatureDiversity(Se3_F32 keyToCurrent ) {
diversity.reset();
for (int i = 0; i < keypixels.size(); i++) {
Pixel p = keypixels.data[i];
if( !p.valid )
continue;
SePointOps_F32.transform(keyToCurrent, p.p3, S);
diversity.addPoint(S.x, S.y, S.z);
}
diversity.process();
return diversity.getSpread();
}
/**
* Estimates the motion relative to the key frame.
* @param input Next image in the sequence
* @param hintKeyToInput estimated transform from keyframe to the current input image
* @return true if it was successful at estimating the motion or false if it failed for some reason
*/
public boolean estimateMotion(Planar input , Se3_F32 hintKeyToInput ) {
InputSanityCheck.checkSameShape(derivX,input);
initMotion(input);
keyToCurrent.set(hintKeyToInput);
boolean foundSolution = false;
float previousError = Float.MAX_VALUE;
for (int i = 0; i < maxIterations; i++) {
constructLinearSystem(input, keyToCurrent);
if (!solveSystem())
break;
if( Math.abs(previousError-errorOptical)/previousError < convergeTol )
break;
else {
// update the estimated motion from the computed twist
previousError = errorOptical;
keyToCurrent.concat(motionTwist, tmp);
keyToCurrent.set(tmp);
foundSolution = true;
}
}
return foundSolution;
}
/**
* Initialize motion related data structures
*/
void initMotion(Planar input) {
if( solver == null ) {
solver = LinearSolverFactory_DDRM.qr(input.width*input.height*input.getNumBands(),6);
}
// compute image derivative and setup interpolation functions
computeD.process(input,derivX,derivY);
}
/**
* Given the set of points in the key frame and their current observations
* @param g initial transform applied to pixel locations. keyframe to current frame
*/
void constructLinearSystem(Planar input , Se3_F32 g ) {
int numBands = imageType.getNumBands();
// first precompute everything that does not depend on pixel values
inboundsPixels = 0;
for (int i = 0; i < keypixels.size(); i++) {
Pixel p = keypixels.data[i];
// Apply the known warp
SePointOps_F32.transform(g, p.p3, S);
if( S.z <= 0 ) {
p.valid = false;
continue;
}
// Compute projected warped pixel coordinate on image I_1
p.proj.x = (S.x / S.z) * fx + cx;
p.proj.y = (S.y / S.z) * fy + cy;
// make sure it's in the bounds
if (p.proj.x < 0 || p.proj.x > input.width - 1 || p.proj.y < 0 || p.proj.y > input.height - 1) {
p.valid = false;
continue;
} else {
p.valid = true;
}
inboundsPixels++;
// pi matrix derivative relative to t at S
float ZZ = S.z * S.z;
p.dP11 = fx / S.z;
p.dP13 = -S.x * fx / ZZ;
p.dP22 = fy / S.z;
p.dP23 = -S.y * fy / ZZ;
}
// how compute the components which require
errorOptical = 0;
int row = 0;
for (int band = 0; band < numBands; band++) {
interpDX.setImage(derivX.getBand(band));
interpDY.setImage(derivY.getBand(band));
interpI.setImage(input.getBand(band));
for (int i = 0; i < keypixels.size(); i++) {
Pixel p = keypixels.data[i];
if( !p.valid )
continue;
// Apply the known warp
// TODO precompute?
SePointOps_F32.transform(g, p.p3, S);
// sample pixel values at warped location in I_1
// NOTE: This could be highly optimized. Compute and save interpolation weights once per input
// instead of for each band and image (current,dx,dy)
// TODO create a special bilinear class for this?
float current = interpI.get( p.proj.x, p.proj.y);
float dx = interpDX.get(p.proj.x, p.proj.y);
float dy = interpDY.get(p.proj.x, p.proj.y);
// B = grad^T * dPI/dt = shape(1,3)
float b1 = dx*p.dP11;
float b2 = dy*p.dP22;
float b3 = dx*p.dP13 + dy*p.dP23;
// C * A(S'(x)) = shape(1,6)
int indexA = row * 6;
A.data[indexA++] = -b2*S.z + b3*S.y;
A.data[indexA++] = b1*S.z - b3*S.x;
A.data[indexA++] = -b1*S.y + b2*S.x;
A.data[indexA++] = b1;
A.data[indexA++] = b2;
A.data[indexA] = b3;
float error = -(current - p.bands[band]);
y.data[row] = error;
errorOptical += Math.abs(error);
row += 1;
}
}
errorOptical /= row;
A.numRows = row;
y.numRows = row;
}
boolean solveSystem() {
if( !solver.setA(A))
return false;
solver.solve(y,twistMatrix);
twist.set((float)twistMatrix.data[0], (float)twistMatrix.data[1], (float)twistMatrix.data[2],
(float)twistMatrix.data[3], (float)twistMatrix.data[4], (float)twistMatrix.data[5]);
// theta is 1 because of how this solution was formulated. See derivation
TwistOps_F32.exponential(twist,1.0f, motionTwist );
return true;
}
public float getErrorOptical() {
return errorOptical;
}
public int getInboundsPixels() {
return inboundsPixels;
}
public int getKeyframePixels() {
return keypixels.size;
}
static class Pixel {
float bands[]; // pixel intensity in each band
int x,y; // pixel coordinate
Point3D_F32 p3 = new Point3D_F32(); // world coordinate
Point2D_F32 proj = new Point2D_F32(); // projected location of point
boolean valid; // if this is visible after apply the estimated warp
// the pi matrix
float dP11,dP13,dP22,dP23;
public Pixel( int numBands ) {
bands = new float[numBands];
}
}
public Se3_F32 getKeyToCurrent() {
return keyToCurrent;
}
public ImageType> getImageType() {
return imageType;
}
public ImageType> getDerivType() {
return derivType;
}
}