boofcv.alg.geo.pose.CompatibleProjectiveHomography Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of boofcv-geo Show documentation
BoofCV is an open source Java library for real-time computer vision and robotics applications.
There is a newer version: 1.1.7
/*
 * Copyright (c) 2021, Peter Abeles. All Rights Reserved.
 *
 * This file is part of BoofCV (http://boofcv.org).
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package boofcv.alg.geo.pose;

import boofcv.alg.geo.PerspectiveOps;
import boofcv.misc.ConfigConverge;
import georegression.geometry.GeometryMath_F64;
import georegression.struct.point.Point2D_F64;
import georegression.struct.point.Point3D_F64;
import georegression.struct.point.Point4D_F64;
import org.ddogleg.optimization.FactoryOptimization;
import org.ddogleg.optimization.UnconstrainedLeastSquares;
import org.ddogleg.optimization.UtilOptimize;
import org.ddogleg.optimization.functions.FunctionNtoM;
import org.ddogleg.optimization.lm.ConfigLevenbergMarquardt;
import org.ddogleg.struct.DogArray;
import org.ejml.data.DMatrixRMaj;
import org.ejml.dense.row.CommonOps_DDRM;
import org.ejml.dense.row.NormOps_DDRM;
import org.ejml.dense.row.RandomMatrices_DDRM;
import org.ejml.dense.row.factory.LinearSolverFactory_DDRM;
import org.ejml.dense.row.linsol.svd.SolveNullSpaceSvd_DDRM;
import org.ejml.dense.row.linsol.svd.SolvePseudoInverseSvd_DDRM;
import org.ejml.interfaces.SolveNullSpace;
import org.ejml.interfaces.linsol.LinearSolverDense;

import java.util.List;
import java.util.Random;

/**
 * Algorithms for finding a 4x4 homography which can convert two camera matrices of the same view but differ in only
 * the projective ambiguity. This is needed if extending a projective scene by independently computing the solution
 * set sets of 2, 3 or more views. This provides a mechanism "stitching" the solutions together using their common
 * views.
 *
 * projection of world point 'X' to pixel 'x': x = P*X
 * Two projective frames of the same view are related by 4x4 homography H. 3D points are related too by H
 * P1[i]*H = P2[i]
 * X1 = H*X2
 * P is a 3x4 camera matrix and has a projective ambiguity. x = P*inv(H)*H*X, P' = P*inv(H) and X' = H*X
 *
 * 
 * Fitzgibbon, Andrew W., and Andrew Zisserman. "Automatic camera recovery for closed or open image sequences."
 * European conference on computer vision. Springer, Berlin, Heidelberg, 1998.
 *  P. Abeles, "BoofCV Technical Report: Automatic Camera Calibration" 2020-1 
 * 
 *
 * @author Peter Abeles
 */
public class CompatibleProjectiveHomography {

	// Linear solver. Change to SVD if a more robust one is needed
	public LinearSolverDense solver = LinearSolverFactory_DDRM.pseudoInverse(true);
	public SolveNullSpace nullspace = new SolveNullSpaceSvd_DDRM();
	public SolvePseudoInverseSvd_DDRM solvePInv = new SolvePseudoInverseSvd_DDRM();
	// in the paper it doesn't specify so it probably uses a simpler pseudo inverse. A+ = A'inv(A*A')

	// workspace variables for A*X=B
	DMatrixRMaj A = new DMatrixRMaj(1, 1);
	DMatrixRMaj B = new DMatrixRMaj(1, 1);

	private final Point4D_F64 a = new Point4D_F64();

	// Scrambled versions of input camera matrices. See include comments
	private final DogArray copyA = new DogArray<>(() -> new DMatrixRMaj(3, 4));
	private final DogArray copyB = new DogArray<>(() -> new DMatrixRMaj(3, 4));

	// RM = random matrix. Used to scramble inputs. See code comments below for why orthogonal
	private final DMatrixRMaj RM = RandomMatrices_DDRM.orthogonal(4, 4, new Random(3245));
	private final DMatrixRMaj tmp4x4 = new DMatrixRMaj(4, 4);

	// PinvP = pinv(P)*P
	private final DMatrixRMaj PinvP = new DMatrixRMaj(4, 4);
	// storage for nullspace of P
	private final DMatrixRMaj h = new DMatrixRMaj(1, 1);

	// Distance functions for non-linear optimization
	private DistanceWorldEuclidean distanceWorld = new DistanceWorldEuclidean();
	private DistanceReprojection distanceRepojection = new DistanceReprojection();

	// Non-linear optimization function
	public UnconstrainedLeastSquares lm;

	// Convergence criteria
	public final ConfigConverge configConverge = new ConfigConverge(1e-8, 1e-8, 500);

	public CompatibleProjectiveHomography() {
		ConfigLevenbergMarquardt config = new ConfigLevenbergMarquardt();
		lm = FactoryOptimization.levenbergMarquardt(config, false);
	}

	/**
	 * Finds the homography H by by minimizing algebriac error. Modified version of algorithm described in [1].
	 * See [2] for implementation details. Solution is found by finding the null space. A minimum of 5 points are
	 * required to solve the 15 degrees of freedom in H.
	 *
	 * X1 = H*X2
	 *
	 * NOTE: Fails when all points lie exactly on the same plane
	 *
	 * @param points1 Set of points from view A but in projective 1. Recommended that they have f-norm of 1
	 * @param points2 Set of points from view A but in projective 2. Recommended that they have f-norm of 1
	 * @param H (Output) 4x4 homography relating the views. P1*H = P2, X1 = H*X2
	 * @return true if successful or false if it fails
	 */
	public boolean fitPoints( List points1, List points2, DMatrixRMaj H ) {
		if (points1.size() != points2.size())
			throw new IllegalArgumentException("Must have the same number in each list");
		if (points1.size() < 5)
			throw new IllegalArgumentException("At least 5 points required");

		final int size = points1.size();

		A.reshape(size*3, 16);

		for (int i = 0; i < size; i++) {
			Point4D_F64 a = points1.get(i);
			Point4D_F64 b = points2.get(i);

			double alpha = -(a.x + a.y + a.z + a.w);

			for (int j = 0; j < 3; j++) {
				int idx = 16*(3*i + j);
				double va = a.getIdx(j);
				for (int k = 0; k < 4; k++) {
					A.data[idx++] = va*b.x;
					A.data[idx++] = va*b.y;
					A.data[idx++] = va*b.z;
					A.data[idx++] = va*b.w;
				}
			}
			for (int j = 0; j < 3; j++) {
				int idx = 16*(3*i + j) + 4*j;

				A.data[idx] += b.x*alpha;
				A.data[idx + 1] += b.y*alpha;
				A.data[idx + 2] += b.z*alpha;
				A.data[idx + 3] += b.w*alpha;
			}
		}

		if (!nullspace.process(A, 1, H))
			return false;

		H.reshape(4, 4);

		return true;
	}

	/**
	 * Computes homography which relate common projective camera transforms to each other by solving a linear
	 * system. A Direct Linear Transform is used due to scale ambiguity. Non-linear refinement is recommended,
	 * before bundle adjustment. Two or more camera matrices are required.
	 *
	 * P1[i] = P2[i]*inv(H)
	 *
	 * NOTE: This will work with planar scenes
	 *
	 * @param cameras1 list of camera matrices
	 * @param cameras2 list of camera matrices, same views as camera1
	 * @param H (Output) 4x4 homography relating the views. P1*H = P2, X1 = H*X2
	 * @return true if successful or false if it failed
	 */
	public boolean fitCameras( List cameras1, List cameras2, DMatrixRMaj H ) {
		if (cameras1.size() != cameras2.size())
			throw new IllegalArgumentException("Must have the same number in each list");
		if (cameras1.size() < 2)
			throw new IllegalArgumentException("At least two cameras are required");

		final int size = cameras1.size();

		// If any of the camera matrices has all zeros in one of the column (typically column 3) that can
		// cause a singularity. This is especially problematic when only two views are available.
		// To get around that problem we scramble the camera matrices so that it's extremely unlikely for there
		// to be zeros in any of the columns.
		// P1*RM*HH = P2*RM
		// H = RM*HH*inv(RM) = RM*HH*RM'
		// RM is a random orthogonal matrix. This is desirable since it won't change the scaling and the inverse is
		// simply it's transpose
		copyA.reset();
		copyB.reset();
		for (int i = 0; i < size; i++) {
			CommonOps_DDRM.mult(cameras1.get(i), RM, copyA.grow());
			CommonOps_DDRM.mult(cameras2.get(i), RM, copyB.grow());
		}

		// A solution is found in both direction because if a column is zero or nearly zero  there are two
		// singular values in that column potentially resulting in an invalid solution. A brute force approach is used
		// to get around that problem by finding H in both directions and picking the one with the smallest error
		H.reshape(4, 4);
		h.reshape(4, 1);
		A.reshape(size*3, 4);

		for (int col = 0; col < 4; col++) {
			for (int i = 0; i < size; i++) {
				DMatrixRMaj P1 = copyA.get(i);
				DMatrixRMaj P2 = copyB.get(i);

				cameraCrossProductMatrix(P1, P2, col, i*3);
			}

			if (!nullspace.process(A, 1, h))
				return false;

			CommonOps_DDRM.insert(h, H, 0, col);
		}

		// Fix the scale for each column now
		resolveColumnScale(copyA.toList(), copyB.toList(), H);

		// Undo the earlier scrambling. See equations above.
		CommonOps_DDRM.mult(RM, H, tmp4x4);
		CommonOps_DDRM.multTransB(tmp4x4, RM, H);

		// NOTE: This does seem a bit overly complex. Is there a way to simplify the code? Maybe a solve for all of H
		//       at once and not have to deal with column scales?

		return true;
	}

	/**
	 * Since each column was solved for independently the columns will have different scales. We need to make
	 * them have the same scale be careful about sign
	 */
	private void resolveColumnScale( List cameras1, List cameras2, DMatrixRMaj H ) {
		B.reshape(3, 1);
		h.zero();

		// Carefully select a pair of camera matrices to determine scale from. Need to avoid the situation where
		// a column is all zeros. However, since the scale can float a simple test to see if it is near zero won't work.
		// Instead we look for the camera matrix with the best worst-case column
		int selected = -1;
		double smallestRatio = Double.MAX_VALUE;
		for (int cameraIdx = 0; cameraIdx < cameras1.size(); cameraIdx++) {
			CommonOps_DDRM.mult(cameras1.get(cameraIdx), H, A);

			// A ratio close to the max value of 1.0 is less desirable
			double worstRatio = 0.0;
			for (int col = 0; col < 4; col++) {
				CommonOps_DDRM.extractColumn(A, col, B);

				double maxValueAbs = CommonOps_DDRM.elementMaxAbs(B);
				double minValueAbs = CommonOps_DDRM.elementMinAbs(B);

				// avoid divide by zero error
				double ratio = maxValueAbs != 0.0 ? minValueAbs/maxValueAbs : 1.0;

				worstRatio = Math.max(worstRatio, ratio);
			}

			// this selects the matrix with best worst case scenario
			if (worstRatio < smallestRatio) {
				smallestRatio = worstRatio;
				selected = cameraIdx;
			}
		}

		// compute the scale for each column
		CommonOps_DDRM.mult(cameras1.get(selected), H, A);
		for (int col = 0; col < 4; col++) {
			CommonOps_DDRM.extractColumn(A, col, B);
			double found = NormOps_DDRM.normF(B);
			// by using the norm to compute scale we lose sign information. To determine the sign look at the
			// element with the largest absolute value. this avoids noise dominating sign
			int indexMaxAbs = -1;
			double maxAbs = 0;
			for (int i = 0; i < 3; i++) {
				if (Math.abs(B.data[i]) > maxAbs) {
					maxAbs = Math.abs(B.data[i]);
					indexMaxAbs = i;
				}
			}
			double foundSign = Math.signum(B.data[indexMaxAbs]);
			CommonOps_DDRM.extractColumn(cameras2.get(selected), col, B);
			double expected = NormOps_DDRM.normF(B);
			double scale = expected/found;
			if (foundSign != Math.signum(B.data[indexMaxAbs]))
				scale *= -1;

			for (int row = 0; row < 4; row++) {
				H.set(row, col, H.get(row, col)*scale);
			}
		}
	}

	/**
	 * Apply cross product. P2(:,col) cross P1*H = 0
	 *
	 * @param column Which column is being solved for
	 * @param rowInA First row in A it should write to
	 */
	void cameraCrossProductMatrix( DMatrixRMaj P1, DMatrixRMaj P2, int column, int rowInA ) {
		double b1 = P2.unsafe_get(0, column);
		double b2 = P2.unsafe_get(1, column);
		double b3 = P2.unsafe_get(2, column);

		double a11 = P1.unsafe_get(0, 0), a12 = P1.unsafe_get(0, 1), a13 = P1.unsafe_get(0, 2), a14 = P1.unsafe_get(0, 3);
		double a21 = P1.unsafe_get(1, 0), a22 = P1.unsafe_get(1, 1), a23 = P1.unsafe_get(1, 2), a24 = P1.unsafe_get(1, 3);
		double a31 = P1.unsafe_get(2, 0), a32 = P1.unsafe_get(2, 1), a33 = P1.unsafe_get(2, 2), a34 = P1.unsafe_get(2, 3);

		// row 1
		A.data[rowInA*4] = b2*a31 - b3*a21;
		A.data[rowInA*4 + 1] = b2*a32 - b3*a22;
		A.data[rowInA*4 + 2] = b2*a33 - b3*a23;
		A.data[rowInA*4 + 3] = b2*a34 - b3*a24;

		// row 2
		rowInA += 1;
		A.data[rowInA*4] = b3*a11 - b1*a31;
		A.data[rowInA*4 + 1] = b3*a12 - b1*a32;
		A.data[rowInA*4 + 2] = b3*a13 - b1*a33;
		A.data[rowInA*4 + 3] = b3*a14 - b1*a34;

		// row 3
		rowInA += 1;
		A.data[rowInA*4] = b1*a21 - b2*a11;
		A.data[rowInA*4 + 1] = b1*a22 - b2*a12;
		A.data[rowInA*4 + 2] = b1*a23 - b2*a13;
		A.data[rowInA*4 + 3] = b1*a24 - b2*a14;
	}

	/**
	 * Solves for the transform H using one view and 2 or more points. The two camera matrices associated with the
	 * same view constrain 11 out of the 15 DOF. Each points provides another 3 linear constraints.
	 *
	 * H(v) = pinv(P)*P' + hv^T
	 * where 'h' is null-space of P. 'v' is 4-vector and is unknown, solved with linear equation
	 *
	 * NOTE: While 2 is the minimum number of views during testing it seemed to require 4 points with perfect data.
	 * The math was double checked and no fundamental flaw could be found in the test. More investigation is needed.
	 * There is a large difference in scales that could be contributing to this problem.
	 *
	 * NOTE: Produces poor results when the scene is planar
	 *
	 * @param camera1 Camera matrix
	 * @param camera2 Camera matrix of the same view but another projective space
	 * @param points1 Observations from camera1
	 * @param points2 Observations from camera2
	 * @param H (Output) 4x4 homography relating the views. P1*H = P2, X1 = H*X2
	 * @return true if successful
	 */
	public boolean fitCameraPoints( DMatrixRMaj camera1, DMatrixRMaj camera2,
									List points1, List points2,
									DMatrixRMaj H ) {
		if (points1.size() != points2.size())
			throw new IllegalArgumentException("Lists must be the same size");
		if (points1.size() < 2)
			throw new IllegalArgumentException("A minimum of two points are required");

		// NOTE: Potential improvement. Normalize input data some how. Get the two camera matrices at the same
		// scale and do the same to the points

		// yes the SVD is computed twice. This can be optimized later, right now it isn't worth it. not a bottle neck
		if (!solvePInv.setA(camera1))
			return false;
		if (!nullspace.process(camera1, 1, h))
			return false;

		// PinvP = pinv(P)*P'
		solvePInv.solve(camera2, PinvP);

		final int size = points1.size();
		A.reshape(size*3, 4);
		B.reshape(size*3, 1);

		for (int i = 0, idxA = 0, idxB = 0; i < size; i++) {
			Point4D_F64 p1 = points1.get(i);
			Point4D_F64 p2 = points2.get(i);

			// a = P+P'X'
			GeometryMath_F64.mult(PinvP, p2, a);

//			double a_sum = a.w;
//			double h_sum = h.data[3];
//			double x_sum = p1.w;

			// Use sum instead of a[4], ... so that points at infinity can be handled
			double a_sum = a.x + a.y + a.z + a.w;
			double h_sum = h.data[0] + h.data[1] + h.data[2] + h.data[3];
			double x_sum = p1.x + p1.y + p1.z + p1.w;

			for (int k = 0; k < 3; k++) {
				// b[k] = h[k]*X[4] - h[4]*X[k]
				double b_k = h.data[k]*x_sum - h_sum*p1.getIdx(k);
				// c[k] = X[k]*a[4] - X[4]*a[k]
				double c_k = p1.getIdx(k)*a_sum - x_sum*a.getIdx(k);

				// b*X'^T*v = c
				A.data[idxA++] = b_k*p2.x;
				A.data[idxA++] = b_k*p2.y;
				A.data[idxA++] = b_k*p2.z;
				A.data[idxA++] = b_k*p2.w;

				B.data[idxB++] = c_k;
			}
		}

//		A.print();

		// Solve for v
		if (!solver.setA(A))
			return false;
		H.reshape(4, 1);
		solver.solve(B, H);

		// copy v into 'a'
		a.setTo(H.data[0], H.data[1], H.data[2], H.data[3]);
		// H = P+P' + h*v^T
		H.reshape(4, 4);
		for (int i = 0; i < 4; i++) {
			for (int j = 0; j < 4; j++) {
				H.data[i*4 + j] = PinvP.get(i, j) + h.data[i]*a.getIdx(j);
			}
		}

		return true;
	}

	/**
	 * Refines the initial estimate of H by reducing the Euclidean distance between points.
	 *
	 * NOTE: parameterization can be improved. Optimizes 16-DOF when only 15 is needed. scale isn't bounded
	 *
	 * @param scene1 List of points in world coordinates
	 * @param scene2 List of points in world coordinates, but at a different projective frame
	 * @param H (Output) 4x4 homography that related to two sets of camera matrices. P1*H = P2
	 */
	public void refineWorld( List scene1, List scene2, DMatrixRMaj H ) {
		if (H.numCols != 4 || H.numRows != 4)
			throw new IllegalArgumentException("Expected 4x4 matrix for H");

		distanceWorld.scene1 = scene1;
		distanceWorld.scene2 = scene2;
		lm.setFunction(distanceWorld, null);
		lm.initialize(H.data, 1e-8, 1e-8);

		UtilOptimize.process(lm, configConverge.maxIterations);

		System.arraycopy(lm.getParameters(), 0, H.data, 0, 16);
	}

	public void refineReprojection( List cameras1, List scene1,
									List scene2, DMatrixRMaj H ) {
		if (H.numCols != 4 || H.numRows != 4)
			throw new IllegalArgumentException("Expected 4x4 matrix for H");
		if (scene1.size() != scene2.size() || scene1.size() <= 0)
			throw new IllegalArgumentException("Lists must have equal size and be not empty");
		if (cameras1.isEmpty())
			throw new IllegalArgumentException("Camera must not be empty");


		distanceRepojection.cameras1 = cameras1;
		distanceRepojection.scene1 = scene1;
		distanceRepojection.scene2 = scene2;

		lm.setFunction(distanceRepojection, null);
		lm.initialize(H.data, configConverge.ftol, configConverge.gtol);

		UtilOptimize.process(lm, configConverge.maxIterations);

		System.arraycopy(lm.getParameters(), 0, H.data, 0, 16);
	}

	/**
	 * Euclidean error for matching up points in the two projective frames
	 *
	 * ||X1-H*X2||
	 */
	@SuppressWarnings({"NullAway.Init"})
	private static class DistanceWorldEuclidean implements FunctionNtoM {

		List scene1;
		List scene2;

		// adjusted point
		Point3D_F64 ba = new Point3D_F64();

		// homography
		DMatrixRMaj H = new DMatrixRMaj(4, 4);

		@Override
		public void process( double[] input, double[] output ) {
			H.data = input;

			for (int i = 0, idx = 0; i < scene1.size(); i++) {
				Point3D_F64 a = scene1.get(i);
				Point3D_F64 b = scene2.get(i);

				GeometryMath_F64.mult4(H, b, ba);

				output[idx++] = a.x - ba.x;
				output[idx++] = a.y - ba.y;
				output[idx++] = a.z - ba.z;
			}
		}

		@Override
		public int getNumOfInputsN() {
			return 16; // HMM only 15 DOF, scale invariant. This wil float
		}

		@Override
		public int getNumOfOutputsM() {
			return scene1.size()*3;
		}
	}

	/**
	 * Euclidean reprojection error in projective frame 1 only.
	 */
	@SuppressWarnings({"NullAway.Init"})
	private static class DistanceReprojection implements FunctionNtoM {

		List cameras1;

		List scene1;
		List scene2;

		// adjusted point
		Point4D_F64 ba = new Point4D_F64();

		// homography
		DMatrixRMaj H = new DMatrixRMaj(4, 4);

		Point2D_F64 pixel1 = new Point2D_F64();
		Point2D_F64 pixel2 = new Point2D_F64();

		@Override
		public void process( double[] input, double[] output ) {
			H.data = input;

			int outputIdx = 0;
			for (int viewIdx = 0; viewIdx < cameras1.size(); viewIdx++) {
				DMatrixRMaj P = cameras1.get(viewIdx);

				for (int pointIdx = 0; pointIdx < scene1.size(); pointIdx++) {
					Point4D_F64 a = scene1.get(pointIdx);
					Point4D_F64 b = scene2.get(pointIdx);

					GeometryMath_F64.mult(H, b, ba);
					PerspectiveOps.renderPixel(P, a, pixel1); // NOTE: This could be cached
					PerspectiveOps.renderPixel(P, ba, pixel2);

					output[outputIdx++] = pixel1.x - pixel2.x;
					output[outputIdx++] = pixel1.y - pixel2.y;
				}
			}
		}

		@Override
		public int getNumOfInputsN() {
			return 16; // HMM only 15 DOF, scale invariant. This will float
		}

		@Override
		public int getNumOfOutputsM() {
			return cameras1.size()*scene1.size()*2;
		}
	}
}