
ptolemaeus.math.linear.schur.ComplexSchurDecomposition Maven / Gradle / Ivy
/**
* MIT License
*
* Copyright (c) 2023 Lockheed Martin Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
* documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
* WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package ptolemaeus.math.linear.schur;
import java.util.Arrays;
import java.util.Optional;
import java.util.function.Consumer;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.hipparchus.complex.Complex;
import org.hipparchus.linear.AnyMatrix;
import org.hipparchus.util.FastMath;
import ptolemaeus.commons.Parallelism;
import ptolemaeus.commons.Validation;
import ptolemaeus.math.CountableDoubleIterable;
import ptolemaeus.math.NumericalMethodsException;
import ptolemaeus.math.commons.NumericalMethodsUtils;
import ptolemaeus.math.functions.MultivariateMatrixValuedFunction;
import ptolemaeus.math.functions.VectorField;
import ptolemaeus.math.levenbergmarquardt.LevenbergMarquardt;
import ptolemaeus.math.levenbergmarquardt.LevenbergMarquardtConstants;
import ptolemaeus.math.levenbergmarquardt.LevenbergMarquardtParameter;
import ptolemaeus.math.levenbergmarquardt.LevenbergMarquardtProblem;
import ptolemaeus.math.linear.complex.ComplexMatrix;
import ptolemaeus.math.linear.complex.ComplexVector;
import ptolemaeus.math.linear.householderreductions.ComplexHessenbergReduction;
import ptolemaeus.math.linear.householderreductions.Householder;
import ptolemaeus.math.linear.real.NVector;
import ptolemaeus.math.weightedleastsquares.WeightedLeastSquaresSolution;
// CHECKSTYLE.OFF: LineLength - the references make the lines too long
/** See {@link SchurHelper} - the workhorse of this package - for implementation details
*
*
*
*
* Compute the {@link ComplexSchurDecomposition} of a square {@link ComplexMatrix} {@code A = QUQ}H, where
* {@code Q} is unitary and {@code U} is upper-triangular. Because {@code Q} is a unitary similarity transformation,
* {@code A} and {@code U} have the same Eigenvalues.
*
*
* At a high-level, QR algorithms compute the Eigenvalues of the input matrix and accumulate unitary transformations
* that were used to get to that point in {@code Q}, and compute {@code U} at the end as
* {@code U = Q}H{@code AQ}.
*
*
* These algorithms don't compute {@code U} directly because we use deflations (see {@link QRDeflations}) to reduce
* "deflate" the problem into smaller sub problems, which makes the difference between tractable and intractable for
* even moderately large matrices.
*
*
* TODO Ryan Moser: 2024-08-21: make the {@link Parallelism} configurable from the {@link ComplexSchurDecomposition}
* constructor or higher.
*
* TODO Ryan Moser, 2024-08-19: implement optional balancing. Again, see the noted references. [2] in particular has a
* nice algorithm description.
*
* TODO Ryan Moser, 2024-08-19: implement aggressive early deflation. It should be easy enough. Follow the algorithm
* specifications in [2] and [4] below. Ask me - RM - for a starting point, because I have some of this done already
*
*
* TODO Ryan Moser, 2024-08-19: determine why the multi-bulge multi-shift QR sweep (MBMSQRS) was so slow.
* Similar to above, ask me - RM - for a starting point if you want to tackle this. I implemented it, but it was much
* slower than the large-bulge QR sweep that's implemented here. To re-implement with what I have saved off is not a lot
* of work.
* The MBMSQRS definitely required fewer {@link Complex} operations - being near optimal according to the references -
* but the actual runtime was much worse. Poor memory access optimization? Maybe it's only faster for large (n
* > 1000) dense matrices? Requires investigation.
*
*
*
*
* Algorithm description:
*
*
*
*
* The first step is to ensure that the input matrix is already upper-Hessenberg, or reduce it to upper-Hessenberg using
* a full {@link ComplexHessenbergReduction}. Upper-Hessenberg matrices have the form
*
*
* H =
*
* . . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . .
* . . . . . . . . . . . . .
* . . . . . . . . . . . .
* . . . . . . . . . . .
* . . . . . . . . . .
* . . . . . . . . .
* . . . . . . . .
* . . . . . . .
* . . . . . .
* . . . . .
* . . . .
* . . .
* . .
*
*
* where {@code .}s represent arbitrary {@link Complex} elements and empty spots are zeros.
*
*
*
*
* Now in upper-Hessenberg form, we perform an iteration that implicitly computes a shifted QR step (see references for
* details, I don't want to put it all here).
*
*
*
*
* Here, we do a large-bulge multi-shift QR sweep to create a single large "shift" transformation that results in a
* single large bulge on the first sub-diagonal of the Hessenberg form. Empirically - according to [1] and [3], using
* more than 10 shifts with a large-bulge sweep iteration results in significant so-called shift-blurring, severely
* impacting the rate of convergence. Thus {@link QRShifts#computeNumShiftsPerBulge(int)} does not return more than
* {@code 10}.
*
*
* The shift transformation is a {@link Householder} matrix for the first column of the matrix polynomial shift-function
* {@code P(U)}:
*
*
*
*
* {@code P(U) = (U - }λm{@code I) .
* (U - }λm-1{@code I) .
* (U - }λm-2{@code I)
* ...
* (U - }λ2{@code I) .
* (U - }λ1{@code I)}
*
*
* where {@code m} is the number of "shifts" to apply, but we do it implicitly, thus avoiding the O((n^3)^m)
* cost of evaluating the full that polynomial. See
* {@link QRShifts#computeFirstColumnOfShiftPolynomial(ComplexMatrix, Complex[])} details
*
*
* We only need the non-trivial (m+1)x(m+1) block of the Householder matrix to apply it to {@code U}, because the
* Householder is really just an identity matrix with a (m+1)x(m+1) non-identity block on its diagonal and, in this
* case, in the first place on the diagonal, and we can use
* {@link ComplexMatrix#preMultiplyInPlace(ComplexMatrix, ComplexMatrix, int, int)} and
* {@link ComplexMatrix#postMultiplyInPlace(ComplexMatrix, ComplexMatrix, int, int)} to do this efficiently:
*
*
*
* . . .
* . . .
* . . .
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
*
*
* Applying this unitary similarity transformation to {@code H} yields a "bulge" (the elements which do not conform to
* the upper-Hessenberg form):
*
*
* H =
*
* m m m
* . . . . . . . . . . . . . . . . m
* b b b . . . . . . . . . . . . . m
* b b b . . . . . . . . . . . . . m
* b b b . . . . . . . . . . . . .
* . . . . . . . . . . . . .
* . . . . . . . . . . . .
* . . . . . . . . . . .
* . . . . . . . . . .
* . . . . . . . . .
* . . . . . . . .
* . . . . . . .
* . . . . . .
* . . . . .
* . . . .
* . . .
* . .
*
*
* Now we need to use a series of unitary similarity transforms to bring {@code H} back into upper-Hessenberg form. This
* process is called "bulge chasing". It's called this because at each step of the reduction (as one can see when using
* an evaluation monitor) the bulge is "shifted down" one place along the first sub-diagonal until it "falls off":
*
*
*
* - in this example, we're using two shifts. This has a special name: the implicit double-shift QR algorithm
*
-
*
- {@code m}s mark the columns and rows modified in each step of {@link ComplexHessenbergReduction} (note that .'s
* that are in these entries are not unchanged, they're just not relevant to the illustration)
* - {@code b}s are used to mark the elements of a (2+1)x(2+1) == 3x3 "bulge" as it moves after each
* multiplication
*
*
*
* m m m
* . . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . . . . m
* b b b . . . . . . . . . . . . m
* b b b . . . . . . . . . . . . m
* b b b . . . . . . . . . . . .
* . . . . . . . . . . . .
* . . . . . . . . . . .
* . . . . . . . . . .
* . . . . . . . . .
* . . . . . . . .
* . . . . . . .
* . . . . . .
* . . . . .
* . . . .
* . . .
* . .
*
*
*
* m m m
* . . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . . . m
* b b b . . . . . . . . . . . m
* b b b . . . . . . . . . . . m
* b b b . . . . . . . . . . .
* . . . . . . . . . . .
* . . . . . . . . . .
* . . . . . . . . .
* . . . . . . . .
* . . . . . . .
* . . . . . .
* . . . . .
* . . . .
* . . .
* . .
*
*
*
* m m m
* . . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . . m
* b b b . . . . . . . . . . m
* b b b . . . . . . . . . . m
* b b b . . . . . . . . . .
* . . . . . . . . . .
* . . . . . . . . .
* . . . . . . . .
* . . . . . . .
* . . . . . .
* . . . . .
* . . . .
* . . .
* . .
*
*
* etc.
*
*
* m m m
* . . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . .
* . . . . . . . . . . . . .
* . . . . . . . . . . . .
* . . . . . . . . . . .
* . . . . . . . . . .
* . . . . . . . . .
* . . . . . . . .
* . . . . . . .
* . . . . . . m
* b b b . . m
* b b b . . m
* b b b . .
* . .
*
*
*
* m m m
* . . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . .
* . . . . . . . . . . . . .
* . . . . . . . . . . . .
* . . . . . . . . . . .
* . . . . . . . . . .
* . . . . . . . . .
* . . . . . . . .
* . . . . . . .
* . . . . . .
* . . . . . m
* b b b . m
* b b b . m
* b b b .
*
*
*
* m m m
* . . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . .
* . . . . . . . . . . . . .
* . . . . . . . . . . . .
* . . . . . . . . . . .
* . . . . . . . . . .
* . . . . . . . . .
* . . . . . . . .
* . . . . . . .
* . . . . . .
* . . . . .
* . . . . m
* b b b m
* b b b m
*
*
*
* m m
* . . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . .
* . . . . . . . . . . . . .
* . . . . . . . . . . . .
* . . . . . . . . . . .
* . . . . . . . . . .
* . . . . . . . . .
* . . . . . . . .
* . . . . . . .
* . . . . . .
* . . . . .
* . . . .
* . . . m
* b b m
*
*
* And now we're back to upper-Hessenberg form!
*
*
*
*
* After each sweep, we check whether we can "deflate" the problem; i.e., break the problem into two (or more, in
* general) sub-problems that can be solved independently, which allows us to ignore updating large portions of the
* matrix. See {@link QRDeflations} for details.
*
*
* This proceeds until we've deflated to a series of small upper-Hessenberg matrices along the diagonal of {@code H},
* each of which we solve in the same way. This recursive procedure continues until we deflate totally into {@code 1x1}
* and {@code 2x2} matrices along the main diagonal of {@code H}. The {@code 1x1}s are Eigenvalues themselves, and the
* {@code 2x2}s are matrices with complex conjugate pairs of Eigenvalues of {@code H}, which we can reduce in closed
* form.
*
*
* Book/paper references:
*
*
* - [1] Numerical Methods for General and Structured Eigenvalue Problems, Kressner
* - [2] Matrix Computations, 4th Edition, Golub and Van Load
* - [3]
* The Multishift QR Algorithm. Part I: Maintaining Well-Focused Shifts and Level 3 Performance
* - [4]
* The Multishift QR Algorithm. Part II: Aggressive Early Deflation
* - [5] A New Deflation Criterion for the QR
* Algorithm
*
*
* LAPACK v3.12.0 references:
*
*
*
* @author Ryan Moser, [email protected]
*
* @see SchurHelper
* @see QRShifts
* @see QRDeflations
* @see SchurMode */
// CHECKSTYLE.ON: LineLength
public class ComplexSchurDecomposition {
/** A {@link Logger} for logging */
private static final Logger LOGGER = LogManager.getLogger(ComplexSchurDecomposition.class);
/** A seed for {@link CountableDoubleIterable} if we need to apply a perturbation to an Eigenvector guess */
private static final long SEED = 867_5309;
/** We use this constant so that it's easier to experiment with setting it on and off, as it's not something that
* should be user-configurable. LAPACK operates in a way equivalent to this being enabled. */
static final boolean SET_EXACT_ZEROES = true;
/** We multiply the dimension of the square input matrix by this to compute the max number of iterations to
* consider. This is the technique used by each reference I've read, but there is not necessarily a convergence
* guarantee, thus the value is configurable. */
private static final int MAX_ITERS_SCALE = 30;
/** a 1x1 identity matrix */
private static final ComplexMatrix ID_1X1 = ComplexMatrix.identity(1);
/** a 2x2 identity matrix */
private static final ComplexMatrix ID_2X2 = ComplexMatrix.identity(2);
/** the {@link ComplexMatrix} for which we're computing a {@link ComplexSchurDecomposition} */
private final ComplexMatrix A;
/** the {@link SchurMode} in which we computed a {@link ComplexSchurDecomposition} */
private final SchurMode mode;
/** The unitary {@link ComplexMatrix} {@code Q} in {@code A = QUQ}-1{@code == QUQ}H */
private final ComplexMatrix Q;
/** The upper-triangular, unitarily similar to the original {@code A} in
* {@code A = QUQ}-1{@code == QUQ}H */
private final ComplexMatrix U;
/** The vector of Eigenvalues for the original {@link ComplexMatrix} that's been decomposed. Ideally, these will be
* be identical to the diagonal of {@link #getU()}, but practically they will be slightly different due to numerical
* imprecision. */
private final Complex[] eigenvalues;
/** Constructor
*
* @param A the {@link ComplexMatrix} for which we computed a {@link ComplexSchurDecomposition}
* @param mode the {@link SchurMode} in which we computed a {@link ComplexSchurDecomposition}
* @param Q the unitary {@link ComplexMatrix} {@code Q} from {@code A = QUQ}-1{@code == QUQ}H
* @param U the upper-triangular {@link ComplexMatrix} {@code U} from
* {@code A = QUQ}-1{@code == QUQ}H
* @param eigenvalues the Eigenvalues of the original {@link ComplexMatrix} {@code A = QUQ}H */
ComplexSchurDecomposition(final ComplexMatrix A,
final SchurMode mode,
final ComplexMatrix Q,
final ComplexMatrix U,
final Complex... eigenvalues) {
this.A = A;
this.mode = mode;
this.Q = Q;
this.U = U;
this.eigenvalues = eigenvalues;
}
/** @return the {@link ComplexMatrix matrix} for which we computed {@code this} {@link ComplexSchurDecomposition};
* i.e., {@code A = QUQ}-1{@code == QUQ}H */
public ComplexMatrix getA() {
return this.A.copy();
}
/** @return the {@link SchurMode} used to compute {@code this} {@link ComplexSchurDecomposition} */
public SchurMode getSchurMode() {
return this.mode;
}
/** @return an {@link Optional} holding the unitary {@code Q} from the Schur decomposition
* {@code A = QUQ}-1{@code == QUQ}H. If {@link SchurMode#EIGENVALUES} was used, this
* {@link Optional} will be empty. */
public Optional getQ() {
return Optional.ofNullable(this.Q).map(ComplexMatrix::copy);
}
/** @return an {@link Optional} holding the upper-triangular {@code U} from the Schur decomposition
* {@code A = QUQ}-1{@code == QUQ}H. If {@link SchurMode#EIGENVALUES} was used, this
* {@link Optional} will be empty. If {@link SchurMode#BLOCK_SCHUR_FORM} was used, this will be in block
* Schur form. See {@link SchurMode#BLOCK_SCHUR_FORM} for details.
*
* @implNote {@code U} is actually computed after the fact as {@code U == Q}H{@code AQ}, so the diagonal
* entries of {@code U} may not match {@link #getEigenvalues()} exactly */
public Optional getU() {
return Optional.ofNullable(this.U).map(ComplexMatrix::copy);
}
/** @return a reference to the array of Eigenvalues of {@link #getA() A}. These are always computed. */
Complex[] getEigenvaluesRef() {
return this.eigenvalues;
}
/** @return the array of Eigenvalues of {@link #getA() A}. These are always computed. */
public Complex[] getEigenvaluesArr() {
return this.eigenvalues.clone();
}
/** @return the Eigenvalues of {@link #getA() A}. These are always computed. */
public ComplexVector getEigenvalues() {
return new ComplexVector(this.eigenvalues);
}
/** @return if {@link #Q} and {@link #U} are present, compute {@code QUQ}H, which should be equal (within
* tolerance) to the original matrix that was decomposed. Otherwise, throw an exception.
*
* @apiNote this is only {@code package-private} because it's not useful outside of testing */
ComplexMatrix computeQUQH() {
// below, ensure that Q and U are present. You can't have one without the other.
final ComplexMatrix notOptionalQHere = this.getQ().orElseThrow();
return notOptionalQHere.times(this.U).timesConjugateTransposeOf(notOptionalQHere);
}
/** Compute the {@link ComplexSchurDecomposition} of the given {@link ComplexMatrix A}. If
* {@link ComplexMatrix#isReal(double) A has exclusively real entries}, then we return the
* {@link SchurMode#BLOCK_SCHUR_FORM real Schur form} of {@code A}. Else, we return the {@link SchurMode#SCHUR_FORM
* completely upper-Triangular Schur form}.
*
* @param A the {@link ComplexMatrix} for which we want a {@link ComplexSchurDecomposition}
* @return the {@link ComplexSchurDecomposition} of {@code A} */
public static ComplexSchurDecomposition of(final ComplexMatrix A) {
final SchurMode schurMode = A.isReal(0.0) ? SchurMode.BLOCK_SCHUR_FORM : SchurMode.SCHUR_FORM;
return of(A, schurMode, chooseMaxIters(A), null);
}
/** Compute the {@link ComplexSchurDecomposition} of the given {@link ComplexMatrix A}
*
* @param A the {@link ComplexMatrix} for which we want a {@link ComplexSchurDecomposition}
* @param schurMode the solve mode to use. See {@link SchurMode} for details.
* @return the {@link ComplexSchurDecomposition} of {@code A} */
public static ComplexSchurDecomposition of(final ComplexMatrix A, final SchurMode schurMode) {
return of(A, schurMode, chooseMaxIters(A), null);
}
/** Compute the {@link ComplexSchurDecomposition} of the given {@link ComplexMatrix A}, with additional fine-tuning
* available.
*
* @param A the {@link ComplexMatrix} for which we want a {@link ComplexSchurDecomposition}
* @param schurMode the solve mode to use. See {@link SchurMode} for details.
* @param maxIters the maximum iterations we will consider at the top level. Recursive calls use a value computed
* as-needed ({@link #chooseMaxIters(AnyMatrix)}).
* @param evaluationMonitor a {@link ComplexMatrix} {@link Consumer} that's updated with changes to the {@code U}
* matrix. An evaluation monitor is a debugging/maintenance tool and should not be used by default, as it
* will be very performance intensive.
* @return the {@link ComplexSchurDecomposition} of {@code A}. */
public static ComplexSchurDecomposition of(final ComplexMatrix A,
final SchurMode schurMode,
final int maxIters,
final Consumer evaluationMonitor) {
final SchurHelper schur = new SchurHelper(A, schurMode, maxIters, evaluationMonitor, SET_EXACT_ZEROES, 0);
boolean alreadyUpperHessenberg = true;
for (int i = 2; alreadyUpperHessenberg && i < schur.dim; i++) {
for (int j = 0; alreadyUpperHessenberg && j < i - 1; j++) {
alreadyUpperHessenberg &= A.getEntry(i, j).equals(Complex.ZERO);
}
}
return solve(schur, alreadyUpperHessenberg);
}
/** Choose a max iterations we should use when computing the {@link ComplexSchurDecomposition} of the given
* {@link AnyMatrix}.
*
* @param A the matrix
* @return the max default max iterations for a matrix of the given size */
public static int chooseMaxIters(final AnyMatrix A) {
return MAX_ITERS_SCALE * A.getRowDimension();
}
/** @param schur the {@link SchurHelper} instance that holds the data that's updated as we iterate; i.e., this is
* updated in place!
* @param alreadyUpperHessenberg the {@code true} whether {@link SchurHelper#currentU schur.currentU} is already
* upper-Hessenberg, {@code false} otherwise.
* @return the {@link ComplexSchurDecomposition} of {@link SchurHelper#currentU schur.currentU}
* in-place! */
// CHECKSTYLE.OFF: MethodLength
static ComplexSchurDecomposition solve(final SchurHelper schur, final boolean alreadyUpperHessenberg) {
if (schur.dim == 1) { // 1x1
return new ComplexSchurDecomposition(schur.originalA,
schur.schurMode,
ID_1X1,
schur.originalA,
schur.originalA.getEntry(0, 0));
// no need to update the evaluation monitor
}
if (schur.dim == 2) { // 2x2
final ComplexSchurDecomposition answer2x2 = compute2x2ClosedForm(schur);
if (schur.hasEvalMonitor) {
schur.updateUBlock(answer2x2.U, 0, 0);
}
return answer2x2;
}
// at this point, the shape (non-zero structure) of schur.currentU could be anything
// (3+)x(3+)
if (!alreadyUpperHessenberg) {
schur.doHessenbergReduction();
}
// @formatter:off
/** Once here, schur.currentU is necessarily upper-Hessenberg:
* . . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . .
* . . . . . . . . . . . . .
* . . . . . . . . . . . .
* . . . . . . . . . . .
* . . . . . . . . . .
* . . . . . . . . .
* . . . . . . . .
* . . . . . . .
* . . . . . .
* . . . . .
* . . . .
* . . .
* . . */
// @formatter:on
for (int count = 0; count < schur.maxIters; count++) {
final ComplexMatrix[] deflations = QRDeflations.computeDeflations(schur.currentU,
schur.numShiftsPerBulge,
schur.machEpsFrobA);
if (deflations == null) { // no deflation opportunities detected
schur.applyShiftBulge(); // apply bulge
// @formatter:off
/* . . . . . . . . . . . . . . . .
* b b b b . . . . . . . . . . . . <--- bulge entries
* b b b b . . . . . . . . . . . . "
* b b b b . . . . . . . . . . . . "
* b b b b . . . . . . . . . . . . "
* . . . . . . . . . . . .
* . . . . . . . . . . .
* . . . . . . . . . .
* . . . . . . . . .
* . . . . . . . .
* . . . . . . .
* . . . . . .
* . . . . .
* . . . .
* . . .
* . . */
schur.chaseShiftBulge(); // Back to upper-Hessenberg
/* . . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . . .
* . . . . . . . . . . . . . .
* . . . . . . . . . . . . .
* . . . . . . . . . . . .
* . . . . . . . . . . .
* . . . . . . . . . <-- if we're lucky, we'll find a deflation opportunity
* . . . . . . . . .
* . . . . . . . .
* . . . . . . .
* . . . . . .
* . . . . .
* . . . .
* . . .
* . . */
// @formatter:on
}
else {
if (schur.hasEvalMonitor) {
/* If using an evaluation monitor, we assume that the best possible performance is not required, so
* we do some extra work. Once we enter this else-statement, currentU is only partially updated
* (that's the whole point of the deflation), so we can no longer use it, thus it's o.k. to modify
* it for reporting purposes. */
// @formatter:off
/** stackedDecomposed has the form of two decoupled upper-Hessenberg matrices; e.g.,
* . . . . .
* . . . . .
* . . . .
* . . .
* . .
* . . . .
* . . . .
* . . .
* . .
*/
// @formatter:on
final ComplexMatrix stackedDecomposed = ComplexMatrix.blockDiagonalConcatenate(deflations);
schur.updateUBlock(stackedDecomposed, 0, 0);
}
final int numDeflations = deflations.length; // == 2
/* currently, this is always exactly 2, but everything here is easily generalized to K > 2 deflations
* (create the Helper instances sequentially, solve in parallel, construct solution sequentially). In
* fact, that's how I (RM) initially wrote it, solving the deflated problems in parallel. I found,
* however, that it is almost never the case that more than one deflation opportunity appears at once
* and, indeed, this a typical assumption in descriptions of the implicitly-shifted QR algorithms */
final Complex[] eigenvalues = new Complex[schur.dim];
int currDiagIndex = 0;
for (int i = 0; i < numDeflations; i++) {
final ComplexMatrix deflation = deflations[i];
final Consumer subEvalMont;
if (schur.hasEvalMonitor) {
/* these Consumers update A with the iterations of this algorithm applied to its deflations, and
* it does this nested arbitrarily many times (or at least until there's a stack overflow, but
* that's infeasible). */
final int fCurrDiagIndex = currDiagIndex;
subEvalMont = subUpdate -> {
schur.updateUBlock(subUpdate, fCurrDiagIndex, fCurrDiagIndex);
};
}
else {
subEvalMont = null;
}
final SchurHelper deflatedSchur = new SchurHelper(deflation,
schur.schurMode,
chooseMaxIters(deflation),
subEvalMont,
SET_EXACT_ZEROES,
schur.recursiveDepth + 1);
// recurse:
final ComplexSchurDecomposition subDecomp = solve(deflatedSchur, true);
// modifies Q data!
schur.possiblyPostMultiplyQ(subDecomp.Q, currDiagIndex); // right-update for Q
// modifies Q data!
for (int eigCounter = 0; eigCounter < subDecomp.eigenvalues.length; eigCounter++) {
eigenvalues[currDiagIndex + eigCounter] = subDecomp.eigenvalues[eigCounter];
}
currDiagIndex += deflation.getColumnDimension();
}
if (schur.schurMode == SchurMode.EIGENVALUES) {
return new ComplexSchurDecomposition(schur.originalA, schur.schurMode, null, null, eigenvalues);
}
// we only need to compute U when we're about to return from the first call in the stack
final ComplexMatrix Q = schur.currentQ;
final ComplexMatrix U =
schur.recursiveDepth != 0 ? null : Q.conjugateTransposeMultiply(schur.originalA).times(Q);
return new ComplexSchurDecomposition(schur.originalA, schur.schurMode, schur.currentQ, U, eigenvalues);
}
}
throw NumericalMethodsException.formatted(
"The QR algorithm failed to converge in %d iterations for the following matrix:%n%s",
schur.maxIters,
NumericalMethodsUtils.toJavaCode(schur.originalA)).get();
}
// CHECKSTYLE.ON: MethodLength
/** Compute and return a {@link ComplexSchurDecomposition} for the given {@code 2x2} {@link ComplexMatrix}
*
*
* See {@link #compute2x2ClosedForm(ComplexMatrix, SchurMode, boolean)}
*
* @param schur the {@link SchurHelper} for which we want to compute a closed-form solution
* @return the closed-form solution */
private static ComplexSchurDecomposition compute2x2ClosedForm(final SchurHelper schur) {
return compute2x2ClosedForm(schur.currentU, schur.schurMode, schur.hasEvalMonitor);
}
/** Compute and return a {@link ComplexSchurDecomposition} for the given {@code 2x2} {@link ComplexMatrix}
*
*
* We compute (construct, really) a closed-form expression using an Eigenvector of {@code M} and a vector orthogonal
* to that Eigenvector. These two vectors allow us to construct a solution because - when concatenated as column
* vectors of a matrix - they necessarily result in a unitary (orthogonal) triangularization of {@code M}; i.e., a
* Schur decomposition.
*
*
* See this PDF
* for an example of this construction.
*
* TODO Ryan Moser, 2024-08-07: see TODO in {@link QRShifts#computeEigenTrailingPrincipal2x2(ComplexMatrix)}
*
* @param M the {@code 2x2} matrix for which we want the {@link ComplexSchurDecomposition}
* @param schurMode the {@link SchurMode}, which determines how much work to do here
* @param hasEvalMonitor {@code true} if we need to consider an evaluation monitor, {@code false} otherwise
* @return the {@link ComplexSchurDecomposition} */
static ComplexSchurDecomposition compute2x2ClosedForm(final ComplexMatrix M,
final SchurMode schurMode,
final boolean hasEvalMonitor) {
final Complex[] eigenValues2x2 = QRShifts.computeEigenTrailingPrincipal2x2(M);
if (schurMode == SchurMode.EIGENVALUES && !hasEvalMonitor) {
return new ComplexSchurDecomposition(M, schurMode, null, null, eigenValues2x2);
}
if (schurMode == SchurMode.BLOCK_SCHUR_FORM) {
return new ComplexSchurDecomposition(M, schurMode, ID_2X2, M, eigenValues2x2);
}
final Complex l1 = eigenValues2x2[1]; // choose an Eigenvalue - this can probably be done intelligently
final Complex c = M.getEntry(1, 0);
final Complex[] eigenvectorArr; // first, construct the 2D Eigenvector associated with the given Eigenvalue
if (c.isZero()) {
final Complex b = M.getEntry(0, 1);
if (b.isZero()) { // b == c == 0
eigenvectorArr = new Complex[] { Complex.ONE, Complex.ZERO };
}
else { // c == 0
final Complex a = M.getEntry(0, 0);
eigenvectorArr = new Complex[] { b, l1.subtract(a) };
}
}
else { // c != 0
final Complex d = M.getEntry(1, 1);
eigenvectorArr = new Complex[] {l1.subtract(d), c};
}
ComplexVector eigenvector = new ComplexVector(eigenvectorArr[0], eigenvectorArr[1]);
final Complex hermitianNorm = Complex.valueOf(eigenvector.hermitianNorm());
eigenvector = eigenvector.dividedBy(hermitianNorm);
/* eVecOrth1 is equal to the following, but is more efficient and avoids any potential precision loss due to the
* double negation of {@code b}: eigenvectorArr[1].conjugate().negate();
*
* Proof: -conj(a + b i) == -(a - b i) == -a + b i */
final Complex eVecOrth1 = // vector orthogonal to eigenvector
Complex.valueOf(-eigenvectorArr[1].getRealPart(), // -a
eigenvectorArr[1].getImaginaryPart()); // b i
final Complex eVecOrth2 = eigenvectorArr[0].conjugate();
ComplexVector orth2Eigenv = new ComplexVector(eVecOrth1, eVecOrth2);
orth2Eigenv = orth2Eigenv.dividedBy(hermitianNorm);
final Complex[][] QArr = new Complex[][] {
{ eigenvector.getEntry(0), orth2Eigenv.getEntry(0) },
{ eigenvector.getEntry(1), orth2Eigenv.getEntry(1) }
}; // ^ an Eigenvector, ^ orthogonal to that Eigenvector
/* by construction, Q is a unitary matrix (Q^-1 == Q^H) */
final ComplexMatrix Q = new ComplexMatrix(QArr);
final ComplexMatrix U = Q.conjugateTransposeMultiply(M).times(Q);
return new ComplexSchurDecomposition(M, schurMode, Q, U, eigenValues2x2);
}
/** Compute the Eigenvectors of {@code A} - the original {@link ComplexMatrix} for which {@code this} is a
* {@link ComplexSchurDecomposition} - using a {@link LevenbergMarquardt least-squares optimization}.
*
*
* If {@code this} was not created with {@link SchurMode#SCHUR_FORM}, we return an {@link Optional empty Optional}
*
*
*
* The {@link #getEigenvalues()} and the Eigenvectors computed here are indexed the same way; e.g., to get the
* {@code i}th Eigenvalue and Eigenvector pair:
*
*
* final ComplexVector eigenvalues = schur.getEigenvalues();
* final ComplexMatrix eigenvectors = schur.computeEigenvectors().orElseThrow();
*
* final Complex eigenvalue = eigenvalues.getEntry(i);
* final ComplexVector eigenvector = new ComplexVector(eigenvectors.getColumn(i));
*
*
* This is not a standard approach as far as I can tell, and it is very slow for large matrices, but it
* works really rather well as a temporary implementation until I (RM) can come back and implement a more
* standardized approach, and it's just as fast as {@link ComplexSchurDecomposition} for small, square matrices
* (e.g., {@code n < 32}).
*
*
* As with all Eigenvector algorithms, it is possible that we fail to compute all of them, because (e.g.) we find
* duplicates, fail to properly converge, etc. To mitigate this as best we can, the initial guesses are the mutually
* orthogonal column vectors of {@link #getQ()} ({@code Q} is unitary, which implies orthogonal column vectors).
*
* @param eigvecFailMode the {@link EigenvectorFailureMode} that dictates how we'll handle cases where we fail to
* compute an Eigenvector
*
* @return an {@link Optional empty Optional} if {@code this} was not created with {@link SchurMode#SCHUR_FORM}.
* Otherwise, return a {@link ComplexMatrix} whose columns are the Eigenvectors of {@code A}.
*
* @see #computeEigenvector(Complex, ComplexVector, double, EigenvectorFailureMode) */
public Optional computeEigenvectors(final EigenvectorFailureMode eigvecFailMode) {
if (this.mode != SchurMode.SCHUR_FORM) {
return Optional.empty();
}
/* epsilon is a small number around the noise level of the system due to numerical imprecision. The 10x factor
* is somewhat arbitrary, as it was chosen for working well empirically, but using the Frobenius norm x machEps
* is standard and, in the QR algorithm, a small integer times this values maintains so-called backwards
* stability. */
final double epsilon = 10 * this.A.computeFrobeniusNorm() * NumericalMethodsUtils.MACHINE_EPSILON;
final int n = this.Q.getColumnDimension();
ComplexMatrix eigenvectorMatrix = ComplexMatrix.zero(n, n);
for (int i = 0; i < n; i++) {
final Complex eigenvalue = this.eigenvalues[i];
final ComplexVector initialGuessVec = new ComplexVector(this.Q.getColumn(i));
final ComplexVector eigenvector = computeEigenvectorLevenbergMarquardt(this.A,
eigenvalue,
initialGuessVec,
epsilon,
eigvecFailMode);
eigenvectorMatrix.setColumnVector(i, eigenvector);
}
return Optional.of(eigenvectorMatrix);
}
/** Compute an Eigenvector which corresponds to the given Eigenvalue for {@link #getA()}. This comes with all of the
* caveats listed in {@link #computeEigenvectors(EigenvectorFailureMode)}.
*
* @param eigenvalue the Eigenvalue for which we want an Eigenvector
* @param initialGuessVec the initial guess
* @param epsilon the epsilon for checking convergence and whether the solution is degenerate. We only compute it
* externally and pass it in so that we only have to do it once.
* @param eigvecFailMode the {@link EigenvectorFailureMode} that dictates how we'll handle cases where we fail to
* compute an Eigenvector
*
* @return an {@link Optional empty Optional} if {@code this} was not created with {@link SchurMode#SCHUR_FORM}.
*
*
* In all cases, if we successfully compute an Eigenvector, we return it. All
* successful results will be normalized.
*
*
* If we fail to compute an Eigenvector and {@code isRetry == false}, we return {@code null}.
* {@link #computeEigenvectorLevenbergMarquardt(ComplexMatrix, Complex, ComplexVector,
* double, EigenvectorFailureMode)}
* relies on this functionality.
*
*
* If we fail to compute an Eigenvector and {@code isRetry == true}, the result is determined by the
* given {@link EigenvectorFailureMode}. */
public Optional computeEigenvector(final Complex eigenvalue,
final ComplexVector initialGuessVec,
final double epsilon,
final EigenvectorFailureMode eigvecFailMode) {
if (this.mode != SchurMode.SCHUR_FORM) {
return Optional.empty();
}
final ComplexVector eigenvector =
computeEigenvectorLevenbergMarquardt(this.A, eigenvalue, initialGuessVec, epsilon, eigvecFailMode);
return Optional.of(eigenvector);
}
/** Compute an Eigenvector for the given Eigenvalue
*
* @param A the {@link ComplexMatrix} for which we want Eigenvectors
* @param eigenvalue the Eigenvalue for which we want an Eigenvector
* @param initialGuessVec the initial guess
* @param epsilon the epsilon for checking convergence and whether the solution is degenerate. We only compute it
* externally and pass it in so that we only have to do it once. Must be greater than zero.
* @param eigvecFailMode the {@link EigenvectorFailureMode} that dictates how we'll handle cases where we fail to
* compute an Eigenvector
* @return an Eigenvector of {@code A}. In all cases, if we successfully compute an Eigenvector, we return it. All
* successful results will be normalized.
*
*
* If we fail to compute an Eigenvector and {@code isRetry == false}, we return {@code null}.
* {@link #computeEigenvectorLevenbergMarquardt(ComplexMatrix, Complex, ComplexVector,
* double, EigenvectorFailureMode)}
* relies on this functionality.
*
*
* If we fail to compute an Eigenvector and {@code isRetry == true}, the result is determined by the
* given {@link EigenvectorFailureMode}. */
public static ComplexVector computeEigenvectorLevenbergMarquardt(final ComplexMatrix A,
final Complex eigenvalue,
final ComplexVector initialGuessVec,
final double epsilon,
final EigenvectorFailureMode eigvecFailMode) {
final ComplexVector firstTry = // possibly null because isRetry == false
computeEigenvectorLevenbergMarquardt(A, eigenvalue, initialGuessVec, epsilon, eigvecFailMode, false);
if (firstTry != null) {
return firstTry;
}
final ComplexVector secondTry = // never null because isRetry == true, so we defer to the failure mode
computeEigenvectorLevenbergMarquardt(A, eigenvalue, initialGuessVec, epsilon, eigvecFailMode, true);
return secondTry;
}
/** Compute an Eigenvector for the given Eigenvalue.
*
* @param A the {@link ComplexMatrix} for which we want Eigenvectors
* @param eigenvalue the Eigenvalue for which we want an Eigenvector
* @param initialGuessVec the initial guess
* @param epsilon the epsilon for checking convergence and whether the solution is degenerate. We only compute it
* externally and pass it in so that we only have to do it once. Must be greater than zero.
* @param eigvecFailMode the {@link EigenvectorFailureMode} that dictates how we'll handle cases where we fail to
* compute an Eigenvector
* @param isRetry {@code true} if this is a retry, {@code false} otherwise. On a retry, we apply a small
* perturbation to the initial guess.
* @return an Eigenvector of {@code A}. In all cases, if we successfully compute an Eigenvector, we return it. All
* successful results will be normalized.
*
*
* If we fail to compute an Eigenvector and {@code isRetry == false}, we return {@code null}.
* {@link #computeEigenvectorLevenbergMarquardt(ComplexMatrix, Complex, ComplexVector,
* double, EigenvectorFailureMode)}
* relies on this functionality.
*
*
* If we fail to compute an Eigenvector and {@code isRetry == true}, the result is determined by the
* given {@link EigenvectorFailureMode}. */
private static ComplexVector computeEigenvectorLevenbergMarquardt(final ComplexMatrix A,
final Complex eigenvalue,
final ComplexVector initialGuessVec,
final double epsilon,
final EigenvectorFailureMode eigvecFailMode,
final boolean isRetry) {
Validation.requireGreaterThan(epsilon, 0.0, "epsilon");
if (eigenvalue.norm() < epsilon) {
return ComplexVector.zero(A.getColumnDimension());
}
final int n = A.getColumnDimension();
final boolean isReal = FastMath.abs(eigenvalue.getImaginary()) <= epsilon && A.isReal(epsilon);
final int flattenedDim = isReal ? n : 2 * n;
/* The function f takes as an input a vector of length n OR 2n (determined by isReal; see flatten and unflatten)
* which is flattened into a vector with complex entries, v, we then compute Av - lv (the left-hand side of the
* Eigenvalue problem) before re-flattening the vector and returning. */
final VectorField f = new VectorField(input -> {
final ComplexVector v = unflatten(input, isReal);
final ComplexVector Av = A.operate(v);
final ComplexVector lv = v.times(eigenvalue);
final ComplexVector delta = Av.minus(lv);
return flatten(delta, isReal);
}, flattenedDim, flattenedDim);
final double[] initGuessFlat = flatten(initialGuessVec, isReal);
if (isRetry) {
/* the scaling factor cbrt(machEps) is a bit arbitrary, but it was chosen for a named constant and for
* working well in problematic cases */
final double perturbNorm = initialGuessVec.hermitianNorm()
* NumericalMethodsUtils.CUBE_ROOT_MACHINE_EPSILON;
applyPerturbation(initGuessFlat, perturbNorm);
}
final double[] targetValues = new double[flattenedDim]; // all zeroes
final double[] weights = new double[flattenedDim];
Arrays.fill(weights, 1.0);
final LevenbergMarquardtConstants lmConstants =
new LevenbergMarquardtConstants(flattenedDim,
weights,
epsilon,
20, // max total iterations
5, // max unimproved iterations
LevenbergMarquardtParameter.getDefault(),
LevenbergMarquardtConstants.DEFAULT_ALPHA_EASY,
LevenbergMarquardtConstants.DEFAULT_H);
/* Maybe TODO Ryan Moser 2024-08-28: closed form? The problem being solved should have some very nice
* cancellations due to how linear it is. Also, compare to the so-called Inverse Iteration method. */
final MultivariateMatrixValuedFunction numericalJacobian =
NumericalMethodsUtils.getJacobianFunction(f,
Optional.empty(),
flattenedDim,
flattenedDim,
Parallelism.PARALLEL);
final LevenbergMarquardtProblem problem = new LevenbergMarquardtProblem(f,
numericalJacobian,
initGuessFlat,
targetValues,
lmConstants);
final WeightedLeastSquaresSolution wlss = LevenbergMarquardt.solve(problem, true);
final double[] flattenedSolnPt = wlss.getPoint();
final ComplexVector notNormalized = unflatten(flattenedSolnPt, isReal);
final NVector solnVector = new NVector(flattenedSolnPt);
if (solnVector.getNorm() < epsilon) { /* solnVector has norm (length) similar to the "noise" level of the system
* and is, thus, of dubious use */
if (!isRetry) { // we haven't yet done a retry, so just return null for now, which flags the retry
return null;
}
LOGGER.warn(() -> getWarningLogMessage(A, eigenvalue, initialGuessVec, epsilon));
return eigvecFailMode.handleFailure(notNormalized);
}
return notNormalized.dividedBy(Complex.valueOf(notNormalized.hermitianNorm()));
}
/** Apply a perturbation of size {@code perturbationNorm} to the given array. Note: the array is modified in place.
*
* @param initGuessFlat the flattened initial guess
* @param perturbationNorm the norm of the perturbation to apply
* @implNote this handles the {@code isReal} special-case without modification */
private static void applyPerturbation(final double[] initGuessFlat, final double perturbationNorm) {
final double[] randomComponents = CountableDoubleIterable.monteCarlo(0.0, 1.0, SEED, initGuessFlat.length)
.stream()
.toArray();
final double[] perturb = new NVector(randomComponents).normalized().times(perturbationNorm).getDataRef();
for (int i = 0; i < initGuessFlat.length; i++) {
initGuessFlat[i] = initGuessFlat[i] + perturb[i];
}
}
/** Given a {@link ComplexVector} of dimension {@code n}, "flatten" it into a {@code double[]}.
*
* The array will have length {@code n == v.getDimension()} if {@code isReal == true} and will have length
* {@code n == 2 * v.getDimension()} if {@code !isReal}.
*
* If {@code !isReal}, every even-indexed element (starting at zero) is the real portion of a {@link ComplexVector}
* entry, and every odd element is an imaginary one.
*
* @param v the vector to flatten
* @param isReal {@code true} to flatten the {@link ComplexVector} into an array of strictly the real components of
* {@code v} with length {@code n == v.getDimension()}, {@code false} to flatten into an array with both the
* real and imaginary components of {@code v} with length {@code n == 2 * v.getDimension()}
* @return the flattened vector */
static double[] flatten(final ComplexVector v, final boolean isReal) {
final int n = v.getDimension();
final int flatenedDim = isReal ? n : 2 * n; // see JavaDoc
final double[] answer = new double[flatenedDim];
for (int i = 0; i < n; i++) {
final Complex ithElt = v.getEntry(i);
if (isReal) {
answer[i] = ithElt.getReal();
}
else {
answer[2 * i] = ithElt.getReal();
answer[2 * i + 1] = ithElt.getImaginary();
}
}
return answer;
}
/** Reverse the process of {@link #flatten(ComplexVector, boolean)}; i.e., given a flattened {@code double[]}
* (vector) of length {@code 2n} map each pair of elements (starting at zero) to a new {@link Complex} in the
* returned answer.
*
* @param flattenedVector the flattened {@code double[]} of length {@code 2n}
* @param isReal {@code true} if we're un-flattening into a real vector wrapped in a {@link ComplexVector},
* {@code false} if the vector is necessarily {@link Complex}
* @return the un-flattened {@link ComplexVector} */
static ComplexVector unflatten(final double[] flattenedVector, final boolean isReal) {
final int n = isReal ? flattenedVector.length : flattenedVector.length / 2;
final ComplexVector v = ComplexVector.zero(n);
for (int i = 0; i < n; i++) {
if (isReal) {
v.setEntry(i, Complex.valueOf(flattenedVector[i]));
}
else {
final int index = 2 * i;
v.setEntry(i, Complex.valueOf(flattenedVector[index], flattenedVector[index + 1]));
}
}
return v;
}
/** Get the {@link Level#WARN} {@link Logger logging} message for when computing an Eigenvector fails
*
* @param A the {@link ComplexMatrix} for which we attempted to compute an Eigenvector
* @param eigenvalue the Eigenvalue for which we attempted to compute a corresponding Eigenvector
* @param initialGuessVec the initial guess at the Eigenvector
* @param epsilon the convergence epsilon
* @return the warning string */
static String getWarningLogMessage(final ComplexMatrix A,
final Complex eigenvalue,
final ComplexVector initialGuessVec,
final double epsilon) {
final String matrixStr = NumericalMethodsUtils.toJavaCode(A, "A");
final String vectorStr = NumericalMethodsUtils.toJavaCode(ComplexMatrix.fromColumns(initialGuessVec),
"initialGuessVec");
final String fmt = """
Computed Eigenvector has Hermitian norm near the noise level of the system, proceed with caution.
Consider submitting a bug report with
Complex Schur input-matrix:
%s
Eigenvector guess:
%s
Eigenvalue: %s
And epsilon: %e
""";
return fmt.formatted(matrixStr, vectorStr, eigenvalue, epsilon);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy