All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ptolemaeus.math.linear.schur.ComplexSchurDecomposition Maven / Gradle / Ivy

/**
 * MIT License
 *
 * Copyright (c) 2023 Lockheed Martin Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
 * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

package ptolemaeus.math.linear.schur;

import java.util.Arrays;
import java.util.Optional;
import java.util.function.Consumer;

import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import org.hipparchus.complex.Complex;
import org.hipparchus.linear.AnyMatrix;
import org.hipparchus.util.FastMath;

import ptolemaeus.commons.Parallelism;
import ptolemaeus.commons.Validation;
import ptolemaeus.math.CountableDoubleIterable;
import ptolemaeus.math.NumericalMethodsException;
import ptolemaeus.math.commons.NumericalMethodsUtils;
import ptolemaeus.math.functions.MultivariateMatrixValuedFunction;
import ptolemaeus.math.functions.VectorField;
import ptolemaeus.math.levenbergmarquardt.LevenbergMarquardt;
import ptolemaeus.math.levenbergmarquardt.LevenbergMarquardtConstants;
import ptolemaeus.math.levenbergmarquardt.LevenbergMarquardtParameter;
import ptolemaeus.math.levenbergmarquardt.LevenbergMarquardtProblem;
import ptolemaeus.math.linear.complex.ComplexMatrix;
import ptolemaeus.math.linear.complex.ComplexVector;
import ptolemaeus.math.linear.householderreductions.ComplexHessenbergReduction;
import ptolemaeus.math.linear.householderreductions.Householder;
import ptolemaeus.math.linear.real.NVector;
import ptolemaeus.math.weightedleastsquares.WeightedLeastSquaresSolution;

// CHECKSTYLE.OFF: LineLength - the references make the lines too long
/** See {@link SchurHelper} - the workhorse of this package - for implementation details
 * 
 * 
*
* * Compute the {@link ComplexSchurDecomposition} of a square {@link ComplexMatrix} {@code A = QUQ}H, where * {@code Q} is unitary and {@code U} is upper-triangular. Because {@code Q} is a unitary similarity transformation, * {@code A} and {@code U} have the same Eigenvalues.
*
* * At a high-level, QR algorithms compute the Eigenvalues of the input matrix and accumulate unitary transformations * that were used to get to that point in {@code Q}, and compute {@code U} at the end as * {@code U = Q}H{@code AQ}.
*
* * These algorithms don't compute {@code U} directly because we use deflations (see {@link QRDeflations}) to reduce * "deflate" the problem into smaller sub problems, which makes the difference between tractable and intractable for * even moderately large matrices.
*
* * TODO Ryan Moser: 2024-08-21: make the {@link Parallelism} configurable from the {@link ComplexSchurDecomposition} * constructor or higher. * * TODO Ryan Moser, 2024-08-19: implement optional balancing. Again, see the noted references. [2] in particular has a * nice algorithm description. * * TODO Ryan Moser, 2024-08-19: implement aggressive early deflation. It should be easy enough. Follow the algorithm * specifications in [2] and [4] below. Ask me - RM - for a starting point, because I have some of this done already
*
* * TODO Ryan Moser, 2024-08-19: determine why the multi-bulge multi-shift QR sweep (MBMSQRS) was so slow.
* Similar to above, ask me - RM - for a starting point if you want to tackle this. I implemented it, but it was much * slower than the large-bulge QR sweep that's implemented here. To re-implement with what I have saved off is not a lot * of work.
* The MBMSQRS definitely required fewer {@link Complex} operations - being near optimal according to the references - * but the actual runtime was much worse. Poor memory access optimization? Maybe it's only faster for large (n * > 1000) dense matrices? Requires investigation. * *
*
* * Algorithm description: * *
*
* * The first step is to ensure that the input matrix is already upper-Hessenberg, or reduce it to upper-Hessenberg using * a full {@link ComplexHessenbergReduction}. Upper-Hessenberg matrices have the form * *
 * H = 
 * 
 * . . . . . . . . . . . . . . . .
 * . . . . . . . . . . . . . . . .
 *   . . . . . . . . . . . . . . .
 *     . . . . . . . . . . . . . .
 *       . . . . . . . . . . . . .
 *         . . . . . . . . . . . .
 *           . . . . . . . . . . .
 *             . . . . . . . . . .
 *               . . . . . . . . .
 *                 . . . . . . . .
 *                   . . . . . . .
 *                     . . . . . .
 *                       . . . . .
 *                         . . . .
 *                           . . .
 *                             . .
 * 
* * where {@code .}s represent arbitrary {@link Complex} elements and empty spots are zeros. * *
*
* * Now in upper-Hessenberg form, we perform an iteration that implicitly computes a shifted QR step (see references for * details, I don't want to put it all here). * *
*
* * Here, we do a large-bulge multi-shift QR sweep to create a single large "shift" transformation that results in a * single large bulge on the first sub-diagonal of the Hessenberg form. Empirically - according to [1] and [3], using * more than 10 shifts with a large-bulge sweep iteration results in significant so-called shift-blurring, severely * impacting the rate of convergence. Thus {@link QRShifts#computeNumShiftsPerBulge(int)} does not return more than * {@code 10}.
*
* * The shift transformation is a {@link Householder} matrix for the first column of the matrix polynomial shift-function * {@code P(U)}: * *
*
* * {@code P(U) = (U - }λm{@code I) . * (U - }λm-1{@code I) . * (U - }λm-2{@code I) * ... * (U - }λ2{@code I) . * (U - }λ1{@code I)}
*
* * where {@code m} is the number of "shifts" to apply, but we do it implicitly, thus avoiding the O((n^3)^m) * cost of evaluating the full that polynomial. See * {@link QRShifts#computeFirstColumnOfShiftPolynomial(ComplexMatrix, Complex[])} details
*
* * We only need the non-trivial (m+1)x(m+1) block of the Householder matrix to apply it to {@code U}, because the * Householder is really just an identity matrix with a (m+1)x(m+1) non-identity block on its diagonal and, in this * case, in the first place on the diagonal, and we can use * {@link ComplexMatrix#preMultiplyInPlace(ComplexMatrix, ComplexMatrix, int, int)} and * {@link ComplexMatrix#postMultiplyInPlace(ComplexMatrix, ComplexMatrix, int, int)} to do this efficiently:
*
* *
 * . . .
 * . . .
 * . . .
 *       1 
 *         1 
 *           1 
 *             1 
 *               1 
 *                 1 
 *                   1 
 *                     1 
 *                       1
 *                         1
 *                           1
 *                             1
 *                               1
 * 
* * Applying this unitary similarity transformation to {@code H} yields a "bulge" (the elements which do not conform to * the upper-Hessenberg form): * *
 * H = 
 * 
 * m m m
 * . . . . . . . . . . . . . . . .  m
 * b b b . . . . . . . . . . . . .  m
 * b b b . . . . . . . . . . . . .  m
 * b b b . . . . . . . . . . . . .
 *       . . . . . . . . . . . . .
 *         . . . . . . . . . . . .
 *           . . . . . . . . . . .
 *             . . . . . . . . . .
 *               . . . . . . . . .
 *                 . . . . . . . .
 *                   . . . . . . .
 *                     . . . . . .
 *                       . . . . .
 *                         . . . .
 *                           . . .
 *                             . .
 * 
* * Now we need to use a series of unitary similarity transforms to bring {@code H} back into upper-Hessenberg form. This * process is called "bulge chasing". It's called this because at each step of the reduction (as one can see when using * an evaluation monitor) the bulge is "shifted down" one place along the first sub-diagonal until it "falls off":
*
* *
    *
  • in this example, we're using two shifts. This has a special name: the implicit double-shift QR algorithm *
  • *
  • {@code m}s mark the columns and rows modified in each step of {@link ComplexHessenbergReduction} (note that .'s * that are in these entries are not unchanged, they're just not relevant to the illustration)
  • *
  • {@code b}s are used to mark the elements of a (2+1)x(2+1) == 3x3 "bulge" as it moves after each * multiplication
  • *
* *
 *   m m m
 * . . . . . . . . . . . . . . . .
 * . . . . . . . . . . . . . . . .  m
 *   b b b . . . . . . . . . . . .  m
 *   b b b . . . . . . . . . . . .  m
 *   b b b . . . . . . . . . . . .
 *         . . . . . . . . . . . .
 *           . . . . . . . . . . .
 *             . . . . . . . . . .
 *               . . . . . . . . .
 *                 . . . . . . . .
 *                   . . . . . . .
 *                     . . . . . .
 *                       . . . . .
 *                         . . . .
 *                           . . .
 *                             . .
 * 
* *
 *     m m m
 * . . . . . . . . . . . . . . . .
 * . . . . . . . . . . . . . . . .
 *   . . . . . . . . . . . . . . .  m
 *     b b b . . . . . . . . . . .  m
 *     b b b . . . . . . . . . . .  m
 *     b b b . . . . . . . . . . .
 *           . . . . . . . . . . .
 *             . . . . . . . . . .
 *               . . . . . . . . .
 *                 . . . . . . . .
 *                   . . . . . . .
 *                     . . . . . .
 *                       . . . . .
 *                         . . . .
 *                           . . .
 *                             . .
 * 
* *
 *       m m m
 * . . . . . . . . . . . . . . . .
 * . . . . . . . . . . . . . . . .
 *   . . . . . . . . . . . . . . .
 *     . . . . . . . . . . . . . .  m
 *       b b b . . . . . . . . . .  m
 *       b b b . . . . . . . . . .  m
 *       b b b . . . . . . . . . .
 *             . . . . . . . . . .
 *               . . . . . . . . .
 *                 . . . . . . . .
 *                   . . . . . . .
 *                     . . . . . .
 *                       . . . . .
 *                         . . . .
 *                           . . .
 *                             . .
 * 
* * etc. * *
 *                       m m m
 * . . . . . . . . . . . . . . . .
 * . . . . . . . . . . . . . . . .
 *   . . . . . . . . . . . . . . .
 *     . . . . . . . . . . . . . .
 *       . . . . . . . . . . . . .
 *         . . . . . . . . . . . .
 *           . . . . . . . . . . .
 *             . . . . . . . . . .
 *               . . . . . . . . .
 *                 . . . . . . . .
 *                   . . . . . . .
 *                     . . . . . .  m
 *                       b b b . .  m
 *                       b b b . .  m
 *                       b b b . .
 *                             . .
 * 
* *
 *                         m m m
 * . . . . . . . . . . . . . . . .
 * . . . . . . . . . . . . . . . .
 *   . . . . . . . . . . . . . . .
 *     . . . . . . . . . . . . . .
 *       . . . . . . . . . . . . .
 *         . . . . . . . . . . . .
 *           . . . . . . . . . . .
 *             . . . . . . . . . .
 *               . . . . . . . . .
 *                 . . . . . . . .
 *                   . . . . . . .
 *                     . . . . . .
 *                       . . . . .  m
 *                         b b b .  m
 *                         b b b .  m
 *                         b b b .
 * 
* *
 *                           m m m
 * . . . . . . . . . . . . . . . .
 * . . . . . . . . . . . . . . . .
 *   . . . . . . . . . . . . . . .
 *     . . . . . . . . . . . . . .
 *       . . . . . . . . . . . . .
 *         . . . . . . . . . . . .
 *           . . . . . . . . . . .
 *             . . . . . . . . . .
 *               . . . . . . . . .
 *                 . . . . . . . .
 *                   . . . . . . .
 *                     . . . . . .
 *                       . . . . .
 *                         . . . .  m
 *                           b b b  m
 *                           b b b  m
 * 
* *
 *                             m m
 * . . . . . . . . . . . . . . . .
 * . . . . . . . . . . . . . . . .
 *   . . . . . . . . . . . . . . .
 *     . . . . . . . . . . . . . .
 *       . . . . . . . . . . . . .
 *         . . . . . . . . . . . .
 *           . . . . . . . . . . .
 *             . . . . . . . . . .
 *               . . . . . . . . .
 *                 . . . . . . . .
 *                   . . . . . . .
 *                     . . . . . .
 *                       . . . . .
 *                         . . . .
 *                           . . .  m
 *                             b b  m
 * 
* * And now we're back to upper-Hessenberg form! * *
*
* * After each sweep, we check whether we can "deflate" the problem; i.e., break the problem into two (or more, in * general) sub-problems that can be solved independently, which allows us to ignore updating large portions of the * matrix. See {@link QRDeflations} for details.
*
* * This proceeds until we've deflated to a series of small upper-Hessenberg matrices along the diagonal of {@code H}, * each of which we solve in the same way. This recursive procedure continues until we deflate totally into {@code 1x1} * and {@code 2x2} matrices along the main diagonal of {@code H}. The {@code 1x1}s are Eigenvalues themselves, and the * {@code 2x2}s are matrices with complex conjugate pairs of Eigenvalues of {@code H}, which we can reduce in closed * form.
*
* * Book/paper references:
* * * * LAPACK v3.12.0 references:
* * * * @author Ryan Moser, [email protected] * * @see SchurHelper * @see QRShifts * @see QRDeflations * @see SchurMode */ // CHECKSTYLE.ON: LineLength public class ComplexSchurDecomposition { /** A {@link Logger} for logging */ private static final Logger LOGGER = LogManager.getLogger(ComplexSchurDecomposition.class); /** A seed for {@link CountableDoubleIterable} if we need to apply a perturbation to an Eigenvector guess */ private static final long SEED = 867_5309; /** We use this constant so that it's easier to experiment with setting it on and off, as it's not something that * should be user-configurable. LAPACK operates in a way equivalent to this being enabled. */ static final boolean SET_EXACT_ZEROES = true; /** We multiply the dimension of the square input matrix by this to compute the max number of iterations to * consider. This is the technique used by each reference I've read, but there is not necessarily a convergence * guarantee, thus the value is configurable. */ private static final int MAX_ITERS_SCALE = 30; /** a 1x1 identity matrix */ private static final ComplexMatrix ID_1X1 = ComplexMatrix.identity(1); /** a 2x2 identity matrix */ private static final ComplexMatrix ID_2X2 = ComplexMatrix.identity(2); /** the {@link ComplexMatrix} for which we're computing a {@link ComplexSchurDecomposition} */ private final ComplexMatrix A; /** the {@link SchurMode} in which we computed a {@link ComplexSchurDecomposition} */ private final SchurMode mode; /** The unitary {@link ComplexMatrix} {@code Q} in {@code A = QUQ}-1{@code == QUQ}H */ private final ComplexMatrix Q; /** The upper-triangular, unitarily similar to the original {@code A} in * {@code A = QUQ}-1{@code == QUQ}H */ private final ComplexMatrix U; /** The vector of Eigenvalues for the original {@link ComplexMatrix} that's been decomposed. Ideally, these will be * be identical to the diagonal of {@link #getU()}, but practically they will be slightly different due to numerical * imprecision. */ private final Complex[] eigenvalues; /** Constructor * * @param A the {@link ComplexMatrix} for which we computed a {@link ComplexSchurDecomposition} * @param mode the {@link SchurMode} in which we computed a {@link ComplexSchurDecomposition} * @param Q the unitary {@link ComplexMatrix} {@code Q} from {@code A = QUQ}-1{@code == QUQ}H * @param U the upper-triangular {@link ComplexMatrix} {@code U} from * {@code A = QUQ}-1{@code == QUQ}H * @param eigenvalues the Eigenvalues of the original {@link ComplexMatrix} {@code A = QUQ}H */ ComplexSchurDecomposition(final ComplexMatrix A, final SchurMode mode, final ComplexMatrix Q, final ComplexMatrix U, final Complex... eigenvalues) { this.A = A; this.mode = mode; this.Q = Q; this.U = U; this.eigenvalues = eigenvalues; } /** @return the {@link ComplexMatrix matrix} for which we computed {@code this} {@link ComplexSchurDecomposition}; * i.e., {@code A = QUQ}-1{@code == QUQ}H */ public ComplexMatrix getA() { return this.A.copy(); } /** @return the {@link SchurMode} used to compute {@code this} {@link ComplexSchurDecomposition} */ public SchurMode getSchurMode() { return this.mode; } /** @return an {@link Optional} holding the unitary {@code Q} from the Schur decomposition * {@code A = QUQ}-1{@code == QUQ}H. If {@link SchurMode#EIGENVALUES} was used, this * {@link Optional} will be empty. */ public Optional getQ() { return Optional.ofNullable(this.Q).map(ComplexMatrix::copy); } /** @return an {@link Optional} holding the upper-triangular {@code U} from the Schur decomposition * {@code A = QUQ}-1{@code == QUQ}H. If {@link SchurMode#EIGENVALUES} was used, this * {@link Optional} will be empty. If {@link SchurMode#BLOCK_SCHUR_FORM} was used, this will be in block * Schur form. See {@link SchurMode#BLOCK_SCHUR_FORM} for details. * * @implNote {@code U} is actually computed after the fact as {@code U == Q}H{@code AQ}, so the diagonal * entries of {@code U} may not match {@link #getEigenvalues()} exactly */ public Optional getU() { return Optional.ofNullable(this.U).map(ComplexMatrix::copy); } /** @return a reference to the array of Eigenvalues of {@link #getA() A}. These are always computed. */ Complex[] getEigenvaluesRef() { return this.eigenvalues; } /** @return the array of Eigenvalues of {@link #getA() A}. These are always computed. */ public Complex[] getEigenvaluesArr() { return this.eigenvalues.clone(); } /** @return the Eigenvalues of {@link #getA() A}. These are always computed. */ public ComplexVector getEigenvalues() { return new ComplexVector(this.eigenvalues); } /** @return if {@link #Q} and {@link #U} are present, compute {@code QUQ}H, which should be equal (within * tolerance) to the original matrix that was decomposed. Otherwise, throw an exception. * * @apiNote this is only {@code package-private} because it's not useful outside of testing */ ComplexMatrix computeQUQH() { // below, ensure that Q and U are present. You can't have one without the other. final ComplexMatrix notOptionalQHere = this.getQ().orElseThrow(); return notOptionalQHere.times(this.U).timesConjugateTransposeOf(notOptionalQHere); } /** Compute the {@link ComplexSchurDecomposition} of the given {@link ComplexMatrix A}. If * {@link ComplexMatrix#isReal(double) A has exclusively real entries}, then we return the * {@link SchurMode#BLOCK_SCHUR_FORM real Schur form} of {@code A}. Else, we return the {@link SchurMode#SCHUR_FORM * completely upper-Triangular Schur form}. * * @param A the {@link ComplexMatrix} for which we want a {@link ComplexSchurDecomposition} * @return the {@link ComplexSchurDecomposition} of {@code A} */ public static ComplexSchurDecomposition of(final ComplexMatrix A) { final SchurMode schurMode = A.isReal(0.0) ? SchurMode.BLOCK_SCHUR_FORM : SchurMode.SCHUR_FORM; return of(A, schurMode, chooseMaxIters(A), null); } /** Compute the {@link ComplexSchurDecomposition} of the given {@link ComplexMatrix A} * * @param A the {@link ComplexMatrix} for which we want a {@link ComplexSchurDecomposition} * @param schurMode the solve mode to use. See {@link SchurMode} for details. * @return the {@link ComplexSchurDecomposition} of {@code A} */ public static ComplexSchurDecomposition of(final ComplexMatrix A, final SchurMode schurMode) { return of(A, schurMode, chooseMaxIters(A), null); } /** Compute the {@link ComplexSchurDecomposition} of the given {@link ComplexMatrix A}, with additional fine-tuning * available. * * @param A the {@link ComplexMatrix} for which we want a {@link ComplexSchurDecomposition} * @param schurMode the solve mode to use. See {@link SchurMode} for details. * @param maxIters the maximum iterations we will consider at the top level. Recursive calls use a value computed * as-needed ({@link #chooseMaxIters(AnyMatrix)}). * @param evaluationMonitor a {@link ComplexMatrix} {@link Consumer} that's updated with changes to the {@code U} * matrix. An evaluation monitor is a debugging/maintenance tool and should not be used by default, as it * will be very performance intensive. * @return the {@link ComplexSchurDecomposition} of {@code A}. */ public static ComplexSchurDecomposition of(final ComplexMatrix A, final SchurMode schurMode, final int maxIters, final Consumer evaluationMonitor) { final SchurHelper schur = new SchurHelper(A, schurMode, maxIters, evaluationMonitor, SET_EXACT_ZEROES, 0); boolean alreadyUpperHessenberg = true; for (int i = 2; alreadyUpperHessenberg && i < schur.dim; i++) { for (int j = 0; alreadyUpperHessenberg && j < i - 1; j++) { alreadyUpperHessenberg &= A.getEntry(i, j).equals(Complex.ZERO); } } return solve(schur, alreadyUpperHessenberg); } /** Choose a max iterations we should use when computing the {@link ComplexSchurDecomposition} of the given * {@link AnyMatrix}. * * @param A the matrix * @return the max default max iterations for a matrix of the given size */ public static int chooseMaxIters(final AnyMatrix A) { return MAX_ITERS_SCALE * A.getRowDimension(); } /** @param schur the {@link SchurHelper} instance that holds the data that's updated as we iterate; i.e., this is * updated in place! * @param alreadyUpperHessenberg the {@code true} whether {@link SchurHelper#currentU schur.currentU} is already * upper-Hessenberg, {@code false} otherwise. * @return the {@link ComplexSchurDecomposition} of {@link SchurHelper#currentU schur.currentU} * in-place! */ // CHECKSTYLE.OFF: MethodLength static ComplexSchurDecomposition solve(final SchurHelper schur, final boolean alreadyUpperHessenberg) { if (schur.dim == 1) { // 1x1 return new ComplexSchurDecomposition(schur.originalA, schur.schurMode, ID_1X1, schur.originalA, schur.originalA.getEntry(0, 0)); // no need to update the evaluation monitor } if (schur.dim == 2) { // 2x2 final ComplexSchurDecomposition answer2x2 = compute2x2ClosedForm(schur); if (schur.hasEvalMonitor) { schur.updateUBlock(answer2x2.U, 0, 0); } return answer2x2; } // at this point, the shape (non-zero structure) of schur.currentU could be anything // (3+)x(3+) if (!alreadyUpperHessenberg) { schur.doHessenbergReduction(); } // @formatter:off /** Once here, schur.currentU is necessarily upper-Hessenberg: * . . . . . . . . . . . . . . . . * . . . . . . . . . . . . . . . . * . . . . . . . . . . . . . . . * . . . . . . . . . . . . . . * . . . . . . . . . . . . . * . . . . . . . . . . . . * . . . . . . . . . . . * . . . . . . . . . . * . . . . . . . . . * . . . . . . . . * . . . . . . . * . . . . . . * . . . . . * . . . . * . . . * . . */ // @formatter:on for (int count = 0; count < schur.maxIters; count++) { final ComplexMatrix[] deflations = QRDeflations.computeDeflations(schur.currentU, schur.numShiftsPerBulge, schur.machEpsFrobA); if (deflations == null) { // no deflation opportunities detected schur.applyShiftBulge(); // apply bulge // @formatter:off /* . . . . . . . . . . . . . . . . * b b b b . . . . . . . . . . . . <--- bulge entries * b b b b . . . . . . . . . . . . " * b b b b . . . . . . . . . . . . " * b b b b . . . . . . . . . . . . " * . . . . . . . . . . . . * . . . . . . . . . . . * . . . . . . . . . . * . . . . . . . . . * . . . . . . . . * . . . . . . . * . . . . . . * . . . . . * . . . . * . . . * . . */ schur.chaseShiftBulge(); // Back to upper-Hessenberg /* . . . . . . . . . . . . . . . . * . . . . . . . . . . . . . . . . * . . . . . . . . . . . . . . . * . . . . . . . . . . . . . . * . . . . . . . . . . . . . * . . . . . . . . . . . . * . . . . . . . . . . . * . . . . . . . . . <-- if we're lucky, we'll find a deflation opportunity * . . . . . . . . . * . . . . . . . . * . . . . . . . * . . . . . . * . . . . . * . . . . * . . . * . . */ // @formatter:on } else { if (schur.hasEvalMonitor) { /* If using an evaluation monitor, we assume that the best possible performance is not required, so * we do some extra work. Once we enter this else-statement, currentU is only partially updated * (that's the whole point of the deflation), so we can no longer use it, thus it's o.k. to modify * it for reporting purposes. */ // @formatter:off /** stackedDecomposed has the form of two decoupled upper-Hessenberg matrices; e.g., * . . . . . * . . . . . * . . . . * . . . * . . * . . . . * . . . . * . . . * . . */ // @formatter:on final ComplexMatrix stackedDecomposed = ComplexMatrix.blockDiagonalConcatenate(deflations); schur.updateUBlock(stackedDecomposed, 0, 0); } final int numDeflations = deflations.length; // == 2 /* currently, this is always exactly 2, but everything here is easily generalized to K > 2 deflations * (create the Helper instances sequentially, solve in parallel, construct solution sequentially). In * fact, that's how I (RM) initially wrote it, solving the deflated problems in parallel. I found, * however, that it is almost never the case that more than one deflation opportunity appears at once * and, indeed, this a typical assumption in descriptions of the implicitly-shifted QR algorithms */ final Complex[] eigenvalues = new Complex[schur.dim]; int currDiagIndex = 0; for (int i = 0; i < numDeflations; i++) { final ComplexMatrix deflation = deflations[i]; final Consumer subEvalMont; if (schur.hasEvalMonitor) { /* these Consumers update A with the iterations of this algorithm applied to its deflations, and * it does this nested arbitrarily many times (or at least until there's a stack overflow, but * that's infeasible). */ final int fCurrDiagIndex = currDiagIndex; subEvalMont = subUpdate -> { schur.updateUBlock(subUpdate, fCurrDiagIndex, fCurrDiagIndex); }; } else { subEvalMont = null; } final SchurHelper deflatedSchur = new SchurHelper(deflation, schur.schurMode, chooseMaxIters(deflation), subEvalMont, SET_EXACT_ZEROES, schur.recursiveDepth + 1); // recurse: final ComplexSchurDecomposition subDecomp = solve(deflatedSchur, true); // modifies Q data! schur.possiblyPostMultiplyQ(subDecomp.Q, currDiagIndex); // right-update for Q // modifies Q data! for (int eigCounter = 0; eigCounter < subDecomp.eigenvalues.length; eigCounter++) { eigenvalues[currDiagIndex + eigCounter] = subDecomp.eigenvalues[eigCounter]; } currDiagIndex += deflation.getColumnDimension(); } if (schur.schurMode == SchurMode.EIGENVALUES) { return new ComplexSchurDecomposition(schur.originalA, schur.schurMode, null, null, eigenvalues); } // we only need to compute U when we're about to return from the first call in the stack final ComplexMatrix Q = schur.currentQ; final ComplexMatrix U = schur.recursiveDepth != 0 ? null : Q.conjugateTransposeMultiply(schur.originalA).times(Q); return new ComplexSchurDecomposition(schur.originalA, schur.schurMode, schur.currentQ, U, eigenvalues); } } throw NumericalMethodsException.formatted( "The QR algorithm failed to converge in %d iterations for the following matrix:%n%s", schur.maxIters, NumericalMethodsUtils.toJavaCode(schur.originalA)).get(); } // CHECKSTYLE.ON: MethodLength /** Compute and return a {@link ComplexSchurDecomposition} for the given {@code 2x2} {@link ComplexMatrix}
*
* * See {@link #compute2x2ClosedForm(ComplexMatrix, SchurMode, boolean)} * * @param schur the {@link SchurHelper} for which we want to compute a closed-form solution * @return the closed-form solution */ private static ComplexSchurDecomposition compute2x2ClosedForm(final SchurHelper schur) { return compute2x2ClosedForm(schur.currentU, schur.schurMode, schur.hasEvalMonitor); } /** Compute and return a {@link ComplexSchurDecomposition} for the given {@code 2x2} {@link ComplexMatrix}
*
* * We compute (construct, really) a closed-form expression using an Eigenvector of {@code M} and a vector orthogonal * to that Eigenvector. These two vectors allow us to construct a solution because - when concatenated as column * vectors of a matrix - they necessarily result in a unitary (orthogonal) triangularization of {@code M}; i.e., a * Schur decomposition.
*
* * See this PDF * for an example of this construction.

* * TODO Ryan Moser, 2024-08-07: see TODO in {@link QRShifts#computeEigenTrailingPrincipal2x2(ComplexMatrix)}
* * @param M the {@code 2x2} matrix for which we want the {@link ComplexSchurDecomposition} * @param schurMode the {@link SchurMode}, which determines how much work to do here * @param hasEvalMonitor {@code true} if we need to consider an evaluation monitor, {@code false} otherwise * @return the {@link ComplexSchurDecomposition} */ static ComplexSchurDecomposition compute2x2ClosedForm(final ComplexMatrix M, final SchurMode schurMode, final boolean hasEvalMonitor) { final Complex[] eigenValues2x2 = QRShifts.computeEigenTrailingPrincipal2x2(M); if (schurMode == SchurMode.EIGENVALUES && !hasEvalMonitor) { return new ComplexSchurDecomposition(M, schurMode, null, null, eigenValues2x2); } if (schurMode == SchurMode.BLOCK_SCHUR_FORM) { return new ComplexSchurDecomposition(M, schurMode, ID_2X2, M, eigenValues2x2); } final Complex l1 = eigenValues2x2[1]; // choose an Eigenvalue - this can probably be done intelligently final Complex c = M.getEntry(1, 0); final Complex[] eigenvectorArr; // first, construct the 2D Eigenvector associated with the given Eigenvalue if (c.isZero()) { final Complex b = M.getEntry(0, 1); if (b.isZero()) { // b == c == 0 eigenvectorArr = new Complex[] { Complex.ONE, Complex.ZERO }; } else { // c == 0 final Complex a = M.getEntry(0, 0); eigenvectorArr = new Complex[] { b, l1.subtract(a) }; } } else { // c != 0 final Complex d = M.getEntry(1, 1); eigenvectorArr = new Complex[] {l1.subtract(d), c}; } ComplexVector eigenvector = new ComplexVector(eigenvectorArr[0], eigenvectorArr[1]); final Complex hermitianNorm = Complex.valueOf(eigenvector.hermitianNorm()); eigenvector = eigenvector.dividedBy(hermitianNorm); /* eVecOrth1 is equal to the following, but is more efficient and avoids any potential precision loss due to the * double negation of {@code b}: eigenvectorArr[1].conjugate().negate(); * * Proof: -conj(a + b i) == -(a - b i) == -a + b i */ final Complex eVecOrth1 = // vector orthogonal to eigenvector Complex.valueOf(-eigenvectorArr[1].getRealPart(), // -a eigenvectorArr[1].getImaginaryPart()); // b i final Complex eVecOrth2 = eigenvectorArr[0].conjugate(); ComplexVector orth2Eigenv = new ComplexVector(eVecOrth1, eVecOrth2); orth2Eigenv = orth2Eigenv.dividedBy(hermitianNorm); final Complex[][] QArr = new Complex[][] { { eigenvector.getEntry(0), orth2Eigenv.getEntry(0) }, { eigenvector.getEntry(1), orth2Eigenv.getEntry(1) } }; // ^ an Eigenvector, ^ orthogonal to that Eigenvector /* by construction, Q is a unitary matrix (Q^-1 == Q^H) */ final ComplexMatrix Q = new ComplexMatrix(QArr); final ComplexMatrix U = Q.conjugateTransposeMultiply(M).times(Q); return new ComplexSchurDecomposition(M, schurMode, Q, U, eigenValues2x2); } /** Compute the Eigenvectors of {@code A} - the original {@link ComplexMatrix} for which {@code this} is a * {@link ComplexSchurDecomposition} - using a {@link LevenbergMarquardt least-squares optimization}.
*
* * If {@code this} was not created with {@link SchurMode#SCHUR_FORM}, we return an {@link Optional empty Optional} *
*
* * The {@link #getEigenvalues()} and the Eigenvectors computed here are indexed the same way; e.g., to get the * {@code i}th Eigenvalue and Eigenvector pair: * *
     * final ComplexVector eigenvalues = schur.getEigenvalues();
     * final ComplexMatrix eigenvectors = schur.computeEigenvectors().orElseThrow();
     * 
     * final Complex eigenvalue = eigenvalues.getEntry(i);
     * final ComplexVector eigenvector = new ComplexVector(eigenvectors.getColumn(i));
     * 
* * This is not a standard approach as far as I can tell, and it is very slow for large matrices, but it * works really rather well as a temporary implementation until I (RM) can come back and implement a more * standardized approach, and it's just as fast as {@link ComplexSchurDecomposition} for small, square matrices * (e.g., {@code n < 32}).
*
* * As with all Eigenvector algorithms, it is possible that we fail to compute all of them, because (e.g.) we find * duplicates, fail to properly converge, etc. To mitigate this as best we can, the initial guesses are the mutually * orthogonal column vectors of {@link #getQ()} ({@code Q} is unitary, which implies orthogonal column vectors). * * @param eigvecFailMode the {@link EigenvectorFailureMode} that dictates how we'll handle cases where we fail to * compute an Eigenvector * * @return an {@link Optional empty Optional} if {@code this} was not created with {@link SchurMode#SCHUR_FORM}. * Otherwise, return a {@link ComplexMatrix} whose columns are the Eigenvectors of {@code A}. * * @see #computeEigenvector(Complex, ComplexVector, double, EigenvectorFailureMode) */ public Optional computeEigenvectors(final EigenvectorFailureMode eigvecFailMode) { if (this.mode != SchurMode.SCHUR_FORM) { return Optional.empty(); } /* epsilon is a small number around the noise level of the system due to numerical imprecision. The 10x factor * is somewhat arbitrary, as it was chosen for working well empirically, but using the Frobenius norm x machEps * is standard and, in the QR algorithm, a small integer times this values maintains so-called backwards * stability. */ final double epsilon = 10 * this.A.computeFrobeniusNorm() * NumericalMethodsUtils.MACHINE_EPSILON; final int n = this.Q.getColumnDimension(); ComplexMatrix eigenvectorMatrix = ComplexMatrix.zero(n, n); for (int i = 0; i < n; i++) { final Complex eigenvalue = this.eigenvalues[i]; final ComplexVector initialGuessVec = new ComplexVector(this.Q.getColumn(i)); final ComplexVector eigenvector = computeEigenvectorLevenbergMarquardt(this.A, eigenvalue, initialGuessVec, epsilon, eigvecFailMode); eigenvectorMatrix.setColumnVector(i, eigenvector); } return Optional.of(eigenvectorMatrix); } /** Compute an Eigenvector which corresponds to the given Eigenvalue for {@link #getA()}. This comes with all of the * caveats listed in {@link #computeEigenvectors(EigenvectorFailureMode)}. * * @param eigenvalue the Eigenvalue for which we want an Eigenvector * @param initialGuessVec the initial guess * @param epsilon the epsilon for checking convergence and whether the solution is degenerate. We only compute it * externally and pass it in so that we only have to do it once. * @param eigvecFailMode the {@link EigenvectorFailureMode} that dictates how we'll handle cases where we fail to * compute an Eigenvector * * @return an {@link Optional empty Optional} if {@code this} was not created with {@link SchurMode#SCHUR_FORM}. *
*
* In all cases, if we successfully compute an Eigenvector, we return it. All * successful results will be normalized.
*
* * If we fail to compute an Eigenvector and {@code isRetry == false}, we return {@code null}. * {@link #computeEigenvectorLevenbergMarquardt(ComplexMatrix, Complex, ComplexVector, * double, EigenvectorFailureMode)} * relies on this functionality.
*
* * If we fail to compute an Eigenvector and {@code isRetry == true}, the result is determined by the * given {@link EigenvectorFailureMode}. */ public Optional computeEigenvector(final Complex eigenvalue, final ComplexVector initialGuessVec, final double epsilon, final EigenvectorFailureMode eigvecFailMode) { if (this.mode != SchurMode.SCHUR_FORM) { return Optional.empty(); } final ComplexVector eigenvector = computeEigenvectorLevenbergMarquardt(this.A, eigenvalue, initialGuessVec, epsilon, eigvecFailMode); return Optional.of(eigenvector); } /** Compute an Eigenvector for the given Eigenvalue * * @param A the {@link ComplexMatrix} for which we want Eigenvectors * @param eigenvalue the Eigenvalue for which we want an Eigenvector * @param initialGuessVec the initial guess * @param epsilon the epsilon for checking convergence and whether the solution is degenerate. We only compute it * externally and pass it in so that we only have to do it once. Must be greater than zero. * @param eigvecFailMode the {@link EigenvectorFailureMode} that dictates how we'll handle cases where we fail to * compute an Eigenvector * @return an Eigenvector of {@code A}. In all cases, if we successfully compute an Eigenvector, we return it. All * successful results will be normalized.
*
* * If we fail to compute an Eigenvector and {@code isRetry == false}, we return {@code null}. * {@link #computeEigenvectorLevenbergMarquardt(ComplexMatrix, Complex, ComplexVector, * double, EigenvectorFailureMode)} * relies on this functionality.
*
* * If we fail to compute an Eigenvector and {@code isRetry == true}, the result is determined by the * given {@link EigenvectorFailureMode}. */ public static ComplexVector computeEigenvectorLevenbergMarquardt(final ComplexMatrix A, final Complex eigenvalue, final ComplexVector initialGuessVec, final double epsilon, final EigenvectorFailureMode eigvecFailMode) { final ComplexVector firstTry = // possibly null because isRetry == false computeEigenvectorLevenbergMarquardt(A, eigenvalue, initialGuessVec, epsilon, eigvecFailMode, false); if (firstTry != null) { return firstTry; } final ComplexVector secondTry = // never null because isRetry == true, so we defer to the failure mode computeEigenvectorLevenbergMarquardt(A, eigenvalue, initialGuessVec, epsilon, eigvecFailMode, true); return secondTry; } /** Compute an Eigenvector for the given Eigenvalue. * * @param A the {@link ComplexMatrix} for which we want Eigenvectors * @param eigenvalue the Eigenvalue for which we want an Eigenvector * @param initialGuessVec the initial guess * @param epsilon the epsilon for checking convergence and whether the solution is degenerate. We only compute it * externally and pass it in so that we only have to do it once. Must be greater than zero. * @param eigvecFailMode the {@link EigenvectorFailureMode} that dictates how we'll handle cases where we fail to * compute an Eigenvector * @param isRetry {@code true} if this is a retry, {@code false} otherwise. On a retry, we apply a small * perturbation to the initial guess. * @return an Eigenvector of {@code A}. In all cases, if we successfully compute an Eigenvector, we return it. All * successful results will be normalized.
*
* * If we fail to compute an Eigenvector and {@code isRetry == false}, we return {@code null}. * {@link #computeEigenvectorLevenbergMarquardt(ComplexMatrix, Complex, ComplexVector, * double, EigenvectorFailureMode)} * relies on this functionality.
*
* * If we fail to compute an Eigenvector and {@code isRetry == true}, the result is determined by the * given {@link EigenvectorFailureMode}. */ private static ComplexVector computeEigenvectorLevenbergMarquardt(final ComplexMatrix A, final Complex eigenvalue, final ComplexVector initialGuessVec, final double epsilon, final EigenvectorFailureMode eigvecFailMode, final boolean isRetry) { Validation.requireGreaterThan(epsilon, 0.0, "epsilon"); if (eigenvalue.norm() < epsilon) { return ComplexVector.zero(A.getColumnDimension()); } final int n = A.getColumnDimension(); final boolean isReal = FastMath.abs(eigenvalue.getImaginary()) <= epsilon && A.isReal(epsilon); final int flattenedDim = isReal ? n : 2 * n; /* The function f takes as an input a vector of length n OR 2n (determined by isReal; see flatten and unflatten) * which is flattened into a vector with complex entries, v, we then compute Av - lv (the left-hand side of the * Eigenvalue problem) before re-flattening the vector and returning. */ final VectorField f = new VectorField(input -> { final ComplexVector v = unflatten(input, isReal); final ComplexVector Av = A.operate(v); final ComplexVector lv = v.times(eigenvalue); final ComplexVector delta = Av.minus(lv); return flatten(delta, isReal); }, flattenedDim, flattenedDim); final double[] initGuessFlat = flatten(initialGuessVec, isReal); if (isRetry) { /* the scaling factor cbrt(machEps) is a bit arbitrary, but it was chosen for a named constant and for * working well in problematic cases */ final double perturbNorm = initialGuessVec.hermitianNorm() * NumericalMethodsUtils.CUBE_ROOT_MACHINE_EPSILON; applyPerturbation(initGuessFlat, perturbNorm); } final double[] targetValues = new double[flattenedDim]; // all zeroes final double[] weights = new double[flattenedDim]; Arrays.fill(weights, 1.0); final LevenbergMarquardtConstants lmConstants = new LevenbergMarquardtConstants(flattenedDim, weights, epsilon, 20, // max total iterations 5, // max unimproved iterations LevenbergMarquardtParameter.getDefault(), LevenbergMarquardtConstants.DEFAULT_ALPHA_EASY, LevenbergMarquardtConstants.DEFAULT_H); /* Maybe TODO Ryan Moser 2024-08-28: closed form? The problem being solved should have some very nice * cancellations due to how linear it is. Also, compare to the so-called Inverse Iteration method. */ final MultivariateMatrixValuedFunction numericalJacobian = NumericalMethodsUtils.getJacobianFunction(f, Optional.empty(), flattenedDim, flattenedDim, Parallelism.PARALLEL); final LevenbergMarquardtProblem problem = new LevenbergMarquardtProblem(f, numericalJacobian, initGuessFlat, targetValues, lmConstants); final WeightedLeastSquaresSolution wlss = LevenbergMarquardt.solve(problem, true); final double[] flattenedSolnPt = wlss.getPoint(); final ComplexVector notNormalized = unflatten(flattenedSolnPt, isReal); final NVector solnVector = new NVector(flattenedSolnPt); if (solnVector.getNorm() < epsilon) { /* solnVector has norm (length) similar to the "noise" level of the system * and is, thus, of dubious use */ if (!isRetry) { // we haven't yet done a retry, so just return null for now, which flags the retry return null; } LOGGER.warn(() -> getWarningLogMessage(A, eigenvalue, initialGuessVec, epsilon)); return eigvecFailMode.handleFailure(notNormalized); } return notNormalized.dividedBy(Complex.valueOf(notNormalized.hermitianNorm())); } /** Apply a perturbation of size {@code perturbationNorm} to the given array. Note: the array is modified in place. * * @param initGuessFlat the flattened initial guess * @param perturbationNorm the norm of the perturbation to apply * @implNote this handles the {@code isReal} special-case without modification */ private static void applyPerturbation(final double[] initGuessFlat, final double perturbationNorm) { final double[] randomComponents = CountableDoubleIterable.monteCarlo(0.0, 1.0, SEED, initGuessFlat.length) .stream() .toArray(); final double[] perturb = new NVector(randomComponents).normalized().times(perturbationNorm).getDataRef(); for (int i = 0; i < initGuessFlat.length; i++) { initGuessFlat[i] = initGuessFlat[i] + perturb[i]; } } /** Given a {@link ComplexVector} of dimension {@code n}, "flatten" it into a {@code double[]}.

* * The array will have length {@code n == v.getDimension()} if {@code isReal == true} and will have length * {@code n == 2 * v.getDimension()} if {@code !isReal}.

* * If {@code !isReal}, every even-indexed element (starting at zero) is the real portion of a {@link ComplexVector} * entry, and every odd element is an imaginary one.
* * @param v the vector to flatten * @param isReal {@code true} to flatten the {@link ComplexVector} into an array of strictly the real components of * {@code v} with length {@code n == v.getDimension()}, {@code false} to flatten into an array with both the * real and imaginary components of {@code v} with length {@code n == 2 * v.getDimension()} * @return the flattened vector */ static double[] flatten(final ComplexVector v, final boolean isReal) { final int n = v.getDimension(); final int flatenedDim = isReal ? n : 2 * n; // see JavaDoc final double[] answer = new double[flatenedDim]; for (int i = 0; i < n; i++) { final Complex ithElt = v.getEntry(i); if (isReal) { answer[i] = ithElt.getReal(); } else { answer[2 * i] = ithElt.getReal(); answer[2 * i + 1] = ithElt.getImaginary(); } } return answer; } /** Reverse the process of {@link #flatten(ComplexVector, boolean)}; i.e., given a flattened {@code double[]} * (vector) of length {@code 2n} map each pair of elements (starting at zero) to a new {@link Complex} in the * returned answer. * * @param flattenedVector the flattened {@code double[]} of length {@code 2n} * @param isReal {@code true} if we're un-flattening into a real vector wrapped in a {@link ComplexVector}, * {@code false} if the vector is necessarily {@link Complex} * @return the un-flattened {@link ComplexVector} */ static ComplexVector unflatten(final double[] flattenedVector, final boolean isReal) { final int n = isReal ? flattenedVector.length : flattenedVector.length / 2; final ComplexVector v = ComplexVector.zero(n); for (int i = 0; i < n; i++) { if (isReal) { v.setEntry(i, Complex.valueOf(flattenedVector[i])); } else { final int index = 2 * i; v.setEntry(i, Complex.valueOf(flattenedVector[index], flattenedVector[index + 1])); } } return v; } /** Get the {@link Level#WARN} {@link Logger logging} message for when computing an Eigenvector fails * * @param A the {@link ComplexMatrix} for which we attempted to compute an Eigenvector * @param eigenvalue the Eigenvalue for which we attempted to compute a corresponding Eigenvector * @param initialGuessVec the initial guess at the Eigenvector * @param epsilon the convergence epsilon * @return the warning string */ static String getWarningLogMessage(final ComplexMatrix A, final Complex eigenvalue, final ComplexVector initialGuessVec, final double epsilon) { final String matrixStr = NumericalMethodsUtils.toJavaCode(A, "A"); final String vectorStr = NumericalMethodsUtils.toJavaCode(ComplexMatrix.fromColumns(initialGuessVec), "initialGuessVec"); final String fmt = """ Computed Eigenvector has Hermitian norm near the noise level of the system, proceed with caution. Consider submitting a bug report with Complex Schur input-matrix: %s Eigenvector guess: %s Eigenvalue: %s And epsilon: %e """; return fmt.formatted(matrixStr, vectorStr, eigenvalue, epsilon); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy