All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.bwaldvogel.liblinear.Tron Maven / Gradle / Ivy

The newest version!
package de.bwaldvogel.liblinear;

import static de.bwaldvogel.liblinear.Linear.info;

import java.util.concurrent.atomic.AtomicBoolean;


/**
 * Trust Region Newton Method optimization
 */
class Tron {

    private final Function fun_obj;
    private final double   eps;
    private final int      max_iter;
    private final double   eps_cg;

    Tron(Function fun_obj, double eps, int max_iter, double eps_cg) {
        this.fun_obj = fun_obj;
        this.eps = eps;
        this.max_iter = max_iter;
        this.eps_cg = eps_cg;
    }

    void tron(double[] w) {
        // Parameters for updating the iterates.
        double eta0 = 1e-4, eta1 = 0.25, eta2 = 0.75;

        // Parameters for updating the trust region size delta.
        double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4;

        int n = fun_obj.get_nr_variable();
        int i, cg_iter;
        double delta = 0, sMnorm, one = 1.0;
        double alpha, f, fnew, prered, actred, gs;
        int search = 1, iter = 1;
        double[] s = new double[n];
        double[] r = new double[n];
        double[] g = new double[n];

        double alpha_pcg = 0.01;
        double[] M = new double[n];

        // calculate gradient norm at w=0 for stopping condition.
        double[] w0 = new double[n];
        for (i = 0; i < n; i++)
            w0[i] = 0;
        fun_obj.fun(w0);
        fun_obj.grad(w0, g);
        double gnorm0 = euclideanNorm(g);

        f = fun_obj.fun(w);
        fun_obj.grad(w, g);
        double gnorm = euclideanNorm(g);

        if (gnorm <= eps * gnorm0)
            search = 0;

        fun_obj.get_diag_preconditioner(M);
        for (i = 0; i < n; i++)
            M[i] = (1 - alpha_pcg) + alpha_pcg * M[i];
        delta = Math.sqrt(uTMv(n, g, M, g));

        double[] w_new = new double[n];
        AtomicBoolean reach_boundary = new AtomicBoolean();
        boolean delta_adjusted = false;
        while (iter <= max_iter && search != 0) {
            cg_iter = trpcg(delta, g, M, s, r, reach_boundary);

            System.arraycopy(w, 0, w_new, 0, n);
            daxpy(one, s, w_new);

            gs = dot(g, s);
            prered = -0.5 * (gs - dot(s, r));
            fnew = fun_obj.fun(w_new);

            // Compute the actual reduction.
            actred = f - fnew;

            // On the first iteration, adjust the initial step bound.
            sMnorm = Math.sqrt(uTMv(n, s, M, s));
            if (iter == 1 && !delta_adjusted) {
                delta = Math.min(delta, sMnorm);
                delta_adjusted = true;
            }

            // Compute prediction alpha*sMnorm of the step.
            if (fnew - f - gs <= 0)
                alpha = sigma3;
            else
                alpha = Math.max(sigma1, -0.5 * (gs / (fnew - f - gs)));

            // Update the trust region bound according to the ratio of actual to
            // predicted reduction.
            if (actred < eta0 * prered)
                delta = Math.min(alpha * sMnorm, sigma2 * delta);
            else if (actred < eta1 * prered)
                delta = Math.max(sigma1 * delta, Math.min(alpha * sMnorm, sigma2 * delta));
            else if (actred < eta2 * prered)
                delta = Math.max(sigma1 * delta, Math.min(alpha * sMnorm, sigma3 * delta));
            else {
                if (reach_boundary.get()) {
                    delta = sigma3 * delta;
                } else {
                    delta = Math.max(delta, Math.min(alpha * sMnorm, sigma3 * delta));
                }
            }

            info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e |g| %5.3e CG %3d%n", iter, actred, prered, delta, f, gnorm, cg_iter);

            if (actred > eta0 * prered) {
                iter++;
                System.arraycopy(w_new, 0, w, 0, n);
                f = fnew;
                fun_obj.grad(w, g);
                fun_obj.get_diag_preconditioner(M);
                for (i = 0; i < n; i++)
                    M[i] = (1 - alpha_pcg) + alpha_pcg * M[i];

                gnorm = euclideanNorm(g);
                if (gnorm <= eps * gnorm0)
                    break;
            }
            if (f < -1.0e+32) {
                info("WARNING: f < -1.0e+32%n");
                break;
            }
            if (prered <= 0) {
                info("WARNING: prered <= 0%n");
                break;
            }
            if (Math.abs(actred) <= 1.0e-12 * Math.abs(f) && Math.abs(prered) <= 1.0e-12 * Math.abs(f)) {
                info("WARNING: actred and prered too small%n");
                break;
            }
        }
    }

    private int trpcg(double delta, double[] g, double[] M, double[] s, double[] r, AtomicBoolean reach_boundary) {
        int n = fun_obj.get_nr_variable();
        double one = 1;
        double[] d = new double[n];
        double[] Hd = new double[n];
        double zTr, znewTrnew, alpha, beta, cgtol;
        double[] z = new double[n];

        reach_boundary.set(false);
        for (int i = 0; i < n; i++) {
            s[i] = 0;
            r[i] = -g[i];
            z[i] = r[i] / M[i];
            d[i] = z[i];
        }

        zTr = dot(z, r);
        cgtol = eps_cg * Math.sqrt(zTr);
        int cg_iter = 0;

        while (true) {
            if (Math.sqrt(zTr) <= cgtol)
                break;
            cg_iter++;
            fun_obj.Hv(d, Hd);

            alpha = zTr / dot(d, Hd);
            daxpy(alpha, d, s);

            double sMnorm = Math.sqrt(uTMv(n, s, M, s));
            if (sMnorm > delta) {
                info("cg reaches trust region boundary%n");
                reach_boundary.set(true);
                alpha = -alpha;
                daxpy(alpha, d, s);

                double sTMd = uTMv(n, s, M, d);
                double sTMs = uTMv(n, s, M, s);
                double dTMd = uTMv(n, d, M, d);
                double dsq = delta * delta;
                double rad = Math.sqrt(sTMd * sTMd + dTMd * (dsq - sTMs));
                if (sTMd >= 0)
                    alpha = (dsq - sTMs) / (sTMd + rad);
                else
                    alpha = (rad - sTMd) / dTMd;
                daxpy(alpha, d, s);
                alpha = -alpha;
                daxpy(alpha, Hd, r);
                break;
            }
            alpha = -alpha;
            daxpy(alpha, Hd, r);

            for (int i = 0; i < n; i++)
                z[i] = r[i] / M[i];
            znewTrnew = dot(z, r);
            beta = znewTrnew / zTr;
            scale(beta, d);
            daxpy(one, z, d);
            zTr = znewTrnew;
        }

        return (cg_iter);
    }

    /**
     * constant times a vector plus a vector
     *
     * 
     * vector2 += constant * vector1
     * 
* * @since 1.8 */ private static void daxpy(double constant, double[] vector1, double[] vector2) { if (constant == 0) return; assert vector1.length == vector2.length; for (int i = 0; i < vector1.length; i++) { vector2[i] += constant * vector1[i]; } } /** * returns the dot product of two vectors * * @since 1.8 */ private static double dot(double[] vector1, double[] vector2) { double product = 0; assert vector1.length == vector2.length; for (int i = 0; i < vector1.length; i++) { product += vector1[i] * vector2[i]; } return product; } /** * returns the euclidean norm of a vector * * @since 1.8 */ private static double euclideanNorm(double vector[]) { int n = vector.length; if (n < 1) { return 0; } if (n == 1) { return Math.abs(vector[0]); } // this algorithm is (often) more accurate than just summing up the squares and taking the square-root afterwards double scale = 0; // scaling factor that is factored out double sum = 1; // basic sum of squares from which scale has been factored out for (int i = 0; i < n; i++) { if (vector[i] != 0) { double abs = Math.abs(vector[i]); // try to get the best scaling factor if (scale < abs) { double t = scale / abs; sum = 1 + sum * (t * t); scale = abs; } else { double t = abs / scale; sum += t * t; } } } return scale * Math.sqrt(sum); } /** * scales a vector by a constant * * @since 1.8 */ private static void scale(double constant, double vector[]) { if (constant == 1.0) return; for (int i = 0; i < vector.length; i++) { vector[i] *= constant; } } private static double uTMv(int n, double[] u, double[] M, double[] v) { int m = n - 4; double res = 0; int i; for (i = 0; i < m; i += 5) res += u[i] * M[i] * v[i] + u[i + 1] * M[i + 1] * v[i + 1] + u[i + 2] * M[i + 2] * v[i + 2] + u[i + 3] * M[i + 3] * v[i + 3] + u[i + 4] * M[i + 4] * v[i + 4]; for (; i < n; i++) res += u[i] * M[i] * v[i]; return res; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy