All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.stats.linearmodels.SolveByOrthogonalizing Maven / Gradle / Ivy

Go to download

TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage disequilibrium.

The newest version!
package net.maizegenetics.stats.linearmodels;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import org.apache.commons.math3.distribution.FDistribution;

import net.maizegenetics.dna.map.Position;
import net.maizegenetics.matrixalgebra.Matrix.DoubleMatrix;
import net.maizegenetics.matrixalgebra.Matrix.DoubleMatrixFactory;
import net.maizegenetics.matrixalgebra.decomposition.SingularValueDecomposition;

import org.apache.commons.math3.exception.OutOfRangeException;

public class SolveByOrthogonalizing {
    private List myBaseModel;
    private List myBasisVectors;
    private List myData;
    private List myOrthogonalizedData;
    private List UColumns = null;
    private SingularValueDecomposition baseSvd = null;
    private final static double tol = 1e-10;

    private SolveByOrthogonalizing() {

    }

    public static SolveByOrthogonalizing getInstanceFromModel(List baseModel, List dataList) {
        SolveByOrthogonalizing sbo = new SolveByOrthogonalizing();
        sbo.myBaseModel = baseModel;
        sbo.myData = dataList;
        DoubleMatrix[][] design = sbo.createDesignMatricesFromModel();
        sbo.computeBaseSvd(design);
        sbo.OrthogonalizeData();
        return sbo;
    }

    public static SolveByOrthogonalizing getInstanceFromVectors(List basisVectors, List dataList) {
        SolveByOrthogonalizing sbo = new SolveByOrthogonalizing();
        sbo.myBasisVectors = basisVectors;
        sbo.myData = dataList;
        sbo.computeBaseSvd(sbo.createDesignMatricesFromVectors());
        sbo.OrthogonalizeData();
        return sbo;
    }

    public SolveByOrthogonalizing.Marker solveForR(SolveByOrthogonalizing.Marker marker) {
        if (marker.vector2 == null)
            return solveForR(marker.position(), marker.vector1());
        return solveForR(marker.position(), marker.vector1(), marker.vector2);
    }

    public SolveByOrthogonalizing.Marker solveForR(Position pos, double[] values) {

        double[] val1 = center(values);
        double[] val2 = orthogonalizeByBase(val1);
        double[] val3 = centerAndScale(val2);
        double[] orthogonalValues = centerAndScale(orthogonalizeByBase(center(values)));

        
        if (orthogonalValues == null) {
            double[] rValues =
                    IntStream.range(0, myOrthogonalizedData.size()).mapToDouble(i -> Double.NaN).toArray();
            return new SolveByOrthogonalizing.Marker(pos, rValues, 0);
        }

        double[] rValues = new double[myOrthogonalizedData.size()];
        int count = 0;
        for (double[] data : myOrthogonalizedData) {
            double ip = innerProduct(data, orthogonalValues);
            rValues[count++] = ip * ip;
        }
        return new SolveByOrthogonalizing.Marker(pos, rValues, 1);
    }

    public SolveByOrthogonalizing.Marker solveForR(Position pos, double[] add, double[] dom) {
        if (dom == null)
            return solveForR(pos, add);
        double[] orthogonalAdd = orthogonalizeByBase(center(add));
        double[] orthogonalDom = orthogonalizeByBase(center(dom));

        //orthogonalize dom with respect to add
        double mult =
                innerProduct(orthogonalAdd, orthogonalDom) / innerProduct(orthogonalAdd, orthogonalAdd);
        int n = orthogonalDom.length;
        for (int i = 0; i < n; i++) {
            orthogonalDom[i] -= mult * orthogonalAdd[i];
        }

        //center and scale
        double[] v1 = centerAndScale(orthogonalAdd);
        double[] v2 = centerAndScale(orthogonalDom);

        if (v1 == null) {
            if (v2 == null) {
                double[] rValues =
                        IntStream.range(0, myOrthogonalizedData.size()).mapToDouble(i -> Double.NaN).toArray();
                return new SolveByOrthogonalizing.Marker(pos, rValues, 0);
            }
            double[] rValues =
                    myOrthogonalizedData.stream().mapToDouble(d -> innerProduct(d, v2)).map(d -> d
                            * d).toArray();
            return new SolveByOrthogonalizing.Marker(pos, rValues, 1);
        }
        if (v2 == null) {
            double[] rValues =
                    myOrthogonalizedData.stream().mapToDouble(d -> innerProduct(d, v1)).map(d -> d
                            * d).toArray();
            return new SolveByOrthogonalizing.Marker(pos, rValues, 1);
        }

        double[] rValues = myOrthogonalizedData.stream()
                .mapToDouble(d -> {
                    double r1 = innerProduct(v1, d);
                    double r2 = innerProduct(v2, d);
                    return r1 * r1 + r2 * r2;
                }).toArray();

        return new SolveByOrthogonalizing.Marker(pos, rValues, 2);
    }

    private DoubleMatrix[][] createDesignMatricesFromModel() {
        DoubleMatrix[][] designMatrices = new DoubleMatrix[1][];
        designMatrices[0] = myBaseModel.stream()
                .filter(a -> !a.getID().toString().toLowerCase().equals("mean"))
                .map(me -> me.getX())
                .toArray(DoubleMatrix[]::new);
        return designMatrices;
    }

    private DoubleMatrix[][] createDesignMatricesFromVectors() {
        DoubleMatrix[][] designMatrices = new DoubleMatrix[1][];
        designMatrices[0] = myBasisVectors.stream()
                .map(d -> DoubleMatrixFactory.DEFAULT.make(d.length, 1, d))
                .toArray(DoubleMatrix[]::new);
        return designMatrices;
    }

    private void computeBaseSvd(DoubleMatrix[][] designMatrices) {
        if (designMatrices[0].length == 0 || designMatrices[0][0] == null)
            return;

        DoubleMatrix X = DoubleMatrixFactory.DEFAULT.compose(designMatrices);

        //center the columns of X
        int nrows = X.numberOfRows();
        double dblnrows = nrows;
        int ncols = X.numberOfColumns();
        for (int c = 0; c < ncols; c++) {
            double mean = X.columnSum(c) / dblnrows;
            for (int r = 0; r < nrows; r++) {
                X.set(r, c, X.get(r, c) - mean);
            }
        }

        baseSvd = X.getSingularValueDecomposition();
        DoubleMatrix U = baseSvd.getU(false);
        UColumns = new ArrayList<>();
        int ncol = U.numberOfColumns();
        for (int i = 0; i < ncol; i++) {
            UColumns.add(U.column(i).to1DArray());
        }

    }

    private void OrthogonalizeData() {
        if (baseSvd == null) {
            myOrthogonalizedData = myData.stream()
                    .map(d -> centerAndScale(Arrays.copyOf(d, d.length)))
                    .collect(Collectors.toList());

        } else {
            myOrthogonalizedData = myData.stream()
                    .map(d -> center(Arrays.copyOf(d, d.length)))
                    .map(d -> orthogonalizeByBase(d))
                    .map(d -> centerAndScale(d))
                    .collect(Collectors.toList());
        }

    }

    private double[] orthogonalizeByBase(double[] vector) {
        if (baseSvd == null) {
            return Arrays.copyOf(vector, vector.length);
        }

        int nrows = vector.length;
        double[] result = Arrays.copyOf(vector, nrows);

        DoubleMatrix U = baseSvd.getU(false);
        int ncol = U.numberOfColumns();
        for (int i = 0; i < ncol; i++) {
            double[] u = U.column(i).to1DArray();
            double ip = innerProduct(vector, u);
            for (int j = 0; j < nrows; j++)
                result[j] -= ip * u[j];
        }

        return result;
    }

    /**
     * @return the df in the base model, including 1 df for the mean
     */
    public int baseDf() {
        int df = 1;  //the mean
        if (baseSvd != null) {
            double[] singularValues = baseSvd.getSingularValues();

            int n = singularValues.length;
            for (int i = 0; i < n; i++) {
                if (singularValues[i] > tol)
                    df++;
            }
        }
        return df;
    }

    public static double innerProduct(double[] x, double[] y) {
        int n = x.length;
        double sumprod = 0;
        for (int i = 0; i < n; i++) {
            sumprod += x[i] * y[i];
        }
        return sumprod;
    }

    public static double[] center(double[] values) {
        int n = values.length;
        double mean = Arrays.stream(values).sum() / n;
        for (int i = 0; i < n; i++)
            values[i] -= mean;
        return values;
    }

    public static double[] scale(double[] values) {
        int n = values.length;
        double divisor = Math.sqrt(innerProduct(values, values));
        for (int i = 0; i < n; i++)
            values[i] /= divisor;
        return values;
    }

    public static double[] centerAndScale(double[] values) {
        int n = values.length;
        double sum = 0;
        double sumsq = 0;
        for (int i = 0; i < n; i++) {
            double val = values[i];
            sum += val;
        }
        double mean = sum / n;

        for (int i = 0; i < n; i++) {
            values[i] = values[i] - mean;
            sumsq += values[i] * values[i];
        }

        double divisor = Math.sqrt(sumsq);
        if (divisor < tol)
            return null;
        for (int i = 0; i < n; i++)
            values[i] /= divisor;
        return values;
    }

    public static double calculateFfromR2(double r2, double markerDf, double errorDf) {
        return r2 / (1 - r2) * errorDf / markerDf;
    }

    public static double calculateP(double F, double markerDf, double errorDf) {
        if (!Double.isFinite(F))
            return Double.NaN;
        double p;
        try {
            p = LinearModelUtils.Ftest(F, markerDf, errorDf);
        } catch (Exception e) {
            p = Double.NaN;
        }
        return p;
    }

    public static double calculateR2Fromp(double alpha, double modelDf, double errorDf) {
        //returns the value of R^2 corresponding to the value of F, f for which P(F>f) = alpha

        FDistribution fdist = new FDistribution(modelDf, errorDf);
        try {
            double p = 1 - alpha;
            double F = fdist.inverseCumulativeProbability(p);
            double Fme = F * modelDf / errorDf;
            return Fme / (1 + Fme);
        } catch (OutOfRangeException e) {
            e.printStackTrace();
            return Double.NaN;
        }

    }

    public List getOrthogonalizedData() {
        return myOrthogonalizedData;
    }
    
    public List copyOrthogonalizedData() {
        return myOrthogonalizedData.stream().map(darray -> Arrays.copyOf(darray, darray.length)).collect(Collectors.toList());
    }
    
    public List getUColumns() {
        return UColumns;
    }
    
    public List copyUColumns() {
        return UColumns.stream().map(darray -> Arrays.copyOf(darray, darray.length)).collect(Collectors.toList());
    }
    
    public static class Marker {
        public final Position myPosition;
        public final double[] vector1;
        public final double[] vector2;
        public int df;

        public Marker(Position pos, double[] values, int df) {
            myPosition = pos;
            vector1 = values;
            vector2 = null;
            this.df = df;
        }

        public Marker(Position pos, double[] additive, double[] dominant, int df) {
            myPosition = pos;
            vector1 = additive;
            vector2 = dominant;
            this.df = df;
        }

        public Position position() {
            return myPosition;
        }

        public double[] vector1() {
            return vector1;
        }

        public double[] vector2() {
            return vector2;
        }

        public boolean hasTwoVectors() {
            return vector2 != null;
        }

        public int degreesOfFreedom() {
            if (vector2 == null)
                return 1;
            return 2;
        }
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy