All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.analysis.modelfitter.AdditiveResidualForwardRegression Maven / Gradle / Ivy

package net.maizegenetics.analysis.modelfitter;

import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

import org.apache.commons.math3.distribution.FDistribution;

import net.maizegenetics.phenotype.GenotypePhenotype;
import net.maizegenetics.stats.linearmodels.CovariateModelEffect;
import net.maizegenetics.stats.linearmodels.FactorModelEffect;
import net.maizegenetics.stats.linearmodels.ModelEffect;
import net.maizegenetics.stats.linearmodels.SweepFastLinearModel;

public class AdditiveResidualForwardRegression extends AbstractForwardRegression {
    double[] residuals;
    
    public AdditiveResidualForwardRegression(GenotypePhenotype data) {
        super(data);
    }

    @Override
    public void fitModel() {
        int maxModelSize = myModel.size() + maxVariants;
        int step = 0;
        SweepFastLinearModel sflm = new SweepFastLinearModel(myModel, y);
        residuals = sflm.getResiduals().to1DArray();
        
        while (step < maxModelSize && forwardStepParallel(true, step)) {
           step++;
        }
    }
    
    @Override
    public void fitModelForSubsample(int[] subSample, int iteration) {
        //create myModel from myBaseModel for this subsample
        myModel = myBaseModel.stream().map(me -> me.getSubSample(subSample)).collect(Collectors.toList());
        double[] ySubSample = Arrays.stream(subSample).mapToDouble(i -> y[i]).toArray();
        SweepFastLinearModel sflm = new SweepFastLinearModel(myModel, ySubSample);
        residuals = sflm.getResiduals().to1DArray();
        
        int maxModelSize = myModel.size() + maxVariants;
        int step = 0;
        
        while (step < maxModelSize && forwardStepParallel(subSample, true, iteration, step)) {
            step++;
        }
        
    }

    private boolean forwardStepParallel(boolean doParallel, int step) {
        int nsamples = residuals.length;
        ModelEffect meanMe = new FactorModelEffect(new int[nsamples], false, "mean");
        List residualModel = Arrays.asList(meanMe);

        AdditiveSite bestSite = StreamSupport.stream(new ForwardStepAdditiveSpliterator(siteList, residualModel, residuals), doParallel)
                .max((a,b) -> a.compareTo(b)).get();
        
        ModelEffect siteEffect = new CovariateModelEffect(bestSite.getCovariate());
        residualModel.add(siteEffect);
        SweepFastLinearModel sflm = new SweepFastLinearModel(residualModel, residuals);
        double[] errorSSdf = sflm.getResidualSSdf();
        double[] siteSSdf = sflm.getIncrementalSSdf(myModel.size() - 1);
        double F,p;
        if (siteSSdf[1] < FDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY || errorSSdf[0] < FDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY) {
            F = Double.NaN;
            p = Double.NaN;
        } else {
            F = siteSSdf[0] / siteSSdf[1] / errorSSdf[0] * errorSSdf[1];
            p = 1 - (new FDistribution(siteSSdf[1], errorSSdf[1]).cumulativeProbability(F));
        }
        
        if (!Double.isNaN(p) && p <= enterLimit) {
            addVariant(bestSite, p, 0, step);
            residuals = sflm.getResiduals().to1DArray();
            return true;
        }
        else return false;
    }
    
    private boolean forwardStepParallel(int[] subset, boolean doParallel, int iteration, int step) {
        int nsamples = residuals.length;
        ModelEffect meanMe = new FactorModelEffect(new int[nsamples], false, "mean");
        List residualModel = Arrays.asList(meanMe);

        AdditiveSite bestSite = StreamSupport.stream(new ForwardStepSubsettingAdditiveSpliterator(siteList, residualModel, residuals, subset), doParallel)
                .max((a,b) -> a.compareTo(b)).get();

        ModelEffect siteEffect = new CovariateModelEffect(bestSite.getCovariate(subset));
        residualModel.add(siteEffect);
        SweepFastLinearModel sflm = new SweepFastLinearModel(residualModel, residuals);
        double[] errorSSdf = sflm.getResidualSSdf();
        double[] siteSSdf = sflm.getIncrementalSSdf(myModel.size() - 1);
        double F,p;
        if (siteSSdf[1] < FDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY || errorSSdf[0] < FDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY) {
            F = Double.NaN;
            p = Double.NaN;
        } else {
            F = siteSSdf[0] / siteSSdf[1] / errorSSdf[0] * errorSSdf[1];
            p = 1 - (new FDistribution(siteSSdf[1], errorSSdf[1]).cumulativeProbability(F));
        }
        
        if (!Double.isNaN(p) && p <= enterLimit) {
            //columns in myFittedVariants: "trait","SnpID","Chr","Pos", "p-value", "-log10p"
            addVariant(bestSite, p, iteration, step);
            residuals = sflm.getResiduals().to1DArray();
            return true;
        }
        else return false;
        
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy