All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.analysis.modelfitter.AdditiveModelForwardRegression Maven / Gradle / Ivy

package net.maizegenetics.analysis.modelfitter;

import java.util.Arrays;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ForkJoinPool;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

import org.apache.commons.math3.distribution.FDistribution;
import org.apache.log4j.Logger;

import net.maizegenetics.phenotype.GenotypePhenotype;
import net.maizegenetics.phenotype.Phenotype;
import net.maizegenetics.stats.linearmodels.CovariateModelEffect;
import net.maizegenetics.stats.linearmodels.ModelEffect;
import net.maizegenetics.stats.linearmodels.SweepFastLinearModel;

public class AdditiveModelForwardRegression extends AbstractForwardRegression {
    private static Logger myLogger = Logger.getLogger(AdditiveModelForwardRegression.class);
    double highestSS;
    int bestSite;
    int maxThreads;
    ForkJoinPool myPool = null;
    
    public AdditiveModelForwardRegression(GenotypePhenotype data, int maxThreads) {
        super(data);
        this.maxThreads = maxThreads;
        myPool = new ForkJoinPool(maxThreads);
    }

    public AdditiveModelForwardRegression(String serialFilename, Phenotype pheno, int maxThreads) {
        super(serialFilename, pheno);
        this.maxThreads = maxThreads;
        myPool = new ForkJoinPool(maxThreads);
    }
    
    public AdditiveModelForwardRegression(GenotypePhenotype data) {
        super(data);
        maxThreads = -1;
    }

    public AdditiveModelForwardRegression(String serialFilename, Phenotype pheno) {
        super(serialFilename, pheno);
        maxThreads = -1;
    }


    @Override
    public void fitModel() {
        int maxModelSize = myModel.size() + maxVariants;
        int step = 0;
        if (maxThreads > 0) {
            while (forwardStepParallelUseMaxThreads(true, step++) && myModel.size() < maxModelSize)
                ;
        } else {
            while (forwardStepParallel(true, step++) && myModel.size() < maxModelSize)
                ;
        }
        
    }

    @Override
    public void fitModelForSubsample(int[] subSample, int iteration) {

        //create myModel from myBaseModel for this subsample
        myModel =
                myBaseModel.stream().map(me -> me.getSubSample(subSample)).collect(Collectors.toList());
        double[] original = y;
        y = Arrays.stream(subSample).mapToDouble(i -> original[i]).toArray();

        int maxModelSize = myModel.size() + maxVariants;
        int step = 0;
        if (maxThreads > 0) {
            while (forwardStepParallelUseMaxThreads(subSample, true, iteration, step++)
                    && myModel.size() < maxModelSize)
                ;
        } else {
            while (forwardStepParallel(subSample, true, iteration, step++)
                    && myModel.size() < maxModelSize)
                ;
        }

        y = original;
    }

    private boolean forwardStepParallel(boolean doParallel, int step) {

        AdditiveSite bestSite =
                StreamSupport.stream(new ForwardStepAdditiveSpliterator(siteList, myModel, y), doParallel)
                        .max((a, b) -> a.compareTo(b)).get();

        ModelEffect siteEffect = new CovariateModelEffect(bestSite.getCovariate());
        myModel.add(siteEffect);
        SweepFastLinearModel sflm = new SweepFastLinearModel(myModel, y);
        double[] errorSSdf = sflm.getResidualSSdf();
        double[] siteSSdf = sflm.getIncrementalSSdf(myModel.size() - 1);
        double F, p;
        if (siteSSdf[1] < FDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY
                || errorSSdf[0] < FDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY) {
            F = Double.NaN;
            p = Double.NaN;
        } else {
            F = siteSSdf[0] / siteSSdf[1] / errorSSdf[0] * errorSSdf[1];
            p = 1 - (new FDistribution(siteSSdf[1], errorSSdf[1]).cumulativeProbability(F));
        }

        if (!Double.isNaN(p) && p <= enterLimit) {
            addVariant(bestSite, p, 0, step);
            return true;
        }
        else
            return false;
    }

    private boolean forwardStepParallelUseMaxThreads(boolean doParallel, int step) {
        
        AdditiveSite bestSite;
        try {
            bestSite = myPool.submit(() -> StreamSupport.stream(new ForwardStepAdditiveSpliterator(siteList, myModel, y), doParallel)
                    .max((a, b) -> a.compareTo(b)).get()).get();
            ModelEffect siteEffect = new CovariateModelEffect(bestSite.getCovariate());
            myModel.add(siteEffect);
            SweepFastLinearModel sflm = new SweepFastLinearModel(myModel, y);
            double[] errorSSdf = sflm.getResidualSSdf();
            double[] siteSSdf = sflm.getIncrementalSSdf(myModel.size() - 1);
            double F, p;
            if (siteSSdf[1] < FDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY
                    || errorSSdf[0] < FDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY) {
                F = Double.NaN;
                p = Double.NaN;
            } else {
                F = siteSSdf[0] / siteSSdf[1] / errorSSdf[0] * errorSSdf[1];
                p = 1 - (new FDistribution(siteSSdf[1], errorSSdf[1]).cumulativeProbability(F));
            }

            if (!Double.isNaN(p) && p <= enterLimit) {
                addVariant(bestSite, p, 0, step);
                return true;
            }
            else
                return false;
            
        } catch (InterruptedException | ExecutionException e) {
            myLogger.error("Thread execution failed in forwardStepParallelUseMaxThreads");
            e.printStackTrace();
            return false;
        }
    }

    private boolean forwardStepParallel(int[] subset, boolean doParallel, int iteration, int step) {

        AdditiveSite bestSite =
                StreamSupport.stream(new ForwardStepSubsettingAdditiveSpliterator(siteList, myModel, y, subset), doParallel)
                        .max((a, b) -> a.compareTo(b)).get();

        ModelEffect siteEffect = new CovariateModelEffect(bestSite.getCovariate(subset));
        myModel.add(siteEffect);
        SweepFastLinearModel sflm = new SweepFastLinearModel(myModel, y);
        double[] errorSSdf = sflm.getResidualSSdf();
        double[] siteSSdf = sflm.getIncrementalSSdf(myModel.size() - 1);
        double F, p;
        if (siteSSdf[1] < FDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY
                || errorSSdf[0] < FDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY) {
            F = Double.NaN;
            p = Double.NaN;
        } else {
            F = siteSSdf[0] / siteSSdf[1] / errorSSdf[0] * errorSSdf[1];
            p = 1 - (new FDistribution(siteSSdf[1], errorSSdf[1]).cumulativeProbability(F));
        }

        if (!Double.isNaN(p) && p <= enterLimit) {
            //columns in myFittedVariants: "trait","SnpID","Chr","Pos", "p-value", "-log10p"
            addVariant(bestSite, p, iteration, step);
            return true;
        }
        else
            return false;

    }

    private boolean forwardStepParallelUseMaxThreads(int[] subset, boolean doParallel, int iteration, int step) {
        
        AdditiveSite bestSite;
        try {
            bestSite = myPool.submit(() -> StreamSupport.stream(new ForwardStepSubsettingAdditiveSpliterator(siteList, myModel, y, subset), doParallel)
                    .max((a, b) -> a.compareTo(b)).get()).get();
            ModelEffect siteEffect = new CovariateModelEffect(bestSite.getCovariate(subset));
            myModel.add(siteEffect);
            SweepFastLinearModel sflm = new SweepFastLinearModel(myModel, y);
            double[] errorSSdf = sflm.getResidualSSdf();
            double[] siteSSdf = sflm.getIncrementalSSdf(myModel.size() - 1);
            double F, p;
            if (siteSSdf[1] < FDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY
                    || errorSSdf[0] < FDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY) {
                F = Double.NaN;
                p = Double.NaN;
            } else {
                F = siteSSdf[0] / siteSSdf[1] / errorSSdf[0] * errorSSdf[1];
                p = 1 - (new FDistribution(siteSSdf[1], errorSSdf[1]).cumulativeProbability(F));
            }

            if (!Double.isNaN(p) && p <= enterLimit) {
                //columns in myFittedVariants: "trait","SnpID","Chr","Pos", "p-value", "-log10p"
                addVariant(bestSite, p, iteration, step);
                return true;
            }
            else
                return false;
            
        } catch (InterruptedException | ExecutionException e) {
            myLogger.error("Thread execution failed in forwardStepParallelUseMaxThreads");
            e.printStackTrace();
            return false;
        }

    }

    @Override
    protected void finalize() throws Throwable {
        if (myPool != null) myPool.shutdown();
        super.finalize();
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy