All Downloads are FREE. Search and download functionalities are using the official Maven repository.

umcg.genetica.methylation.CheckMatrixForValidBetaValues Maven / Gradle / Ivy

There is a newer version: 1.0.7
Show newest version
/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package umcg.genetica.methylation;

import java.util.ArrayList;
import org.apache.commons.collections.primitives.ArrayDoubleList;
import umcg.genetica.math.matrix.DoubleMatrixDataset;

/**
 *
 * @author MarcJan
 */
public class CheckMatrixForValidBetaValues {
    
    /**
     * Check if all probe values are actually between 0 and 1. Otherwise set to NA
     *
     * If not kick out sample
     *
     * @param dataset
     * @param replaceCheckedValuesOutOfRange
     * @return
     */
    public static void checkBetaValues(umcg.genetica.math.matrix2.DoubleMatrixDataset dataset, boolean replaceCheckedValuesOutOfRange) {

        for (int c = 0; c < dataset.columns(); ++c) {
            for (int r = 0; r < dataset.rows(); ++r) {
                if (!(dataset.getMatrix().get(r, c) >= 0 && dataset.getMatrix().get(r, c) <= 1)) {
                    dataset.getMatrix().set(r, c, Double.NaN);
                }
            }
        }
    }
    
    /**
     * Check if all probe values are actually between 0 and 1. If
     * replaceCheckedValuesOutOfRange is true values that are known to be
     * allowed are replaced. With 0 if between -0.1 and 0 (Due to background
     * correction). With -999 if either 9 or -3.4E38.
     *
     * If not kick out sample
     *
     * @param dataset
     * @param replaceCheckedValuesOutOfRange
     * @return
     */
    public static ArrayList checkMinAndMaxPerSample(DoubleMatrixDataset dataset, boolean replaceCheckedValuesOutOfRange) {

        ArrayList columnsToExclude = new ArrayList();

        for (int c = 0; c < dataset.nrCols; ++c) {
            ArrayDoubleList tmp = new ArrayDoubleList();
            for (int r = 0; r < dataset.nrRows; ++r) {
                if (!(dataset.rawData[r][c] >= 0 && dataset.rawData[r][c] <= 1) && dataset.rawData[r][c] != -999) {
                    if (replaceCheckedValuesOutOfRange) {
                        if (dataset.rawData[r][c] >= -0.01 && dataset.rawData[r][c] <= 1) {
                            dataset.rawData[r][c] = 0;
                        } else if (Math.abs(dataset.rawData[r][c] - (-3.4d * Math.pow(10, 38))) < .0000001) {
                            dataset.rawData[r][c] = -999;
                        } else if ((dataset.rawData[r][c] == 9)) {
                            dataset.rawData[r][c] = -999;
                        } else {
                            System.out.println("This shouldn't be reached");
                            System.out.println("This value reached it though: "+ dataset.rawData[r][c]);
                            System.exit(-1);
                        }
                    } else {
                        tmp.add(dataset.rawData[r][c]);
                    }
                }
            }
            if (tmp.size() != 0) {
                if (tmp.size() > 100) {
                    System.out.println("Excluding due to min and max values probe:\t" + c + "\t" + dataset.colObjects.get(c) + "\t" + tmp.size());
                } else {
                    System.out.println("Excluding due to min and max values probe:\t" + c + "\t" + dataset.colObjects.get(c) + "\t" + tmp.size() + "\t" + tmp.toString());
                }
                columnsToExclude.add(dataset.colObjects.get(c));
            }
        }
        return (columnsToExclude);

    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy