All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.utils.dragstr.DragstrParams Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.utils.dragstr;

import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.dragstr.DragstrHyperParameters;
import org.broadinstitute.hellbender.utils.MathUtils;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.param.ParamUtils;

import java.util.Arrays;
import java.util.stream.IntStream;

/**
 * Holds the values of the DRAGstr model parameters for different combinations of repeat unit length (period) and
 * number of repeat units.
 */
public final class DragstrParams {

    /**
     * String returned by {@link #toString()} when this object has not been retrieved from nor persisted into a file.
     */
    private static final String NON_PERSISTENT_NAME = "";

    /**
     * String returned by {@link #toString()} for the default model parameters
     */
    private static final String DEFAULT_NAME = "";

     /**
     * Holds the path of the file this param where loaded from or persistent into last.
     */
    private String name;

    /**
     * Gap-Open-Penalty default Phred scores.
     * 

* Each row represent a different period (index 0 is period length == 1, index 1 is period length == 2) to the default maximum of {@value DragstrHyperParameters#DEFAULT_MAX_PERIOD}. *

*

* Then each element (column) in that row is the value for the ith repeat length in units (0-based) up to the default maximum of {@value DragstrHyperParameters#DEFAULT_MAX_REPEAT_LENGTH}. *

* *

* This values were copied from DRAGstr documentation provided by Illumina. * It is unknown to us how they came out with this values nor we know of any way to generate them * on the fly. *

*/ //@formatter:off -- this disables IntelliJ code reformatting if preferences are set; Preferences > Editor > Code Style > Formatter Control private static final double[][] DEFAULT_GOP = { /* GOP */ // Repeat Length // 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 // Period /* 1 */ {45.00, 45.00, 45.00, 45.00, 45.00, 45.00, 40.50, 33.50, 28.00, 24.00, 21.75, 21.75, 21.75, 21.75, 21.75, 21.75, 21.75, 21.75, 21.75, 21.75}, /* 2 */ {39.50, 39.50, 39.50, 39.50, 36.00, 30.00, 27.25, 25.00, 24.25, 24.75, 26.25, 26.25, 26.25, 26.25, 26.25, 26.25, 26.25, 26.25, 26.25, 26.75}, /* 3 */ {38.50, 41.00, 41.00, 41.00, 41.00, 37.50, 35.25, 34.75, 34.75, 33.25, 33.25, 33.25, 32.50, 30.75, 28.50, 29.00, 29.00, 29.00, 29.00, 29.00}, /* 4 */ {37.50, 39.00, 39.00, 37.75, 34.00, 34.00, 30.25, 30.25, 30.25, 30.25, 30.25, 30.25, 30.25, 30.25, 30.25, 31.75, 31.75, 31.75, 31.75, 31.75}, /* 5 */ {37.00, 40.00, 40.00, 40.00, 36.00, 35.00, 24.50, 24.50, 24.50, 24.50, 22.50, 22.50, 22.50, 23.50, 23.50, 23.50, 23.50, 23.50, 23.50, 23.50}, /* 6 */ {36.25, 40.00, 40.00, 40.00, 40.00, 40.00, 40.00, 40.00, 40.00, 40.00, 40.00, 40.00, 40.00, 40.00, 40.00, 40.00, 40.00, 40.00, 40.00, 40.00}, /* 7 */ {36.00, 40.50, 40.50, 40.50, 20.75, 20.75, 20.75, 20.75, 20.75, 20.75, 20.75, 20.75, 20.75, 20.75, 20.75, 20.75, 20.75, 20.75, 20.75, 20.75}, /* 8 */ {36.25, 39.75, 32.75, 32.75, 32.75, 32.75, 32.75, 32.75, 32.75, 32.75, 32.75, 32.75, 32.75, 32.75, 32.75, 32.75, 32.75, 32.75, 32.75, 32.75}}; //@formatter:on /** * API default Phred scores. *

* Each row represent a different period (index 0 is period length == 1, index 1 is period length == 2) to the default maximum of {@value DragstrHyperParameters#DEFAULT_MAX_PERIOD}. *

*

* Then each element (column) in that row is the value for the ith repeat length in units (0-based) up to the default maximum of {@value DragstrHyperParameters#DEFAULT_MAX_REPEAT_LENGTH}. *

* *

* This values were copied from DRAGstr documentation provided by Illumina. * It is unknown to us how they came out with this values nor we know of any way to generate them * on the fly. *

*/ //@formatter:off -- this disables IntelliJ code reformatting if preferences are set; Preferences > Editor > Code Style > Formatter Control private static final double[][] DEFAULT_API = { /* API */ // Repeat Length // 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 // Period /* 1 */ {39.00, 39.00, 37.00, 35.00, 32.00, 26.00, 20.00, 16.00, 12.00, 10.00, 8.00, 7.00, 7.00, 6.00, 6.00, 5.00, 5.00, 4.00, 4.00, 4.00}, /* 2 */ {30.00, 30.00, 29.00, 22.00, 17.00, 14.00, 11.00, 8.00, 6.00, 5.00, 4.00, 4.00, 3.00, 3.00, 3.00, 3.00, 3.00, 3.00, 2.00, 2.00}, /* 3 */ {27.00, 27.00, 25.00, 18.00, 14.00, 12.00, 9.00, 7.00, 5.00, 4.00, 3.00, 3.00, 3.00, 3.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00}, /* 4 */ {27.00, 27.00, 18.00, 9.00, 9.00, 9.00, 9.00, 3.00, 3.00, 3.00, 3.00, 3.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00}, /* 5 */ {29.00, 29.00, 18.00, 8.00, 8.00, 8.00, 4.00, 3.00, 3.00, 3.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00}, /* 6 */ {25.00, 25.00, 10.00, 10.00, 10.00, 4.00, 3.00, 3.00, 3.00, 3.00, 3.00, 3.00, 3.00, 3.00, 3.00, 3.00, 3.00, 3.00, 3.00, 3.00}, /* 7 */ {21.00, 21.00, 11.00, 11.00, 5.00, 5.00, 5.00, 5.00, 5.00, 5.00, 5.00, 5.00, 5.00, 5.00, 5.00, 5.00, 5.00, 5.00, 5.00, 5.00}, /* 8 */ {18.00, 18.00, 10.00, 6.00, 4.00, 4.00, 4.00, 4.00, 4.00, 4.00, 4.00, 4.00, 4.00, 4.00, 4.00, 4.00, 4.00, 4.00, 4.00, 4.00}}; //@formatter:on /** * Gap-Continuation-Penalty (GCP) *

* Each row represent a different period (index 0 is period length == 1, index 1 is period length == 2) to the default maximum of {@value DragstrHyperParameters#DEFAULT_MAX_PERIOD}. *

*

* Then each element (column) in that row is the value for the ith repeat length in units (0-based) up to the default maximum of {@value DragstrHyperParameters#DEFAULT_MAX_REPEAT_LENGTH}. *

* *

* This value is not estimated and it rather has fixed values. They only depend on the period length. * With a very simple formula of 10.0 / period. Since DRAGEN tables are approximated just the second * decimal. We do that below using: {@code round(10 * 100 / period) / 100} * It is unknown to us how they came out with this values nor we know of any way to generate them * on the fly. *

*

* As a result the first row (period 1, index 0) is just an array of 10.0, the second row (period 2, index 1) is filled with 5.0, * the third 3.33, the fourth 2.25 and so forth. *

*/ private static final double[][] DEFAULT_GCP = IntStream.rangeClosed(1, DragstrHyperParameters.DEFAULT_MAX_PERIOD) .mapToDouble(period -> Math.round(1000.0 / period) / 100.0) // This way we end-up with two decimal precision. .mapToObj(value -> MathUtils.doubles(DragstrHyperParameters.DEFAULT_MAX_REPEAT_LENGTH, value)) .toArray(double[][]::new); /** * Default parameters when there is not enough data for estimation. */ public static final DragstrParams DEFAULT = new DragstrParams( DragstrHyperParameters.DEFAULT_MAX_PERIOD, DragstrHyperParameters.DEFAULT_MAX_REPEAT_LENGTH, DEFAULT_GOP, DEFAULT_GCP, DEFAULT_API, DEFAULT_NAME); private final int maxPeriod; private final int maxRepeats; private final double[][] gop; private final double[][] gcp; private final double[][] api; public static DragstrParams of(final int maxPeriod, final int maxRepeats, final double[][] gop, final double[][] gcp, final double[][] api) { return of(maxPeriod, maxRepeats, gop, gcp, api, NON_PERSISTENT_NAME); } public static DragstrParams of(final int maxPeriod, final int maxRepeats, final double[][] gop, final double[][] gcp, final double[][] api, final String name) { ParamUtils.isPositive(maxPeriod, "max period must be a positive"); ParamUtils.isPositive(maxRepeats, "max repeats must be a positive"); Utils.nonNull(gop, "gop cannot be null"); Utils.nonNull(gcp, "gcp cannot be null"); Utils.nonNull(api, "api cannot be null"); Utils.validate(gop.length == maxPeriod, "input gop length must match maxPeriod"); Utils.validate(gcp.length == maxPeriod, "input gcp length must match maxPeriod"); Utils.validate(api.length == maxPeriod, "input api length must match maxPeriod"); for (int i = 0; i < maxPeriod; i++) { final double[] gopRow = gop[i]; final double[] gcpRow = gcp[i]; final double[] apiRow = api[i]; for (int j = 0; j < maxRepeats; j++) { Utils.validate(gopRow[j] >= 0 && Double.isFinite(gopRow[j]), "bad gop value: " + gopRow[j]); Utils.validate(gcpRow[j] >= 0 && Double.isFinite(gcpRow[j]), "bad gcp value: " + gcpRow[j]); Utils.validate(apiRow[j] >= 0 && Double.isFinite(apiRow[j]), "bad api value: " + apiRow[j]); } } checkMatricesAreValid(maxPeriod, maxRepeats, gop, gcp, api); return new DragstrParams(maxPeriod, maxRepeats, gop.clone(), gcp.clone(), api.clone(), name); } private DragstrParams(final int maxPeriod, final int maxRepeats, final double[][] gop, final double[][] gcp, final double[][] api, final String name) { this.maxPeriod = maxPeriod; this.maxRepeats = maxRepeats; this.gop = gop; this.gcp = gcp; this.api = api; this.name = name; } private static void checkMatricesAreValid(final int maxPeriod, final int maxRepeats, final double[][] gopMatrix, final double[][] gcpMatrix, final double[][] apiMatrix) { checkMatrixIsValid(maxPeriod, maxRepeats, gopMatrix, DragstrParamUtils.GOP_TABLE_NAME); checkMatrixIsValid(maxPeriod, maxRepeats, gcpMatrix, DragstrParamUtils.GCP_TABLE_NAME); checkMatrixIsValid(maxPeriod, maxRepeats, apiMatrix, DragstrParamUtils.API_TABLE_NAME); } private static void checkMatrixIsValid(final int maxPeriod, final int maxRepeatLength, final double[][] matrix, final String name) { Utils.nonNull(matrix, "the " + name + " matrix provided cannot be null"); if (matrix.length != maxPeriod) { throw new UserException.BadInput("the " + name + " matrix provided has the wrong number of rows"); } else { for (final double[] row : matrix) { Utils.nonNull(row, "the " + name + " matrix contains null rows"); if (row.length != maxRepeatLength) { throw new UserException.BadInput("the " + name + " matrix contains rows with length that does not match the max repeat length"); } } } } private double lookup(final double[][] matrix, final int period, final int repeats) { ParamUtils.isPositive(period, "period"); ParamUtils.isPositive(repeats, "repeat length in units"); final int periodIndex = period < maxPeriod ? period - 1 : maxPeriod - 1; final int repeatIndex = repeats < maxRepeats ? repeats - 1 : maxRepeats - 1; return matrix[periodIndex][repeatIndex]; } /** * Returns the GOP for a specific period and repeat length in Phred scale. * @param period the query period/unit length. * @param repeats the query repeat length * @return 0 or greater. */ public double gop(final int period, final int repeats) { return lookup(gop, period, repeats); } /** * Returns the GCP for a specific period and repeat length in Phred scale. * @param period the query period/unit length. * @param repeats the query repeat length * @return 0 or greater. */ public double gcp(final int period, final int repeats) { return lookup(gcp, period, repeats); } /** * Returns the API for a specific period and repeat length in Phred scale. * @param period the query period/unit length. * @param repeats the query repeat length * @return 0 or greater. */ public double api(final int period, final int repeats) { return lookup(api, period, repeats); } /** * Return the maximum period this parameter collection has specified values for. * @return 1 or greater. */ public int maximumPeriod() { return maxPeriod; } /** * Return the maximum repeat length this parameter collection has specified values for. * @return 1 or greater. */ public int maximumRepeats() { return maxRepeats; } public int maximumLengthInBasePairs() { return maxPeriod * maxRepeats; } @Override public String toString() { return name; } void setName(final String name) { this.name = Utils.nonNull(name); } @Override public boolean equals(final Object other) { return other instanceof DragstrParams && equals((DragstrParams) other); } @Override public int hashCode() { int result = 1; for (double[] gg : gop) { for (double g : gg) { long bits = Double.doubleToLongBits(g); result = 31 * result + (int) (bits ^ (bits >>> 32)); } } for (double[] aa : api) { for (double a : aa) { long bits = Double.doubleToLongBits(a); result = 31 * result + (int) (bits ^ (bits >>> 32)); } } // gcp is usualy fixed so we should not bother. return result; } public boolean equals(final DragstrParams other) { if (other == null) { return false; } else if (other == this) { return false; } else { if (other.maximumRepeats() != maximumRepeats()) { return false; } else if (other.maximumPeriod() != maximumPeriod()) { return false; } else { for (int i = 1; i <= maximumPeriod(); i++) { for (int j = 1; j <= maximumRepeats(); j++) { if (Math.abs(gop(i, j) - other.gop(i, j)) > 0.001 || Math.abs(api(i, j) - other.api(i, j)) > 0.001 || Math.abs(gcp(i, j) - other.gcp(i, j)) > 0.001) { return false; } } } return true; } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy