All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.utils.recalibration.covariates.PerReadCovariateMatrix Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.utils.recalibration.covariates;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.recalibration.EventType;

/**
 * The object that holds BQSR covarites for a read as a matrix (table) of shape ( read length ) x ( num covariates ).
 * The covariates are encoded as their integer keys.
 *
 * Even though it's not a matrix in the linear algebra sense, we call it a matrix rather than table to
 * differentiate it from the recal table in BQSR.
 */
public final class PerReadCovariateMatrix {
    private static final Logger logger = LogManager.getLogger(PerReadCovariateMatrix.class);

    /**
     * This is where we store the pre-read covariates, also indexed by (event type) and (read position).
     * Thus the array has shape { event type } x { read position (aka cycle) } x { covariate }.
     * For instance, { covariate } is by default 4-dimensional (read group, base quality, context, cycle).
     */
    private final int[][][] covariates;

    /**
     * The index of the current covariate, used by addCovariate
     */
    private int currentCovariateIndex = 0;

    /**
     * Use an LRU cache to keep cache of keys (int[][][]) arrays for each read length we've seen.
     * The cache allows us to avoid the expense of recreating these arrays for every read.  The LRU
     * keeps the total number of cached arrays to less than LRU_CACHE_SIZE.
     */
    public PerReadCovariateMatrix(final int readLength, final int numberOfCovariates, final CovariateKeyCache keysCache) {
        Utils.nonNull(keysCache);
        final int[][][] cachedKeys = keysCache.get(readLength);
        if ( cachedKeys == null ) {
            if ( logger.isDebugEnabled() ) logger.debug("Keys cache miss for length " + readLength + " cache size " + keysCache.size());
            covariates = new int[EventType.values().length][readLength][numberOfCovariates];
            keysCache.put(readLength, covariates);
        } else {
            covariates = cachedKeys;
        }
    }

    public void setCovariateIndex(final int index) {
        currentCovariateIndex = index;
    }

    /**
     * Update the keys for mismatch, insertion, and deletion for the current covariate at read offset
     *
     * NOTE: no checks are performed on the number of covariates, for performance reasons.  If the count increases
     * after the keysCache has been accessed, this method will throw an ArrayIndexOutOfBoundsException.  This currently
     * only occurs in the testing harness, and we don't anticipate that it will become a part of normal runs.
     *
     * @param mismatch the mismatch key value
     * @param insertion the insertion key value
     * @param deletion the deletion key value
     * @param readOffset the read offset, must be >= 0 and <= the read length used to create this ReadCovariates
     */
    public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) {
        covariates[EventType.BASE_SUBSTITUTION.ordinal()][readOffset][currentCovariateIndex] = mismatch;
        covariates[EventType.BASE_INSERTION.ordinal()][readOffset][currentCovariateIndex] = insertion;
        covariates[EventType.BASE_DELETION.ordinal()][readOffset][currentCovariateIndex] = deletion;
    }

    /**
     * Get the keys for all covariates at read position for error model
     *
     * @param readPosition
     * @param errorModel
     * @return
     */
    public int[] getCovariatesAtOffset(final int readPosition, final EventType errorModel) {
        return covariates[errorModel.ordinal()][readPosition];
    }

    public int[][] getMatrixForErrorModel(final EventType errorModel) {
        return covariates[errorModel.ordinal()];
    }

    // ----------------------------------------------------------------------
    //
    // routines for testing
    //
    // ----------------------------------------------------------------------

    protected int[][] getMismatchMatrix() { return getMatrixForErrorModel(EventType.BASE_SUBSTITUTION); }
    protected int[][] getInsertionMatrix() { return getMatrixForErrorModel(EventType.BASE_INSERTION); }
    protected int[][] getDeletionMatrix() { return getMatrixForErrorModel(EventType.BASE_DELETION); }

    protected int[] getMismatchCovariatesAtOffset(final int readPosition) {
        return getCovariatesAtOffset(readPosition, EventType.BASE_SUBSTITUTION);
    }

    protected int[] getInsertionCovariatesAtOffset(final int readPosition) {
        return getCovariatesAtOffset(readPosition, EventType.BASE_INSERTION);
    }

    protected int[] getDeletionCovariatesAtOffset(final int readPosition) {
        return getCovariatesAtOffset(readPosition, EventType.BASE_DELETION);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy