All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.utils.read.FlowBasedKeyCodec Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.utils.read;

import com.google.common.annotations.VisibleForTesting;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.utils.BaseUtils;

import java.util.ArrayList;

public class FlowBasedKeyCodec {

    /**
     * Converts base space sequence to flow space
     * @param bases base space sequence
     * @param flowOrder flow order
     * @return Array of flow values
     */

    static public int[] baseArrayToKey(final byte[] bases, final String flowOrder){

        final ArrayList result = new ArrayList<>();
        final byte[] flowOrderBytes = flowOrder.getBytes();
        int loc = 0;
        int flowNumber = 0 ;
        final int period = flowOrderBytes.length;
        int periodGuard = 0;
        while ( loc < bases.length ) {
            final byte flowBase = flowOrderBytes[flowNumber%period];
            if ((bases[loc]!=flowBase) && ( bases[loc]!= BaseUtils.Base.N.base)) {
                result.add(0);
                if ( ++periodGuard > period )
                    throw new GATKException("baseArrayToKey periodGuard tripped, on " + new String(bases) + ", flowOrder: " + flowOrder
                    + " This probably indicates the presence of a base (value) in the sequence that is not included in the provided flow order");
            } else {
                int count = 0;
                while ( ( loc < bases.length) && ((bases[loc]==flowBase) || (bases[loc]== BaseUtils.Base.N.base)) ){
                    loc++;
                    count ++;
                }
                result.add(count);
                periodGuard = 0;
            }
            flowNumber++;
        }
        final int[] ret = new int[result.size()];
        for (int i = 0; i < result.size(); i++) {
            ret[i] = result.get(i);
        }
        return ret;
    }

    /**
     * For every flow of the key output the index of the last base that was output prior to this flow
     * @param key given key
     * @return array
     */
    static public int[] getKeyToBase(final int[] key) {
        final int[] result = new int[key.length];
        result[0] = -1;
        for (int i = 1; i < result.length; i++) {
            result[i] = result[i - 1] + key[i - 1];
        }
        return result;

    }

    /**
     * For every flow of the key output the nucleotide that is being read for this flow
     * @param flowOrder given flow order
     * @param expectedLength the length of the key (key is not provided)
     * @return array of bases
     */

    static public byte[] getFlowToBase(final String flowOrder, final int expectedLength) {
        final byte[] result = new byte[expectedLength] ;
        for ( int i = 0; i < result.length; i++ ) {
            result[i] = (byte)flowOrder.charAt(i%flowOrder.length());
        }
        return result;
    }

    /**
     * Prints the key as character-encoded string
     * @param ints (key array)
     * @return encoded string
     */
    static public String keyAsString(final int[] ints)
    {
        final StringBuilder   sb = new StringBuilder();

        for ( final int i : ints )
            sb.append((char)((i < 10) ? ('0' + i) : ('A' + i - 10)));

        return sb.toString();
    }

    /**
     * Converts a numerical array into flow space by flattening per-base elements to fill empty flows based on minimum scores.
     * This is intended to make it easier to transform per-base read statistics that are computed on the read in base-space
     * sanely into flow-space reads in a reasonable fashion that makes sense for parameters like indel-likelihoods.
     *
     * The rules are as follows:
     *  - For every run of homopolymers, the minimum score for any given base is translated to cover the whole run
     *  - For every 0 base flow, the score from the previous filled flow is copied into place.
     *  - For the beginning of the array (in the case of preceding 0-flows) the given default value is used.
     *
     * Example:
     *  A read with the following bases with a base-space calculated into flow space (ACTG) as:
     *     TTTATGC -> 0030101101
     *  With an input array like this:
     *     byte[]{1,2,3,4,5,6,7}
     *  We should expect the following result array (Which matches the key array in length)
     *     byte[]{defaultQual,defaultQual,1,1,4,4,5,6,6,7}
     *
     * @param bases base space sequence
     * @param keyLength size of the converted bases array in flow-space
     * @param baseSpacedArrayToConvert Array of base-space scores to be conformed to flow space
     * @param defaultQual default quality to use at the head of the array for non-covered flows
     * @param flowOrder flow order
     * @return Array of translated bases comparable to the keyLength
     */
    @VisibleForTesting
    static public byte[] baseArrayToKeySpace(final byte[] bases, final int keyLength, final byte[] baseSpacedArrayToConvert, final byte defaultQual, final String flowOrder){

        if (bases.length != baseSpacedArrayToConvert.length) {
            throw new IllegalArgumentException("Read and qual arrays do not match");
        }

        final byte[] result = new byte[keyLength];
        final byte[] flowOrderBytes = flowOrder.getBytes();
        int loc = 0;
        int flowNumber = 0 ;
        byte lastQual = defaultQual;
        final int period = flowOrderBytes.length;
        while ( loc < bases.length ) {
            final byte flowBase = flowOrderBytes[flowNumber%period];
            if ((bases[loc]!=flowBase) && ( bases[loc]!= BaseUtils.Base.N.base)) {
                result[flowNumber] = lastQual;
            } else {
                byte qual = Byte.MAX_VALUE;
                while ( ( loc < bases.length) && ((bases[loc]==flowBase) || (bases[loc]== BaseUtils.Base.N.base)) ){
                    qual = (byte)Math.min(baseSpacedArrayToConvert[loc], qual);
                    loc++;
                }
                result[flowNumber] = qual;
                lastQual = qual;

            }
            flowNumber++;
        }
        return result;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy