All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.dna.snp.NucleotideAlignmentConstants Maven / Gradle / Ivy

Go to download

TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage disequilibrium.

The newest version!
/*
 * NucleotideAlignmentConstants
 */
package net.maizegenetics.dna.snp;

import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;

/**
 *
 * @author terry
 */
public final class NucleotideAlignmentConstants {

    // Byte Values for Nucleotide Alleles
    public static final byte A_ALLELE = (byte) 0x0;
    public static final byte C_ALLELE = (byte) 0x1;
    public static final byte G_ALLELE = (byte) 0x2;
    public static final byte T_ALLELE = (byte) 0x3;
    public static final byte INSERT_ALLELE = (byte) 0x4;
    public static final byte GAP_ALLELE = (byte) 0x5;
    public static final byte UNDEFINED_ALLELE = (byte) 0x6;

    // Diploid Byte Values for Nucleotide Alleles
    public static final byte A_DIPLOID_ALLELE = (byte) 0x00;
    public static final byte C_DIPLOID_ALLELE = (byte) 0x11;
    public static final byte G_DIPLOID_ALLELE = (byte) 0x22;
    public static final byte T_DIPLOID_ALLELE = (byte) 0x33;
    public static final byte INSERT_DIPLOID_ALLELE = (byte) 0x44;
    public static final byte GAP_DIPLOID_ALLELE = (byte) 0x55;
    public static final byte UNDEFINED_DIPLOID_ALLELE = (byte) 0x66;

    // String Values for Nucleotide Alleles
    public static final String INSERT_ALLELE_STR = "+";
    public static final String GAP_ALLELE_STR = "-";
    public static final char GAP_ALLELE_CHAR = '-';
    public static final String UNDEFINED_ALLELE_STR = "X";

    public static final String[][] NUCLEOTIDE_ALLELES = new String[][]{{"A", "C", "G", "T", "+", "-",
        UNDEFINED_ALLELE_STR, UNDEFINED_ALLELE_STR, UNDEFINED_ALLELE_STR, UNDEFINED_ALLELE_STR, UNDEFINED_ALLELE_STR,
        UNDEFINED_ALLELE_STR, UNDEFINED_ALLELE_STR, UNDEFINED_ALLELE_STR, GenotypeTable.RARE_ALLELE_STR, GenotypeTable.UNKNOWN_ALLELE_STR}};
    /**
     * Number of nucleotide states excluding rare and unknown.
     */
    public static final int NUMBER_NUCLEOTIDE_ALLELES = 6;
    private static final Map NUCLEOTIDE_DIPLOID_HASH = new HashMap();

    static {
        NUCLEOTIDE_DIPLOID_HASH.put("AA", (byte) 0x00);
        NUCLEOTIDE_DIPLOID_HASH.put("AC", (byte) 0x01);
        NUCLEOTIDE_DIPLOID_HASH.put("AG", (byte) 0x02);
        NUCLEOTIDE_DIPLOID_HASH.put("AT", (byte) 0x03);
        NUCLEOTIDE_DIPLOID_HASH.put("A+", (byte) 0x04);
        NUCLEOTIDE_DIPLOID_HASH.put("A-", (byte) 0x05);
        NUCLEOTIDE_DIPLOID_HASH.put("AN", (byte) 0x0F);
        NUCLEOTIDE_DIPLOID_HASH.put("AX", (byte) 0x0F);
        NUCLEOTIDE_DIPLOID_HASH.put("AZ", (byte) 0x0E);

        NUCLEOTIDE_DIPLOID_HASH.put("CA", (byte) 0x10);
        NUCLEOTIDE_DIPLOID_HASH.put("CC", (byte) 0x11);
        NUCLEOTIDE_DIPLOID_HASH.put("CG", (byte) 0x12);
        NUCLEOTIDE_DIPLOID_HASH.put("CT", (byte) 0x13);
        NUCLEOTIDE_DIPLOID_HASH.put("C+", (byte) 0x14);
        NUCLEOTIDE_DIPLOID_HASH.put("C-", (byte) 0x15);
        NUCLEOTIDE_DIPLOID_HASH.put("CN", (byte) 0x1F);
        NUCLEOTIDE_DIPLOID_HASH.put("CX", (byte) 0x1F);
        NUCLEOTIDE_DIPLOID_HASH.put("CZ", (byte) 0x1E);

        NUCLEOTIDE_DIPLOID_HASH.put("GA", (byte) 0x20);
        NUCLEOTIDE_DIPLOID_HASH.put("GC", (byte) 0x21);
        NUCLEOTIDE_DIPLOID_HASH.put("GG", (byte) 0x22);
        NUCLEOTIDE_DIPLOID_HASH.put("GT", (byte) 0x23);
        NUCLEOTIDE_DIPLOID_HASH.put("G+", (byte) 0x24);
        NUCLEOTIDE_DIPLOID_HASH.put("G-", (byte) 0x25);
        NUCLEOTIDE_DIPLOID_HASH.put("GN", (byte) 0x2F);
        NUCLEOTIDE_DIPLOID_HASH.put("GX", (byte) 0x2F);
        NUCLEOTIDE_DIPLOID_HASH.put("GZ", (byte) 0x2E);

        NUCLEOTIDE_DIPLOID_HASH.put("TA", (byte) 0x30);
        NUCLEOTIDE_DIPLOID_HASH.put("TC", (byte) 0x31);
        NUCLEOTIDE_DIPLOID_HASH.put("TG", (byte) 0x32);
        NUCLEOTIDE_DIPLOID_HASH.put("TT", (byte) 0x33);
        NUCLEOTIDE_DIPLOID_HASH.put("T+", (byte) 0x34);
        NUCLEOTIDE_DIPLOID_HASH.put("T-", (byte) 0x35);
        NUCLEOTIDE_DIPLOID_HASH.put("TN", (byte) 0x3F);
        NUCLEOTIDE_DIPLOID_HASH.put("TX", (byte) 0x3F);
        NUCLEOTIDE_DIPLOID_HASH.put("TZ", (byte) 0x3E);

        NUCLEOTIDE_DIPLOID_HASH.put("+A", (byte) 0x40);
        NUCLEOTIDE_DIPLOID_HASH.put("+C", (byte) 0x41);
        NUCLEOTIDE_DIPLOID_HASH.put("+G", (byte) 0x42);
        NUCLEOTIDE_DIPLOID_HASH.put("+T", (byte) 0x43);
        NUCLEOTIDE_DIPLOID_HASH.put("++", (byte) 0x44);
        NUCLEOTIDE_DIPLOID_HASH.put("+-", (byte) 0x45);
        NUCLEOTIDE_DIPLOID_HASH.put("+N", (byte) 0x4F);
        NUCLEOTIDE_DIPLOID_HASH.put("+X", (byte) 0x4F);
        NUCLEOTIDE_DIPLOID_HASH.put("+Z", (byte) 0x4E);

        NUCLEOTIDE_DIPLOID_HASH.put("-A", (byte) 0x50);
        NUCLEOTIDE_DIPLOID_HASH.put("-C", (byte) 0x51);
        NUCLEOTIDE_DIPLOID_HASH.put("-G", (byte) 0x52);
        NUCLEOTIDE_DIPLOID_HASH.put("-T", (byte) 0x53);
        NUCLEOTIDE_DIPLOID_HASH.put("-+", (byte) 0x54);
        NUCLEOTIDE_DIPLOID_HASH.put("--", (byte) 0x55);
        NUCLEOTIDE_DIPLOID_HASH.put("-N", (byte) 0x5F);
        NUCLEOTIDE_DIPLOID_HASH.put("-X", (byte) 0x5F);
        NUCLEOTIDE_DIPLOID_HASH.put("-Z", (byte) 0x5E);

        NUCLEOTIDE_DIPLOID_HASH.put("NA", (byte) 0xF0);
        NUCLEOTIDE_DIPLOID_HASH.put("NC", (byte) 0xF1);
        NUCLEOTIDE_DIPLOID_HASH.put("NG", (byte) 0xF2);
        NUCLEOTIDE_DIPLOID_HASH.put("NT", (byte) 0xF3);
        NUCLEOTIDE_DIPLOID_HASH.put("N+", (byte) 0xF4);
        NUCLEOTIDE_DIPLOID_HASH.put("N-", (byte) 0xF5);
        NUCLEOTIDE_DIPLOID_HASH.put("NN", (byte) 0xFF);
        NUCLEOTIDE_DIPLOID_HASH.put("NX", (byte) 0xFF);
        NUCLEOTIDE_DIPLOID_HASH.put("NZ", (byte) 0xFE);

        NUCLEOTIDE_DIPLOID_HASH.put("XA", (byte) 0xF0);
        NUCLEOTIDE_DIPLOID_HASH.put("XC", (byte) 0xF1);
        NUCLEOTIDE_DIPLOID_HASH.put("XG", (byte) 0xF2);
        NUCLEOTIDE_DIPLOID_HASH.put("XT", (byte) 0xF3);
        NUCLEOTIDE_DIPLOID_HASH.put("X+", (byte) 0xF4);
        NUCLEOTIDE_DIPLOID_HASH.put("X-", (byte) 0xF5);
        NUCLEOTIDE_DIPLOID_HASH.put("XN", (byte) 0xFF);
        NUCLEOTIDE_DIPLOID_HASH.put("XX", (byte) 0xFF);
        NUCLEOTIDE_DIPLOID_HASH.put("XZ", (byte) 0xFE);

        NUCLEOTIDE_DIPLOID_HASH.put("ZA", (byte) 0xE0);
        NUCLEOTIDE_DIPLOID_HASH.put("ZC", (byte) 0xE1);
        NUCLEOTIDE_DIPLOID_HASH.put("ZG", (byte) 0xE2);
        NUCLEOTIDE_DIPLOID_HASH.put("ZT", (byte) 0xE3);
        NUCLEOTIDE_DIPLOID_HASH.put("Z+", (byte) 0xE4);
        NUCLEOTIDE_DIPLOID_HASH.put("Z-", (byte) 0xE5);
        NUCLEOTIDE_DIPLOID_HASH.put("ZN", (byte) 0xEF);
        NUCLEOTIDE_DIPLOID_HASH.put("ZX", (byte) 0xEF);
        NUCLEOTIDE_DIPLOID_HASH.put("ZZ", (byte) 0xEE);

        NUCLEOTIDE_DIPLOID_HASH.put("A", (byte) 0x00); // AA
        NUCLEOTIDE_DIPLOID_HASH.put("C", (byte) 0x11); // CC
        NUCLEOTIDE_DIPLOID_HASH.put("G", (byte) 0x22); // GG
        NUCLEOTIDE_DIPLOID_HASH.put("T", (byte) 0x33); // TT
        NUCLEOTIDE_DIPLOID_HASH.put("+", (byte) 0x44); // ++
        NUCLEOTIDE_DIPLOID_HASH.put("-", (byte) 0x55); // --
        NUCLEOTIDE_DIPLOID_HASH.put("Z", (byte) 0xEE); // ZZ
        NUCLEOTIDE_DIPLOID_HASH.put("N", (byte) 0xFF); // NN
        NUCLEOTIDE_DIPLOID_HASH.put("X", (byte) 0xFF); // NN

        NUCLEOTIDE_DIPLOID_HASH.put("R", (byte) 0x02); // AG
        NUCLEOTIDE_DIPLOID_HASH.put("Y", (byte) 0x13); // CT
        NUCLEOTIDE_DIPLOID_HASH.put("S", (byte) 0x21); // GC
        NUCLEOTIDE_DIPLOID_HASH.put("W", (byte) 0x03); // AT
        NUCLEOTIDE_DIPLOID_HASH.put("K", (byte) 0x23); // GT
        NUCLEOTIDE_DIPLOID_HASH.put("M", (byte) 0x01); // AC
        NUCLEOTIDE_DIPLOID_HASH.put("0", (byte) 0x54); // -+
    }
    private static final byte[] NUCLEOTIDE_DIPLOID_ARRAY = new byte[256];

    static {
        Arrays.fill(NUCLEOTIDE_DIPLOID_ARRAY, UNDEFINED_DIPLOID_ALLELE);
        for (String temp : NUCLEOTIDE_DIPLOID_HASH.keySet()) {
            NUCLEOTIDE_DIPLOID_ARRAY[getNucleotideDiploidArrayIndex(temp)] = NUCLEOTIDE_DIPLOID_HASH.get(temp);
        }
    }
    private static final int mask = 0x2F;
    private static final int mask2 = 0x81;
    private static final int shift = 2;

    private static int getNucleotideDiploidArrayIndex(String str) {

        if (str.length() == 1) {
            return str.charAt(0);
        } else if (str.length() == 2) {
            return ((((str.charAt(1) << shift) ^ (byte) mask2)) ^ (str.charAt(0) & (byte) mask)) & 0xFF;
        } else {
            throw new IllegalStateException("NucleotideAlignmentConstants: getNucleotideDiploidArrayIndex: illegal str: " + str);
        }

    }
    public static final Map NUCLEOTIDE_IUPAC_HASH = new HashMap();

    static {
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x00, "A"); // AA
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x01, "M"); // AC
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x02, "R"); // AG
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x03, "W"); // AT
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x04, "0"); // A+
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x05, "0"); // A-
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x0E, "A"); // AZ
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x0F, "A"); // AN

        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x10, "M"); // CA
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x11, "C"); // CC
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x12, "S"); // CG
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x13, "Y"); // CT
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x14, "0"); // C+
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x15, "0"); // C-
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x1E, "C"); // CZ
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x1F, "C"); // CN

        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x20, "R"); // GA
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x21, "S"); // GC
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x22, "G"); // GG
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x23, "K"); // GT
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x24, "0"); // G+
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x25, "0"); // G-
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x2E, "G"); // GZ
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x2F, "G"); // GN

        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x30, "W"); // TA
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x31, "Y"); // TC
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x32, "K"); // TG
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x33, "T"); // TT
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x34, "0"); // T+
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x35, "0"); // T-
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x3E, "T"); // TZ
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x3F, "T"); // TN

        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x40, "0"); // +A
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x41, "0"); // +C
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x42, "0"); // +G
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x43, "0"); // +T
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x44, "+"); // ++
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x45, "0"); // +-
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x4E, "+"); // +Z
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x4F, "+"); // +N

        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x50, "0"); // -A
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x51, "0"); // -C
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x52, "0"); // -G
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x53, "0"); // -T
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x54, "0"); // -+
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x55, "-"); // --
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x5E, "-"); // -Z
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0x5F, "-"); // -N

        NUCLEOTIDE_IUPAC_HASH.put((byte) 0xE0, "A"); // ZA
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0xE1, "C"); // ZC
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0xE2, "G"); // ZG
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0xE3, "T"); // ZT
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0xE4, "+"); // Z+
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0xE5, "-"); // Z-
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0xEE, "Z"); // ZZ
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0xEF, "N"); // ZN

        NUCLEOTIDE_IUPAC_HASH.put((byte) 0xF0, "A"); // NA
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0xF1, "C"); // NC
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0xF2, "G"); // NG
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0xF3, "T"); // NT
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0xF4, "+"); // N+
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0xF5, "-"); // N-
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0xFE, "N"); // NZ
        NUCLEOTIDE_IUPAC_HASH.put((byte) 0xFF, "N"); // NN

    }
    private static final String[] NUCLEOTIDE_IUPAC_ARRAY = new String[256];
    private static final char[] NUCLEOTIDE_IUPAC_CHARARRAY = new char[256];
    private static final char[] NUCLEOTIDE_IUPAC_CHARCOMPLEMENT_ARRAY = new char[256];

    static {
        Arrays.fill(NUCLEOTIDE_IUPAC_ARRAY, UNDEFINED_ALLELE_STR);
        Arrays.fill(NUCLEOTIDE_IUPAC_CHARARRAY, UNDEFINED_ALLELE_STR.charAt(0));
        Arrays.fill(NUCLEOTIDE_IUPAC_CHARCOMPLEMENT_ARRAY, UNDEFINED_ALLELE_STR.charAt(0));
        for (Byte temp : NUCLEOTIDE_IUPAC_HASH.keySet()) {
            NUCLEOTIDE_IUPAC_ARRAY[temp & 0xFF] = NUCLEOTIDE_IUPAC_HASH.get(temp);
            NUCLEOTIDE_IUPAC_CHARARRAY[temp & 0xFF] = NUCLEOTIDE_IUPAC_HASH.get(temp).charAt(0);
            byte compByte = getNucleotideDiploidComplement(temp);
            NUCLEOTIDE_IUPAC_CHARCOMPLEMENT_ARRAY[NUCLEOTIDE_IUPAC_HASH.get(temp).charAt(0)] = NUCLEOTIDE_IUPAC_HASH.get(compByte).charAt(0);
        }
    }
    private static final Map NUCLEOTIDE_ALLELE_HASH = new HashMap();
    private static final byte[] NUCLEOTIDE_ALLELE_ARRAY = new byte[256];

    static {
        NUCLEOTIDE_ALLELE_HASH.put("A", A_ALLELE); // A
        NUCLEOTIDE_ALLELE_HASH.put("C", C_ALLELE); // C
        NUCLEOTIDE_ALLELE_HASH.put("G", G_ALLELE); // G
        NUCLEOTIDE_ALLELE_HASH.put("T", T_ALLELE); // T
        NUCLEOTIDE_ALLELE_HASH.put("a", A_ALLELE); // A
        NUCLEOTIDE_ALLELE_HASH.put("c", C_ALLELE); // C
        NUCLEOTIDE_ALLELE_HASH.put("g", G_ALLELE); // G
        NUCLEOTIDE_ALLELE_HASH.put("t", T_ALLELE); // T
        NUCLEOTIDE_ALLELE_HASH.put("+", INSERT_ALLELE); // +
        NUCLEOTIDE_ALLELE_HASH.put("-", GAP_ALLELE); // -
        NUCLEOTIDE_ALLELE_HASH.put("N", GenotypeTable.UNKNOWN_ALLELE); // N
        NUCLEOTIDE_ALLELE_HASH.put("n", GenotypeTable.UNKNOWN_ALLELE); // N
        Arrays.fill(NUCLEOTIDE_ALLELE_ARRAY, UNDEFINED_ALLELE);
        for (Map.Entry en : NUCLEOTIDE_ALLELE_HASH.entrySet()) {
            NUCLEOTIDE_ALLELE_ARRAY[en.getKey().charAt(0)] = en.getValue();
        }
    }

    private NucleotideAlignmentConstants() {
        // do not instantiate
    }

    /**
     * Returns diploid byte value for given nucleotide value. First four bits
     * contain first allele value. And second four bits contain second allele
     * value.
     *
     * @param value diploid allele value
     *
     * @return nucleotide diploid allele byte value
     */
    public static byte getNucleotideDiploidByte(String value) {
        try {
            return NUCLEOTIDE_DIPLOID_ARRAY[getNucleotideDiploidArrayIndex(value)];
            // return NUCLEOTIDE_DIPLOID_HASH.get(value).byteValue();
        } catch (NullPointerException e) {
            throw new IllegalArgumentException("NucleotideAlignmentConstants: getNucleotideDiploidByte: unknown allele value: " + value);
        }
    }

    /**
     * Returns Optional diploid byte value for given nucleotide value. First four bits
     * contain first allele value. And second four bits contain second allele
     * value. Optional empty if not present in the Hash.
     *
     * @param value diploid allele value
     *
     * @return nucleotide diploid allele byte value
     */
    public static Optional parseNucleotideDiploidByte(String value) {
        try {
            return Optional.ofNullable(NUCLEOTIDE_DIPLOID_ARRAY[getNucleotideDiploidArrayIndex(value)]);
        } catch (IllegalStateException e) {
            return Optional.empty();
        }
    }

    /**
     * Returns char allele for allele byte encoding. THis is called from junit
     * tests in ReferenceGenomeSequenceTest.java
     *
     * @param value haploid allele byte value
     *
     * @return nucleotide haploid allele byte value
     */
    public static Object getNucleotideAlleleValue(byte value) {
        try {
            for (Object alleleObject : NUCLEOTIDE_ALLELE_HASH.keySet()) {
                if (NUCLEOTIDE_ALLELE_HASH.get(alleleObject).equals(value)) {
                    return alleleObject;
                }
            }
            return null;
        } catch (NullPointerException e) {
            throw new IllegalArgumentException("NucleotideAlignmentConstants: getNucleotideAlleleByte: unknown allele value: " + value);
        }
    }

    /**
     * Returns haploid byte value for given nucleotide value. Only right-most
     * four bits used.
     *
     * @param value haploid allele value
     *
     * @return nucleotide haploid allele byte value
     */
    public static byte getNucleotideAlleleByte(String value) {
        try {
            return NUCLEOTIDE_ALLELE_HASH.get(value).byteValue();
        } catch (NullPointerException e) {
            throw new IllegalArgumentException("NucleotideAlignmentConstants: getNucleotideAlleleByte: unknown allele value: " + value);
        }
    }

    /**
     * Returns haploid byte value for given nucleotide value. Only right-most
     * four bits used.
     *
     * @param value haploid allele value
     *
     * @return nucleotide haploid allele byte value
     */
    public static byte getNucleotideAlleleByte(char value) {
        try {
            return NUCLEOTIDE_ALLELE_ARRAY[value];
        } catch (NullPointerException e) {
            throw new IllegalArgumentException("NucleotideAlignmentConstants: getNucleotideAlleleByte: unknown allele value: " + value);
        }
    }

    /**
     * Returns diploid byte value for given nucleotide value. First four bits
     * contain first allele value. And second four bits contain second allele
     * value.
     *
     * @param value diploid allele value
     *
     * @return nucleotide diploid allele byte value
     */
    public static byte getNucleotideDiploidByte(char value) {
        try {
            return NUCLEOTIDE_DIPLOID_ARRAY[value];
            // return NUCLEOTIDE_DIPLOID_HASH.get(String.valueOf(value)).byteValue();
        } catch (NullPointerException e) {
            throw new IllegalArgumentException("NucleotideAlignmentConstants: getNucleotideDiploidByte: unknown allele value: " + value);
        }
    }

    /**
     * Returns the IUPAC String for the given diploid allele value.
     *
     * @param value diploid allele value
     *
     * @return IUPAC String
     */
    public static String getNucleotideIUPAC(byte value) {
        return NUCLEOTIDE_IUPAC_ARRAY[value & 0xFF];
    }

    /**
     * Returns the IUPAC char for the given diploid allele value.
     *
     * @param value diploid allele value
     *
     * @return IUPAC char
     */
    public static char getNucleotideIUPACChar(byte value) {
        return NUCLEOTIDE_IUPAC_CHARARRAY[value & 0xFF];
    }

    /**
     * Returns the Nucleotide String for the given haploid allele value.
     *
     * @param value haploid value
     *
     * @return Nucleotide String
     */
    public static String getHaplotypeNucleotide(byte value) {
        return NUCLEOTIDE_ALLELES[0][value];
    }

    /**
     * Returns the Nucleotide Complement of given byte encoded nucleotide. A
     * returns T. T returns A. C returns G. G returns C. Otherwise given
     * nucleotide is returned.
     *
     * @param nucleotide nucleotide byte value
     *
     * @return Nucleotide Complement
     */
    public static byte getNucleotideComplement(byte nucleotide) {

        if (nucleotide == A_ALLELE) {
            return T_ALLELE;
        } else if (nucleotide == T_ALLELE) {
            return A_ALLELE;
        } else if (nucleotide == C_ALLELE) {
            return G_ALLELE;
        } else if (nucleotide == G_ALLELE) {
            return C_ALLELE;
        } else {
            return nucleotide;
        }

    }

    /**
     * Returns the Nucleotide Complement of the given diploid byte encoded
     * alleles.
     *
     * @param diploidAllele diploid allele value
     *
     * @return Nucleotide Complement
     */
    public static byte getNucleotideDiploidComplement(byte diploidAllele) {

        byte first = (byte) ((diploidAllele >>> 4) & 0xf);
        byte second = (byte) (diploidAllele & 0xf);
        first = getNucleotideComplement(first);
        second = getNucleotideComplement(second);
        return (byte) ((first << 4) | second);
    }

    /**
     * Returns the Nucleotide Complement of the given diploid IUPAC encoded
     * alleles.
     *
     * @param diploidAllele diploid IUPAC
     *
     * @return Nucleotide Complement in IUPAC
     */
    public static char getNucleotideDiploidIUPACComplement(char diploidAllele) {
        if (diploidAllele > 256) {
            return UNDEFINED_ALLELE_STR.charAt(0);
        }
        return NUCLEOTIDE_IUPAC_CHARCOMPLEMENT_ARRAY[diploidAllele];
    }

    /**
     * Returns whether given allele encodings are for Nucleotide Data.
     *
     * @param alleleStates allele encodings
     *
     * @return true if nucleotide encodings
     */
    public static boolean isNucleotideEncodings(String[][] alleleStates) {

        boolean isNucleotide = false;
        if (alleleStates.length == 1) {
            isNucleotide = true;
            if (alleleStates[0].length == NucleotideAlignmentConstants.NUCLEOTIDE_ALLELES[0].length) {
                for (int i = 0; i < alleleStates.length; i++) {
                    if (!alleleStates[0][i].equals(NucleotideAlignmentConstants.NUCLEOTIDE_ALLELES[0][i])) {
                        isNucleotide = false;
                    }
                }
            }

        }

        return isNucleotide;

    }

    /**
     * Convert a haploid (allele) string (e.g. ACGTA) to arrays of allele bytes
     * (e.g. {0,1,2,3,0})
     *
     * @param haploString haploid allele string
     * @return encoded array of bytes
     */
    public static byte[] convertHaplotypeStringToAlleleByteArray(String haploString) {
        byte[] haplosBytes = new byte[haploString.length()];
        for (int i = 0; i < haplosBytes.length; i++) {
            haplosBytes[i] = getNucleotideAlleleByte(haploString.charAt(i));
        }
        return haplosBytes;
    }

    /**
     * Convert a genotype (diploid) string (e.g. ACGTA) to arrays of diploid bytes
     * (e.g. {0,17,34,51,0})
     *
     * @param diploidString haploid allele string
     * @return encoded array of bytes
     */
    public static byte[] convertGenotypeStringToDiploidByteArray(String diploidString) {
        byte[] genoBytes = new byte[diploidString.length()];
        for (int i = 0; i < genoBytes.length; i++) {
            genoBytes[i] = getNucleotideDiploidByte(diploidString.charAt(i));
        }
        return genoBytes;
    }

    /**
     * Convert a haploid (allele) string (e.g. {0,1,2,3,0}) to its reverse
     * complement (e.g. {3,0,1,2,3})
     *
     * @param alleles haploid allele byte
     * @return encoded array of bytes
     */
    public static byte[] reverseComplementAlleleByteArray(byte[] alleles) {
        byte[] reverseCompBytes = new byte[alleles.length];
        int allelesIndex = alleles.length - 1;
        for (int i = 0; i < reverseCompBytes.length; i++) {
            reverseCompBytes[i] = getNucleotideComplement(alleles[allelesIndex--]);
        }
        return reverseCompBytes;
    }

    /**
     * Convert a nucleotide byte array to a string
     *
     * @param b the array of nucleotide bytes to convert
     * @return a string representing the nucleotide bytes
     */
    public static String nucleotideBytetoString(byte[] b) {
        StringBuilder result = new StringBuilder();
        for (int index = 0; index < b.length; index++) {
            result.append(getNucleotideAlleleValue(b[index]));
        }
        return result.toString();
    }

    /**
     * Return whether a simple homozgyous DNA state (i.e. no indels or N).
     * @param diploidAllele diploid
     * @return
     */
    public static boolean isHomozygousACGT(Byte diploidAllele) {
        if (!GenotypeTableUtils.isHomozygous(diploidAllele)) return false;
        if((diploidAllele & 0xf) < INSERT_ALLELE) return true;
        return false;
    }

    /**
     * Return whether a simple DNA state (i.e. no indels or N).  Heterozygous resolves to true also.
     * @param diploidAllele diploid
     * @return
     */
    public static boolean isACGT(Byte diploidAllele) {
        if ((((diploidAllele >>> 4) & 0xf) < INSERT_ALLELE) && ((diploidAllele & 0xf) < INSERT_ALLELE)) return true;
        return false;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy