All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.tools.funcotator.TranscriptSelectionMode Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.tools.funcotator;

import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation;
import org.broadinstitute.hellbender.utils.codecs.gtf.GencodeGTFFieldConstants;
import org.broadinstitute.hellbender.utils.codecs.gtf.GencodeGtfFeature;

import java.util.Comparator;
import java.util.Set;

/**
 * The manner to select a single transcript from a set of transcripts to report as the "best" or main transcript.
 * That is, the transcript that has the most information on it in the output.
 */
public enum TranscriptSelectionMode {

    /**
     * BEST_EFFECT
     *
     * Select a transcript to be reported with details with priority on effect according to the folowing list of selection criteria:
     *
     * Choose the transcript that is on the custom list specified by the user. If no list was specified, treat as if no transcripts were on the list (tie).
     * In case of tie, choose the transcript that yields the variant classification highest on the variant classification rank list (see below).
     * If still a tie, choose the transcript with highest level of curation. Note that this means lower number is better for level (see below).
     * If still a tie, choose the transcript with the best appris annotation (see below).
     * If still a tie, choose the transcript with the longest transcript sequence length.
     * If still a tie, choose the first transcript, alphabetically.
     *
     * Levels of Curation:
     *     1 (verified loci)
     *     2 manually annotated loci
     *     3 automatically annotated loci
     *
     * Variant Classification Scores (See {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification} as well):
     *      De_novo_Start_OutOfFrame    0
     *      Nonsense_Mutation           0
     *      Nonstop_Mutation            0
     *      Missense_Mutation           1
     *      De_novo_Start_InFrame       1
     *      In_Frame_Del                1
     *      In_Frame_Ins                1
     *      Frame_Shift_Del             2
     *      Frame_Shift_Ins             2
     *      Frame_Shift_Sub             2
     *      Start_Codon_SNP             3
     *      Start_Codon_Del             3
     *      Start_Codon_Ins             3
     *      Start_Codon_DNP             3
     *      Start_Codon_TNP             3
     *      Start_Codon_ONP             3
     *      Stop_Codon_SNP              3
     *      Stop_Codon_Del              3
     *      Stop_Codon_Ins              3
     *      Stop_Codon_DNP              3
     *      Stop_Codon_TNP              3
     *      Stop_Codon_ONP              3
     *      Splice_Site                 4
     *      Splice_Site_SNP             4
     *      Splice_Site_Del             4
     *      Splice_Site_Ins             4
     *      Splice_Site_DNP             4
     *      Splice_Site_TNP             4
     *      Splice_Site_ONP             4
     *      Splice_Site                 4
     *      miRNA                       4
     *      Silent                      5
     *      3UTR                        6
     *      5UTR                        6
     *      Intron                      7
     *      5Flank                      8
     *      3Flank                      8
     *      Non-coding_Transcript       9
     *      IGR	                        20
     *      TX-REF-MISMATCH             100
     *
     * APPRIS Ranks (http://appris.bioinfo.cnio.es/):
     *
     *      appris_principal
     *      appris_candidate_highest_score
     *      appris_candidate_longest_ccds
     *      appris_candidate_ccds
     *      appris_candidate_longest_seq
     *      appris_candidate_longest
     *      appris_candidate
     *      no appris tag present
     */
    BEST_EFFECT {
        public Comparator getComparator(final Set userRequestedTranscripts) {
            return new BestEffectGencodeFuncotationComparator(userRequestedTranscripts);
        }
    },

    /**
     * CANONICAL
     *
     * Select a transcript to be reported with details with priority on canonical order according to the folowing list of selection criteria:
     *
     * Choose the transcript that is on the custom list specified by the user. If no list was specified, treat as if all transcripts were on the list (tie).
     * In case of tie, choose the transcript with highest level of curation. Note that this means lower number is better for level (see below).
     * If still a tie, choose the transcript that yields the variant classification highest on the variant classification rank list (see below).
     * If still a tie, choose the transcript with the best appris annotation (see below).
     * If still a tie, choose the transcript with the longest transcript sequence length.
     * If still a tie, choose the first transcript, alphabetically.
     *
     * Levels of Curation:
     *     1 (verified loci)
     *     2 manually annotated loci
     *     3 automatically annotated loci
     *
     * Variant Classification Scores (See {@link GencodeFuncotation.VariantClassification} as well):
     *      De_novo_Start_OutOfFrame    0
     *      Nonsense_Mutation           0
     *      Nonstop_Mutation            0
     *      Missense_Mutation           1
     *      De_novo_Start_InFrame       1
     *      In_Frame_Del                1
     *      In_Frame_Ins                1
     *      Frame_Shift_Del             2
     *      Frame_Shift_Ins             2
     *      Frame_Shift_Sub             2
     *      Start_Codon_SNP             3
     *      Start_Codon_Del             3
     *      Start_Codon_Ins             3
     *      Start_Codon_DNP             3
     *      Start_Codon_TNP             3
     *      Start_Codon_ONP             3
     *      Stop_Codon_SNP              3
     *      Stop_Codon_Del              3
     *      Stop_Codon_Ins              3
     *      Stop_Codon_DNP              3
     *      Stop_Codon_TNP              3
     *      Stop_Codon_ONP              3
     *      Splice_Site                 4
     *      Splice_Site_SNP             4
     *      Splice_Site_Del             4
     *      Splice_Site_Ins             4
     *      Splice_Site_DNP             4
     *      Splice_Site_TNP             4
     *      Splice_Site_ONP             4
     *      Splice_Site                 4
     *      miRNA                       4
     *      Silent                      5
     *      3UTR                        6
     *      5UTR                        6
     *      Intron                      7
     *      5Flank                      8
     *      3Flank                      8
     *      Non-coding_Transcript       9
     *      IGR	                        20
     *      TX-REF-MISMATCH             100
     *
     * APPRIS Ranks (http://appris.bioinfo.cnio.es/):
     *
     *      appris_principal
     *      appris_candidate_highest_score
     *      appris_candidate_longest_ccds
     *      appris_candidate_ccds
     *      appris_candidate_longest_seq
     *      appris_candidate_longest
     *      appris_candidate
     *      no appris tag present
     */
    CANONICAL {
        public Comparator getComparator(final Set userRequestedTranscripts) {
            return new CanonicalGencodeFuncotationComparator(userRequestedTranscripts);
        }
    },

    /**
     * Same as CANONICAL, but indicates that no transcripts should be dropped.  Render all overlapping transcripts.
     */
    ALL {
        public Comparator getComparator(final Set userRequestedTranscripts) {
            return new CanonicalGencodeFuncotationComparator(userRequestedTranscripts);
        }
    };

    public abstract Comparator getComparator(final Set userRequestedTranscripts);

    private static class ComparatorByUserTranscript implements Comparator {

        private final Set userRequestedTranscripts;

        public ComparatorByUserTranscript( final Set userRequestedTranscripts ) {
            this.userRequestedTranscripts = userRequestedTranscripts;
        }

        @Override
        public int compare( final GencodeFuncotation a, final GencodeFuncotation b ) {
            // Choose the transcript that is on the custom list specified by the user:
            if ( FuncotatorUtils.isFuncotationInTranscriptList(a, userRequestedTranscripts) && (!FuncotatorUtils.isFuncotationInTranscriptList(b, userRequestedTranscripts)) ) {
                return -1;
            }
            else if ( (!FuncotatorUtils.isFuncotationInTranscriptList(a, userRequestedTranscripts)) && FuncotatorUtils.isFuncotationInTranscriptList(b, userRequestedTranscripts) ) {
                return 1;
            }
            else {
                return 0;
            }
        }
    }

    private static class ComparatorByIgrStatus implements Comparator {

        public ComparatorByIgrStatus(){}

        @Override
        public int compare( final GencodeFuncotation a, final GencodeFuncotation b ) {
            // Check to see if one is an IGR.  IGR's have only a subset of the information in them, so it's easier to
            // order them if they're IGRs:
            if ( (b.getVariantClassification().equals(GencodeFuncotation.VariantClassification.IGR)) &&
                    (!a.getVariantClassification().equals(GencodeFuncotation.VariantClassification.IGR)) ) {
                return -1;
            }
            else if ( (a.getVariantClassification().equals(GencodeFuncotation.VariantClassification.IGR)) &&
                    (!b.getVariantClassification().equals(GencodeFuncotation.VariantClassification.IGR)) ) {
                return 1;
            }
            else {
                return 0;
            }
        }
    }

    private static class ComparatorByVariantClassification implements Comparator {

        public ComparatorByVariantClassification(){}

        @Override
        public int compare( final GencodeFuncotation a, final GencodeFuncotation b ) {
            // Check highest variant classification:
            if ( a.getVariantClassification().getSeverity() < b.getVariantClassification().getSeverity() ) {
                return -1;
            }
            else if ( a.getVariantClassification().getSeverity() > b.getVariantClassification().getSeverity() ) {
                return 1;
            }
            else {
                return 0;
            }
        }
    }

    private static class ComparatorByProteinCodingStatus implements Comparator {

        public ComparatorByProteinCodingStatus(){}

        @Override
        public int compare( final GencodeFuncotation a, final GencodeFuncotation b ) {
            // Is it protein coding?
            final boolean isAProteinCoding = GencodeGTFFieldConstants.KnownGeneBiotype.PROTEIN_CODING.toString().equals(a.getGeneTranscriptType());
            final boolean isBProteinCoding = GencodeGTFFieldConstants.KnownGeneBiotype.PROTEIN_CODING.toString().equals(b.getGeneTranscriptType());
            if ( isAProteinCoding != isBProteinCoding ) {
                if ( isAProteinCoding ) {
                    return -1;
                }
                else {
                    return 1;
                }
            }
            else {
                return 0;
            }
        }
    }

    private static class ComparatorByLocusLevel implements Comparator {

        public ComparatorByLocusLevel(){}

        @Override
        public int compare( final GencodeFuncotation a, final GencodeFuncotation b ) {
            // Check locus/curation levels:
            if ( (a.getLocusLevel() != null) && (b.getLocusLevel() == null) ) {
                return -1;
            }
            else if ( (a.getLocusLevel() == null ) && (b.getLocusLevel() != null) ) {
                return 1;
            }
            else if ( (a.getLocusLevel() == null ) && (b.getLocusLevel() == null) ) {
                return 0;
            }
            else {
                return a.getLocusLevel().compareTo( b.getLocusLevel() );
            }
        }
    }

    private static class ComparatorByApprisRank implements Comparator {

        public ComparatorByApprisRank(){}

        @Override
        public int compare( final GencodeFuncotation a, final GencodeFuncotation b ) {
            // Check the appris annotation:
            if ( (a.getApprisRank() != null) && (b.getApprisRank() == null) ) {
                return -1;
            }
            else if ( (a.getApprisRank() == null ) && (b.getApprisRank() != null) ) {
                return 1;
            }
            else if ( (a.getApprisRank() != null) && (b.getApprisRank() != null) && (!a.getApprisRank().equals(b.getApprisRank())) ) {
                return a.getApprisRank().compareTo( b.getApprisRank() );
            }
            else {
                return 0;
            }
        }
    }

    private static class ComparatorByTranscriptSequenceLength implements Comparator {

        public ComparatorByTranscriptSequenceLength(){}

        @Override
        public int compare( final GencodeFuncotation a, final GencodeFuncotation b ) {
            // Check transcript sequence length:
            // Note that since we want longer transcripts to be sorted earlier in the list, we reverse b and a in the last check.
            if ( (a.getTranscriptLength() != null) && (b.getTranscriptLength() == null) ) {
                return -1;
            }
            else if ( (a.getTranscriptLength() == null ) && (b.getTranscriptLength() != null) ) {
                return 1;
            }
            else if ( (a.getTranscriptLength() != null) && (b.getTranscriptLength() != null) && (!a.getTranscriptLength().equals(b.getTranscriptLength())) ) {
                return b.getTranscriptLength().compareTo( a.getTranscriptLength() );
            }
            else {
                return 0;
            }
        }
    }

    private static class ComparatorByTranscriptName implements Comparator {

        public ComparatorByTranscriptName(){}

        @Override
        public int compare( final GencodeFuncotation a, final GencodeFuncotation b ) {
            // Compare ABC order by transcript name:
            if ( (a.getAnnotationTranscript() != null) && (b.getAnnotationTranscript() == null) ) {
                return -1;
            }
            else if ( (a.getAnnotationTranscript() == null ) && (b.getAnnotationTranscript() != null) ) {
                return 1;
            }
            // Need a default case in case all the comparison criteria are the same:
            else if ( (a.getAnnotationTranscript() == null ) && (b.getAnnotationTranscript() == null) ) {
                return -1;
            }
            else {
                return a.getAnnotationTranscript().compareTo(b.getAnnotationTranscript());
            }
        }
    }

    /**
     * Comparator class for Best Effect order.
     * Complex enough that a Lambda would be utter madness.
     */
    static class BestEffectGencodeFuncotationComparator implements Comparator {

        private final Comparator byUserTranscript;
        private final Comparator byIgrStatus;
        private final Comparator byVariantClassification;
        private final Comparator byProteinCodingStatus;
        private final Comparator byLocusLevel;
        private final Comparator byApprisRank;
        private final Comparator byTranscriptLength;
        private final Comparator byTranscriptName;

        private final Comparator chainedComparator;

        public BestEffectGencodeFuncotationComparator( final Set userRequestedTranscripts ) {
            byUserTranscript = new ComparatorByUserTranscript(userRequestedTranscripts);
            byIgrStatus = new ComparatorByIgrStatus();
            byVariantClassification = new ComparatorByVariantClassification();
            byProteinCodingStatus = new ComparatorByProteinCodingStatus();
            byLocusLevel = new ComparatorByLocusLevel();
            byApprisRank = new ComparatorByApprisRank();
            byTranscriptLength = new ComparatorByTranscriptSequenceLength();
            byTranscriptName = new ComparatorByTranscriptName();

            chainedComparator = byUserTranscript
                    .thenComparing(byIgrStatus)
                    .thenComparing(byVariantClassification)
                    .thenComparing(byProteinCodingStatus)
                    .thenComparing(byLocusLevel)
                    .thenComparing(byApprisRank)
                    .thenComparing(byTranscriptLength)
                    .thenComparing(byTranscriptName);
        }

        @Override
        public int compare( final GencodeFuncotation a, final GencodeFuncotation b ) {
            // 0   - Choose the transcript that is on the custom list specified by the user
            // 1a) - Check to see if one is an IGR.
            // 1b) - Check highest variant classification.
            // 2a) - Is it protein coding?
            // 2b) - Check locus/curation levels.
            // 3)  - Check the appris annotation.
            // 4)  - Check transcript sequence length.
            // 5)  - Default to ABC order by transcript name.
            return chainedComparator.compare(a, b);
        }
    }

    /**
     * Comparator class for Canonical order.
     * Complex enough that a Lambda would be utter madness.
     */
    static class CanonicalGencodeFuncotationComparator implements Comparator {

        private final Comparator byUserTranscript;
        private final Comparator byIgrStatus;
        private final Comparator byVariantClassification;
        private final Comparator byProteinCodingStatus;
        private final Comparator byLocusLevel;
        private final Comparator byApprisRank;
        private final Comparator byTranscriptLength;
        private final Comparator byTranscriptName;

        private final Comparator chainedComparator;

        public CanonicalGencodeFuncotationComparator(final Set userRequestedTranscripts ) {
            byUserTranscript = new ComparatorByUserTranscript(userRequestedTranscripts);
            byIgrStatus = new ComparatorByIgrStatus();
            byVariantClassification = new ComparatorByVariantClassification();
            byProteinCodingStatus = new ComparatorByProteinCodingStatus();
            byLocusLevel = new ComparatorByLocusLevel();
            byApprisRank = new ComparatorByApprisRank();
            byTranscriptLength = new ComparatorByTranscriptSequenceLength();
            byTranscriptName = new ComparatorByTranscriptName();

            chainedComparator = byUserTranscript
                    .thenComparing(byProteinCodingStatus)
                    .thenComparing(byLocusLevel)
                    .thenComparing(byApprisRank)
                    .thenComparing(byIgrStatus)
                    .thenComparing(byVariantClassification)
                    .thenComparing(byTranscriptLength)
                    .thenComparing(byTranscriptName);
        }

        @Override
        public int compare( final GencodeFuncotation a, final GencodeFuncotation b ) {

            // 0)  - Choose the transcript that is on the custom list specified by the user:
            // 1a) - Is it protein coding?
            // 1b) - Check locus/curation levels.
            // 2)  - Check the appris annotation:
            // 3a) - Check to see if one is an IGR.
            // 3b) - Check highest variant classification:
            // 4)  - Check transcript sequence length:
            // 5)  - Default to ABC order by transcript name:
            return chainedComparator.compare(a, b);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy