All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.utils.variant.GATKVCFConstants Maven / Gradle / Ivy

There is a newer version: 4.6.0.0
Show newest version
package org.broadinstitute.hellbender.utils.variant;

import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.vcf.VCFConstants;

import java.util.Arrays;
import java.util.List;

/**
 * This class contains any constants (primarily FORMAT/INFO keys) in VCF files used by the GATK.
 * Note that VCF-standard constants are in VCFConstants, in htsjdk.  Keys in header lines should
 * have matching entries in GATKVCFHeaderLines
 */
public final class GATKVCFConstants {

    public static final String CONTIG_ID_KEY =                      "ID";
    public static final String CONTIG_LENGTH_KEY =                  "length";
    public static final String ASSEMBLY_NAME_KEY =                  "assembly";

    //INFO keys
    public static final String ALLELE_SPECIFIC_PREFIX =             "AS_";
    public static final String AS_FILTER_STATUS_KEY =               "AS_FilterStatus";
    public static final String RAW_RMS_MAPPING_QUALITY_DEPRECATED =        "RAW_MQ";  //NOTE: this is deprecated in favor of the new RAW_MQandDP below
    public static final String MAPPING_QUALITY_DEPTH_DEPRECATED =   "MQ_DP";  //NOTE: this is deprecated in favor of the new RAW_MQandDP below
    public static final String RAW_MAPPING_QUALITY_WITH_DEPTH_KEY = "RAW_MQandDP";
    public static final String AS_RMS_MAPPING_QUALITY_KEY =         "AS_MQ";
    public static final String AS_RAW_RMS_MAPPING_QUALITY_KEY =     "AS_RAW_MQ";
    public static final String AS_CULPRIT_KEY =                     "AS_culprit";
    public static final String AS_VQS_LOD_KEY =                     "AS_VQSLOD";
    public static final String ORIGINAL_AC_KEY =                    "AC_Orig"; //SelectVariants
    public static final String ORIGINAL_AF_KEY =                    "AF_Orig"; //SelectVariants
    public static final String ORIGINAL_AN_KEY =                    "AN_Orig"; //SelectVariants
    public static final String AC_ADJUSTED_KEY =                    "AC_adj"; //GnarlyGenotyper
    public static final String BASE_QUAL_RANK_SUM_KEY =             "BaseQRankSum";
    public static final String AS_BASE_QUAL_RANK_SUM_KEY =          "AS_BaseQRankSum";
    public static final String AS_RAW_BASE_QUAL_RANK_SUM_KEY =      "AS_RAW_BaseQRankSum";
    public static final String GENOTYPE_AND_VALIDATE_STATUS_KEY =   "callStatus";
    public static final String CLIPPING_RANK_SUM_KEY =              "ClippingRankSum";
    public static final String CULPRIT_KEY =                        "culprit";
    public static final String ORIGINAL_DP_KEY =                    "DP_Orig"; //SelectVariants
    public static final String DOWNSAMPLED_KEY =                    "DS";
    public static final String EVENT_COUNT_IN_HAPLOTYPE_KEY =       "ECNT"; //M2
    public static final String FISHER_STRAND_KEY =                  "FS";
    public static final String AS_FISHER_STRAND_KEY =               "AS_FS";
    public static final String AS_SB_TABLE_KEY =                    "AS_SB_TABLE";
    public static final String SB_TABLE_KEY =                       "SB_TABLE";
    public static final String GQ_MEAN_KEY =                        "GQ_MEAN";
    public static final String GQ_STDEV_KEY =                       "GQ_STDDEV";
    public static final String HAPLOTYPE_SCORE_KEY =                "HaplotypeScore";
    public static final String HI_CONF_DENOVO_KEY =                 "hiConfDeNovo";
    public static final String INTERVAL_GC_CONTENT_KEY =            "IGC";
    public static final String INBREEDING_COEFFICIENT_KEY =         "InbreedingCoeff";
    public static final String AS_INBREEDING_COEFFICIENT_KEY =      "AS_InbreedingCoeff";
    public static final String EXCESS_HET_KEY =                     "ExcessHet";
    public static final String RAW_GENOTYPE_COUNT_KEY =             "RAW_GT_COUNT";
    public static final String LIKELIHOOD_RANK_SUM_KEY =            "LikelihoodRankSum";
    public static final String LO_CONF_DENOVO_KEY =                 "loConfDeNovo";
    public static final String MLE_ALLELE_COUNT_KEY =               "MLEAC";
    public static final String MLE_ALLELE_FREQUENCY_KEY =           "MLEAF";
    public static final String MAP_QUAL_RANK_SUM_KEY =              "MQRankSum";
    public static final String RAW_MAP_QUAL_RANK_SUM_KEY =          "RAW_MQRankSum";
    public static final String AS_MAP_QUAL_RANK_SUM_KEY =           "AS_MQRankSum";
    public static final String AS_RAW_MAP_QUAL_RANK_SUM_KEY =       "AS_RAW_MQRankSum";
    public static final String NOCALL_CHROM_KEY =                   "NCC";
    public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY =   "NDA";
    public static final String NEGATIVE_LABEL_KEY =                 "NEGATIVE_TRAIN_SITE";
    public static final String GENOTYPE_PRIOR_KEY =                 "PG";
    public static final String POSITIVE_LABEL_KEY =                 "POSITIVE_TRAIN_SITE";
    public static final String QUAL_BY_DEPTH_KEY =                  "QD";
    public static final String AS_QUAL_BY_DEPTH_KEY =               "AS_QD";
    public static final String AS_QUAL_KEY =                        "AS_QUAL";
    public static final String RAW_QUAL_APPROX_KEY =                "QUALapprox";
    public static final String AS_RAW_QUAL_APPROX_KEY =             "AS_QUALapprox";
    public static final String VARIANT_DEPTH_KEY =                  "VarDP";
    public static final String AS_VARIANT_DEPTH_KEY =               "AS_VarDP";
    public static final String AS_ALT_ALLELE_DEPTH_KEY =            "AS_AltDP";
    public static final String READ_POS_RANK_SUM_KEY =              "ReadPosRankSum";
    public static final String AS_READ_POS_RANK_SUM_KEY =           "AS_ReadPosRankSum";
    public static final String AS_RAW_READ_POS_RANK_SUM_KEY =       "AS_RAW_ReadPosRankSum";
    public static final String REPEATS_PER_ALLELE_KEY =             "RPA";
    public static final String REPEAT_UNIT_KEY =                    "RU";
    public static final String SAMPLE_LIST_KEY =                    "Samples";
    public static final String STRAND_ODDS_RATIO_KEY =              "SOR";
    public static final String AS_STRAND_ODDS_RATIO_KEY =           "AS_SOR";
    public static final String STR_PRESENT_KEY =                    "STR";
    public static final String VQS_LOD_KEY =                        "VQSLOD";
    public static final String CNN_1D_KEY =                         "CNN_1D";
    public static final String CNN_2D_KEY =                         "CNN_2D";
    public static final String F1R2_KEY =                           "F1R2";
    public static final String F2R1_KEY =                           "F2R1";

    // Mutect2-specific INFO keys
    public static final String TUMOR_LOG_10_ODDS_KEY =                      "TLOD";
    public static final String NORMAL_LOG_10_ODDS_KEY =                     "NLOD";
    public static final String IN_PON_KEY =               "PON";
    public static final String NORMAL_ARTIFACT_LOG_10_ODDS_KEY =      "NALOD";
    public static final String POPULATION_AF_KEY =        "POPAF";
    public static final String GERMLINE_QUAL_KEY =  "GERMQ";
    public static final String SEQUENCING_QUAL_KEY =  "SEQQ";
    public static final String POLYMERASE_SLIPPAGE_QUAL_KEY =  "STRQ";
    public static final String STRAND_QUAL_KEY =  "STRANDQ";
    public static final String CONTAMINATION_QUAL_KEY =  "CONTQ";
    public static final String READ_ORIENTATION_QUAL_KEY =  "ROQ";
    public static final String ORIGINAL_CONTIG_MISMATCH_KEY =       "OCM";
    public static final String N_COUNT_KEY =                        "NCount";
    public static final String AS_UNIQUE_ALT_READ_SET_COUNT_KEY = "AS_UNIQ_ALT_READ_COUNT";
    public static final String MEDIAN_BASE_QUALITY_KEY = "MBQ";
    public static final String MEDIAN_MAPPING_QUALITY_KEY = "MMQ";
    public static final String MEDIAN_FRAGMENT_LENGTH_KEY = "MFRL";
    public static final String MEDIAN_READ_POSITON_KEY = "MPOS";
    public static final String UNITIG_SIZES_KEY = "UNITIGS";
    public static final String ALIGNMENT_SCORE_DIFFERENCE_KEY = "ALIGN_DIFF";
    public static final String JOINT_ALIGNMENT_COUNT_KEY = "NALIGNS";

    // Methylation-specific INFO Keys
    public static final String UNCONVERTED_BASE_COVERAGE_KEY =      "UNCONVERTED_BASE_COV";
    public static final String CONVERTED_BASE_COVERAGE_KEY =        "CONVERTED_BASE_COV";
    public static final String METHYLATION_REFERENCE_CONTEXT_KEY =  "REFERENCE_CONTEXT";


    // FORMAT keys
    public static final String ALLELE_BALANCE_KEY =                 "AB";
    public static final String JOINT_LIKELIHOOD_TAG_NAME =          "JL"; //FamilyLikelihoodsUtils
    public static final String JOINT_POSTERIOR_TAG_NAME =           "JP"; //FamilyLikelihoodsUtils
    public final static String MIN_DP_FORMAT_KEY =                  "MIN_DP";
    public static final String MAPPING_QUALITY_ZERO_BY_SAMPLE_KEY = "MQ0";
    public static final String HAPLOTYPE_CALLER_PHASING_GT_KEY =    "PGT";
    public static final String HAPLOTYPE_CALLER_PHASING_ID_KEY =    "PID";
    public static final String PHRED_SCALED_POSTERIORS_KEY =        "PP"; //FamilyLikelihoodsUtils / PosteriorLikelihoodsUtils
    public static final String REFERENCE_GENOTYPE_QUALITY =         "RGQ";
    public static final String GENOTYPE_QUALITY_BY_ALLELE_BALANCE = "ABGQ"; //GnarlyGenotyper
    public static final String GENOTYPE_QUALITY_BY_ALT_CONFIDENCE = "ALTGQ"; //GnarlyGenotyper
    public static final String STRAND_COUNT_BY_SAMPLE_KEY =         "SAC";
    public static final String STRAND_BIAS_BY_SAMPLE_KEY =          "SB";
    public final static String TRANSMISSION_PROBABILITY_KEY =       "TP"; //PhaseByTransmission

    // M2-specific FORMAT keys
    public static final String ALLELE_FRACTION_KEY =                "AF";

    //FILTERS
    /* Note that many filters used throughout GATK (most notably in VariantRecalibration) are dynamic,
       their names (or descriptions) depend on some threshold.  Those filters are not included here
     */
    public static final String CLUSTERED_EVENTS_FILTER_NAME =                 "clustered_events"; //M2
    public static final String GERMLINE_RISK_FILTER_NAME =                    "germline"; //M2
    public static final String LOW_QUAL_FILTER_NAME =                         "LowQual";
    public static final String ALIGNMENT_ARTIFACT_FILTER_NAME =               "alignment";
    public static final String PON_FILTER_NAME =                              "panel_of_normals"; //M2
    public static final String POLYMERASE_SLIPPAGE =                          "slippage"; //M2
    public static final String TUMOR_EVIDENCE_FILTER_NAME =                   "weak_evidence"; //M2
    public static final String MULTIALLELIC_FILTER_NAME =                     "multiallelic"; //M2
    public static final String STRAND_ARTIFACT_FILTER_NAME =                  "strand_bias"; // M2
    public static final String DUPLICATED_EVIDENCE_FILTER_NAME =              "duplicate";
    public final static String ARTIFACT_IN_NORMAL_FILTER_NAME =               "normal_artifact";
    public final static String MEDIAN_BASE_QUALITY_FILTER_NAME =              "base_qual";
    public final static String MEDIAN_MAPPING_QUALITY_FILTER_NAME =           "map_qual";
    public final static String MEDIAN_FRAGMENT_LENGTH_DIFFERENCE_FILTER_NAME = "fragment";
    public final static String READ_POSITION_FILTER_NAME =                    "position";
    public final static String CONTAMINATION_FILTER_NAME =                    "contamination";
    public final static String READ_ORIENTATION_ARTIFACT_FILTER_NAME =        "orientation";
    public final static String BAD_HAPLOTYPE_FILTER_NAME =                    "haplotype";
    public final static String STRICT_STRAND_BIAS_FILTER_NAME =               "strict_strand";
    public final static String N_RATIO_FILTER_NAME =                           "n_ratio";
    public final static String ALLELE_FRACTION_FILTER_NAME =                   "low_allele_frac";
    public static final String POSSIBLE_NUMT_FILTER_NAME =                     "possible_numt";
    public static final String LOW_HET_FILTER_NAME =                           "mt_many_low_hets";
    public static final String FAIL =                                           "FAIL";
    public static final String SITE_LEVEL_FILTERS =                             "SITE";


    public static final List MUTECT_FILTER_NAMES = Arrays.asList(VCFConstants.PASSES_FILTERS_v4, POLYMERASE_SLIPPAGE,
            PON_FILTER_NAME, CLUSTERED_EVENTS_FILTER_NAME, TUMOR_EVIDENCE_FILTER_NAME, GERMLINE_RISK_FILTER_NAME,
            MULTIALLELIC_FILTER_NAME, STRAND_ARTIFACT_FILTER_NAME, ARTIFACT_IN_NORMAL_FILTER_NAME,
            MEDIAN_BASE_QUALITY_FILTER_NAME, MEDIAN_MAPPING_QUALITY_FILTER_NAME,
            MEDIAN_FRAGMENT_LENGTH_DIFFERENCE_FILTER_NAME,
            READ_POSITION_FILTER_NAME, CONTAMINATION_FILTER_NAME, DUPLICATED_EVIDENCE_FILTER_NAME,
            READ_ORIENTATION_ARTIFACT_FILTER_NAME, BAD_HAPLOTYPE_FILTER_NAME,
            STRICT_STRAND_BIAS_FILTER_NAME, N_RATIO_FILTER_NAME, ALLELE_FRACTION_FILTER_NAME, POSSIBLE_NUMT_FILTER_NAME, FAIL);

    public static final List MUTECT_AS_FILTER_NAMES = Arrays.asList(AS_FILTER_STATUS_KEY);

    // Symbolic alleles
    public final static String SYMBOLIC_ALLELE_DEFINITION_HEADER_TAG = "ALT";
    public final static String NON_REF_SYMBOLIC_ALLELE_NAME = "NON_REF";
    public final static String SPANNING_DELETION_SYMBOLIC_ALLELE_NAME_DEPRECATED = "*:DEL";
    public final static Allele SPANNING_DELETION_SYMBOLIC_ALLELE_DEPRECATED = Allele.create("<" + SPANNING_DELETION_SYMBOLIC_ALLELE_NAME_DEPRECATED + ">", false); // represents any possible spanning deletion allele at this si
    public static final String ALLELE_SPECIFIC_ANNOTATION_PREFIX = "AS";


    public static boolean isSpanningDeletion(final Allele allele){
        return allele.equals(Allele.SPAN_DEL) || allele.equals(SPANNING_DELETION_SYMBOLIC_ALLELE_DEPRECATED);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy