All Downloads are FREE. Search and download functionalities are using the official Maven repository.

htsjdk.variant.vcf.VCFStandardHeaderLines Maven / Gradle / Ivy

There is a newer version: 4.1.3
Show newest version
/*
* Copyright (c) 2012 The Broad Institute
* 
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
* 
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* 
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package htsjdk.variant.vcf;

import htsjdk.tribble.TribbleException;
import htsjdk.variant.utils.GeneralUtils;

import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;

/**
 * Manages header lines for standard VCF 
INFO
and
FORMAT
fields. * * Provides simple mechanisms for * 1) registering standard lines, * 2) looking them up, and * 3) adding them to headers. * * @author Mark DePristo * @since 6/12 */ public class VCFStandardHeaderLines { /** * Enabling this causes us to repair header lines even if only their descriptions differ. */ private final static boolean REPAIR_BAD_DESCRIPTIONS = false; private static Standards formatStandards = new Standards(); private static Standards infoStandards = new Standards(); /** * Walks over the VCF header and repairs the standard VCF header lines in it, returning a freshly * allocated {@link VCFHeader} with standard VCF header lines repaired as necessary. */ public static VCFHeader repairStandardHeaderLines(final VCFHeader oldHeader) { final Set newLines = new LinkedHashSet(oldHeader.getMetaDataInInputOrder().size()); for ( VCFHeaderLine line : oldHeader.getMetaDataInInputOrder() ) { if ( line instanceof VCFFormatHeaderLine ) { line = formatStandards.repair((VCFFormatHeaderLine) line); } else if ( line instanceof VCFInfoHeaderLine) { line = infoStandards.repair((VCFInfoHeaderLine) line); } newLines.add(line); } final VCFHeader repairedHeader = new VCFHeader(newLines, oldHeader.getGenotypeSamples()); final VCFHeaderVersion oldHeaderVersion = oldHeader.getVCFHeaderVersion(); if (oldHeaderVersion != null && oldHeaderVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) { // this needs to maintain version 4.3 (and not back-version to v4.2), so propagate // the old version only for v4.3 repairedHeader.setVCFHeaderVersion(oldHeaderVersion); } return repairedHeader; } /** * Adds header lines for each of the format fields in IDs to header, returning the set of * {@code IDs} without standard descriptions, unless {@code throwErrorForMissing} is true, in which * case this situation results in a {@link TribbleException} */ public static Set addStandardFormatLines(final Set headerLines, final boolean throwErrorForMissing, final Collection IDs) { return formatStandards.addToHeader(headerLines, IDs, throwErrorForMissing); } /** * @see #addStandardFormatLines(java.util.Set, boolean, java.util.Collection) */ public static Set addStandardFormatLines(final Set headerLines, final boolean throwErrorForMissing, final String ... IDs) { return addStandardFormatLines(headerLines, throwErrorForMissing, Arrays.asList(IDs)); } /** * Returns the standard format line for {@code ID}. * If none exists, return null or throw an exception, depending on {@code throwErrorForMissing}. */ public static VCFFormatHeaderLine getFormatLine(final String ID, final boolean throwErrorForMissing) { return formatStandards.get(ID, throwErrorForMissing); } /** * Returns the standard format line for {@code ID}. * If none exists, throw an {@link TribbleException} */ public static VCFFormatHeaderLine getFormatLine(final String ID) { return formatStandards.get(ID, true); } /** * Adds header lines for each of the info fields in {@code IDs} to header, returning the set of * IDs without standard descriptions, unless {@code throwErrorForMissing} is true, in which * case this situation results in a {@link TribbleException}. */ public static Set addStandardInfoLines(final Set headerLines, final boolean throwErrorForMissing, final Collection IDs) { return infoStandards.addToHeader(headerLines, IDs, throwErrorForMissing); } /** * @see #addStandardFormatLines(java.util.Set, boolean, java.util.Collection) */ public static Set addStandardInfoLines(final Set headerLines, final boolean throwErrorForMissing, final String ... IDs) { return addStandardInfoLines(headerLines, throwErrorForMissing, Arrays.asList(IDs)); } /** * Returns the standard info line for {@code ID}. * If none exists, return {@code null} or throw a {@link TribbleException}, depending on {@code throwErrorForMissing}. */ public static VCFInfoHeaderLine getInfoLine(final String ID, final boolean throwErrorForMissing) { return infoStandards.get(ID, throwErrorForMissing); } /** * Returns the standard info line for {@code ID}. * If none exists throw a {@link TribbleException}. */ public static VCFInfoHeaderLine getInfoLine(final String ID) { return getInfoLine(ID, true); } private static void registerStandard(final VCFInfoHeaderLine line) { infoStandards.add(line); } private static void registerStandard(final VCFFormatHeaderLine line) { formatStandards.add(line); } // // VCF header line constants // static { // FORMAT lines registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype")); registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "Genotype Quality")); registerStandard(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)")); registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification")); registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS, VCFHeaderLineCount.R, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Genotype-level filter")); registerStandard(new VCFFormatHeaderLine(VCFConstants.PHASE_SET_KEY, 1, VCFHeaderLineType.Integer, "Phasing set (typically the position of the first variant in the set)")); registerStandard(new VCFFormatHeaderLine(VCFConstants.PHASE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Read-backed phasing quality")); // INFO lines registerStandard(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval")); registerStandard(new VCFInfoHeaderLine(VCFConstants.DBSNP_KEY, 0, VCFHeaderLineType.Flag, "dbSNP Membership")); registerStandard(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth; some reads may have been filtered")); registerStandard(new VCFInfoHeaderLine(VCFConstants.STRAND_BIAS_KEY, 1, VCFHeaderLineType.Float, "Strand Bias")); registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed")); registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed")); registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes")); registerStandard(new VCFInfoHeaderLine(VCFConstants.MAPPING_QUALITY_ZERO_KEY, 1, VCFHeaderLineType.Integer, "Total Mapping Quality Zero Reads")); registerStandard(new VCFInfoHeaderLine(VCFConstants.RMS_MAPPING_QUALITY_KEY, 1, VCFHeaderLineType.Float, "RMS Mapping Quality")); registerStandard(new VCFInfoHeaderLine(VCFConstants.SOMATIC_KEY, 0, VCFHeaderLineType.Flag, "Somatic event")); } private static class Standards { private final Map standards = new HashMap(); public T repair(final T line) { final T standard = get(line.getID(), false); if ( standard != null ) { final boolean badCountType = line.getCountType() != standard.getCountType(); final boolean badCount = line.isFixedCount() && ! badCountType && line.getCount() != standard.getCount(); final boolean badType = line.getType() != standard.getType(); final boolean badDesc = ! line.getDescription().equals(standard.getDescription()); final boolean needsRepair = badCountType || badCount || badType || (REPAIR_BAD_DESCRIPTIONS && badDesc); if ( needsRepair ) { if ( GeneralUtils.DEBUG_MODE_ENABLED ) { System.err.println("Repairing standard header line for field " + line.getID() + " because" + (badCountType ? " -- count types disagree; header has " + line.getCountType() + " but standard is " + standard.getCountType() : "") + (badType ? " -- type disagree; header has " + line.getType() + " but standard is " + standard.getType() : "") + (badCount ? " -- counts disagree; header has " + line.getCount() + " but standard is " + standard.getCount() : "") + (badDesc ? " -- descriptions disagree; header has '" + line.getDescription() + "' but standard is '" + standard.getDescription() + "'": "")); } return standard; } else { return line; } } else { return line; } } public Set addToHeader(final Set headerLines, final Collection IDs, final boolean throwErrorForMissing) { final Set missing = new HashSet(); for ( final String ID : IDs ) { final T line = get(ID, throwErrorForMissing); if ( line == null ) missing.add(ID); else headerLines.add(line); } return missing; } public void add(final T line) { if ( standards.containsKey(line.getID()) ) { throw new TribbleException("Attempting to add multiple standard header lines for ID " + line.getID()); } standards.put(line.getID(), line); } public T get(final String ID, final boolean throwErrorForMissing) { final T x = standards.get(ID); if ( throwErrorForMissing && x == null ) { throw new TribbleException("Couldn't find a standard VCF header line for field " + ID); } return x; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy