Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
package org.broadinstitute.hellbender.testutils;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.*;
import org.apache.commons.collections.IteratorUtils;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.io.output.NullOutputStream;
import org.apache.commons.lang3.tuple.Pair;
import org.broadinstitute.barclay.argparser.CommandLineArgumentParser;
import org.broadinstitute.barclay.argparser.CommandLineParser;
import org.broadinstitute.hellbender.cmdline.GATKPlugin.GATKAnnotationPluginDescriptor;
import org.broadinstitute.hellbender.engine.FeatureDataSource;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.tools.walkers.annotator.Annotation;
import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils;
import org.broadinstitute.hellbender.tools.walkers.genotyper.AlleleSubsettingUtils;
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAssignmentMethod;
import org.broadinstitute.hellbender.utils.BaseUtils;
import org.broadinstitute.hellbender.utils.MathUtils;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;
import org.testng.Assert;
// This should be:
//import org.apache.logging.log4j.LogManager;
//import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import static org.broadinstitute.hellbender.utils.variant.VariantContextGetters.attributeToList;
public final class VariantContextTestUtils {
public static final String SAMPLE_NAME = "XXYYZZ";
private VariantContextTestUtils() {}
/** Standard Logger. */
protected static final Logger logger = LogManager.getLogger(VariantContextTestUtils.class);
/**
* Reads an entire VCF into memory, returning both its VCFHeader and all VariantContext records in
* the vcf. Supports both local files and NIO-supported remote filesystems such as GCS.
*
* For unit/integration testing purposes only! Do not call this method from actual tools!
*
* @param vcfPath path or URI to a VCF, as a String
* @return A Pair with the VCFHeader as the first element, and a List of all VariantContexts from the VCF
* as the second element
*/
public static Pair> readEntireVCFIntoMemory(final String vcfPath) {
Utils.nonNull(vcfPath);
try ( final FeatureDataSource vcfReader = new FeatureDataSource<>(vcfPath) ) {
final Object header = vcfReader.getHeader();
if ( ! (header instanceof VCFHeader) ) {
throw new IllegalArgumentException(vcfPath + " does not have a valid VCF header");
}
final List vcfRecords = new ArrayList<>();
for ( final VariantContext vcfRecord : vcfReader ) {
vcfRecords.add(vcfRecord);
}
return Pair.of((VCFHeader)header, vcfRecords);
}
}
public static VCFHeader getVCFHeader(final String vcfPath) {
Utils.nonNull(vcfPath);
try ( final FeatureDataSource vcfReader = new FeatureDataSource<>(vcfPath) ) {
final Object header = vcfReader.getHeader();
if (!(header instanceof VCFHeader)) {
throw new IllegalArgumentException(vcfPath + " does not have a valid VCF header");
}
return (VCFHeader)header;
}
}
private static void assertAttributeEquals(final String key, final Object actual, final Object expected) {
final Object notationCorrectedActual = normalizeScientificNotation(actual);
final Object notationCorrectedExpected = normalizeScientificNotation(expected);
if (notationCorrectedExpected instanceof Double && notationCorrectedActual instanceof Double) {
// must be very tolerant because doubles are being rounded to 2 sig figs
BaseTest.assertEqualsDoubleSmart((Double) notationCorrectedActual, (Double) notationCorrectedExpected, 1e-2, "Attribute " + key);
} else if (actual instanceof Integer || expected instanceof Integer) {
Object actualNormalized = normalizeToInteger(actual);
Object expectedNormalized = normalizeToInteger(expected);
Assert.assertEquals(actualNormalized, expectedNormalized, "Attribute " + key);
} else {
Assert.assertEquals(notationCorrectedActual, notationCorrectedExpected, "Attribute " + key);
}
}
/**
* Attempt to convert a String containing a signed integer (no separators) to an integer. If the attribute is not
* a String, or does not contain an integer, the original object is returned.
*
* @param attribute
* @return An Integer representing the value in the attribute if it contains a parseable integer, otherwise the
* original attribute.
*/
@VisibleForTesting
static Object normalizeToInteger(final Object attribute) {
if (attribute instanceof String) {
try {
return Integer.parseInt((String) attribute);
} catch ( final NumberFormatException e) {
return attribute;
}
}
return attribute;
}
/**
* Normalizes the representation of Strings containing doubles.
* This is necessary to deal with the fact that variant context attributes are deserialized from vcf as Strings
* instead of their original type. Some versions of gatk3 output double attributes in scientific notation while gatk4
* doesn't do so.
*
* @param attribute an attribute to attempt to normalize
* @return if attribute is a String, try to parse it as a Double and return that value, else return the original attribute
*/
@VisibleForTesting
static Object normalizeScientificNotation(final Object attribute){
if (attribute instanceof String){
try {
if (((String) attribute).contains("|")) {
// If the attribute is an allele specific attribute separated by '|', then we want to remap
// each of its contained values (which could be comma separated lists) separately
String[] split = ((String) attribute).split("\\|",-1);
return Arrays.stream(split).map(
s -> {return Arrays.stream(s.split(",",-1))
.map(d -> {if (d.equals("")) return d;
else return Double.toString(Double.parseDouble(d));})
.collect(Collectors.joining(","));})
.collect(Collectors.joining("|"));
} else {
return Double.parseDouble((String) attribute);
}
} catch ( final NumberFormatException e) {
return attribute;
}
}
return attribute;
}
/**
* Method which reorders the AltAlleles of a VariantContext so that they are in alphabetical order.
*
* It also reorders all annotation fields and the PL,AD, and SAC fields of each sample genotype in the variant context
* to be consistent with the new ordering
*
* NOTE: this method relies on the correctness of AlleleSubsettingUtils.subsetAlleles() for reordering alleles
*
* @param vc Variant context to reorder
* @param header
* @return
*/
public static VariantContext sortAlleles(final VariantContext vc, final VCFHeader header){
final List originalAltAlleles = vc.getAlternateAlleles();
final List sortedAltAlleles = originalAltAlleles.stream().sorted().collect(Collectors.toList());
final List sortedAlleles = new ArrayList<>(vc.getNAlleles());
sortedAlleles.add(vc.getReference());
sortedAlleles.addAll(sortedAltAlleles);
final VariantContextBuilder result = new VariantContextBuilder(vc);
result.alleles(sortedAlleles);
GenotypesContext newGT = AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(),2,vc.getAlleles(),sortedAlleles, null,
GenotypeAssignmentMethod.SET_TO_NO_CALL);
// Asserting that the new genotypes were calculated properly in case AlleleSubsettingUtils behavior changes
if (newGT.getSampleNames().size() != vc.getGenotypes().size()) throw new IllegalStateException("Sorting this variant context resulted in a different number of genotype alleles, check that AlleleSubsettingUtils still supports reordering:" + vc.toString());
for (int i =0; i newAttributes = new HashMap<>(vc.getAttributes());
for (Map.Entry entry : newAttributes.entrySet()) {
VCFHeaderLineCount type = header.hasInfoLine(entry.getKey())?header.getInfoHeaderLine(entry.getKey()).getCountType():VCFHeaderLineCount.UNBOUNDED;
int ploidy = vc.getGenotypes().getMaxPloidy(2);
newAttributes.replace(entry.getKey(), updateAttribute(entry.getKey(), entry.getValue(), vc.getAlleles(), sortedAlleles, type, ploidy));
}
// The above will have built new genotype for PL,AD, and SAC fields excluding the GT and GQ field, thus we must re-add them for comparison.
for (int i = 0; i < newGT.size(); i++) {
final HashMap newGTAttributes = new HashMap<>(newGT.get(i).getExtendedAttributes());
for (Map.Entry entry : newGTAttributes.entrySet()) {
VCFHeaderLineCount type = header.hasInfoLine(entry.getKey())?header.getFormatHeaderLine(entry.getKey()).getCountType():VCFHeaderLineCount.UNBOUNDED;
int ploidy = vc.getGenotypes().getMaxPloidy(2);
newGTAttributes.replace(entry.getKey(), updateAttribute(entry.getKey(), entry.getValue(), vc.getAlleles(), sortedAlleles, type, ploidy));
}
Genotype replacementGenotype = new GenotypeBuilder(newGT.get(i))
.GQ(vc.getGenotype(i).getGQ())
.phased(vc.getGenotype(i).isPhased())
.alleles(vc.getGenotype(i).getAlleles())
.attributes(newGTAttributes).make();
newGT.replace(replacementGenotype);
}
result.attributes(newAttributes).genotypes(newGT);
return result.make();
}
@SuppressWarnings({"unchecked", "rawtypes"})
private static Object updateAttribute(final String key, final Object value,
final List originalAlleles, final List sortedAlleles,
final VCFHeaderLineCount count, int ploidy) {
if (key.startsWith("AS_")) {
return remapASValues(value instanceof List? String.join(",", ((List) value)) : (String) value, createAlleleIndexMap(originalAlleles, sortedAlleles));
}else {
switch (count) {
case INTEGER:
return value;
case UNBOUNDED:
//doesn't depend on allele ordering
return value;
case A:
return remapATypeValues(attributeToList(value), createAlleleIndexMap(originalAlleles, sortedAlleles));
case R:
return remapRTypeValues(attributeToList(value), createAlleleIndexMap(originalAlleles, sortedAlleles));
case G:
return remapGTypeValues(attributeToList(value), originalAlleles, ploidy, sortedAlleles);
default:
throw new GATKException("found unexpected vcf header count type: " + count);
}
}
}
static List createAlleleIndexMap(final List originalAlleles, final List sortedAlleles){
final List mapping = new ArrayList<>(originalAlleles.size());
for ( final Allele a: sortedAlleles){
final int newIndex = originalAlleles.indexOf(a);
mapping.add(newIndex);
}
return mapping;
}
static List