All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
picard.arrays.ArraysCallingMetricAccumulator Maven / Gradle / Ivy
package picard.arrays;
import picard.arrays.illumina.InfiniumVcfFields;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.Iso8601Date;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFHeader;
import picard.pedigree.Sex;
import picard.util.DbSnpBitSetUtil;
import picard.vcf.CallingMetricAccumulator;
import picard.vcf.processor.VariantProcessor;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;
import java.util.stream.Collectors;
class ArraysCallingMetricAccumulator implements VariantProcessor.Accumulator {
private static final Log LOG = Log.getInstance(ArraysCallingMetricAccumulator.class);
private static final ProgressLogger progress = new ProgressLogger(LOG, 10000);
private final DbSnpBitSetUtil.DbSnpBitSets dbsnp;
private final CollectArraysVariantCallingMetrics.ArraysVariantCallingSummaryMetrics summaryMetric
= new CollectArraysVariantCallingMetrics.ArraysVariantCallingSummaryMetrics();
private String sampleAlias;
private Integer analysisVersionNumber;
private String chipTypeName;
private String reportedGender;
private String fingerprintGender;
private String autocallGender;
private Double gtcCallRate;
private Iso8601Date autocallDate;
private Iso8601Date imagingDate;
private String autocallVersion;
private String extendedIlluminaManifestVersion;
private String zcallVersion;
private String zcallThresholdsFile;
private String clusterFileName;
private Integer p95Green;
private Integer p95Red;
private String scannerName;
private String pipelineVersion;
/**
* A map of sample names to metrics. If .get() for a not-yet-existing sample name, a metric is generated, inserted into the map,
* then returned.
*/
private final CollectionUtil.DefaultingMap sampleMetricsMap =
new CollectionUtil.DefaultingMap<>(
sampleName -> {
final CollectArraysVariantCallingMetrics.ArraysVariantCallingDetailMetrics detail = new CollectArraysVariantCallingMetrics.ArraysVariantCallingDetailMetrics();
detail.CHIP_WELL_BARCODE = sampleName;
detail.SAMPLE_ALIAS = sampleAlias;
detail.ANALYSIS_VERSION = analysisVersionNumber;
detail.CHIP_TYPE = chipTypeName;
detail.REPORTED_GENDER = reportedGender;
detail.FP_GENDER = fingerprintGender;
detail.AUTOCALL_GENDER = autocallGender;
detail.AUTOCALL_VERSION = autocallVersion;
detail.GTC_CALL_RATE = gtcCallRate;
detail.AUTOCALL_DATE = new Iso8601Date(autocallDate);
detail.IMAGING_DATE = new Iso8601Date(imagingDate);
detail.EXTENDED_MANIFEST_VERSION = extendedIlluminaManifestVersion;
detail.ZCALL_VERSION = zcallVersion;
detail.zcallThresholdsFile = zcallThresholdsFile;
detail.CLUSTER_FILE_NAME = clusterFileName;
detail.P95_GREEN = p95Green;
detail.P95_RED = p95Red;
detail.SCANNER_NAME = scannerName;
detail.PIPELINE_VERSION = pipelineVersion;
return detail;
}, true);
ArraysCallingMetricAccumulator(DbSnpBitSetUtil.DbSnpBitSets dbsnp) {
this.dbsnp = dbsnp;
}
public void setup(final VCFHeader vcfHeader) {
this.sampleAlias = InfiniumVcfFields.getValueFromVcfOtherHeaderLine(vcfHeader, InfiniumVcfFields.SAMPLE_ALIAS);
this.pipelineVersion = InfiniumVcfFields.getOptionalValueFromVcfOtherHeaderLine(vcfHeader, InfiniumVcfFields.PIPELINE_VERSION);
this.analysisVersionNumber = InfiniumVcfFields.getOptionalIntegerFromVcfOtherHeaderLine(vcfHeader, InfiniumVcfFields.ANALYSIS_VERSION_NUMBER);
this.chipTypeName = InfiniumVcfFields.getValueFromVcfOtherHeaderLine(vcfHeader, InfiniumVcfFields.ARRAY_TYPE);
this.reportedGender = getOptionalGenderStringFromVcfOtherHeaderLine(vcfHeader, InfiniumVcfFields.EXPECTED_GENDER);
this.fingerprintGender = getOptionalGenderStringFromVcfOtherHeaderLine(vcfHeader, InfiniumVcfFields.FINGERPRINT_GENDER);
this.gtcCallRate = InfiniumVcfFields.getOptionalDoubleFromVcfOtherHeaderLine(vcfHeader, InfiniumVcfFields.GTC_CALL_RATE);
this.autocallGender = Sex.fromString(InfiniumVcfFields.getValueFromVcfOtherHeaderLine(vcfHeader, InfiniumVcfFields.AUTOCALL_GENDER)).toSymbol();
this.autocallVersion = InfiniumVcfFields.getValueFromVcfOtherHeaderLine(vcfHeader, InfiniumVcfFields.AUTOCALL_VERSION);
final SimpleDateFormat autocallDateFormat = new SimpleDateFormat("MM/dd/yyyy HH:mm"); // of the form '09/21/2016 20:40'
this.autocallDate = InfiniumVcfFields.getDateFromVcfOtherHeaderLine(vcfHeader, InfiniumVcfFields.AUTOCALL_DATE, autocallDateFormat);
final SimpleDateFormat imagingDateFormat = new SimpleDateFormat("MM/dd/yyyy hh:mm:ss a"); // of the form '8/15/2015 7:28:52 AM'
imagingDateFormat.setTimeZone(TimeZone.getTimeZone("America/New_York"));
this.imagingDate = InfiniumVcfFields.getDateFromVcfOtherHeaderLine(vcfHeader, InfiniumVcfFields.IMAGING_DATE, imagingDateFormat);
this.extendedIlluminaManifestVersion = InfiniumVcfFields.getValueFromVcfOtherHeaderLine(vcfHeader, InfiniumVcfFields.EXTENDED_ILLUMINA_MANIFEST_VERSION);
this.zcallVersion = InfiniumVcfFields.getOptionalValueFromVcfOtherHeaderLine(vcfHeader, InfiniumVcfFields.ZCALL_VERSION);
this.zcallThresholdsFile = InfiniumVcfFields.getOptionalValueFromVcfOtherHeaderLine(vcfHeader, InfiniumVcfFields.ZCALL_THRESHOLDS);
this.clusterFileName = InfiniumVcfFields.getValueFromVcfOtherHeaderLine(vcfHeader, InfiniumVcfFields.CLUSTER_FILE);
this.p95Green = InfiniumVcfFields.getIntegerFromVcfOtherHeaderLine(vcfHeader, InfiniumVcfFields.P_95_GREEN);
this.p95Red = InfiniumVcfFields.getIntegerFromVcfOtherHeaderLine(vcfHeader, InfiniumVcfFields.P_95_RED);
this.scannerName = InfiniumVcfFields.getValueFromVcfOtherHeaderLine(vcfHeader, InfiniumVcfFields.SCANNER_NAME);
vcfHeader.getGenotypeSamples().forEach(sampleName -> sampleMetricsMap.get(sampleName));
}
private String getOptionalGenderStringFromVcfOtherHeaderLine(final VCFHeader vcfHeader, final String fieldName) {
String genderString = InfiniumVcfFields.getOptionalValueFromVcfOtherHeaderLine(vcfHeader, fieldName);
if (genderString != null) {
return Sex.fromString(genderString).toSymbol();
}
return Sex.NotReported.toSymbol();
}
@Override
public void accumulate(VariantContext vc) {
progress.record(vc.getContig(), vc.getStart());
final String singletonSample = CallingMetricAccumulator.getSingletonSample(vc);
vc.getSampleNames().forEach(sampleName ->
updateDetailMetric(sampleMetricsMap.get(sampleName), vc.getGenotype(sampleName), vc,
sampleName.equals(singletonSample)));
}
private void updateDetailMetric(final CollectArraysVariantCallingMetrics.ArraysVariantCallingDetailMetrics metric,
final Genotype genotype,
final VariantContext vc,
final boolean hasSingletonSample) {
metric.NUM_ASSAYS++;
if (!vc.isFiltered() || vc.getCommonInfo().getFilters().contains(InfiniumVcfFields.DUPE)) {
metric.NUM_NON_FILTERED_ASSAYS++;
if (genotype.isCalled()) {
metric.NUM_CALLS++;
String gtA = (String) genotype.getExtendedAttribute(InfiniumVcfFields.GTA, genotype.getGenotypeString());
if (!gtA.equals(VCFConstants.EMPTY_GENOTYPE)) {
metric.NUM_AUTOCALL_CALLS++;
}
} else {
metric.NUM_NO_CALLS++;
}
if (vc.isSNP()) {
// Biallelic SNPs
final boolean isInDbSnp = dbsnp.snps.isDbSnpSite(vc.getContig(), vc.getStart());
metric.NUM_SNPS++;
if (isInDbSnp) {
metric.NUM_IN_DB_SNP++;
}
} else if (vc.isIndel()) {
metric.NUM_INDELS++;
}
if (hasSingletonSample) {
metric.NUM_SINGLETONS++;
}
if (genotype.isHet()) {
metric.numHets++;
} else if (genotype.isHomVar()) {
metric.numHomVar++;
}
} else {
metric.NUM_FILTERED_ASSAYS++;
if (vc.getCommonInfo().getFilters().contains(InfiniumVcfFields.ZEROED_OUT_ASSAY)) {
// A "zeroed-out SNP". Marked as unusable/uncallable
metric.NUM_ZEROED_OUT_ASSAYS++;
}
}
}
@Override
public Result result() {
return new Result(summaryMetric, sampleMetricsMap.values());
}
public static class Result {
final CollectArraysVariantCallingMetrics.ArraysVariantCallingSummaryMetrics summary;
final Collection details;
Result(final CollectArraysVariantCallingMetrics.ArraysVariantCallingSummaryMetrics summary, final Collection details) {
this.summary = summary;
this.details = details;
}
/**
* Combines results generated across 1 or more threads
*/
public static Result merge(final Collection results) {
final Collection details = new ArrayList<>();
results.forEach(result -> {
details.addAll(result.details);
});
final Map> sampleDetailsMap =
details.stream().collect(Collectors.groupingBy(vcDetailMetrics -> vcDetailMetrics.CHIP_WELL_BARCODE));
final Collection collapsedDetails = new ArrayList<>();
final CollectArraysVariantCallingMetrics.ArraysVariantCallingSummaryMetrics collapsedSummary = new CollectArraysVariantCallingMetrics.ArraysVariantCallingSummaryMetrics();
sampleDetailsMap.values().forEach(sampleDetails -> {
final CollectArraysVariantCallingMetrics.ArraysVariantCallingDetailMetrics collapsed = new CollectArraysVariantCallingMetrics.ArraysVariantCallingDetailMetrics();
CollectArraysVariantCallingMetrics.ArraysVariantCallingDetailMetrics.foldInto(collapsed, sampleDetails);
CollectArraysVariantCallingMetrics.ArraysVariantCallingSummaryMetrics.foldInto(collapsedSummary, sampleDetails);
collapsedDetails.add(collapsed);
collapsed.calculateDerivedFields();
});
collapsedSummary.calculateDerivedFields();
return new Result(collapsedSummary, collapsedDetails);
}
}
}