org.opencb.biodata.tools.variant.converters.VariantContextConverter Maven / Gradle / Ivy
/*
*
*
*/
package org.opencb.biodata.tools.variant.converters;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFConstants;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.MutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.opencb.biodata.models.variant.StudyEntry;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.tools.commons.Converter;
import org.opencb.commons.datastore.core.ObjectMap;
import java.text.DecimalFormat;
import java.util.*;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
/**
* Created by jtarraga on 07/02/17.
*/
public abstract class VariantContextConverter implements Converter {
public static final DecimalFormat DECIMAL_FORMAT_7 = new DecimalFormat("#.#######");
public static final DecimalFormat DECIMAL_FORMAT_3 = new DecimalFormat("#.###");
public static final String FIELD_SEPARATOR = "|";
public static final String INFO_SEPARATOR = "&";
public static final String NO_CALL_ALLELE = String.valueOf(VCFConstants.NO_CALL_ALLELE);
protected final String studyIdString;
protected final List sampleNames;
protected final List sampleFormats;
protected final Map samplePositions;
protected final List annotations;
protected Map studyNameMap;
public VariantContextConverter(String study, List sampleNames, List sampleFormats, List annotations) {
this.studyIdString = study;
this.sampleNames = sampleNames;
// If samples format is NULL, all given formats will be shown
this.sampleFormats = sampleFormats;
this.annotations = annotations;
this.studyNameMap = null;
if (this.sampleNames != null) {
samplePositions = new HashMap<>(sampleNames.size());
for (int i = 0; i < sampleNames.size(); i++) {
samplePositions.put(sampleNames.get(i), i);
}
} else {
samplePositions = Collections.emptyMap();
}
}
protected abstract List buildAlleles(T variant, Pair adjustedRange, Map referenceAlleles);
protected static String buildAllele(String chromosome, Integer start, Integer end, String allele, Pair adjustedRange, Map referenceAlleles) {
if (start.equals(adjustedRange.getLeft()) && end.equals(adjustedRange.getRight())) {
return allele; // same start / end
}
if (StringUtils.startsWith(allele, "*") || StringUtils.startsWith(allele, "<")) {
return allele; // no need for overlapping deletions and symbolic alleles
}
if (StringUtils.startsWithAny(allele, "]", "[")) {
if (allele.endsWith(".")) {
allele = allele.substring(0, allele.length() - 1);
}
return allele + getReferenceBase(chromosome, adjustedRange.getLeft(), adjustedRange.getLeft() + 1, referenceAlleles);
}
if (StringUtils.endsWithAny(allele, "]", "[")) {
if (allele.startsWith(".")) {
allele = allele.substring(1);
}
return getReferenceBase(chromosome, adjustedRange.getLeft(), start, referenceAlleles) + allele;
}
return getReferenceBase(chromosome, adjustedRange.getLeft(), start, referenceAlleles)
+ allele
+ getReferenceBase(chromosome, end + 1, adjustedRange.getRight() + 1, referenceAlleles);
}
protected static MutablePair adjustedVariantStart(
final int start, final int end, String reference, String alternate,
Map referenceAlleles, MutablePair adjustedPosition) {
int adjustedStart;
int adjustedEnd;
if (adjustedPosition == null) {
adjustedStart = start;
adjustedEnd = end;
} else {
adjustedStart = Math.min(start, adjustedPosition.getLeft());
adjustedEnd = Math.max(end, adjustedPosition.getRight());
}
// Add a context base if reference or alternate are empty,
if (StringUtils.isBlank(reference) || StringUtils.isBlank(alternate)) {
// Add the context base at the beginning or at the end, depending if it's contained in the referenceAlleles map.
// If none is present, add from the start
// Do not add a context base if the adjusted position already includes that position.
if (start - 1 < adjustedStart && end + 1 > adjustedEnd) {
if (referenceAlleles.containsKey(start - 1) || !referenceAlleles.containsKey(end + 1)) {
adjustedStart = start - 1;
} else {
adjustedEnd = end + 1;
}
}
}
adjustedEnd = Math.max(adjustedEnd, adjustedStart);
if (adjustedPosition == null) {
adjustedPosition = new MutablePair<>(adjustedStart, adjustedEnd);
} else {
adjustedPosition.setLeft(adjustedStart);
adjustedPosition.setRight(adjustedEnd);
}
return adjustedPosition;
}
/**
* Get bases from reference sequence.
*
* @param chromosome Chromosome
* @param from Start ( inclusive) position
* @param to End (exclusive) position
* @param referenceAlleles Reference alleles
* @return String Reference sequence of length to - from
*/
protected static String getReferenceBase(String chromosome, int from, int to, Map referenceAlleles) {
int length = to - from;
if (length < 0) {
throw new IllegalStateException(
"Sequence length is negative: chromosome " + chromosome + " from " + from + " to " + to);
}
StringBuilder sb = new StringBuilder(length);
for (int i = from; i < to; i++) {
sb.append(referenceAlleles.getOrDefault(i, 'N'));
}
return sb.toString();
// return StringUtils.repeat('N', length); // current return default base TODO load reference sequence
}
protected void init(T variant) {
if (this.studyNameMap == null) {
studyNameMap = new HashMap<>();
getStudiesId(variant).forEachRemaining(studyId -> {
this.studyNameMap.put(studyId, studyId);
if (studyId.contains("@")) {
this.studyNameMap.put(studyId.split("@")[1], studyId);
}
if (studyId.contains(":")) {
this.studyNameMap.put(studyId.split(":")[1], studyId);
}
});
}
}
protected String getIdForVcf(String id, List names) {
if (StringUtils.isNotEmpty(id) && !id.contains(":")) {
StringBuilder ids = new StringBuilder(id);
if (names != null) {
for (String name : names) {
ids.append(VCFConstants.ID_FIELD_SEPARATOR).append(name);
}
}
return ids.toString();
} else {
return VCFConstants.EMPTY_ID_FIELD;
}
}
protected Set getNoCallAlleleIdx(List alleleList) {
Set nocallAlleles = new HashSet<>();
for (int i = 0; i < alleleList.size(); i++) {
String anObject = alleleList.get(i);
if (NO_CALL_ALLELE.equals(anObject)) {
nocallAlleles.add(i);
}
}
return nocallAlleles;
}
protected static Map buildReferenceAllelesMap(Iterator callsIterator) {
Map referenceAlleles = new HashMap<>();
callsIterator.forEachRemaining(call -> {
if (call != null) {
Variant originalVariant = new Variant(call.split(",")[0]);
String originalReference = originalVariant.getReference();
Integer originalPosition = originalVariant.getStart();
for (int i = 0; i < originalReference.length(); i++) {
referenceAlleles.put(originalPosition + i, originalReference.charAt(i));
}
}
});
return referenceAlleles;
}
protected Set getFilters(List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy