All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencb.biodata.models.variant.Variant Maven / Gradle / Ivy

The newest version!
/*
 * 
 *
 */

package org.opencb.biodata.models.variant;

import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import htsjdk.variant.variantcontext.Allele;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.core.Region;
import org.opencb.biodata.models.variant.avro.*;

import javax.annotation.Nullable;
import java.io.Serializable;
import java.util.*;

/**
 * @author Jacobo Coll;
 * @author Cristina Yenyxe Gonzalez Garcia <[email protected]>
 */
@JsonIgnoreProperties({"impl", "ids", "sourceEntries", "studiesMap", "lengthReference", "lengthAlternate"})
public class Variant implements Serializable, Comparable {

    public static final EnumSet SV_SUBTYPES = EnumSet.of(VariantType.INSERTION, VariantType.DELETION,
            VariantType.TRANSLOCATION, VariantType.INVERSION,
            VariantType.CNV, VariantType.COPY_NUMBER, VariantType.COPY_NUMBER_GAIN, VariantType.COPY_NUMBER_LOSS,
            VariantType.DUPLICATION, VariantType.TANDEM_DUPLICATION,
            VariantType.BREAKEND);
    public static final EnumSet COPY_NUMBER_SUBTYPES = EnumSet.of(VariantType.COPY_NUMBER_GAIN, VariantType.COPY_NUMBER_LOSS);
    private final VariantAvro impl;
    private volatile Map studyEntries = null;

    public static final int SV_THRESHOLD = 50;
    public static final int UNKNOWN_LENGTH = 0;

    public Variant() {
        impl = new VariantAvro(null, new LinkedList<>(), "", -1, -1, "", "", "+", null, 0, null, new LinkedList<>(), null);
    }

    public Variant(VariantAvro avro) {
        Objects.requireNonNull(avro);
        impl = avro;
    }

    /**
     * Creates a variant parsing a string.
     *
     * Format : (chr):(start)[-(end)][:(ref)]:(alt)
     *
     * @see VariantBuilder
     * @param variantString Variant string
     * @throws IllegalArgumentException if the variant does not match with the pattern
     */
    public Variant(String variantString) {
        this();
        new VariantBuilder(variantString).build(this);
    }

    public Variant(String chromosome, int position, String reference, String alternate) {
        // Use null end, so the builder will infer the end position.
        this(chromosome, position, null, reference, alternate, "+");
    }

    public Variant(String chromosome, int start, int end, String reference, String alternate) {
        this(chromosome, start, end, reference, alternate, "+");
    }

    public Variant(String chromosome, int start, int end, String reference, String alternate, String strand) {
        this(VariantBuilder.buildAvroVariant(chromosome, start, end, reference, alternate));
        setStrand(strand);
        setStudies(null);
    }

    private Variant(String chromosome, int start, @Nullable Integer end, String reference, String alternate, String strand) {
        // Nullable end. The builder will infer the end (and length) if null.
        this(VariantBuilder.buildAvroVariant(chromosome, start, end, reference, alternate));
        setStrand(strand);
        setStudies(null);
    }

    public static VariantBuilder newBuilder() {
        return new VariantBuilder();
    }

    public static VariantBuilder newBuilder(String str) {
        return new VariantBuilder(str);
    }

    public static VariantBuilder newBuilder(String chromosome, Integer start, Integer end, String reference, String alternate) {
        return new VariantBuilder(chromosome, start, end, reference, alternate);
    }

    public static Variant parseVariant(String variantString) {
        return new VariantBuilder(variantString).build();
    }

    public static List parseVariants(String variantsString) {
        List variants = null;
        if(variantsString != null && !variantsString.isEmpty()) {
            String[] variantItems = variantsString.split(",");
            variants = new ArrayList<>(variantItems.length);
            for(String variantString: variantItems) {
                variants.add(parseVariant(variantString));
            }
        }
        return variants;
    }

    @Deprecated
    public static int inferLength(String reference, String alternate, int start, int end, VariantType type) {
        return VariantBuilder.inferLength(reference, alternate, start, end, type);
    }

    @Deprecated
    public static VariantType inferType(String reference, String alternate) {
        return VariantBuilder.inferType(reference, alternate);
    }

    @Deprecated
    public static Variant getMateBreakend(Variant variant) {
        return VariantBuilder.getMateBreakend(variant);
    }

    @Deprecated
    public static VariantType getCNVSubtype(Integer copyNumber) {
        return VariantBuilder.getCopyNumberSubtype(copyNumber);
    }

    public void reset() {
        resetType();
        resetLength();
    }

    public void resetType() {
        VariantType prevType = getType();
        setType(VariantBuilder.inferType(getReference(), getAlternate()));
        if (getSv() != null && getType().equals(VariantType.INDEL) && !prevType.equals(VariantType.INDEL)) {
            // The variant was SV. Remove StructuralVariant information if any, and if empty
            if (getSv().equals(new StructuralVariation())) {
                setSv(null);
            }
        }
    }

    public void resetLength() {
        setLength(VariantBuilder.inferLength(getReference(), getAlternate(), getStart(), getEnd(), getType()));
    }

    public boolean isSV() {
        return VariantBuilder.isSV(getType());
    }

    public boolean isSymbolic() {
//        return Allele.wouldBeSymbolicAllele(getAlternate().getBytes());
        String alternate = getAlternate();
        if (alternate.length() <= 1) {
            return false;
        }
        return getType().equals(VariantType.BREAKEND) || alternate.charAt(0) == '<' && alternate.charAt(alternate.length() - 1) == '>';
    }

    public VariantAvro getImpl() {
        return impl;
    }

    public final void setChromosome(String chromosome) {
        if (StringUtils.isEmpty(chromosome)) {
            throw new IllegalArgumentException("Chromosome must not be empty");
        }
        impl.setChromosome(Region.normalizeChromosome(chromosome));
    }

    public final void setStart(Integer start) {
        if (start < 0) {
            throw new IllegalArgumentException("Start must be positive");
        }
        impl.setStart(start);
    }

    public final void setEnd(Integer end) {
        if (end < 0) {
            throw new IllegalArgumentException("End must be positive");
        }
        impl.setEnd(end);
    }

    public void setReference(String reference) {
        impl.setReference(reference);
//        resetLength();
    }

    public void setAlternate(String alternate) {
        impl.setAlternate(alternate);
//        resetLength();
    }

    public String getId() {
        return impl.getId();
    }

    public Variant setId(String id) {
        impl.setId(id);
        return this;
    }

    public String getChromosome() {
        return impl.getChromosome();
    }

    public Integer getStart() {
        return impl.getStart();
    }

    public Integer getEnd() {
        return impl.getEnd();
    }

    public String getReference() {
        return impl.getReference();
    }

    public String getAlternate() {
        return impl.getAlternate();
    }

    public String getStrand() {
        return impl.getStrand();
    }

    public Variant setStrand(String strand) {
        impl.setStrand(strand);
        return this;
    }

    public StructuralVariation getSv() {
        return impl.getSv();
    }

    public Variant setSv(StructuralVariation sv) {
        impl.setSv(sv);
        return this;
    }

    @Deprecated
    public List getIds() {
        if (StringUtils.isNotEmpty(impl.getId())) {
            if (impl.getNames() != null) {
                List ids = new ArrayList<>(1 + impl.getNames().size());
                ids.add(impl.getId());
                ids.addAll(impl.getNames());
                return ids;
            } else {
                return Collections.singletonList(impl.getId());
            }
        } else {
            return impl.getNames();
        }
    }

    @Deprecated
    public void setIds(List ids) {
        if (ids == null || ids.isEmpty()) {
            impl.setId(null);
            impl.setNames(Collections.emptyList());
        } else {
            impl.setId(ids.get(0));
            impl.setNames(ids.subList(1, ids.size()));
        }
    }

    public List getNames() {
        return impl.getNames();
    }

    public Variant setNames(List names) {
        impl.setNames(names);
        return this;
    }

    public Integer getLength() {
        return impl.getLength();
    }

    public Integer getLengthReference() {
        return VariantBuilder.getLengthReference(getReference(), getType(), getLength());
    }

    public Integer getLengthAlternate() {
        return VariantBuilder.getLengthAlternate(getAlternate(), getType(), getLength());
    }

    public Variant setLength(Integer value) {
        impl.setLength(value);
        return this;
    }

    public VariantType getType() {
        return impl.getType();
    }

    public Variant setType(VariantType value) {
        impl.setType(value);
        return this;
    }

    public VariantAnnotation getAnnotation() {
        return impl.getAnnotation();
    }

    public void setAnnotation(VariantAnnotation value) {
        impl.setAnnotation(value);
    }

    public List getStudies() {
        return getStudiesMap() == null ? null : Collections.unmodifiableList(new ArrayList<>(getStudiesMap().values()));
    }

    public void setStudies(List studies) {
        if (studies == null) {
            studyEntries = null;
            impl.setStudies(new ArrayList<>());
        } else {
            studyEntries = new HashMap<>(studies.size());
            impl.setStudies(new ArrayList<>(studies.size()));
            for (StudyEntry study : studies) {
                impl.getStudies().add(study.getImpl());
                studyEntries.put(study.getStudyId(), study);
            }
        }
    }

    public Map getStudiesMap() {
        if (impl.getStudies() != null) {
            if (studyEntries == null) {
                studyEntries = new HashMap<>(impl.getStudies().size());
            }
            if (studyEntries.size() != impl.getStudies().size()) {
                for (org.opencb.biodata.models.variant.avro.StudyEntry studyEntry : impl.getStudies()) {
                    studyEntries.putIfAbsent(studyEntry.getStudyId(), new StudyEntry(studyEntry));
                }
            }
            return Collections.unmodifiableMap(studyEntries);
        }
        return null;
    }

    public StudyEntry getStudy(String studyId) {
        if (impl.getStudies() != null) {
            return getStudiesMap().get(studyId);
        }
        return null;
    }

    public void addStudyEntry(StudyEntry studyEntry) {
        if (studyEntries == null) {
            studyEntries = new HashMap<>();
        }
        if (impl.getStudies() == null) {
            impl.setStudies(new ArrayList<>());
        }
        StudyEntry prevStudy = this.studyEntries.put(studyEntry.getStudyId(), studyEntry);
        if (prevStudy != null) {
            impl.getStudies().remove(prevStudy.getImpl());
        }
        impl.getStudies().add(studyEntry.getImpl());
    }

    @Deprecated
    public Iterable getSampleNames(String studyId, String fileId) {
        return getSampleNames(studyId);
    }

    public List getSampleNames(String studyId) {
        StudyEntry studyEntry = getStudy(studyId);
        if (studyEntry == null) {
            return null;
        }
        return studyEntry.getOrderedSamplesName();
    }

    public void transformToEnsemblFormat() {
        if (getType() == VariantType.INDEL || getType() == VariantType.SV || getLength() > 1) {
            if (getReference().charAt(0) == getAlternate().charAt(0)) {
                setReference(getReference().substring(1));
                setAlternate(getAlternate().substring(1));
                setStart(getStart() + 1);
                if (getReference().length() < getAlternate().length()) {
                    setEnd(getEnd() - 1);
                }

                if (getReference().equals("")) {
                    setReference("-");
                }
                if (getAlternate().equals("")) {
                    setAlternate("-");
                }

                resetLength();
            }
        }
    }

    public String toStringSimple() {
        return getChromosome() + ":" + getStart() + ":" + (getReference().isEmpty() ? "-" : getReference()) + ":" + (getAlternate().isEmpty() ? "-" : getAlternate());
    }

    @Override
    public String toString() {
        int start = getStart();
        int end = getEnd();
        StringBuilder sb = new StringBuilder().append(getChromosome()).append(":");
        StructuralVariation sv = getSv();

        // Start
        if (sv != null && (sv.getCiStartLeft() != null || sv.getCiStartRight() != null)) {
            sb.append(sv.getCiStartLeft() == null ? start : sv.getCiStartLeft())
                    .append('<').append(start).append('<')
                    .append(sv.getCiStartRight() == null ? start : sv.getCiStartRight());
        } else {
            sb.append(start);
        }

        // Optional end
        if (start != end && getLengthReference() != getReference().length() && getLength() != UNKNOWN_LENGTH
                || isSymbolic() && end >= start && getType() != VariantType.NO_VARIATION) {
            sb.append("-");
            if (sv != null && (sv.getCiEndLeft() != null || sv.getCiEndRight() != null)) {
                sb.append(sv.getCiEndLeft() == null ? end : sv.getCiEndLeft())
                        .append('<').append(end).append('<')
                        .append(sv.getCiEndRight() == null ? end : sv.getCiEndRight());
            } else {
                sb.append(end);
            }
        }

        sb.append(":");
        if (this.getReference() != null) {
            sb.append(getReference().isEmpty() ? "-" : getReference()).append(":");
        }
        if (getAlternate().isEmpty()) {
            if (VariantType.NO_VARIATION.equals(getType())) {
                sb.append(".");
            } else {
                sb.append("-");
            }
        } else if (VariantType.INSERTION.equals(getType()) && getSv() != null
                    && (getSv().getLeftSvInsSeq() != null || getSv().getRightSvInsSeq() != null)) {
            if (getSv().getLeftSvInsSeq() != null) {
                sb.append(getSv().getLeftSvInsSeq());
            }
            sb.append("...");
            if (getSv().getRightSvInsSeq() != null) {
                sb.append(getSv().getRightSvInsSeq());
            }
//        } else if (getType().equals(VariantType.CNV)) {
//            if (getSv().getCopyNumber() != null) {
//                sb.append("');
//            } else {
//                sb.append(getAlternate());
//            }
        } else if (getType() == VariantType.TANDEM_DUPLICATION) {
            sb.append("");
        } else {
            sb.append(getAlternate());
        }
        return sb.toString();
    }

    public String toJson() {
        return impl.toString();
    }

    public boolean sameGenomicVariant(Object o) {
        if (this == o) {
            return true;
        }
        if (!(o instanceof Variant)) {
            return false;
        }

        Variant variant = (Variant) o;

        if (!Objects.equals(getStart(), variant.getStart())) {
            return false;
        }
        if (!Objects.equals(getEnd(), variant.getEnd())) {
            return false;
        }
        if (getChromosome() != null ? !getChromosome().equals(variant.getChromosome()) : variant.getChromosome() != null) {
            return false;
        }
        if (getReference() != null ? !getReference().equals(variant.getReference()) : variant.getReference() != null) {
            return false;
        }
        if (getAlternate() != null ? !getAlternate().equals(variant.getAlternate()) : variant.getAlternate() != null) {
            return false;
        }
        if (getType() != variant.getType()) {
            return false;
        }
        if (getSv() != null ? !getSv().equals(variant.getSv()) : variant.getSv() != null) {
            return false;
        }
        return true;

    }

    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (!(o instanceof Variant)) return false;

        Variant variant = (Variant) o;

        return !(impl != null ? !impl.equals(variant.impl) : variant.impl != null);

    }

    @Override
    public int hashCode() {
        return impl != null ? impl.hashCode() : 0;
    }

    public boolean overlapWith(Variant other, boolean inclusive) {
        return overlapWith(other.getChromosome(), other.getStart(), other.getEnd(), inclusive);
    }

    public boolean overlapWith(String chromosome, int start, int end, boolean inclusive) {
        if (!StringUtils.equals(this.getChromosome(), chromosome)) {
            return false; // Different Chromosome
        } else {
            int aStart = this.getStart();
            int aEnd = this.getEnd();

            if (aStart > aEnd) { // Insertion
                aStart = aEnd;
            }
            if (start > end){ // Insertion
                start = end;
            }

            if (inclusive) {
                return aStart <= end && aEnd >= start;
            } else {
                return aStart < end && aEnd > start;
            }
        }
    }

    public boolean onSameStartPosition (Variant other){
        return StringUtils.equals(this.getChromosome(), other.getChromosome()) 
                && this.getStart().equals(other.getStart());
    }

    /**
     * Check if Variant covers the same region (chromosome, start, end)
     * @param other Variant to check against
     * @return True if chromosome, start and end are the same
     */
    public boolean onSameRegion (Variant other){
        return onSameStartPosition(other) && this.getEnd().equals(other.getEnd());
    }

    /**
     * Return all VariantTypes subtypes given a VariantType.
     * {@link VariantType} represents a hierarchical structure where SNV includes all SNP, MNV includes MNP
     * and SV includes  INSERTION, DELETION, TRANSLOCATION, INVERSION and CNV
     *
     * @param variantType   Variant Type
     * @return  Set of subtypes
     */
    public static Set subTypes(VariantType variantType) {
        if(variantType.equals(VariantType.SNV)) {
            return Collections.singleton(VariantType.SNP);
        } else if (variantType.equals(VariantType.MNV)) {
            return Collections.singleton(VariantType.MNP);
        } else if (variantType.equals(VariantType.SV)) {
            return  SV_SUBTYPES;
        } else if (variantType.equals(VariantType.COPY_NUMBER) || variantType.equals(VariantType.CNV)) {
            return COPY_NUMBER_SUBTYPES;
        } else {
            return Collections.emptySet();
        }
    }

    @Override
    public int compareTo(Variant o) {
        if (this.equals(o)) {
            return 0;
        }
        return this.getImpl().compareTo(o.getImpl());
    }
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy