All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencb.biodata.tools.clinical.ClinicalVariantCreator Maven / Gradle / Ivy

There is a newer version: 3.3.0
Show newest version
/*
 * 
 *
 */

package org.opencb.biodata.tools.clinical;

import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.collections4.MapUtils;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.clinical.ClinicalAcmg;
import org.opencb.biodata.models.clinical.ClinicalDiscussion;
import org.opencb.biodata.models.clinical.ClinicalProperty;
import org.opencb.biodata.models.clinical.ClinicalProperty.ModeOfInheritance;
import org.opencb.biodata.models.clinical.Disorder;
import org.opencb.biodata.models.clinical.interpretation.*;
import org.opencb.biodata.models.clinical.interpretation.exceptions.InterpretationAnalysisException;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.ConsequenceType;
import org.opencb.biodata.models.variant.avro.GeneCancerAssociation;
import org.opencb.biodata.models.variant.avro.SequenceOntologyTerm;
import org.opencb.commons.utils.PrintUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.*;
import java.util.stream.Collectors;

import static org.opencb.biodata.models.clinical.ClinicalProperty.Penetrance;
import static org.opencb.biodata.models.clinical.ClinicalProperty.RoleInCancer;
import static org.opencb.biodata.models.clinical.interpretation.VariantClassification.calculateAcmgClassification;
import static org.opencb.biodata.models.clinical.interpretation.VariantClassification.computeClinicalSignificance;
import static org.opencb.biodata.tools.pedigree.ModeOfInheritance.extendedLof;
import static org.opencb.biodata.tools.pedigree.ModeOfInheritance.proteinCoding;

public abstract class ClinicalVariantCreator {

    protected Set biotypeSet;
    protected Set soNameSet;

    protected Map> geneToPanelMap;
    protected Map> variantToPanelMap;

    // logger
    protected Logger logger = LoggerFactory.getLogger(this.getClass().toString());

    protected List diseasePanels;
    protected Disorder disorder;
    protected List modeOfInheritances;
    protected Penetrance penetrance;

    @Deprecated
    protected Map> rolesInCancerMap;

    protected String assembly;

    @Deprecated
    public ClinicalVariantCreator(List diseasePanels, Disorder disorder, List modeOfInheritances,
                                  Penetrance penetrance, Map roleInCancer, String assembly) {
        this(diseasePanels, disorder, modeOfInheritances, penetrance, assembly, new ArrayList<>(proteinCoding),
                new ArrayList<>(extendedLof));
    }

    @Deprecated
    public ClinicalVariantCreator(List diseasePanels, Disorder disorder, List modeOfInheritances,
                                  Map roleInCancer, Penetrance penetrance, String assembly, List biotypes,
                                  List soNames) {
        this(diseasePanels, disorder, modeOfInheritances, penetrance, assembly, biotypes, soNames);
    }

    public ClinicalVariantCreator(List diseasePanels, Disorder disorder, List modeOfInheritances,
                                  Penetrance penetrance, String assembly) {
        this(diseasePanels, disorder, modeOfInheritances, penetrance, assembly, new ArrayList<>(proteinCoding),
                new ArrayList<>(extendedLof));
    }

    public ClinicalVariantCreator(List diseasePanels, Disorder disorder, List modeOfInheritances,
                                  Penetrance penetrance, String assembly, List biotypes, List soNames) {
        this.diseasePanels = diseasePanels;
        this.disorder = disorder;
        this.modeOfInheritances = modeOfInheritances;
        this.penetrance = penetrance;
        this.assembly = assembly;
        PrintUtils.printSpace();

        this.biotypeSet = new HashSet<>();
        if (CollectionUtils.isNotEmpty(biotypes)) {
            biotypeSet.addAll(biotypes);
        }
        this.soNameSet = new HashSet<>();
        if (CollectionUtils.isNotEmpty(soNames)) {
            soNameSet.addAll(soNames);
        }

        this.geneToPanelMap = null;
        this.variantToPanelMap = null;
    }

    public abstract List create(List variants) throws InterpretationAnalysisException;

    public List create(List variants, ModeOfInheritance moi) throws InterpretationAnalysisException {
        return create(variants, Collections.singletonList(moi));
    }

    public List create(List variants, List mois) throws InterpretationAnalysisException {
        this.modeOfInheritances = mois;
        return create(variants);
    }

    public List createSecondaryFindings(List variants) {
        List clinicalVariants = new ArrayList<>();
        for (Variant variant : variants) {
            List clinicalVariantEvidences = new ArrayList<>();

            // If we have clinical variant evidences, then we have to create the clinical variant
            if (CollectionUtils.isNotEmpty(clinicalVariantEvidences)) {
                ClinicalVariant clinicalVariant = new ClinicalVariant(variant.getImpl(), Collections.emptyList(), Collections.emptyList(),
                        Collections.emptyMap(), new ClinicalDiscussion(), null, ClinicalVariant.Status.NOT_REVIEWED,
                        Collections.emptyList(), Collections.emptyMap());
                clinicalVariant.setEvidences(clinicalVariantEvidences);

                // Add variant to the list
                clinicalVariants.add(clinicalVariant);
            }
        }
        return clinicalVariants;
    }

    protected Map> getVariantToPanelMap(List diseasePanels) {
        Map> idToPanelMap = new HashMap<>();
        if (CollectionUtils.isNotEmpty(diseasePanels)) {
            for (DiseasePanel panel : diseasePanels) {
                // Put gene IDs
                if (CollectionUtils.isNotEmpty(panel.getGenes())) {
                    for (DiseasePanel.VariantPanel variantPanel : panel.getVariants()) {
                        if (variantPanel.getId() != null) {
                            if (!idToPanelMap.containsKey(variantPanel.getId())) {
                                idToPanelMap.put(variantPanel.getId(), new HashSet<>());
                            }
                            idToPanelMap.get(variantPanel.getId()).add(panel);
                        }
                    }
                }
            }
        }
        return idToPanelMap;
    }

    protected Map> getGeneToPanelMap(List diseasePanels) {
        Map> idToPanelMap = new HashMap<>();
        if (CollectionUtils.isNotEmpty(diseasePanels)) {
            for (DiseasePanel panel : diseasePanels) {
                // Put gene IDs
                if (CollectionUtils.isNotEmpty(panel.getGenes())) {
                    for (DiseasePanel.GenePanel genePanel : panel.getGenes()) {
                        if (genePanel.getId() != null) {
                            if (!idToPanelMap.containsKey(genePanel.getId())) {
                                idToPanelMap.put(genePanel.getId(), new HashSet<>());
                            }
                            idToPanelMap.get(genePanel.getId()).add(panel);
                        }
                    }
                }
            }
        }
        return idToPanelMap;
    }

    protected Map> getGeneToPanelMoiMap(List diseasePanels) {
        // Map>
        Map> idToPanelMoiMap = new HashMap<>();
        if (CollectionUtils.isNotEmpty(diseasePanels)) {
            for (DiseasePanel panel : diseasePanels) {
                // Put gene IDs
                if (CollectionUtils.isNotEmpty(panel.getGenes())) {
                    for (DiseasePanel.GenePanel panelGene : panel.getGenes()) {
                        if (StringUtils.isNotEmpty(panelGene.getId()) && panelGene.getModeOfInheritance() != null) {
                            if (!idToPanelMoiMap.containsKey(panelGene.getId())) {
                                idToPanelMoiMap.put(panelGene.getId(), new HashMap());
                            }
                            idToPanelMoiMap.get(panelGene.getId()).put(panel.getId(), panelGene.getModeOfInheritance());
                        }
                    }
                }
            }
        }
        return idToPanelMoiMap;
    }

    protected ClinicalVariantEvidence createClinicalVariantEvidence(GenomicFeature genomicFeature, String panelId,
                                                                    ModeOfInheritance moi, Penetrance penetrance,
                                                                    String tier, Variant variant) {
        return  createClinicalVariantEvidence(genomicFeature, panelId, Collections.singletonList(moi), penetrance,
                tier, variant);
    }

    protected ClinicalVariantEvidence createClinicalVariantEvidence(GenomicFeature genomicFeature, String panelId,
                                                                    List mois,
                                                                    Penetrance penetrance, String tier, Variant variant) {
        ClinicalVariantEvidence clinicalVariantEvidence = new ClinicalVariantEvidence();

//        // Consequence types
//        if (CollectionUtils.isNotEmpty(consequenceTypes)) {
//            // Set consequence type
//            clinicalVariantEvidence.setConsequenceTypes(consequenceTypes);
//        }

        // Genomic feature
        if (genomicFeature != null) {
            clinicalVariantEvidence.setGenomicFeature(genomicFeature);
        }

        // Panel ID
        if (panelId != null) {
            clinicalVariantEvidence.setPanelId(panelId);
        }

        // Mode of inheritance
        if (mois != null) {
            clinicalVariantEvidence.setModeOfInheritances(mois);
        }

        // Penetrance
        if (penetrance != null) {
            clinicalVariantEvidence.setPenetrance(penetrance);
        }

        // Variant classification:
        clinicalVariantEvidence.setClassification(new VariantClassification());

        // Variant classification: ACMG
        List acmgs = calculateAcmgClassification(variant, mois);
        clinicalVariantEvidence.getClassification().setAcmg(acmgs);

        // Variant classification: clinical significance
        if (MapUtils.isNotEmpty(variantToPanelMap) && variantToPanelMap.containsKey(variant.getId())
                && CollectionUtils.isNotEmpty(variantToPanelMap.get(variant.getId()))) {
            clinicalVariantEvidence.getClassification().setClinicalSignificance(ClinicalProperty.ClinicalSignificance.PATHOGENIC);
        } else {
            clinicalVariantEvidence.getClassification().setClinicalSignificance(computeClinicalSignificance(acmgs));
        }

        // Role in cancer
        if (variant.getAnnotation() != null && CollectionUtils.isNotEmpty(variant.getAnnotation().getGeneCancerAssociations())) {
            Set roles = new HashSet<>();
            for (GeneCancerAssociation geneCancerAssociation : variant.getAnnotation().getGeneCancerAssociations()) {
                if (CollectionUtils.isNotEmpty(geneCancerAssociation.getRoleInCancer())) {
                    for (String value : geneCancerAssociation.getRoleInCancer()) {
                        try {
                            roles.add(RoleInCancer.valueOf(value.toUpperCase()));
                        } catch (Exception e) {
                            logger.info("Unknown role in cancer value: {}. It will be ignored.", value.toUpperCase());
                        }
                    }
                }
            }
            if (CollectionUtils.isNotEmpty(roles)) {
                List rolesInCancer = new ArrayList<>(roles);
                clinicalVariantEvidence.setRolesInCancer(rolesInCancer);
            }
        }

        return clinicalVariantEvidence;
    }


    protected List getSOTerms(ConsequenceType ct, Set includeSoTerms) {
        List soTerms = new ArrayList<>();
        if (CollectionUtils.isNotEmpty(ct.getSequenceOntologyTerms())) {
            for (SequenceOntologyTerm soTerm : ct.getSequenceOntologyTerms()) {
                if (CollectionUtils.isEmpty(includeSoTerms) || includeSoTerms.contains(soTerm.getName())) {
                    soTerms.add(soTerm);
                }
            }
        }
        return soTerms;
    }


    protected boolean containSOName(ConsequenceType ct, Set soNameSet, Set includeSoTerms) {
        List sots = getSOTerms(ct, includeSoTerms);
        if (CollectionUtils.isNotEmpty(sots) && CollectionUtils.isNotEmpty(soNameSet)) {
            for (SequenceOntologyTerm sot : sots) {
                if (StringUtils.isNotEmpty(sot.getName()) && soNameSet.contains(sot.getName())) {
                    return true;
                }
            }

        }
        return false;
    }


    protected List createClinicalVariantEvidences(String tier, List panelIds, ConsequenceType ct,
                                                                           Variant variant) {
        return createClinicalVariantEvidences(tier, panelIds, ct, variant, extendedLof);
    }

    protected List createClinicalVariantEvidences(String tier, List panelIds, ConsequenceType ct,
                                                                           Variant variant, Set includeSoTerms) {
        List clinicalVariantEvidences = new ArrayList<>();

        // Sanity check
        List soTerms = null;
        GenomicFeature genomicFeature = null;
        if (ct != null) {
            soTerms = getSOTerms(ct, includeSoTerms);

            genomicFeature = new GenomicFeature(ct.getEnsemblGeneId(), "GENE", ct.getEnsemblTranscriptId(), ct.getGeneName(), soTerms,
                    null);
        }

        if (CollectionUtils.isNotEmpty(panelIds)) {
            for (String panelId : panelIds) {
                ClinicalVariantEvidence clinicalVariantEvidence = createClinicalVariantEvidence(genomicFeature, panelId, modeOfInheritances,
                        penetrance, tier, variant);
                if (clinicalVariantEvidence != null) {
                    clinicalVariantEvidences.add(clinicalVariantEvidence);
                }
            }
        } else {
            // We report events without panels
            if (CollectionUtils.isNotEmpty(soTerms)) {
                ClinicalVariantEvidence clinicalVariantEvidence = createClinicalVariantEvidence(genomicFeature, null, modeOfInheritances,
                        penetrance, tier, variant);
                if (clinicalVariantEvidence != null) {
                    clinicalVariantEvidences.add(clinicalVariantEvidence);
                }
            }
        }
        return clinicalVariantEvidences;
    }

    public List groupCHVariants(Map> clinicalVariantMap) {
        List clinicalVariants = new ArrayList<>();

        for (Map.Entry> entry : clinicalVariantMap.entrySet()) {
            Set variantIds = entry.getValue().stream().map(Variant::toStringSimple).collect(Collectors.toSet());
            for (ClinicalVariant clinicalVariant : entry.getValue()) {
                Set tmpVariantIds = new HashSet<>(variantIds);
                tmpVariantIds.remove(clinicalVariant.toStringSimple());

                for (ClinicalVariantEvidence clinicalVariantEvidence : clinicalVariant.getEvidences()) {
                    clinicalVariantEvidence.setCompoundHeterozygousVariantIds(new ArrayList<>(tmpVariantIds));
                }

                clinicalVariants.add(clinicalVariant);
            }
        }

        return clinicalVariants;
    }

    public List mergeClinicalVariants(List clinicalVariants) {
        Map clinicalVariantMap = new HashMap<>();
        for (ClinicalVariant clinicalVariant : clinicalVariants) {
            if (clinicalVariantMap.containsKey(clinicalVariant.getId())) {
                clinicalVariantMap.get(clinicalVariant.getId()).getEvidences().addAll(clinicalVariant.getEvidences());
            } else {
                clinicalVariantMap.put(clinicalVariant.getId(), clinicalVariant);
            }
        }

        return new ArrayList<>(clinicalVariantMap.values());
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy