All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencb.biodata.tools.clinical.TieringClinicalVariantCreator Maven / Gradle / Ivy

There is a newer version: 3.3.0
Show newest version
/*
 * 
 *
 */

package org.opencb.biodata.tools.clinical;

import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.collections4.MapUtils;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.clinical.ClinicalDiscussion;
import org.opencb.biodata.models.clinical.ClinicalProperty.ModeOfInheritance;
import org.opencb.biodata.models.clinical.ClinicalProperty.Penetrance;
import org.opencb.biodata.models.clinical.Disorder;
import org.opencb.biodata.models.clinical.interpretation.ClinicalVariant;
import org.opencb.biodata.models.clinical.interpretation.ClinicalVariantEvidence;
import org.opencb.biodata.models.clinical.interpretation.DiseasePanel;
import org.opencb.biodata.models.clinical.interpretation.GenomicFeature;
import org.opencb.biodata.models.clinical.interpretation.exceptions.InterpretationAnalysisException;
import org.opencb.biodata.models.core.Region;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.ConsequenceType;
import org.opencb.biodata.models.variant.avro.SequenceOntologyTerm;

import java.util.*;
import java.util.stream.Collectors;

import static org.opencb.biodata.models.clinical.ClinicalProperty.ModeOfInheritance.UNKNOWN;
import static org.opencb.biodata.models.clinical.ClinicalProperty.RoleInCancer;
import static org.opencb.biodata.models.clinical.interpretation.VariantClassification.*;
import static org.opencb.biodata.tools.pedigree.ModeOfInheritance.extendedLof;
import static org.opencb.biodata.tools.pedigree.ModeOfInheritance.proteinCoding;

public class TieringClinicalVariantCreator extends ClinicalVariantCreator {

    public static final Set TIER_1_CONSEQUENCE_TYPES_SET = new HashSet<>(Arrays.asList("SO:0001893", "transcript_ablation",
            "SO:0001574", "splice_acceptor_variant", "SO:0001575", "splice_donor_variant", "SO:0001587", "stop_gained",
            "SO:0001589", "frameshift_variant", "SO:0001578", "stop_lost", "SO:0001582", "initiator_codon_variant"));

    private static final Set TIER_2_CONSEQUENCE_TYPES_SET = new HashSet<>(Arrays.asList("SO:0001889", "transcript_amplification",
            "SO:0001821", "inframe_insertion", "SO:0001822", "inframe_deletion", "SO:0001583", "missense_variant",
            "SO:0001630", "splice_region_variant", "SO:0001626", "incomplete_terminal_codon_variant"));

    @Deprecated
    public TieringClinicalVariantCreator(List diseasePanels, Map roleInCancer, Disorder disorder,
                                         ModeOfInheritance modeOfInheritance, Penetrance penetrance, String assembly) {
        super(diseasePanels, disorder, Collections.singletonList(modeOfInheritance), penetrance, roleInCancer, assembly);
    }

    public TieringClinicalVariantCreator(List diseasePanels, Disorder disorder, ModeOfInheritance modeOfInheritance,
                                         Penetrance penetrance, String assembly) {
        super(diseasePanels, disorder, Collections.singletonList(modeOfInheritance), penetrance, assembly);
    }

    @Override
    public List create(List variants) throws InterpretationAnalysisException {
        Map> moiMap = new HashMap<>();
        for (Variant variant : variants) {
            moiMap.put(variant.getId(), modeOfInheritances != null ? modeOfInheritances : Collections.emptyList());
        }
        return create(variants, moiMap);
    }

    public List create(List variants, Map> variantMoIMap)
            throws InterpretationAnalysisException {
        // Sanity check
        if (variants == null || variants.isEmpty()) {
            return Collections.emptyList();
        }

        // Panels are mandatory in Tiering analysis
        if (CollectionUtils.isEmpty(diseasePanels)) {
            throw new InterpretationAnalysisException("Missing gene panels for Tiering analysis");
        }
        Map> geneToPanelMap = getGeneToPanelMap(diseasePanels);

        if (MapUtils.isEmpty(geneToPanelMap)) {
            throw new InterpretationAnalysisException("Tiering analysis: no genes found in gene panels: "
                    + StringUtils.join(diseasePanels.stream().map(DiseasePanel::getId).collect(Collectors.toList()), ","));
        }

        //   Gene        Panel       Moi
        Map> geneToPanelMoiMap = getGeneToPanelMoiMap(diseasePanels);

        // Create the list of clinical variants, with a evidence event for each 1) transcript, 2) panel and 3) consequence type (SO name)
        // Tiers classification:
        //     - Tier 1: gene panel + mode of inheritance + TIER_1_CONSEQUENCE_TYPES
        //     - Tier 2: gene panel + mode of inheritance + TIER_2_CONSEQUENCE_TYPES
        //     - Tier 3: gene panel + mode of inheritance + other consequence types
        //               gene panel + mode of inheritance
        //               not in panel
        List clinicalVariants = new ArrayList<>();
        for (Variant variant : variants) {

            List clinicalVariantEvidences = new ArrayList<>();
            List modeOfInheritances = variantMoIMap.get(variant.getId());

            if (variant.getAnnotation() != null && CollectionUtils.isNotEmpty(variant.getAnnotation().getConsequenceTypes())) {

                // 1) create the clinical variant evidence for each transcript
                for (ConsequenceType ct : variant.getAnnotation().getConsequenceTypes()) {

                    // Only protein coding
                    if (StringUtils.isEmpty(ct.getBiotype()) || !proteinCoding.contains(ct.getBiotype())) {
                        logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId() + ", discarded, biotype: "
                                + ct.getBiotype());
                        continue;
                    }

                    GenomicFeature genomicFeature = new GenomicFeature(ct.getEnsemblGeneId(), "GENE", ct.getEnsemblTranscriptId(),
                            ct.getGeneName(), ct.getSequenceOntologyTerms(), null);

                    if (geneToPanelMap.containsKey(ct.getEnsemblGeneId())) {
                        logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId() + ", gene in panel");

                        // 2) create the clinical variant evidence for each panel
                        Set genePanels = geneToPanelMap.get(ct.getEnsemblGeneId());
                        for (DiseasePanel genePanel : genePanels) {
                            // In addition to the panel, the mode of inheritance must match too!
                            if (geneToPanelMoiMap.containsKey(ct.getEnsemblGeneId())) {
                                for (ModeOfInheritance moi : modeOfInheritances) {
                                    if (moi == ModeOfInheritance.UNKNOWN) {
                                        processPanelRegion(genePanel, ct, variant, clinicalVariantEvidences);
                                    } else if (geneToPanelMoiMap.get(ct.getEnsemblGeneId()).get(genePanel.getId()) == moi) {
                                        logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId() + ", moi match");

                                        if (CollectionUtils.isNotEmpty(ct.getSequenceOntologyTerms())) {

                                            // 3) create the clinical variant evidence for consequence type (SO term)
                                            for (SequenceOntologyTerm soTerm : ct.getSequenceOntologyTerms()) {

                                                // Only LOF extended SO terms are reported
                                                if ((soTerm.getName() != null && !extendedLof.contains(soTerm.getName()))
                                                        || (soTerm.getAccession() != null
                                                        && !extendedLof.contains(soTerm.getAccession()))) {
                                                    logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId()
                                                            + ", discarded, LOF: " + soTerm.getName());
                                                    continue;
                                                }

                                                if (StringUtils.isNotEmpty(soTerm.getAccession())) {
                                                    if (TIER_1_CONSEQUENCE_TYPES_SET.contains(soTerm.getAccession())) {
                                                        // Tier 1
                                                        logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId()
                                                                + ", reported, TIER 1, " + soTerm.getName());
                                                        clinicalVariantEvidences.add(createClinicalVariantEvidence(genomicFeature,
                                                                genePanel.getId(), moi, penetrance, TIER_1, variant));
                                                    } else if (TIER_2_CONSEQUENCE_TYPES_SET.contains(soTerm.getAccession())) {
                                                        // Tier 2
                                                        logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId()
                                                                + ", reported, TIER 2, " + soTerm.getName());
                                                        clinicalVariantEvidences.add(createClinicalVariantEvidence(genomicFeature,
                                                                genePanel.getId(), moi, penetrance, TIER_2, variant));
                                                    } else {
                                                        // Tier 3
                                                        logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId()
                                                                + ", reported, TIER 3, " + soTerm.getName());
                                                        clinicalVariantEvidences.add(createClinicalVariantEvidence(genomicFeature,
                                                                genePanel.getId(), moi, penetrance, TIER_3, variant));
                                                    }
                                                } else {
                                                    // Tier 3
                                                    logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId()
                                                            + ", reported, TIER 3, empty SO");
                                                    clinicalVariantEvidences.add(createClinicalVariantEvidence(genomicFeature,
                                                            genePanel.getId(), moi, penetrance, TIER_3, variant));
                                                }
                                            }
                                        } else {
                                            // Tier 3
                                            logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId() + ", reported, "
                                                    + "TIER 3, empty SO list");
                                            clinicalVariantEvidences.add(createClinicalVariantEvidence(genomicFeature, genePanel.getId(),
                                                    moi, penetrance, TIER_3, variant));
                                        }
                                    } else {
                                        if (geneToPanelMoiMap.get(ct.getEnsemblGeneId()).get(genePanel.getId()) == UNKNOWN) {
                                            // Tier 3
                                            logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId() + ", reported,"
                                                    + " TIER 3, UNKNOWN moi");
                                            if (CollectionUtils.isNotEmpty(ct.getSequenceOntologyTerms())) {
                                                for (SequenceOntologyTerm soTerm : ct.getSequenceOntologyTerms()) {
                                                    // Only LOF extended SO terms are reported
                                                    if ((soTerm.getName() != null && !extendedLof.contains(soTerm.getName()))
                                                            || (soTerm.getAccession() != null
                                                            && !extendedLof.contains(soTerm.getAccession()))) {
                                                        logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId()
                                                                + ", discarded, LOF: " + soTerm.getName());
                                                        continue;
                                                    }
                                                    logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId()
                                                            + ", reported, TIER 3");
                                                    clinicalVariantEvidences.add(createClinicalVariantEvidence(genomicFeature,
                                                            genePanel.getId(), moi, penetrance, TIER_3, variant));
                                                }
                                            } else {
                                                logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId() + ", discarded,"
                                                        + " moi mismatch " + moi.name() + " vs panel gene moi "
                                                        + geneToPanelMoiMap.get(ct.getEnsemblGeneId()).get(genePanel.getId()).name());
                                            }
                                        }
                                    }
                                }
                            } else {
                                logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId() + ", moi missing (UNTIERED)");
                                for (ModeOfInheritance moi : modeOfInheritances) {
                                    if (CollectionUtils.isNotEmpty(ct.getSequenceOntologyTerms())) {
                                        for (SequenceOntologyTerm soTerm : ct.getSequenceOntologyTerms()) {
                                            // Only LOF extended SO terms are reported
                                            if ((soTerm.getName() != null && !extendedLof.contains(soTerm.getName()))
                                                    || (soTerm.getAccession() != null && !extendedLof.contains(soTerm.getAccession()))) {
                                                logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId()
                                                        + ", discarded, LOF: " + soTerm.getName());
                                                continue;
                                            }
                                            logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId()
                                                    + ", reported, UNTIERED, LOF: " + soTerm.getName());
                                            clinicalVariantEvidences.add(createClinicalVariantEvidence(genomicFeature,
                                                    genePanel.getId(), moi, penetrance, "", variant));
                                        }
                                    } else {
                                        logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId()
                                                + ", reported, UNTIERED, missing LOF");
                                        clinicalVariantEvidences.add(createClinicalVariantEvidence(genomicFeature,
                                                genePanel.getId(), moi, penetrance, "", variant));
                                    }
                                }
                            }
                        }
                    } else {
                        // Tier 3
                        logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId() + ", not in panel");
                        for (ModeOfInheritance moi : modeOfInheritances) {
                            if (CollectionUtils.isNotEmpty(ct.getSequenceOntologyTerms())) {
                                for (SequenceOntologyTerm soTerm : ct.getSequenceOntologyTerms()) {
                                    // Only LOF extended SO terms are reported
                                    if ((soTerm.getName() != null && !extendedLof.contains(soTerm.getName()))
                                            || (soTerm.getAccession() != null && !extendedLof.contains(soTerm.getAccession()))) {
                                        logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId() + ", discarded, LOF: "
                                                + soTerm.getName());
                                        continue;
                                    }

                                    logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId() + ", reported, TIER 3, LOF: "
                                            + soTerm.getName());
                                    clinicalVariantEvidences.add(createClinicalVariantEvidence(genomicFeature, null, moi, penetrance,
                                            TIER_3, variant));
                                }
                            } else {
                                logger.debug(variant.toStringSimple() + ": " + ct.getEnsemblTranscriptId()
                                        + ", reported, TIER 3, missing LOF");
                                clinicalVariantEvidences.add(createClinicalVariantEvidence(genomicFeature, null, moi, penetrance,
                                        TIER_3, variant));
                            }
                        }
                    }
                }
            }

            // If we have clinical variant evidence, then we have to create the clinical variant
            if (CollectionUtils.isNotEmpty(clinicalVariantEvidences)) {
                logger.debug(variant.toStringSimple() + ": reported, num. evidences: " + clinicalVariantEvidences.size());
                ClinicalVariant clinicalVariant = new ClinicalVariant(variant.getImpl(), Collections.emptyList(), Collections.emptyList(),
                        Collections.emptyMap(), new ClinicalDiscussion(), null, ClinicalVariant.Status.NOT_REVIEWED,
                        Collections.emptyList(), Collections.emptyMap());
                clinicalVariant.setEvidences(clinicalVariantEvidences);

                // Add variant to the list
                clinicalVariants.add(clinicalVariant);
            }
        }

        return clinicalVariants;
    }

    private void processPanelRegion(DiseasePanel genePanel, ConsequenceType ct, Variant variant,
                                    List clinicalVariantEvidences) {
        if (genePanel != null && CollectionUtils.isNotEmpty(genePanel.getRegions())) {
            for (DiseasePanel.RegionPanel panelRegion : genePanel.getRegions()) {
                if (CollectionUtils.isNotEmpty(genePanel.getRegions())) {
                    for (DiseasePanel.Coordinate coordinate : panelRegion.getCoordinates()) {
                        if (assembly.equals(coordinate.getAssembly())) {
                            if (StringUtils.isNotEmpty(coordinate.getLocation())) {
                                Region region = Region.parseRegion(coordinate.getLocation());
                                GenomicFeature genomicFeature = new GenomicFeature(region.toString(), "REGION",
                                        ct.getEnsemblTranscriptId(), ct.getGeneName(), Collections.emptyList(), panelRegion.getXrefs());

                                int overlapPercentage = getOverlapPercentage(region, variant);
                                if (overlapPercentage >= panelRegion.getRequiredOverlapPercentage()) {
                                    for (SequenceOntologyTerm soTerm : ct.getSequenceOntologyTerms()) {
                                        clinicalVariantEvidences.add(createClinicalVariantEvidence(genomicFeature, genePanel.getId(),
                                                ModeOfInheritance.UNKNOWN, penetrance, TIER_1, variant));
                                    }
                                } else {
                                    for (SequenceOntologyTerm soTerm : ct.getSequenceOntologyTerms()) {
                                        clinicalVariantEvidences.add(createClinicalVariantEvidence(genomicFeature, genePanel.getId(),
                                                ModeOfInheritance.UNKNOWN, penetrance, TIER_2, variant));
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }

    private int getOverlapPercentage(Region region, Variant variant) {
        int start = Math.max(region.getStart(), variant.getStart());
        int end = Math.min(region.getEnd(), variant.getEnd());
        return 100 * (end - start + 1) / region.size();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy