All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.monarchinitiative.phenol.annotations.assoc.DiseaseToGeneAssociationLoader Maven / Gradle / Ivy

There is a newer version: 2.1.2
Show newest version
package org.monarchinitiative.phenol.annotations.assoc;


import org.monarchinitiative.phenol.annotations.formats.GeneIdentifier;
import org.monarchinitiative.phenol.annotations.formats.GeneIdentifiers;
import org.monarchinitiative.phenol.annotations.formats.hpo.AssociationType;
import org.monarchinitiative.phenol.annotations.formats.hpo.DiseaseToGeneAssociation;
import org.monarchinitiative.phenol.annotations.formats.hpo.DiseaseToGeneAssociations;
import org.monarchinitiative.phenol.annotations.formats.hpo.GeneToAssociation;
import org.monarchinitiative.phenol.ontology.data.TermId;

import java.io.IOException;
import java.nio.file.Path;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
 * This class parses files containing gene to disease associations:
 * 
    *
  1. Homo_sapiens.gene_info.gz (has links between NCBIGene ids and gene symbols)
  2. *
  3. mim2gene_medgen (has links between OMIM disease ids and genes/NCBIGene ids
  4. *
  5. Optionally, Orphanet's gene file, en_product6.xml (has links between Orphanet diseases and genes)
  6. *
*/ public class DiseaseToGeneAssociationLoader { private DiseaseToGeneAssociationLoader() { } /** * @param mim2geneMedgen path to OMIM disease to gene associations file * @param geneIdentifiers gene identifiers container * @return a container with mappings between disease ID to {@link GeneToAssociation}. *

* For instance, MICROVASCULAR COMPLICATIONS OF DIABETES, SUSCEPTIBILITY TO, 1; is associated to the gene VEGF as POLYGENIC, * and MARFAN SYNDROME is associated to the gene FBN1 as MENDELIAN. */ public static DiseaseToGeneAssociations loadDiseaseToGeneAssociations(Path mim2geneMedgen, GeneIdentifiers geneIdentifiers) throws IOException { return loadDiseaseToGeneAssociations(mim2geneMedgen, null, geneIdentifiers); } /** * @param mim2geneMedgen path to OMIM disease to gene associations file * @param orphanet2Gene path to Ophanet's gene file (ignored if null) * @param geneIdentifiers gene identifiers container * @return a container with mappings between disease ID to {@link GeneToAssociation}. *

* For instance, MICROVASCULAR COMPLICATIONS OF DIABETES, SUSCEPTIBILITY TO, 1; is associated to the gene VEGF as POLYGENIC, * and MARFAN SYNDROME is associated to the gene FBN1 as MENDELIAN. */ public static DiseaseToGeneAssociations loadDiseaseToGeneAssociations(Path mim2geneMedgen, Path orphanet2Gene, GeneIdentifiers geneIdentifiers) throws IOException { DiseaseToGeneAssociations omimDiseaseToGeneAssociations = Mim2GeneMedgenLoader.loadDiseaseToGeneAssociations(mim2geneMedgen, geneIdentifiers); DiseaseToGeneAssociations orphaDiseaseToGeneAssociations; if (orphanet2Gene == null) { return omimDiseaseToGeneAssociations; } else { Map> diseaseMimToGeneIds = prepareDiseaseMimToGeneIds(omimDiseaseToGeneAssociations); Map> orphaDiseaseToGeneIds = OrphaGeneToDiseaseParser.parseOrphaGeneXml(orphanet2Gene, diseaseMimToGeneIds, geneIdentifiers); orphaDiseaseToGeneAssociations = postprocessOrphaMappings(orphaDiseaseToGeneIds); List associations = Stream.concat(omimDiseaseToGeneAssociations.diseaseToGeneAssociations(), orphaDiseaseToGeneAssociations.diseaseToGeneAssociations()) .collect(Collectors.toUnmodifiableList()); return DiseaseToGeneAssociations.of(associations); } } private static Map> prepareDiseaseMimToGeneIds(DiseaseToGeneAssociations mimDiseaseToGeneAssociations) { Map> diseaseMimToGeneIds = new HashMap<>(); for (DiseaseToGeneAssociation association : mimDiseaseToGeneAssociations) { String diseaseMim = association.diseaseId().getId(); List geneIds = association.associations().stream() .map(ga -> ga.geneIdentifier().id()) .collect(Collectors.toUnmodifiableList()); diseaseMimToGeneIds.put(diseaseMim, geneIds); } return diseaseMimToGeneIds; } private static DiseaseToGeneAssociations postprocessOrphaMappings(Map> orphaDiseaseToGeneIds) { List orphaAssociations = new ArrayList<>(orphaDiseaseToGeneIds.size()); for (Map.Entry> entry : orphaDiseaseToGeneIds.entrySet()) { TermId orphaDiseaseId = entry.getKey(); List geneAssociations = entry.getValue().stream() .map(gi -> GeneToAssociation.of(gi, AssociationType.UNKNOWN)) .collect(Collectors.toUnmodifiableList()); orphaAssociations.add(DiseaseToGeneAssociation.of(orphaDiseaseId, geneAssociations)); } return DiseaseToGeneAssociations.of(Collections.unmodifiableList(orphaAssociations)); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy