All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.monarchinitiative.phenol.io.obo.mpo.MpGeneParser Maven / Gradle / Ivy

There is a newer version: 2.1.1
Show newest version
package org.monarchinitiative.phenol.io.obo.mpo;

import com.google.common.collect.ImmutableMap;
import org.monarchinitiative.phenol.base.PhenolException;
import org.monarchinitiative.phenol.formats.mpo.MpGene;
import org.monarchinitiative.phenol.formats.mpo.MpGeneModel;
import org.monarchinitiative.phenol.formats.mpo.MpSimpleModel;
import org.monarchinitiative.phenol.io.OntologyLoader;
import org.monarchinitiative.phenol.ontology.data.Ontology;
import org.monarchinitiative.phenol.ontology.data.TermId;


import java.io.*;
import java.util.*;

import static org.monarchinitiative.phenol.formats.mpo.MpGene.createMpGene;


/**
 * Parses the MRK_List2.rpt file.
 * The file is List of Mouse Genetic Markers (sorted alphabetically by marker symbol, tab-delimited)
 * The List2 version excludes withdrawn symbols
 * The
 */
public class MpGeneParser {
  /** Path to the MRK_List2.rpt file from MGI. */
  private final String mgiMarkerPath;
  /** Path to the MGI_GenePheno.rpt file from MGI.*/
  private final String mgiGenePhenoPath;
  /** THe MPO ontology object. */
  private final Ontology ontology;

  public MpGeneParser(String markerPath, String mgiGenePhenoPath, String ontologypath) throws PhenolException {
    this(markerPath, mgiGenePhenoPath, OntologyLoader.loadOntology(new File(ontologypath)));
  }

  public MpGeneParser(String markerPath, String mgiGenePhenoPath, Ontology mpo) {
    this.mgiMarkerPath = markerPath;
    this.mgiGenePhenoPath = mgiGenePhenoPath;
    this.ontology = mpo;
  }

  /**
   * Reads the file of genetic markers. For each genetic marker, extracts the full MGI Accession ID,
   * the Marker Symbol, and Marker Type.
   * @throws IOException if the file cannot be read
   * @return ImmutableGenes object holding all the genes read from file
   */
  public Map parseMarkers() throws IOException, PhenolException {
    ImmutableMap.Builder bld = ImmutableMap.builder();
    BufferedReader br = new BufferedReader(new FileReader(mgiMarkerPath));
    // skip over first line of file, which is a header line
    String line = br.readLine();
    while ((line=br.readLine()) != null) {
      String[] fields = line.split("\t");
      // first field is MGI Accession ID, seventh is Marker Symbol, tenth is Marker Type
      //String mgiId = fields[0];
      TermId mgiId = TermId.of(fields[0]);
        bld.put(mgiId, createMpGene(mgiId, fields[6], fields[9]));
    }
    br.close();
    return bld.build();
  }

  public Map parseMpGeneModels() {
    Map> gene2simpleMap=new HashMap<>();
    ImmutableMap.Builder builder = new ImmutableMap.Builder<>();
    try {
      MpAnnotationParser annotParser = new MpAnnotationParser(this.mgiGenePhenoPath);
      Map simpleModelMap = annotParser.getGenotypeAccessionToMpModelMap();
      for (MpSimpleModel simplemod : simpleModelMap.values()) {
        TermId geneId = simplemod.getMarkerId();
        gene2simpleMap.putIfAbsent(geneId,new ArrayList<>());
        List lst = gene2simpleMap.get(geneId);
        lst.add(simplemod);
      }
      // when we get here, the simpleModelMap has key-a gene ID, value-collection of
      // all simple models that have a knockout of the corresponding gene
      for (TermId geneId : gene2simpleMap.keySet()) {
        List modCollection = gene2simpleMap.get(geneId);
          MpGeneModel genemod = new MpGeneModel(geneId, ontology, true, modCollection);
          builder.put(geneId,genemod);
      }
    } catch (PhenolException e) {
      e.printStackTrace();
    }

    return builder.build();
  }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy