org.monarchinitiative.phenol.io.obographs.OboGraphDocumentAdaptor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of phenol-io Show documentation
Show all versions of phenol-io Show documentation
phenol-io contains the generic I/O functionality for ontologies
package org.monarchinitiative.phenol.io.obographs;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSortedMap;
import org.geneontology.obographs.model.*;
import org.geneontology.obographs.model.meta.BasicPropertyValue;
import org.monarchinitiative.phenol.base.PhenolRuntimeException;
import org.monarchinitiative.phenol.io.utils.CurieUtilBuilder;
import org.monarchinitiative.phenol.ontology.data.*;
import org.prefixcommons.CurieUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import static java.util.stream.Collectors.toMap;
/**
* Adaptor class for converting {@link org.geneontology.obographs.model.GraphDocument} instances to
* {@link org.monarchinitiative.phenol.ontology.data.Ontology} instances.
*
* @author Jules Jacobsen
*/
public class OboGraphDocumentAdaptor {
private static final Logger LOGGER = LoggerFactory.getLogger(OboGraphDocumentAdaptor.class);
private final Map metaInfo;
private final List terms;
private final List relationships;
private OboGraphDocumentAdaptor(Builder builder) {
this.metaInfo = builder.metaInfo;
this.terms = builder.terms;
this.relationships = builder.relationships;
}
public Map getMetaInfo() {
return metaInfo;
}
public List getTerms() {
return terms;
}
public List getRelationships() {
return relationships;
}
public Ontology buildOntology() {
return ImmutableOntology.builder()
.metaInfo(metaInfo)
.terms(terms)
.relationships(relationships)
.build();
}
public static Builder builder() {
return new Builder();
}
public static class Builder {
// Factory object that adds OBO-typical data to each term.
private OboGraphTermFactory factory = new OboGraphTermFactory();
private CurieUtil curieUtil = CurieUtilBuilder.defaultCurieUtil();
private Set wantedTermIdPrefixes = Collections.emptySet();
private Map metaInfo;
private List terms;
private List relationships;
public Builder curieUtil(CurieUtil curieUtil) {
Objects.requireNonNull(curieUtil);
this.curieUtil = curieUtil;
return this;
}
public Builder wantedTermIdPrefixes(Set wantedTermIdPrefixes) {
Objects.requireNonNull(wantedTermIdPrefixes);
this.wantedTermIdPrefixes = wantedTermIdPrefixes;
return this;
}
public OboGraphDocumentAdaptor build(GraphDocument graphDocument) {
// check the curieUtil contains a mapping for the requested prefixes otherwise
// they will not be included in the output and users will not get the graph they asked for
List unMappedIdPrefixes = getWantedButUnmappedIdPrefixes();
if (!unMappedIdPrefixes.isEmpty()) {
String message = String.format("Unable to filter terms for prefix(s) %s as these not mapped. Add the mapping to CurieUtil.", unMappedIdPrefixes);
throw new PhenolRuntimeException(message);
}
Graph oboGraph = getFirstGraph(graphDocument);
LOGGER.debug("Converting graph document...");
LOGGER.debug("Converting metadata...");
// Metadata about the ontology
this.metaInfo = convertMetaData(oboGraph.getMeta());
LOGGER.debug("Converting nodes to terms...");
this.terms = convertNodesToTerms(oboGraph.getNodes());
LOGGER.debug("Converting edges to relationships...");
// Mapping edges in obographs to termIds in phenol
this.relationships = convertEdgesToRelationships(oboGraph.getEdges(), oboGraph.getNodes());
return new OboGraphDocumentAdaptor(this);
}
private Graph getFirstGraph(GraphDocument graphDocument) {
Objects.requireNonNull(graphDocument);
List graphs = graphDocument.getGraphs();
if (graphs == null || graphs.isEmpty()) {
throw new PhenolRuntimeException("GraphDocument is empty");
}
// We assume there is only one graph instance in the graph document instance.
return graphs.get(0);
}
private List getWantedButUnmappedIdPrefixes() {
List unmappedIdPrefixes = new ArrayList<>();
if(!wantedTermIdPrefixes.isEmpty()) {
for (String prefix : wantedTermIdPrefixes) {
if (!curieUtil.getCurieMap().containsKey(prefix)) {
unmappedIdPrefixes.add(prefix);
}
}
}
return unmappedIdPrefixes;
}
private Map convertMetaData(Meta meta) {
if (meta == null) {
return ImmutableSortedMap.of();
}
ImmutableMap.Builder metaMap = new ImmutableSortedMap.Builder<>(Comparator.naturalOrder());
String version = meta.getVersion() != null ? meta.getVersion() : "";
metaMap.put("data-version", version);
if (meta.getBasicPropertyValues() != null) {
for (BasicPropertyValue basicPropertyValue : meta.getBasicPropertyValues()) {
if (basicPropertyValue.getPred().equalsIgnoreCase("date")) {
String date = basicPropertyValue.getVal().trim();
metaMap.put("date", date);
}
}
}
return metaMap.build();
}
private List convertNodesToTerms(List nodes) {
ImmutableList.Builder termsList = new ImmutableList.Builder<>();
if (nodes == null) {
LOGGER.warn("No nodes found in loaded ontology.");
throw new PhenolRuntimeException("PhenolException: No nodes found in loaded ontology.");
}
// Mapping nodes in obographs to termIds in phenol
for (Node node : nodes) {
// LOGGER.info("{} {} {}", node.getType(), node.getId(), node.getLabel());
// only take classes, otherwise we may get some OIO and IAO entities
if (node.getType() != null && node.getType() == Node.RDFTYPES.CLASS) {
TermId termId = getTermIdOrNull(node.getId());
if (termId != null) {
Term term = factory.constructTerm(node, termId);
termsList.add(term);
}
}
}
return termsList.build();
}
private List convertEdgesToRelationships(List edges, List nodes) {
Map propertyIdLabels = nodes.stream()
.filter(node -> node.getType() == Node.RDFTYPES.PROPERTY)
.filter(node -> node.getId() != null && node.getLabel() != null)
.collect(toMap(Node::getId, Node::getLabel));
ImmutableList.Builder relationshipsList = new ImmutableList.Builder<>();
if (edges == null) {
LOGGER.warn("No edges found in loaded ontology.");
throw new PhenolRuntimeException("No edges found in loaded ontology.");
}
int edgeId = 1;
for (Edge edge : edges) {
TermId subjectTermId = getTermIdOrNull(edge.getSub());
TermId objectTermId = getTermIdOrNull(edge.getObj());
if (subjectTermId != null && objectTermId != null) {
RelationshipType relType = RelationshipType.of(edge.getPred(), propertyIdLabels.getOrDefault(edge.getPred(), "unknown"));
Relationship relationship = new Relationship(subjectTermId, objectTermId, edgeId++, relType);
relationshipsList.add(relationship);
}
}
return relationshipsList.build();
}
private TermId getTermIdOrNull(String id) {
Optional curie = curieUtil.getCurie(id);
if (!curie.isPresent()) {
LOGGER.warn("No matching curie found for id: {}", id);
return null;
}
String curieStr = curie.get();
TermId termId = TermId.of(curieStr);
// Note that GO has some Terms/Relations with RO and BFO that we want to skip
String prefix = termId.getPrefix();
if (wantedTermIdPrefixes.isEmpty() || wantedTermIdPrefixes.contains(prefix)) {
return termId;
}
return null;
}
}
}