org.obolibrary.obo2owl.OWLAPIObo2Owl Maven / Gradle / Ivy
The newest version!
package org.obolibrary.obo2owl;
import static org.obolibrary.obo2owl.Obo2OWLConstants.DEFAULT_IRI_PREFIX;
import static org.obolibrary.obo2owl.Obo2OWLConstants.OIOVOCAB_IRI_PREFIX;
import static org.semanticweb.owlapi.util.OWLAPIPreconditions.checkNotNull;
import static org.semanticweb.owlapi.util.OWLAPIPreconditions.emptyOptional;
import static org.semanticweb.owlapi.util.OWLAPIPreconditions.optional;
import static org.semanticweb.owlapi.util.OWLAPIPreconditions.verifyNotNull;
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import javax.annotation.Nullable;
import org.obolibrary.obo2owl.Obo2OWLConstants.Obo2OWLVocabulary;
import org.obolibrary.oboformat.model.Clause;
import org.obolibrary.oboformat.model.Frame;
import org.obolibrary.oboformat.model.OBODoc;
import org.obolibrary.oboformat.model.QualifierValue;
import org.obolibrary.oboformat.model.Xref;
import org.obolibrary.oboformat.parser.OBOFormatConstants;
import org.obolibrary.oboformat.parser.OBOFormatConstants.OboFormatTag;
import org.obolibrary.oboformat.parser.OBOFormatException;
import org.obolibrary.oboformat.parser.OBOFormatParser;
import org.obolibrary.oboformat.parser.OBOFormatParserException;
import org.semanticweb.owlapi.formats.RDFXMLDocumentFormat;
import org.semanticweb.owlapi.io.OWLParserException;
import org.semanticweb.owlapi.model.AddImport;
import org.semanticweb.owlapi.model.AddOntologyAnnotation;
import org.semanticweb.owlapi.model.AxiomType;
import org.semanticweb.owlapi.model.IRI;
import org.semanticweb.owlapi.model.OWLAnnotation;
import org.semanticweb.owlapi.model.OWLAnnotationProperty;
import org.semanticweb.owlapi.model.OWLAnnotationSubject;
import org.semanticweb.owlapi.model.OWLAnnotationValue;
import org.semanticweb.owlapi.model.OWLAxiom;
import org.semanticweb.owlapi.model.OWLClass;
import org.semanticweb.owlapi.model.OWLClassExpression;
import org.semanticweb.owlapi.model.OWLDataFactory;
import org.semanticweb.owlapi.model.OWLDocumentFormat;
import org.semanticweb.owlapi.model.OWLEntity;
import org.semanticweb.owlapi.model.OWLImportsDeclaration;
import org.semanticweb.owlapi.model.OWLIndividual;
import org.semanticweb.owlapi.model.OWLNamedObject;
import org.semanticweb.owlapi.model.OWLObjectComplementOf;
import org.semanticweb.owlapi.model.OWLObjectProperty;
import org.semanticweb.owlapi.model.OWLObjectPropertyExpression;
import org.semanticweb.owlapi.model.OWLOntology;
import org.semanticweb.owlapi.model.OWLOntologyChange;
import org.semanticweb.owlapi.model.OWLOntologyCreationException;
import org.semanticweb.owlapi.model.OWLOntologyID;
import org.semanticweb.owlapi.model.OWLOntologyLoaderConfiguration;
import org.semanticweb.owlapi.model.OWLOntologyManager;
import org.semanticweb.owlapi.model.OWLOntologyStorageException;
import org.semanticweb.owlapi.model.OWLProperty;
import org.semanticweb.owlapi.model.OWLRuntimeException;
import org.semanticweb.owlapi.model.SetOntologyID;
import org.semanticweb.owlapi.model.parameters.ConfigurationOptions;
import org.semanticweb.owlapi.util.CollectionFactory;
import org.semanticweb.owlapi.vocab.Namespaces;
import org.semanticweb.owlapi.vocab.OWL2Datatype;
import org.semanticweb.owlapi.vocab.OWLRDFVocabulary;
import org.semanticweb.owlapi.vocab.OWLXMLVocabulary;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Sets;
/**
* The Class OWLAPIObo2Owl.
*/
public class OWLAPIObo2Owl {
private class IDCache extends LinkedHashMap {
private final int cacheSize;
IDCache(int cacheSize) {
this.cacheSize = cacheSize;
}
@Override
protected boolean removeEldestEntry(@Nullable Map.Entry eldest) {
return size() > cacheSize;
}
}
private static final String TRUE = "true";
/**
* The Constant IRI_PROP_isReversiblePropertyChain.
*/
public static final String IRI_PROP_ISREVERSIBLEPROPERTYCHAIN =
DEFAULT_IRI_PREFIX + "IAO_isReversiblePropertyChain";
protected static final Map ANNOTATIONPROPERTYMAP = initAnnotationPropertyMap();
private static final Logger LOG = LoggerFactory.getLogger(OWLAPIObo2Owl.class);
private static final Set SKIPPED_QUALIFIERS = Sets.newHashSet("gci_relation",
"gci_filler", "cardinality", "minCardinality", "maxCardinality", "all_some", "all_only");
protected final Map idSpaceMap;
protected final Set apToDeclare;
protected final Map clsToDeclare;
protected final Map typedefToAnnotationProperty;
protected String defaultIDSpace = "";
protected OWLOntologyManager manager;
protected OWLOntology owlOntology;
protected OWLDataFactory fac;
protected OBODoc obodoc;
/**
* Cache for the id to IRI conversion. This cannot be replaced with a Caffeine cache - the
* loading of keys is recursive, and a bug in ConcurrentHashMap implementation causes livelocks
* for this particular situation.
*/
private Map idToIRICache;
/**
* @param manager the manager
*/
public OWLAPIObo2Owl(OWLOntologyManager manager) {
idSpaceMap = new HashMap<>();
apToDeclare = new HashSet<>();
clsToDeclare = new HashMap<>();
typedefToAnnotationProperty = new HashMap<>();
init(manager);
}
/**
* Static convenience method which: (1) creates an Obo2Owl bridge object (2) parses an obo file
* from a URL (3) converts that to an OWL ontology (4) saves the OWL ontology as RDF/XML.
*
* @param iri the iri
* @param outFile the out file
* @param manager manager to use
* @throws IOException Signals that an I/O exception has occurred.
* @throws OWLOntologyCreationException ontology creation exception
* @throws OWLOntologyStorageException ontology storage exception
* @throws OBOFormatParserException OBO format parser exception
*/
public static void convertURL(String iri, String outFile, OWLOntologyManager manager)
throws IOException, OWLOntologyCreationException, OWLOntologyStorageException {
OWLAPIObo2Owl bridge = new OWLAPIObo2Owl(manager);
OBOFormatParser p = new OBOFormatParser();
OBODoc obodoc = p.parse(new URL(iri));
OWLOntology ontology = bridge.convert(obodoc);
IRI outputStream = IRI.create(outFile);
OWLDocumentFormat format = new RDFXMLDocumentFormat();
LOG.info("saving to {} fmt={}", outputStream, format);
manager.saveOntology(ontology, format, outputStream);
}
/**
* See.
*
* @param iri the iri
* @param outFile the out file
* @param defaultOnt -- e.g. "go". If the obo file contains no "ontology:" header tag, this is
* added
* @param manager the manager to be used
* @throws IOException Signals that an I/O exception has occurred.
* @throws OWLOntologyCreationException ontology creation exception
* @throws OWLOntologyStorageException ontology storage exception
* @throws OBOFormatParserException OBO format parser exception
* {@link #convertURL(String, String, OWLOntologyManager)}
*/
public static void convertURL(String iri, String outFile, String defaultOnt,
OWLOntologyManager manager)
throws IOException, OWLOntologyCreationException, OWLOntologyStorageException {
OWLAPIObo2Owl bridge = new OWLAPIObo2Owl(manager);
OBOFormatParser p = new OBOFormatParser();
OBODoc obodoc = p.parse(new URL(iri));
obodoc.addDefaultOntologyHeader(defaultOnt);
OWLOntology ontology = bridge.convert(obodoc);
IRI outputStream = IRI.create(outFile);
OWLDocumentFormat format = new RDFXMLDocumentFormat();
LOG.info("saving to {} fmt={}", outputStream, format);
manager.saveOntology(ontology, format, outputStream);
}
/**
* Table 5.8 Translation of Annotation Vocabulary.
*
* @return property map
*/
protected static Map initAnnotationPropertyMap() {
Map map = new HashMap<>();
map.put(OboFormatTag.TAG_IS_OBSELETE.getTag(), OWLRDFVocabulary.OWL_DEPRECATED.getIRI());
map.put(OboFormatTag.TAG_NAME.getTag(), OWLRDFVocabulary.RDFS_LABEL.getIRI());
map.put(OboFormatTag.TAG_COMMENT.getTag(), OWLRDFVocabulary.RDFS_COMMENT.getIRI());
for (Obo2OWLVocabulary vac : Obo2OWLVocabulary.values()) {
map.put(vac.getMappedTag(), vac.getIRI());
}
return map;
}
/**
* @param path the path
* @return the URI string
*/
protected static String getURI(String path) {
if (path.startsWith("http://") || path.startsWith("https://") || path.startsWith("file:")) {
return path;
}
File f = new File(path);
return f.toURI().toString();
}
/**
* Translate relation union of.
*
* @param id the id
* @param p the p
* @param clauses the clauses
* @return the OWL axiom
*/
@SuppressWarnings("unused")
@Nullable
protected static OWLAxiom trRelationUnionOf(String id, OWLProperty p,
Collection clauses) {
// TODO not expressible in OWL - use APs. SWRL?
LOG.error(
"The relation union_of for {} is currently non-translatable to OWL. Ignoring clauses: {}",
id, clauses);
return null;
}
/**
* Translate relation intersection of.
*
* @param id the id
* @param p the p
* @param clauses the clauses
* @return the OWL axiom
*/
@SuppressWarnings("unused")
@Nullable
protected static OWLAxiom trRelationIntersectionOf(String id, OWLProperty p,
Collection clauses) {
// TODO not expressible in OWL - use APs. SWRL?
LOG.error(
"The relation intersection_of for {} is currently non-translatable to OWL. Ignoring clauses: {}",
id, clauses);
return null;
}
/**
* Gets the qualifier value string.
*
* @param q the qualifier value
* @param quals the qualifier values
* @return the qualifier value string
*/
protected static String getQVString(String q, Collection quals) {
for (QualifierValue qv : quals) {
if (qv.getQualifier().equals(q)) {
return qv.getValue();
}
}
return "";
}
/**
* Gets the qualifier value boolean.
*
* @param q the qualifier value
* @param quals the qualifier values
* @return the qualifier value boolean
*/
protected static boolean getQVBoolean(String q, Collection quals) {
for (QualifierValue qv : quals) {
if (qv.getQualifier().equals(q)) {
Object v = qv.getValue();
return Boolean.parseBoolean((String) v);
}
}
return false;
}
/**
* Gets the qualifier value int.
*
* @param q the qualifier value
* @param quals the qualifier values
* @return the qualifier value int
*/
@Nullable
protected static Integer getQVInt(String q, Collection quals) {
for (QualifierValue qv : quals) {
if (qv.getQualifier().equals(q)) {
Object v = qv.getValue();
return Integer.valueOf((String) v);
}
}
return null;
}
/**
* Gets the id prefix.
*
* @param x the x
* @return the id prefix
*/
protected static String getIdPrefix(String x) {
String[] parts = x.split(":", 2);
return parts[0];
}
/**
* Translate tag to iri.
*
* @param tag the tag
* @return the iri
*/
public static IRI trTagToIRI(String tag) {
IRI iri = ANNOTATIONPROPERTYMAP.get(tag);
if (iri == null) {
iri = IRI.create(Obo2OWLConstants.OIOVOCAB_IRI_PREFIX, tag);
}
return iri;
}
protected void init(OWLOntologyManager m) {
// use the given manager and its factory
manager = m;
fac = manager.getOWLDataFactory();
int cacheSize = ConfigurationOptions.CACHE_SIZE
.getValue(Integer.class, Collections.emptyMap()).intValue();
idToIRICache = new IDCache(cacheSize);
// clear all internal maps.
idSpaceMap.clear();
apToDeclare.clear();
clsToDeclare.clear();
typedefToAnnotationProperty.clear();
}
/**
* Gets the manager.
*
* @return the manager
*/
public OWLOntologyManager getManager() {
return manager;
}
/**
* Sets the manager.
*
* @param manager the new manager
*/
public void setManager(OWLOntologyManager manager) {
this.manager = manager;
}
/**
* Gets the obodoc.
*
* @return the obodoc
*/
public OBODoc getObodoc() {
return obodoc;
}
/**
* Sets the obodoc.
*
* @param obodoc the new obodoc
*/
public void setObodoc(OBODoc obodoc) {
this.obodoc = obodoc;
}
/**
* Gets the owl ontology.
*
* @return the owlOntology
*/
protected OWLOntology getOwlOntology() {
return verifyNotNull(owlOntology);
}
/**
* Sets the owl ontology.
*
* @param owlOntology the owlOntology to set
*/
protected void setOwlOntology(OWLOntology owlOntology) {
this.owlOntology = owlOntology;
}
/**
* Creates an OBOFormatParser object to parse a file and then converts it using the convert
* method.
*
* @param oboFile the obo file
* @return ontology
* @throws OWLOntologyCreationException the OWL ontology creation exception
*/
public OWLOntology convert(String oboFile) throws OWLOntologyCreationException {
try {
OBOFormatParser p = new OBOFormatParser();
return convert(p.parse(oboFile));
} catch (IOException ex) {
throw new OWLOntologyCreationException(
"Error Occured while parsing OBO '" + oboFile + '\'', ex);
} catch (OBOFormatParserException ex) {
throw new OWLOntologyCreationException(
"Syntax error occured while parsing OBO '" + oboFile + '\'', ex);
}
}
/**
* Convert.
*
* @param doc the obodoc
* @return ontology
* @throws OWLOntologyCreationException the OWL ontology creation exception
*/
public OWLOntology convert(OBODoc doc) throws OWLOntologyCreationException {
obodoc = doc;
init(manager);
return tr(manager.createOntology());
}
/**
* Convert.
*
* @param doc the obodoc
* @param in the in
* @return the OWL ontology
*/
public OWLOntology convert(OBODoc doc, OWLOntology in) {
obodoc = doc;
init(in.getOWLOntologyManager());
return tr(in);
}
/**
* Translate ontology.
*
* @param in the in
* @return the OWL ontology
*/
protected OWLOntology tr(OWLOntology in) {
setOwlOntology(in);
Frame hf = verifyNotNull(obodoc.getHeaderFrame());
Clause ontClause = hf.getClause(OboFormatTag.TAG_ONTOLOGY);
if (ontClause != null) {
String ontOboId = (String) ontClause.getValue();
defaultIDSpace = ontOboId;
IRI ontIRI;
if (ontOboId.contains(":")) {
ontIRI = IRI.create(ontOboId);
} else {
ontIRI = IRI.create(DEFAULT_IRI_PREFIX + ontOboId + ".owl");
}
Clause dvclause = hf.getClause(OboFormatTag.TAG_DATA_VERSION);
if (dvclause != null) {
String dv = dvclause.getValue().toString();
IRI vIRI =
IRI.create(DEFAULT_IRI_PREFIX + ontOboId + '/' + dv + '/' + ontOboId + ".owl");
OWLOntologyID oid = new OWLOntologyID(optional(ontIRI), optional(vIRI));
// if the ontology being read has a differet id from the one
// that was passed in, update it
// when parsing, the original ontology is likely an anonymous,
// empty one
if (!oid.equals(in.getOntologyID())) {
manager.applyChange(new SetOntologyID(in, oid));
}
} else {
// if the ontology being read has a differet id from the one
// that was passed in, update it
// when parsing, the original ontology is likely an anonymous,
// empty one
if (!ontIRI.equals(in.getOntologyID().getOntologyIRI().orElse(null))) {
manager.applyChange(new SetOntologyID(in,
new OWLOntologyID(optional(ontIRI), emptyOptional())));
}
}
} else {
defaultIDSpace = "TEMP";
manager.applyChange(new SetOntologyID(in, new OWLOntologyID(
optional(IRI.create(DEFAULT_IRI_PREFIX, defaultIDSpace)), emptyOptional())));
// TODO - warn
}
trHeaderFrame(hf);
obodoc.getTypedefFrames().forEach(this::trTypedefToAnnotationProperty);
obodoc.getTypedefFrames().forEach(this::trTypedefFrame);
obodoc.getTermFrames().forEach(this::trTermFrame);
// TODO - individuals
for (Clause cl : hf.getClauses(OboFormatTag.TAG_IMPORT)) {
String path = getURI(cl.getValue().toString());
IRI importIRI = IRI.create(path);
OWLImportsDeclaration owlImportsDeclaration = fac.getOWLImportsDeclaration(importIRI);
manager.makeLoadImportRequest(owlImportsDeclaration,
new OWLOntologyLoaderConfiguration());
AddImport ai = new AddImport(in, owlImportsDeclaration);
manager.applyChange(ai);
}
postProcess(in);
return in;
}
/**
* perform any necessary post-processing. currently this only includes the experimental
* logical-definitions-view-property
*
* @param ontology the ontology
*/
protected void postProcess(OWLOntology ontology) {
OWLAnnotationProperty p =
fac.getOWLAnnotationProperty(Obo2OWLVocabulary.IRI_OIO_LogicalDefinitionViewRelation);
Optional findAny = ontology.annotations().filter(a -> a.getProperty().equals(p))
.map(a -> a.getValue().asLiteral()).filter(Optional::isPresent)
.map(x -> x.get().getLiteral()).findAny();
if (!findAny.isPresent()) {
return;
}
IRI pIRI = oboIdToIRI(findAny.get());
OWLObjectProperty vp = fac.getOWLObjectProperty(pIRI);
Set rmAxioms = new HashSet<>();
Set newAxioms = new HashSet<>();
ontology.axioms(AxiomType.EQUIVALENT_CLASSES).forEach(eca -> {
AtomicInteger numNamed = new AtomicInteger();
Set xs = new HashSet<>();
eca.classExpressions().forEach(x -> {
if (x instanceof OWLClass) {
xs.add(x);
numNamed.incrementAndGet();
} else {
// anonymous class expressions are 'prefixed' with view
// property
xs.add(fac.getOWLObjectSomeValuesFrom(vp, x));
}
});
if (numNamed.get() == 1) {
rmAxioms.add(eca);
newAxioms.add(fac.getOWLEquivalentClassesAxiom(xs));
}
});
ontology.remove(rmAxioms);
ontology.add(newAxioms);
}
/**
* Translate header frame.
*
* @param headerFrame the header frame
*/
public void trHeaderFrame(Frame headerFrame) {
for (String t : headerFrame.getTags()) {
OboFormatTag tag = OBOFormatConstants.getTag(t);
if (tag == OboFormatTag.TAG_ONTOLOGY) {
// already processed
} else if (tag == OboFormatTag.TAG_IMPORT) {
// TODO
} else if (tag == OboFormatTag.TAG_SUBSETDEF) {
OWLAnnotationProperty parentAnnotProp = trTagToAnnotationProp(t);
for (Clause clause : headerFrame.getClauses(t)) {
OWLAnnotationProperty childAnnotProp =
trAnnotationProp(clause.getValue(String.class));
Set annotations = trAnnotations(clause);
add(fac.getOWLSubAnnotationPropertyOfAxiom(childAnnotProp, parentAnnotProp,
annotations));
OWLAnnotationProperty ap =
trTagToAnnotationProp(OboFormatTag.TAG_COMMENT.getTag());
add(fac.getOWLAnnotationAssertionAxiom(ap, childAnnotProp.getIRI(),
trLiteral(clause.getValue2())));
}
} else if (tag == OboFormatTag.TAG_SYNONYMTYPEDEF) {
OWLAnnotationProperty parentAnnotProp = trTagToAnnotationProp(t);
for (Clause clause : headerFrame.getClauses(t)) {
Object[] values = clause.getValues().toArray();
OWLAnnotationProperty childAnnotProp = trAnnotationProp(values[0].toString());
IRI childIRI = childAnnotProp.getIRI();
Set annotations = trAnnotations(clause);
add(fac.getOWLSubAnnotationPropertyOfAxiom(childAnnotProp, parentAnnotProp,
annotations));
OWLAnnotationProperty ap =
trTagToAnnotationProp(OboFormatTag.TAG_NAME.getTag());
add(fac.getOWLAnnotationAssertionAxiom(ap, childIRI, trLiteral(values[1])));
if (values.length > 2 && !values[2].toString().isEmpty()) {
ap = trTagToAnnotationProp(OboFormatTag.TAG_SCOPE.getTag());
add(fac.getOWLAnnotationAssertionAxiom(ap, childIRI,
trTagToAnnotationProp(values[2].toString()).getIRI()));
}
}
} else if (tag == OboFormatTag.TAG_DATE) {
handleDate(t, headerFrame.getClause(tag));
} else if (tag == OboFormatTag.TAG_PROPERTY_VALUE) {
addPropertyValueHeaders(headerFrame.getClauses(OboFormatTag.TAG_PROPERTY_VALUE));
} else if (tag == OboFormatTag.TAG_DATA_VERSION) {
// TODO Add versionIRI
} else if (tag == OboFormatTag.TAG_REMARK) {
// translate remark as rdfs:comment
headerFrame.getClauses(t).forEach(c -> addOntologyAnnotation(fac.getRDFSComment(),
trLiteral(c.getValue()), trAnnotations(c)));
} else if (tag == OboFormatTag.TAG_IDSPACE) {
// do not translate, as they are just directives
} else if (tag == OboFormatTag.TAG_OWL_AXIOMS) {
// in theory, there should only be one tag
// but we can silently collapse multiple tags
headerFrame.getTagValues(tag, String.class)
.forEach(s -> OwlStringTools.translate(s, getOwlOntology()));
} else {
headerFrame.getClauses(t)
.forEach(c -> addOntologyAnnotation(trTagToAnnotationProp(t),
trLiteral(c.getValue()), trAnnotations(c)));
}
}
}
protected void handleDate(String t, @Nullable Clause clause) {
if (clause != null) {
Object value = clause.getValue();
String dateString = null;
if (value instanceof Date) {
dateString = OBOFormatConstants.headerDateFormat().format((Date) value);
} else if (value instanceof String) {
dateString = (String) value;
}
if (dateString != null) {
addOntologyAnnotation(trTagToAnnotationProp(t), trLiteral(dateString),
trAnnotations(clause));
} else {
// TODO: Throw Exceptions
OBOFormatException e =
new OBOFormatException("Cannot translate clause «" + clause + '»');
LOG.error("Cannot translate: {}", clause, e);
}
}
}
/**
* Adds the property value headers.
*
* @param clauses the clauses
*/
protected void addPropertyValueHeaders(Collection clauses) {
for (Clause clause : clauses) {
Set annotations = trAnnotations(clause);
Collection