
it.uniroma2.art.owlart.utilities.transform.SKOS2OWLConverter Maven / Gradle / Ivy
package it.uniroma2.art.owlart.utilities.transform;
import it.uniroma2.art.owlart.exceptions.ModelAccessException;
import it.uniroma2.art.owlart.exceptions.ModelUpdateException;
import it.uniroma2.art.owlart.model.ARTLiteral;
import it.uniroma2.art.owlart.model.ARTURIResource;
import it.uniroma2.art.owlart.model.NodeFilters;
import it.uniroma2.art.owlart.model.impl.ARTNodeFactoryImpl;
import it.uniroma2.art.owlart.models.OWLModel;
import it.uniroma2.art.owlart.models.SKOSModel;
import it.uniroma2.art.owlart.navigation.ARTLiteralIterator;
import it.uniroma2.art.owlart.navigation.ARTURIResourceIterator;
import it.uniroma2.art.owlart.vocabulary.OWL;
import it.uniroma2.art.owlart.vocabulary.RDFS;
import it.uniroma2.art.owlart.vocabulary.XmlSchema;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Simple utility class for transforming a SKOS thesaurus into an OWL ontology using the rules adopted by the
* OAEI 2012 Library Track (http://web.informatik.uni-mannheim.de/oaei-library/2012/).
*
*
* - skos:concept ➔ owl:Class
* - skos:prefLabel, skos:altLabel ➔ rdfs:label
* - skos:scopeNote, skos:notation ➔ rdfs:comment
* - skos:narrower ➔ rdfs:superClassOf
* - skos:broader ➔ rdfs:subClassOf
* - skos:related ➔ rdfs:seeAlso
*
*
* @author Manuel Fiorelli
*
*/
public class SKOS2OWLConverter {
private static final Logger logger = LoggerFactory.getLogger(SKOS2OWLConverter.class);
private boolean considerPrefLabel = true;
private boolean considerAltLabel = true;
private boolean considerDefinition = true;
private boolean considerScopeNote = true;
private boolean considerNotation = true;
private boolean considerRelated = true;
private boolean considerBroader = true;
private Pattern splitLabelPattern = Pattern.compile("^(\\w(?:\\.\\d+)+)(.+)");
private boolean emitNotationFromLabel = true;
private Pattern labelSanitization = Pattern.compile("^(?:'|\")?(.*)(?:'|\")?$");
/**
* Abstract class modeling a URI transformer. Implement a concrete subclass for providing new URI
* transformation.
*
*/
public static abstract class URITransformer {
private ARTNodeFactoryImpl nodeFact = new ARTNodeFactoryImpl();
public abstract String transform(String source);
public ARTURIResource transform(ARTURIResource source) {
String newURI = transform(source.getURI());
return nodeFact.createURIResource(newURI);
}
}
/**
* A transformer which leave the URI untouched.
*/
public static final URITransformer IDENTITY_URI_TRANSFORMER = new URITransformer() {
public String transform(String source) {
return source;
}
};
private URITransformer uriTransformer = IDENTITY_URI_TRANSFORMER;
/**
* Sets the {@link URITransformer} for this converter.
*
* @param uriTransformer
* a {@code URITransformer}
*/
public void setURITransformer(URITransformer uriTransformer) {
this.uriTransformer = uriTransformer;
}
public boolean isConsiderPrefLabel() {
return considerPrefLabel;
}
public void setConsiderPrefLabel(boolean considerPrefLabel) {
this.considerPrefLabel = considerPrefLabel;
}
public boolean isConsiderAltLabel() {
return considerAltLabel;
}
public void setConsiderAltLabel(boolean considerAltLabel) {
this.considerAltLabel = considerAltLabel;
}
public boolean isConsiderScopeNote() {
return considerScopeNote;
}
public void setConsiderScopeNote(boolean considerScopeNote) {
this.considerScopeNote = considerScopeNote;
}
public boolean isConsiderNotation() {
return considerNotation;
}
public void setConsiderNotation(boolean considerNotation) {
this.considerNotation = considerNotation;
}
public boolean isConsiderRelated() {
return considerRelated;
}
public void setConsiderRelated(boolean considerRelated) {
this.considerRelated = considerRelated;
}
public boolean isConsiderBroader() {
return considerBroader;
}
public void setConsiderBroader(boolean considerBroader) {
this.considerBroader = considerBroader;
}
public Pattern getSplitLabelPattern() {
return splitLabelPattern;
}
public void setSplitLabelPattern(Pattern splitLabelPattern) {
this.splitLabelPattern = splitLabelPattern;
}
public boolean isEmitNotationFromLabel() {
return emitNotationFromLabel;
}
public void setEmitNotationFromLabel(boolean emitNotationFromLabel) {
this.emitNotationFromLabel = emitNotationFromLabel;
}
public Pattern getLabelSanitization() {
return labelSanitization;
}
public void setLabelSanitization(Pattern labelSanitization) {
this.labelSanitization = labelSanitization;
}
/**
* Converts a SKOS thesaurus into an OWL ontology. In {@code sourceModel} the inference should be turned
* on, while in {@code targetModel} inference MUST be turned off. Inferences on the target model are
* useless, and may cause a relevant slow down.
*
* @param sourceModel
* the source model containing the input thesaurus
* @param targetModel
* the target model which will held the produced ontology
* @throws ModelAccessException
* @throws ModelUpdateException
*/
public void doConversion(SKOSModel sourceModel, OWLModel targetModel) throws ModelAccessException,
ModelUpdateException {
// Adds an owl:Ontology resource
ARTURIResourceIterator schemeIt = sourceModel.listAllSchemes(NodeFilters.MAINGRAPH);
try {
while (schemeIt.streamOpen()) {
ARTURIResource aScheme = schemeIt.getNext();
targetModel.addInstance(uriTransformer.transform(aScheme).getURI(), OWL.Res.ONTOLOGY,
NodeFilters.MAINGRAPH);
}
} finally {
schemeIt.close();
}
// Adds the desired annotation property into the target model
targetModel.addAnnotationProperty(RDFS.LABEL, null, NodeFilters.MAINGRAPH);
targetModel.addAnnotationProperty(RDFS.COMMENT, null, NodeFilters.MAINGRAPH);
targetModel.addAnnotationProperty(RDFS.SEEALSO, null, NodeFilters.MAINGRAPH);
// Adds the rdfs:isDefinedBy property
targetModel.addProperty(RDFS.ISDEFINEDBY, null, NodeFilters.MAINGRAPH);
// Transforms each concept
int count = 0;
ARTURIResourceIterator it = sourceModel.listConcepts(true);
try {
while (it.streamOpen()) {
ARTURIResource concept = it.getNext();
transformConcept(sourceModel, targetModel, concept);
count++;
if (count % 10 == 0) {
System.out.println("processed = " + count);
logger.info("Processed Concepts = {}", count);
}
}
} finally {
it.close();
}
logger.info("Total procedded concepts = {}", count);
}
private void transformConcept(SKOSModel skosModel, OWLModel owlModel, ARTURIResource concept)
throws ModelUpdateException, ModelAccessException {
ARTURIResource classURI = transformURI(concept);
// Adds a class for a concept
owlModel.addClass(classURI.getURI(), NodeFilters.MAINGRAPH);
ARTURIResourceIterator it = skosModel.listAllSchemesForConcept(concept, NodeFilters.MAINGRAPH);
try {
while (it.streamOpen()) {
ARTURIResource scheme = it.getNext();
owlModel.addTriple(classURI, RDFS.Res.ISDEFINEDBY, transformURI(scheme),
NodeFilters.MAINGRAPH);
}
} finally {
it.close();
}
// Transforms skos:{pref,alt}Label(s)
if (considerPrefLabel)
transformPrefLabels(skosModel, owlModel, concept);
if (considerAltLabel)
transformAltLabels(skosModel, owlModel, concept);
// Transforms skos:scopeNote(s) and skos:notation(s)
if (considerScopeNote)
transformScopeNotes(skosModel, owlModel, concept);
if (considerNotation)
transformNotations(skosModel, owlModel, concept);
if (considerDefinition)
transformDefinitions(skosModel, owlModel, concept);
// Transforms skos:broader and skos:related
if (considerBroader)
transformBroaders(skosModel, owlModel, concept);
if (considerRelated)
transformRelated(skosModel, owlModel, concept);
}
private void transformDefinitions(SKOSModel skosModel, OWLModel owlModel, ARTURIResource concept)
throws ModelAccessException, ModelUpdateException {
ARTURIResource currentClass = transformURI(concept);
ARTLiteralIterator it = skosModel.listDefinitions(concept, true);
try {
while (it.streamOpen()) {
ARTLiteral label = it.getNext();
owlModel.addComment(currentClass, label.getLabel(), label.getLanguage(),
NodeFilters.MAINGRAPH);
}
} finally {
it.close();
}
}
private void transformPrefLabels(SKOSModel skosModel, OWLModel owlModel, ARTURIResource concept)
throws ModelUpdateException, ModelAccessException {
ARTURIResource currentClass = transformURI(concept);
ARTLiteralIterator it = skosModel.listPrefLabels(concept, true);
try {
while (it.streamOpen()) {
ARTLiteral label = it.getNext();
String lexicalForm = label.getLabel();
String lang = label.getLanguage();
Matcher m = splitLabelPattern.matcher(lexicalForm);
if (m.find()) {
String notation = m.group(1);
lexicalForm = m.group(2).trim();
if (emitNotationFromLabel) {
owlModel.addTriple(currentClass, RDFS.Res.COMMENT,
owlModel.createLiteral(notation, XmlSchema.STRING), NodeFilters.MAINGRAPH);
}
}
Matcher m2 = labelSanitization.matcher(lexicalForm);
if (m2.find()) {
lexicalForm = m2.group(1).trim();
}
owlModel.addLabel(currentClass, lexicalForm, lang, NodeFilters.MAINGRAPH);
}
} finally {
it.close();
}
}
private void transformAltLabels(SKOSModel skosModel, OWLModel owlModel, ARTURIResource concept)
throws ModelUpdateException, ModelAccessException {
ARTURIResource currentClass = transformURI(concept);
ARTLiteralIterator it = skosModel.listAltLabels(concept, true);
try {
while (it.streamOpen()) {
ARTLiteral label = it.getNext();
if (label != null)
owlModel.addLabel(currentClass, label.getLabel(), label.getLanguage(),
NodeFilters.MAINGRAPH);
}
} finally {
it.close();
}
}
private void transformBroaders(SKOSModel skosModel, OWLModel owlModel, ARTURIResource concept)
throws ModelAccessException, ModelUpdateException {
ARTURIResource currentClass = transformURI(concept);
ARTURIResourceIterator it = skosModel.listBroaderConcepts(concept, false, true);
try {
while (it.streamOpen()) {
ARTURIResource broaderConcept = it.getNext();
ARTURIResource superClassURI = transformURI(broaderConcept);
owlModel.addSuperClass(currentClass, superClassURI, NodeFilters.MAINGRAPH);
}
} finally {
it.close();
}
}
private void transformRelated(SKOSModel skosModel, OWLModel owlModel, ARTURIResource concept)
throws ModelAccessException, ModelUpdateException {
ARTURIResource currentClass = transformURI(concept);
ARTURIResourceIterator it = skosModel.listRelatedConcepts(concept, true);
try {
while (it.streamOpen()) {
ARTURIResource broaderConcept = it.getNext();
ARTURIResource superClassURI = transformURI(broaderConcept);
owlModel.addTriple(currentClass, RDFS.Res.SEEALSO, superClassURI, NodeFilters.MAINGRAPH);
}
} finally {
it.close();
}
}
private void transformScopeNotes(SKOSModel skosModel, OWLModel owlModel, ARTURIResource concept)
throws ModelUpdateException, ModelAccessException {
ARTURIResource currentClass = transformURI(concept);
ARTLiteralIterator it = skosModel.listScopeNotes(concept, true);
try {
while (it.streamOpen()) {
ARTLiteral label = it.getNext();
owlModel.addComment(currentClass, label.getLabel(), label.getLanguage(),
NodeFilters.MAINGRAPH);
}
} finally {
it.close();
}
}
private void transformNotations(SKOSModel skosModel, OWLModel owlModel, ARTURIResource concept)
throws ModelAccessException, ModelUpdateException {
ARTURIResource currentClass = transformURI(concept);
ARTLiteralIterator it = skosModel.listNotations(concept);
try {
while (it.streamOpen()) {
ARTLiteral label = it.getNext();
owlModel.addComment(currentClass, label.getLabel(), label.getLanguage(),
NodeFilters.MAINGRAPH);
}
} finally {
it.close();
}
}
private ARTURIResource transformURI(ARTURIResource concept) {
return uriTransformer.transform(concept);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy