be.ugent.rml.conformer.RMLConverterNew Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of rmlmapper Show documentation
Show all versions of rmlmapper Show documentation
The RMLMapper executes RML rules to generate high quality Linked Data from multiple originally (semi-)structured data sources.
The newest version!
package be.ugent.rml.conformer;
import be.ugent.idlab.knows.dataio.access.DatabaseType;
import be.ugent.rml.records.ReferenceFormulation;
import be.ugent.rml.store.Quad;
import be.ugent.rml.store.QuadStore;
import be.ugent.rml.term.BlankNode;
import be.ugent.rml.term.Literal;
import be.ugent.rml.term.NamedNode;
import be.ugent.rml.term.Term;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import static be.ugent.rml.NAMESPACES.*;
public class RMLConverterNew implements Converter {
private static Logger logger = LoggerFactory.getLogger(RMLConverterNew.class);
private final Map replacementsObjects = new HashMap<>() {{
/* Old RML */
put(RML + "BaseSource", new ReplacementEntry(RML2 + "LogicalSource", null));
/* R2RML */
put(RR + "BaseTableOrView", new ReplacementEntry(RML2 + "LogicalSource", null));
put(RR + "Literal", new ReplacementEntry(RML2 + "Literal", null));
put(RR + "R2RMLView", new ReplacementEntry(RML2 + "LogicalSource", null));
put(RR + "SQL2008", new ReplacementEntry(RML2 + "SQL2008", null));
}};
private final Map replacementsPredicates = new HashMap<>() {{
put(RML + "source", new ReplacementEntry(RML2 + "source", (quad, quadStore) -> processSources(quad, quadStore)));
put(RML + "query", new ReplacementEntry(RML2 + "query", (quad, quadStore) -> processQueries(quad, quadStore)));
put(RR + "column", new ReplacementEntry(RML2 + "reference", null));
put(RR + "SQL2008", new ReplacementEntry(RML2 + "SQL2008", null));
put(RR + "logicalTable", new ReplacementEntry(RML2 + "logicalSource", (quad, quadStore) -> processLogicalTable(quad, quadStore)));
put(RR + "sqlVersion", new ReplacementEntry(RML2 + "referenceFormulation", null));
put(RR + "tableName", new ReplacementEntry(RML2 + "source", RMLConverterNew::processTableName));
put(RR + "Literal", new ReplacementEntry(RML2 + "Literal", RMLConverterNew::processTableName));
}};
private final Map objectRenames = new HashMap<>() {{
put(QL + "CSV", ReferenceFormulation.CSV);
put(QL + "JSONPath", ReferenceFormulation.JSONPath);
put(QL + "XPath", ReferenceFormulation.XPath);
put(RML + "LogicalSource", RML2 + "LogicalSource");
put(RML + "TriplesMap", RML2 + "TriplesMap");
put(RR + "BlankNode", RML2 + "BlankNode");
put(RR + "IRI", RML2 + "IRI");
put(RR + "Join", RML2 + "Join");
put(RR + "PredicateMap", RML2 + "PredicateMap");
put(RR + "PredicateObjectMap", RML2 + "PredicateObjectMap");
put(RR + "RefObjectMap", RML2 + "RefObjectMap");
put(RR + "SubjectMap", RML2 + "SubjectMap");
put(RR + "ObjectMap", RML2 + "ObjectMap");
put(RR + "TermMap", RML2 + "TermMap");
put(RR + "TriplesMap", RML2 + "TriplesMap");
put(RR + "GraphMap", RML2 + "GraphMap");
put(RR + "defaultGraph", RML2 + "defaultGraph");
put(RML + "LanguageMap", RML2 + "LanguageMap");
put(RMLT + "LogicalTarget", RML2 + "LogicalTarget");
put(RR + "GraphMap", RML2 + "GraphMap");
put(RMLT + "Target", RML2 + "Target");
}};
/*
TODO: figure out not included entries:
- rr:inverseExpression: what to do with the deprecated tag?
*/
private final Map predicateRenames = new HashMap<>() {{
/* Old RML */
put(RML + "iterator", RML2 + "iterator");
put(RML + "logicalSource", RML2 + "logicalSource");
put(RML + "logicalTarget", RML2 + "logicalTarget");
put(RML + "reference", RML2 + "reference");
put(RML + "referenceFormulation", RML2 + "referenceFormulation");
put(RML + "languageMap", RML2 + "languageMap");
put(RML + "parentTermMap", RML2 + "parentTermMap");
/* Old RMLT */
put(RMLT + "target", RML2 + "target");
put(RMLT + "serialization", RML2 + "serialization");
put(RMLT + "compression", RML2 + "compression");
/* R2RML */
put(RR + "joinCondition", RML2 + "joinCondition");
put(RR + "parent", RML2 + "parent");
put(RR + "child", RML2 + "child");
put(RR + "parentTriplesMap", RML2 + "parentTriplesMap");
put(RR + "column", RML2 + "reference");
put(RR + "class", RML2 + "class");
put(RR + "constant", RML2 + "constant");
put(RR + "datatype", RML2 + "datatype");
put(RR + "graph", RML2 + "graph");
put(RR + "graphMap", RML2 + "graphMap");
put(RR + "language", RML2 + "language");
put(RR + "object", RML2 + "object");
put(RR + "objectMap", RML2 + "objectMap");
put(RR + "predicate", RML2 + "predicate");
put(RR + "predicateMap", RML2 + "predicateMap");
put(RR + "predicateObjectMap", RML2 + "predicateObjectMap");
put(RR + "subject", RML2 + "subject");
put(RR + "subjectMap", RML2 + "subjectMap");
put(RR + "termType", RML2 + "termType");
put(RR + "template", RML2 + "template");
put(RR + "logicalTable", RML2 + "logicalSource");
}};
private final Set obsoletes = new HashSet<>() {{
add(RR + "sqlVersion");
}};
private final QuadStore store;
public RMLConverterNew(QuadStore store) {
this.store = store;
}
private static void processTableName(Quad tableName, QuadStore store) {
store.addQuad(tableName.getSubject(), new NamedNode(RML2 + "referenceFormulation"), new NamedNode(RML2 + "SQL2008Table"));
store.removeQuads(tableName.getSubject(), new NamedNode(RML2 + "referenceFormulation"), null);
}
/**
* Replace a logical table quad with a proper logical source
* @param logicalTableQuad
* @param quadStore
*/
private void processLogicalTable(Quad logicalTableQuad, QuadStore quadStore) {
BlankNode blank = new BlankNode();
quadStore.addQuad(logicalTableQuad.getSubject(), new NamedNode(RML2 + "logicalSource"), blank);
// add the reference formulation of the logical source
quadStore.removeQuads(logicalTableQuad.getSubject(), new NamedNode(RML2 + "referenceFormulation"), new NamedNode(ReferenceFormulation.RDBTable));
// now have blank contain all required fields
// translate the table name as rml:source
Term logicalTable = logicalTableQuad.getObject();
Term tableName = quadStore.getQuads(logicalTable, new NamedNode(RR + "tableName"), null)
.get(0)
.getObject();
quadStore.addQuad(blank, new NamedNode(RML2 + "source"), tableName);
quadStore.addQuad(blank, new NamedNode(RML2 + "referenceFormulation"), new NamedNode(ReferenceFormulation.RDBTable));
quadStore.removeQuads(logicalTableQuad);
}
@Override
public void convert(Map mappingOptions) throws Exception {
// inject DB sources for every triple map that has a logicalTable as source
if (mappingOptions != null) {
// R2RML conversion
// convert all logical tables into proper logical sources
List logicalTableMaps = this.store.getQuads(null, new NamedNode(RR + "logicalTable"), null);
for (Quad map : logicalTableMaps) {
// insert the database for the logical source
Term database = new NamedNode(map.getSubject().getValue() + "_database");
this.store.addQuad(database, new NamedNode(RDF + "type"), new NamedNode(D2RQ + "Database"));
for (Map.Entry entry : mappingOptions.entrySet()) {
this.store.addQuad(database, new NamedNode(D2RQ + entry.getKey()), new Literal(entry.getValue()));
if (entry.getKey().equals("jdbcDSN")) {
DatabaseType type = DatabaseType.getDBtype(entry.getValue());
this.store.addQuad(database, new NamedNode(D2RQ + "jdbcDriver"), new Literal(type.getDriver()));
}
}
Term logicalSource = new NamedNode(map.getSubject().getValue() + "_logicalSource");
this.store.addQuad(logicalSource, new NamedNode(RML2+"source"), database);
// translate rr:logicalTable to rml:source
// grab the logical table
Term logicalTable = map.getObject();
List tableNames = this.store.getQuads(logicalTable, new NamedNode(RR + "tableName"), null);
if (tableNames.isEmpty()) {
// no tableNames present, SQL query must be present
List queries = this.store.getQuads(logicalTable, new NamedNode(RR + "sqlQuery"), null);
if (queries.isEmpty()) {
throw new IllegalArgumentException("Logical table contains neither a tableName, nor a SQL query");
}
this.store.addQuad(logicalSource, new NamedNode(RML2 + "referenceFormulation"), new NamedNode(ReferenceFormulation.RDBQuery));
this.store.addQuad(logicalSource, new NamedNode(RML2 + "iterator"), new Literal(queries.get(0).getObject().getValue()));
this.store.removeQuads(logicalTable, new NamedNode(RR + "sqlQuery"), null);
} else {
this.store.addQuad(logicalSource, new NamedNode(RML2 + "referenceFormulation"), new NamedNode(ReferenceFormulation.RDBTable));
this.store.addQuad(logicalSource, new NamedNode(RML2 + "iterator"), new Literal(tableNames.get(0).getObject().getValue()));
this.store.removeQuads(logicalTable, new NamedNode(RR + "tableName"), null);
}
// connect the logical source to the map
this.store.addQuad(map.getSubject(), new NamedNode(RML2 + "logicalSource"), logicalSource);
// clean up the store: remove logicalTable and tableName
this.store.removeQuads(map.getSubject(), new NamedNode(RR + "logicalTable"), null);
}
// convert all logical sources that have a rr:tableName into proper sources
List tableNameLogicalSources = this.store.getQuads(null, new NamedNode(RR + "tableName"), null);
for (Quad ls : tableNameLogicalSources) {
// drop the sql version
// set the correct reference formulation
this.store.addQuad(ls.getSubject(), new NamedNode(RML2 + "referenceFormulation"), new NamedNode(ReferenceFormulation.RDBTable));
// put table name in rml:iterator
String tableName = ls.getObject().getValue();
this.store.addQuad(ls.getSubject(), new NamedNode(RML2 + "iterator"), new Literal(tableName));
// drop obsolete fields
this.store.removeQuads(ls.getSubject(), new NamedNode(RR + "sqlVersion"), null);
this.store.removeQuads(ls);
}
// convert all logical sources that have a rml:query to proper sources
List queryLogicalSources = this.store.getQuads(null, new NamedNode(RML + "query"), null);
for (Quad ls : queryLogicalSources) {
Term source = null;
if (this.store.contains(ls.getSubject(), new NamedNode(RML + "source"), null)) {
source = this.store.getQuad(ls.getSubject(), new NamedNode(RML + "source"), null).getObject();
}
// drop any obsolete reference formulations
this.store.removeQuads(ls.getSubject(), new NamedNode(RML + "referenceFormulation"), null);
// set the proper reference formulation
if (source != null && this.store.contains(source, new NamedNode(SD + "resultFormat"), null)) {
Term resultsFormat = this.store.getQuad(source, new NamedNode(SD + "resultFormat"), null).getObject();
this.store.addQuad(ls.getSubject(), new NamedNode(RML2 + "referenceFormulation"), resultsFormat);
} else {
this.store.addQuad(ls.getSubject(), new NamedNode(RML2 + "referenceFormulation"), new NamedNode(ReferenceFormulation.RDBQuery));
}
// set the query into the iterator
this.store.addQuad(ls.getSubject(), new NamedNode(RML2 + "iterator"), ls.getObject());
// drop the obsolete quads
this.store.removeQuads(ls.getSubject(), new NamedNode(RR + "sqlVersion"), null);
this.store.removeQuads(ls);
}
}
for (Map.Entry e : this.replacementsPredicates.entrySet()) {
List quads = this.store.getQuads(null, new NamedNode(e.getKey()), null);
ReplacementEntry entry = e.getValue();
for (Quad q : quads) {
if (entry.function != null) {
entry.function.call(q, this.store);
} else {
// apply the replace and warn
this.store.renameAllPredicates(new NamedNode(e.getKey()), new NamedNode(entry.replacementTerm));
logger.warn("Predicate replacement function for term {} is not yet defined!", e.getKey());
}
}
}
for (Map.Entry e : this.replacementsObjects.entrySet()) {
// find all quads that carry this object
List quads = this.store.getQuads(null, null, new NamedNode(e.getKey()));
ReplacementEntry entry = e.getValue();
for (Quad q : quads) {
this.store.addQuad(q.getSubject(), q.getPredicate(), new NamedNode(entry.replacementTerm));
if (entry.function != null) {
entry.function.call(q, store);
} else {
logger.warn("Object replacement function for term {} is not yet defined!", e.getKey());
}
}
this.store.removeQuads(quads);
}
// apply simple renames first
for (Map.Entry e : this.predicateRenames.entrySet()) {
String old = e.getKey();
String _new = e.getValue();
this.store.renameAllPredicates(new NamedNode(old), new NamedNode(_new));
}
for (Map.Entry e : this.objectRenames.entrySet()) {
String old = e.getKey();
String _new = e.getValue();
this.store.renameAllObjects(new NamedNode(old), new NamedNode(_new));
}
/* Replace namespaces */
this.store.removeNameSpace("rml");
this.store.removeNameSpace("rr");
this.store.removeNameSpace("ql");
this.store.addNameSpace("rml", "http://w3id.org/rml/");
dropObsolete();
}
private void processSources(Quad source, QuadStore store) {
if (source.getObject().isLiteral()) {
String path = source.getObject().getValue();
BlankNode node = new BlankNode();
store.addQuad(node, new NamedNode(RDF + "type"), new NamedNode(DCAT + "Distribution"));
store.addQuad(node, new NamedNode(RDF + "type"), new NamedNode(RML2 + "Source"));
store.addQuad(node, new NamedNode(DCAT + "downloadURL"), new Literal(path)); // TODO: file:// prefix
store.addQuad(source.getSubject(), new NamedNode(RML2 + "source"), node);
store.removeQuads(source.getSubject(), source.getPredicate(), source.getObject());
}
store.renameAllPredicates(new NamedNode(RML + "source"), new NamedNode(RML2 + "source"));
}
private void processQueries(Quad query, QuadStore store) throws Exception {
Term source = store.getQuad(query.getSubject(), new NamedNode(RML2 + "source"), null).getObject();
if (store.contains(source, new NamedNode(SD + "resultFormat"), null)) {
Term supportedLanguage = store.getQuad(source, new NamedNode(SD + "resultFormat"), null).getObject();
store.addQuad(query.getSubject(), new NamedNode(RML2 + "referenceFormulation"), supportedLanguage);
}
else {
store.addQuad(query.getSubject(), new NamedNode(RML2 + "referenceFormulation"), new NamedNode(RML2 + "SQL2008Query"));
}
store.removeQuads(query.getSubject(), new NamedNode(RML + "referenceFormulation"), null);
store.removeQuads(query.getSubject(), new NamedNode(RML + "iterator"), null);
store.renameAllPredicates(new NamedNode(RML + "query"), new NamedNode(RML2 + "iterator"));
}
private void dropObsolete() {
for (String obsolete : obsoletes) {
this.store.removeQuads(null, new NamedNode(obsolete), null);
}
}
/**
* Serves as a function to run when converting terms that are replaced by another term
*/
@FunctionalInterface
private interface ReplaceFunction {
void call(Quad quad, QuadStore store) throws Exception;
}
/**
* Private record to contain the term to replace and a function to further execute on the QuadStore
* @param replacementTerm term to be put instead of the previous one
* @param function function to run at replacement time
*/
private record ReplacementEntry(String replacementTerm, ReplaceFunction function) {
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy