All Downloads are FREE. Search and download functionalities are using the official Maven repository.

be.ugent.rml.MappingOptimizer Maven / Gradle / Ivy

Go to download

The RMLMapper executes RML rules to generate high quality Linked Data from multiple originally (semi-)structured data sources.

The newest version!
package be.ugent.rml;

import be.ugent.rml.extractor.Extractor;
import be.ugent.rml.extractor.ReferenceExtractor;
import be.ugent.rml.store.Quad;
import be.ugent.rml.store.QuadStore;
import be.ugent.rml.term.NamedNode;
import be.ugent.rml.term.Term;

import java.util.*;
import static be.ugent.rml.Utils.getObjectsFromQuads;

public class MappingOptimizer {

    private final QuadStore rmlStore;

    public MappingOptimizer(QuadStore rmlStore) {
        this.rmlStore = rmlStore;
    }

    public QuadStore optimizeMapping() throws Exception {
        renameSameLogicalSource();
        eliminateSelfJoins();
        return rmlStore;
    }

    private void renameSameLogicalSource() {
        List logicalSources = Utils.getObjectsFromQuads(rmlStore.getQuads(null,new NamedNode(NAMESPACES.RML2 + "logicalSource"),null));
        Map, Term> logicalSourcesDict = new HashMap<>();
        for (Term logicalSource : logicalSources){
            // two logical Sources are considered to be identical when they have the same objects at the leaves of their subgraph
            Set allObjects = new HashSet<>();
            List objects = Utils.getObjectsFromQuads(rmlStore.getQuads(logicalSource, null, null));
            while (!objects.isEmpty()) {
                Term object = objects.remove(objects.size()-1);
                if (object.isBNode() || object.isIRI()) {
                    List newObjects = Utils.getObjectsFromQuads(rmlStore.getQuads(object, null, null));
                    if (!newObjects.isEmpty()) {
                        objects.addAll(newObjects);
                    } else {
                        //object is final, not subject of new quads
                        allObjects.add(object);
                    }
                } else {
                    //object is final, not subject of new quads
                    allObjects.add(object);
                }
            }
            Set finalObjectSet = Collections.unmodifiableSet(allObjects);
            if (!logicalSourcesDict.keySet().contains(finalObjectSet)) {
                logicalSourcesDict.put(finalObjectSet, logicalSource);
            } else {
                List triplesMaps = Utils.getSubjectsFromQuads(this.rmlStore.getQuads(null, new NamedNode(NAMESPACES.RML2 + "logicalSource"), logicalSource));
                for (Term triplesMap : triplesMaps) {
                    rmlStore.removeQuads(triplesMap, new NamedNode(NAMESPACES.RML2 + "logicalSource"), logicalSource);
                    rmlStore.addQuad(triplesMap, new NamedNode(NAMESPACES.RML2 + "logicalSource"), logicalSourcesDict.get(finalObjectSet));
                }
            }
        }
    }

    private void eliminateSelfJoins() {
        List refObjectMapsQuads = rmlStore.getQuads(null, new NamedNode(NAMESPACES.RML2 + "parentTriplesMap"), null);
        for (Quad refObjectMapQuad : refObjectMapsQuads) {
            Term parentTriplesMap = refObjectMapQuad.getObject();
            Term childObjectMap = refObjectMapQuad.getSubject();
            Term parentLogicalSource = Utils.getObjectsFromQuads(rmlStore.getQuads(parentTriplesMap, new NamedNode(NAMESPACES.RML2 + "logicalSource"), null)).get(0);
            Term childPredicateObjectMap = Utils.getSubjectsFromQuads(rmlStore.getQuads(null, new NamedNode(NAMESPACES.RML2 + "objectMap"), childObjectMap)).get(0);
            Term childTriplesMap = Utils.getSubjectsFromQuads(rmlStore.getQuads(null, new NamedNode(NAMESPACES.RML2 + "predicateObjectMap"), childPredicateObjectMap)).get(0);
            Term childLogicalSource = Utils.getObjectsFromQuads(rmlStore.getQuads(childTriplesMap, new NamedNode(NAMESPACES.RML2 + "logicalSource"), null)).get(0);

            // check if the logical sources are the same
            if (childLogicalSource.equals(parentLogicalSource)) {

                List joinConditions = Utils.getObjectsFromQuads(rmlStore.getQuads(childObjectMap, new NamedNode(NAMESPACES.RML2 + "joinCondition"), null));

                List parentSubjectMaps = Utils.getObjectsFromQuads(rmlStore.getQuads(parentTriplesMap, new NamedNode(NAMESPACES.RML2 + "subjectMap"), null));
                 Term parentSubjectMap = null;
                if (!parentSubjectMaps.isEmpty()) {
                    parentSubjectMap = parentSubjectMaps.get(0);
                }

                boolean safeSelfJoinElimination = true;

                // if no join condition, we can safely eliminate the self-join
                // else we need more checks
                if (parentSubjectMap != null && !joinConditions.isEmpty()) {
                    // we can eliminate a self-join when all join conditions have equal references and all references for the parent subject or all reference for the related child triple come back in the join conditions
                    // 1. check if all join references are equal
                    List joinReferences = new ArrayList<>();
                    for (Term joinCondition : joinConditions) {
                        String parent = getObjectsFromQuads(rmlStore.getQuads(joinCondition, new NamedNode(NAMESPACES.RML2 + "parent"), null)).get(0).getValue();
                        String child = getObjectsFromQuads(rmlStore.getQuads(joinCondition, new NamedNode(NAMESPACES.RML2 + "child"), null)).get(0).getValue();
                        if (child.equals(parent)) {
                            joinReferences.add(child);
                        } else {
                            safeSelfJoinElimination = false;
                        }
                    }
                    if (safeSelfJoinElimination) {
                        // 2. check if all references for the parent subject come back in the join conditions
                        boolean safeTerms = hasSafeReferences(parentSubjectMap, joinReferences);
                        if(!safeTerms) {
                            // if not all references for the parent subject come back in the join conditions,
                            // 3. check if all references for the related child terms come back in the join conditions
                            // 3.1 check child subject
                            List childSubjectMaps = Utils.getObjectsFromQuads(rmlStore.getQuads(parentTriplesMap, new NamedNode(NAMESPACES.RML2 + "subjectMap"), null));
                            if(!childSubjectMaps.isEmpty()) {
                                safeTerms = hasSafeReferences(childSubjectMaps.get(0), joinReferences);
                            } else {
                                safeTerms = true;
                            }
                            //3.2 check child predicate (only make sense if the child subject was safe, otherwise we cannot eliminate the-self join)
                            if (safeTerms) {
                                List childPredicateMaps = Utils.getObjectsFromQuads(rmlStore.getQuads(childPredicateObjectMap, new NamedNode(NAMESPACES.RML2 + "predicateMap"), null));
                                if(!childPredicateMaps.isEmpty()) {
                                    safeTerms = hasSafeReferences(childPredicateMaps.get(0), joinReferences);
                                }
                            }
                        }
                        // 4. if parent subject or all child terms are safe, the self join can be eliminated, else not
                        if (!safeTerms) {
                            safeSelfJoinElimination = false;
                        }
                    }
                }
                if (safeSelfJoinElimination) {
                    // now we rewrite the mapping file to eliminate the self-join
                    boolean termTypeAdded = false;
                    List parentSubjectMapQuads = rmlStore.getQuads(parentSubjectMap, null, null);
                    for (Quad parentSubjectMapQuad : parentSubjectMapQuads) {
                        Term predicate = parentSubjectMapQuad.getPredicate();
                        if (predicate.equals(new NamedNode(NAMESPACES.FNML + "functionValue"))
                                || predicate.equals(new NamedNode(NAMESPACES.RML2 + "termType"))
                                || predicate.equals(new NamedNode(NAMESPACES.RML2 + "reference"))
                                || predicate.equals(new NamedNode(NAMESPACES.RML2 + "template"))
                                || predicate.equals(new NamedNode(NAMESPACES.RML2 + "constant"))) {
                            rmlStore.addQuad(childObjectMap, predicate, parentSubjectMapQuad.getObject());
                        }
                        if (predicate.equals(new NamedNode(NAMESPACES.RML2 + "termType"))) {
                            termTypeAdded = true;
                        }
                    }
                    rmlStore.removeQuads(childObjectMap, new NamedNode(NAMESPACES.RML2 + "parentTriplesMap"), parentTriplesMap);
                    if (!termTypeAdded) {
                        rmlStore.addQuad(childObjectMap, new NamedNode(NAMESPACES.RML2 + "termType"), new NamedNode(NAMESPACES.RML2 + "IRI"));
                    }
                }
            }
        }
    }

    private Set getAllLinkedReferences(Term term){
        Set references = new HashSet<>();
        List linkedSubjects = new ArrayList<>();
        linkedSubjects.add(term);
        while(!linkedSubjects.isEmpty()) {
            Term subject = linkedSubjects.get(0);
            List linkedQuads = rmlStore.getQuads(subject, null, null);
            for (Quad linkedQuad : linkedQuads) {
                Term predicate = linkedQuad.getPredicate();
                if (predicate.equals(new NamedNode(NAMESPACES.RML2 + "reference"))) {
                    references.add(linkedQuad.getObject().getValue());
                } else if (predicate.equals(new NamedNode(NAMESPACES.RML2 + "template"))) {
                    String template = linkedQuad.getObject().getValue();
                    List extractors = Utils.parseTemplate(template, false);
                    for (Extractor extractor : extractors) {
                        if (extractor instanceof ReferenceExtractor) {
                            references.add(((ReferenceExtractor) extractor).getReference());
                        }
                    }
                } else {
                    Term object = linkedQuad.getObject();
                    if (object.isBNode() || object.isIRI()) {
                        linkedSubjects.add(object);
                    }
                }
            }
            linkedSubjects.remove(0);
        }
        return references;
    }

    private boolean hasSafeReferences(Term term, List joinReferences){
        boolean isSafe = true;
        Set termReferences = getAllLinkedReferences(term);
        for (String parentReference : termReferences){
            if (!joinReferences.contains(parentReference)){
                isSafe = false;
            }
        }
        return isSafe;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy