All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.fbk.rdfpro.rules.Ruleset Maven / Gradle / Ivy

Go to download

Parser and writer for RDF in Turtle Quads (TQL) format. TQL is N-Quads with the more permissive (and efficient!) Turtle encoding. TQL is used in DBpedia exports and is supported in input by the Virtuoso triple store.

There is a newer version: 0.4.1
Show newest version
package eu.fbk.rdfpro.rules;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeSet;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;

import javax.annotation.Nullable;

import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.query.BindingSet;
import org.openrdf.query.algebra.Compare;
import org.openrdf.query.algebra.Compare.CompareOp;
import org.openrdf.query.algebra.Extension;
import org.openrdf.query.algebra.ExtensionElem;
import org.openrdf.query.algebra.Filter;
import org.openrdf.query.algebra.FunctionCall;
import org.openrdf.query.algebra.Join;
import org.openrdf.query.algebra.StatementPattern;
import org.openrdf.query.algebra.TupleExpr;
import org.openrdf.query.algebra.ValueExpr;
import org.openrdf.query.algebra.Var;
import org.openrdf.query.algebra.helpers.QueryModelVisitorBase;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import eu.fbk.rdfpro.rules.util.Algebra;
import eu.fbk.rdfpro.util.Namespaces;
import eu.fbk.rdfpro.util.Statements;

public final class Ruleset {

    private static final Logger LOGGER = LoggerFactory.getLogger(Ruleset.class);

    private final Set rules;

    private final Set staticTerms;

    @Nullable
    private transient Map ruleIndex;

    private transient int hash;

    @Nullable
    private transient Map staticHeads;

    @Nullable
    private transient Map dynamicHeads;

    @Nullable
    private transient Map staticBodies;

    @Nullable
    private transient Map dynamicBodies;

    @Nullable
    private transient Ruleset staticRuleset;

    @Nullable
    private transient Ruleset preprocessingRuleset;

    public Ruleset(final Iterable rules, @Nullable final Iterable staticTerms) {
        this.rules = newUnmodifiableSet(rules, false);
        this.staticTerms = newUnmodifiableSet(staticTerms, true);
        this.ruleIndex = null;
        this.hash = 0;
        this.staticBodies = null;
        this.dynamicBodies = null;
        this.staticRuleset = null;
        this.preprocessingRuleset = null;
    }

    private void split() {
        final Map staticHeads = new HashMap<>();
        final Map dynamicHeads = new HashMap<>();
        final Map staticBodies = new HashMap<>();
        final Map dynamicBodies = new HashMap<>();
        for (final Rule rule : this.rules) {
            try {
                final TupleExpr head = Algebra.explodeFilters(rule.getHead());
                final TupleExpr body = Algebra.explodeFilters(rule.getBody());
                final TupleExpr[] headExprs = Algebra.splitTupleExpr(head, this.staticTerms, -1);
                final TupleExpr[] bodyExprs = Algebra.splitTupleExpr(body, this.staticTerms, 1);
                if (LOGGER.isDebugEnabled()) {
                    LOGGER.debug("Splitting of rule {}:"
                            + "\n  head original: {}\n  head static:   {}\n  head dynamic:  {}"
                            + "\n  body original: {}\n  body static:   {}\n  body dynamic:  {}",
                            rule.getID(), Algebra.format(rule.getHead()),
                            Algebra.format(headExprs[0]), Algebra.format(headExprs[1]),
                            Algebra.format(rule.getBody()), Algebra.format(bodyExprs[0]),
                            Algebra.format(bodyExprs[1]));
                }
                staticHeads.put(rule.getID(), headExprs[0]);
                dynamicHeads.put(rule.getID(), headExprs[1]);
                staticBodies.put(rule.getID(), bodyExprs[0]);
                dynamicBodies.put(rule.getID(), bodyExprs[1]);
            } catch (final Throwable ex) {
                throw new IllegalArgumentException("Cannot split rule " + rule.getID(), ex);
            }
        }
        this.staticHeads = staticHeads;
        this.dynamicHeads = dynamicHeads;
        this.staticBodies = staticBodies;
        this.dynamicBodies = dynamicBodies;
    }

    public Set getRules() {
        return this.rules;
    }

    @Nullable
    public Rule getRule(final URI ruleID) {
        if (this.ruleIndex == null) {
            final Map index = new HashMap<>();
            for (final Rule rule : this.rules) {
                index.put(rule.getID(), rule);
            }
            this.ruleIndex = index;
        }
        return this.ruleIndex.get(ruleID);
    }

    @Nullable
    public TupleExpr getStaticHead(final Resource ruleID) {
        if (this.staticHeads == null) {
            split();
        }
        return this.staticHeads.get(ruleID);
    }

    @Nullable
    public TupleExpr getStaticBody(final Resource ruleID) {
        if (this.staticBodies == null) {
            split();
        }
        return this.staticBodies.get(ruleID);
    }

    public Set getStaticTerms() {
        return this.staticTerms;
    }

    @Nullable
    public TupleExpr getDynamicHead(final Resource ruleID) {
        if (this.dynamicHeads == null) {
            split();
        }
        return this.dynamicHeads.get(ruleID);
    }

    @Nullable
    public TupleExpr getDynamicBody(final Resource ruleID) {
        if (this.dynamicBodies == null) {
            split();
        }
        return this.dynamicBodies.get(ruleID);
    }

    public Ruleset getDynamicRuleset(
            final Map> staticBindings) {

        final ValueFactory vf = Statements.VALUE_FACTORY;
        final List rules = new ArrayList<>();
        for (final Rule rule : this.rules) {
            final TupleExpr dynamicHead = getDynamicHead(rule.getID());
            final TupleExpr staticBody = getStaticBody(rule.getID());
            final TupleExpr dynamicBody = getDynamicBody(rule.getID());
            if (dynamicHead != null) {
                if (staticBody == null) {
                    final URI id = vf.createURI(rule.getID() + "_" + rules.size());
                    rules.add(new Rule(id, dynamicHead, dynamicBody));
                } else {
                    final Iterable list = staticBindings.get(rule.getID());
                    if (list != null) {
                        for (final BindingSet bindings : list) {
                            final TupleExpr rewrittenHead = Algebra.rewrite(dynamicHead, bindings);
                            final TupleExpr rewrittenBody = Algebra.rewrite(dynamicBody, bindings);
                            if (!Objects.equals(rewrittenHead, rewrittenBody)) {
                                final URI id = vf.createURI(rule.getID() + "_" + rules.size());
                                rules.add(new Rule(id, rewrittenHead, rewrittenBody));
                            }
                        }
                    }
                }
            }
        }
        return new Ruleset(rules, this.staticTerms);
    }

    public Ruleset getPreprocessingRuleset() {
        if (this.preprocessingRuleset == null) {
            final List preprocessingRules = new ArrayList<>();
            for (final Rule rule : this.rules) {
                final TupleExpr dynamicHead = getDynamicHead(rule.getID());
                final TupleExpr staticBody = getStaticBody(rule.getID());
                if (dynamicHead != null && staticBody != null) {
                    preprocessingRules.add(new Rule(rule.getID(), null, staticBody));
                }
            }
            this.preprocessingRuleset = new Ruleset(preprocessingRules, this.staticTerms);
        }
        return this.preprocessingRuleset;
    }

    public Ruleset transform(@Nullable final BindingSet bindings) {
        if (bindings == null || bindings.size() == 0) {
            return this;
        }
        final List transformedRules = new ArrayList<>();
        for (final Rule rule : this.rules) {
            final TupleExpr head = Algebra.rewrite(rule.getHead(), bindings);
            final TupleExpr body = Algebra.rewrite(rule.getBody(), bindings);
            transformedRules.add(new Rule(rule.getID(), head, body));
        }
        return new Ruleset(transformedRules, this.staticTerms);
    }

    public Ruleset transformMergeHeads() {

        final Map> clusters = new HashMap<>();
        for (final Rule rule : this.rules) {
            List cluster = clusters.get(rule.getBody());
            if (cluster == null) {
                cluster = new ArrayList<>();
                clusters.put(rule.getBody(), cluster);
            }
            cluster.add(rule);
        }

        final ValueFactory vf = Statements.VALUE_FACTORY;
        final List mergedRules = new ArrayList<>();
        for (final List cluster : clusters.values()) {
            final Rule first = cluster.get(0);
            final String namespace = first.getID().getNamespace();
            final Set names = new TreeSet<>();
            final TupleExpr body = first.getBody();
            TupleExpr head = null;
            for (int i = 0; i < cluster.size(); ++i) {
                final Rule rule = cluster.get(i);
                final String s = rule.getID().getLocalName();
                final int index = s.indexOf("__");
                names.add(index < 0 ? s : s.substring(0, index));
                head = head == null ? rule.getHead() : new Join(head, rule.getHead());
            }
            final URI id = vf.createURI(namespace,
                    String.join("_", names) + "__" + mergedRules.size());
            mergedRules.add(new Rule(id, head, body));
        }

        return new Ruleset(mergedRules, this.staticTerms);
    }

    public Ruleset transformGlobalGM(@Nullable final Resource globalGraph) {
        final Var graphVar = globalGraph == null ? null : new Var("_const-" + UUID.randomUUID(),
                globalGraph);
        final List transformedRules = new ArrayList<>();
        for (final Rule rule : this.rules) {
            final TupleExpr head = Algebra.rewriteGraph(rule.getHead(), graphVar);
            final TupleExpr body = Algebra.rewriteGraph(rule.getBody(), null);
            transformedRules.add(new Rule(rule.getID(), head, body));
        }
        return new Ruleset(transformedRules, this.staticTerms);
    }

    public Ruleset transformSeparateGM() {

        // Extract all the vars used in the rules
        final Set vars = new HashSet();
        for (final Rule rule : this.rules) {
            vars.addAll(Algebra.extractVariables(rule.getHead()));
            vars.addAll(Algebra.extractVariables(rule.getBody()));
        }

        // Select a fresh graph var that does not appear in the rules
        String graphVarName = "g";
        int index = 0;
        while (vars.contains(graphVarName)) {
            graphVarName = "g" + index++;
        }
        final Var graphVar = new Var(graphVarName);

        // Rewrite rules
        final List transformedRules = new ArrayList<>();
        for (final Rule rule : this.rules) {
            final TupleExpr head = Algebra.rewriteGraph(rule.getHead(), graphVar);
            final TupleExpr body = Algebra.rewriteGraph(rule.getBody(), graphVar);
            transformedRules.add(new Rule(rule.getID(), head, body));
        }
        return new Ruleset(transformedRules, this.staticTerms);
    }

    public Ruleset transformStarGM(final Resource globalGraph) {

        // Extract all the vars used in the rules
        final Set vars = new HashSet();
        for (final Rule rule : this.rules) {
            vars.addAll(Algebra.extractVariables(rule.getHead()));
            vars.addAll(Algebra.extractVariables(rule.getBody()));
        }

        // Select a variable prefix never used in the rules
        String candidatePrefix = "g";
        outer: while (true) {
            for (final String var : vars) {
                if (var.startsWith(candidatePrefix)) {
                    candidatePrefix = "_" + candidatePrefix;
                    continue outer;
                }
            }
            break;
        }
        final String prefix = candidatePrefix;

        // Rewrite rules
        final List transformedRules = new ArrayList<>();
        for (final Rule rule : this.rules) {
            TupleExpr head = rule.getHead();
            TupleExpr body = rule.getBody();
            if (body == null) {
                head = Algebra.rewriteGraph(head, new Var("_const-" + UUID.randomUUID(),
                        globalGraph));
            } else {
                final AtomicInteger counter = new AtomicInteger(0);
                final List filterGraphVars = new ArrayList<>();
                final List bindGraphVars = new ArrayList<>();
                filterGraphVars.add(new Var("_const-" + UUID.randomUUID(), globalGraph));
                bindGraphVars.add(new Var("_const-" + UUID.randomUUID(), globalGraph));
                head = Algebra.rewriteGraph(head, new Var(prefix));
                body = body.clone();
                body.visit(new QueryModelVisitorBase() {

                    @Override
                    public void meet(final StatementPattern pattern) throws RuntimeException {
                        final Var graphVar = new Var(prefix + counter.getAndIncrement());
                        pattern.setContextVar(graphVar);
                        filterGraphVars.add(graphVar.clone());
                        bindGraphVars.add(graphVar.clone());
                    }

                });
                body = new Filter(body, new Compare(new FunctionCall(
                        RR.STAR_SELECT_GRAPH.stringValue(), filterGraphVars), new Var("_const-"
                        + UUID.randomUUID(), RDF.NIL), CompareOp.NE));
                body = new Extension(body, new ExtensionElem(new FunctionCall(
                        RR.STAR_SELECT_GRAPH.stringValue(), bindGraphVars), prefix));
            }
            transformedRules.add(new Rule(rule.getID(), head, body));
        }
        return new Ruleset(transformedRules, this.staticTerms);
    }

    @Override
    public boolean equals(final Object object) {
        if (object == this) {
            return true;
        }
        if (!(object instanceof Ruleset)) {
            return false;
        }
        final Ruleset other = (Ruleset) object;
        return this.rules.equals(other.rules) && this.staticTerms.equals(other.staticTerms);
    }

    @Override
    public int hashCode() {
        if (this.hash == 0) {
            this.hash = Objects.hash(this.rules, this.staticTerms);
        }
        return this.hash;
    }

    @Override
    public String toString() {
        final StringBuilder builder = new StringBuilder();
        builder.append("STATIC TERMS (").append(this.staticTerms.size()).append("):");
        for (final URI staticTerm : this.staticTerms) {
            builder.append("\n").append(Statements.formatValue(staticTerm, Namespaces.DEFAULT));
        }
        builder.append("\n\nRULES (").append(this.rules.size()).append("):");
        for (final Rule rule : this.rules) {
            builder.append("\n").append(rule);
        }
        return builder.toString();
    }

    public > T toRDF(final T output) {

        // Emit static terms
        final ValueFactory vf = Statements.VALUE_FACTORY;
        for (final URI staticTerm : this.staticTerms) {
            vf.createStatement(staticTerm, RDF.TYPE, RR.STATIC_TERM);
        }

        // Emit rules
        for (final Rule rule : this.rules) {
            rule.toRDF(output);
        }
        return output;
    }

    public static Ruleset fromRDF(final Iterable model) {

        // Parse static terms
        final List staticTerms = new ArrayList<>();
        for (final Statement stmt : model) {
            if (stmt.getSubject() instanceof URI && RDF.TYPE.equals(stmt.getPredicate())
                    && RR.STATIC_TERM.equals(stmt.getObject())) {
                staticTerms.add((URI) stmt.getSubject());
            }
        }

        // Parse rules
        final List rules = Rule.fromRDF(model);

        // Build resulting ruleset
        return new Ruleset(rules, staticTerms);
    }

    public static Ruleset merge(final Ruleset... rulesets) {
        if (rulesets.length == 0) {
            return new Ruleset(Collections.emptyList(), Collections.emptyList());
        } else if (rulesets.length == 1) {
            return rulesets[0];
        } else {
            final List staticTerms = new ArrayList<>();
            final List rules = new ArrayList<>();
            for (final Ruleset ruleset : rulesets) {
                staticTerms.addAll(ruleset.getStaticTerms());
                rules.addAll(ruleset.getRules());
            }
            return new Ruleset(rules, staticTerms);
        }
    }

    private static  Set newUnmodifiableSet(final Iterable elements,
            final boolean deduplicate) {
        final Set set = new LinkedHashSet();
        for (final T element : elements) {
            if (!deduplicate && set.contains(element)) {
                throw new IllegalArgumentException("Duplicate element: " + element);
            }
            set.add(element);
        }
        return Collections.unmodifiableSet(set);
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy