org.aksw.jenax.path.relgen.RelationGeneratorBase Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jenax-arq-datapaths Show documentation
The newest version!
package org.aksw.jenax.path.relgen;

import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import org.aksw.jenax.arq.util.syntax.ElementUtils;
import org.aksw.jenax.path.core.PathOpsPE;
import org.aksw.jenax.path.core.PathPE;
import org.aksw.jenax.sparql.fragment.api.Fragment;
import org.aksw.jenax.sparql.fragment.api.Fragment1;
import org.aksw.jenax.sparql.fragment.impl.Concept;
import org.aksw.jenax.sparql.fragment.impl.UnaryXExpr;
import org.apache.jena.graph.Node;
import org.apache.jena.sparql.core.Var;
import org.apache.jena.sparql.expr.Expr;
import org.apache.jena.sparql.syntax.Element;

import com.google.common.hash.HashCode;
import com.google.common.hash.Hashing;

/** */
public abstract class RelationGeneratorBase
//    implements Trav1Provider
{

    /** Relations that have been traversed by the path -
     *  does not include the current relation */
    protected List pastRelations = new ArrayList<>();

    /** The current relation */
    protected Fragment relation;

    // A hash updated upon requesting a new relation; hash is based on the seen path segments
    // Note the hash is really per relation and not per column.
    // The column names are appended to the hash
    protected HashCode contextHash;

    /** Cached String version of the hash */
    protected String contextHashStr;

    /** Conditions imposed the current relation based on the seen path segments */
    protected List conditions = new ArrayList<>();


    /** The absolute path at which the relation was requested
     * Once it covers all columns of the relation it is used to compute the
     * next context hash
     */
    protected PathPE relationStartAbsPath;


    /**
     * The relative path of segments seen for the current relation
     * Connects to relationStartPath
     */
    protected PathPE relPath;

    /** The path segments seen for the current relation. */
    // protected List segments = new ArrayList<>();

    int columnIdx = 0;




    /**
     * Yield the next relation to traverse
     *
     * @param path
     * @param index
     * @return
     */
    protected abstract Fragment nextInstance();


    public RelationGeneratorBase() {
        super();
        reset();
    }

    public Fragment process(PathPE path) {
        if (path.isAbsolute()) {
            reset();
        }

        ensureInit();


        Fragment result = relation;

        for (UnaryXExpr segment : path.getSegments()) {
            result = process(segment);
        }

        return result;
    }

    protected void reset() {
        setHashCode(null);
        pastRelations.clear();
        relation = null;
        columnIdx = 0;
        relationStartAbsPath = PathOpsPE.get().newAbsolutePath();
        relPath = PathOpsPE.newRelativePath();
        updateHash();
    }

    public void ensureInit() {
        if (relation == null || columnIdx >= relation.getVars().size()) {

            String oldHash = contextHashStr;
            updateHash();

            Var pastLastVar = null;

            if (relation != null) {
                Fragment pastItem = relation.filter(conditions);
                pastLastVar = pastItem.getVars().get(pastItem.getVars().size() - 1);
                pastRelations.add(pastItem);
            }

            relation = nextInstance();

            relationStartAbsPath = relationStartAbsPath.resolve(relPath);
            relPath = PathOpsPE.newRelativePath();

            List vars = relation.getVars();
            if (vars.size() <= 1) {
                throw new RuntimeException("Relations must have at least 2 variables");
            }

            conditions.clear();

            // Rename variables w.r.t the hashes:
            // The first var receives the prior hash (in order to join with the prior relation)
            // all other vars receive the new hash

            Var firstVar = vars.get(0);

            if (pastLastVar == null) {
                pastLastVar = Var.alloc(contextHashStr + "_" + firstVar.getName());
            }

            Var plv = pastLastVar;

            Map remap = relation.getVarsMentioned().stream()
                    .collect(Collectors.toMap(
                            v -> v,
                            node -> {
                                Node r;
                                if (node.isVariable()) {
                                    if (node.equals(firstVar)) {
                                        r = plv;
                                    } else {
//	                                    String prefix = node.equals(firstVar)
//	                                            ? oldHash
//	                                            : contextHashStr;
                                        r = Var.alloc(contextHashStr + "_" + node.getName());
                                    }
                                } else {
                                    r = node;
                                }
                                return r;
                            }));

            relation = relation.applyNodeTransform(v -> remap.getOrDefault(v, v));

            // If we joined the last column of the previous relation with the first
            // column of the next one, then jump over that first column
            columnIdx = pastRelations.isEmpty() ? 0 : 1;
        }
    }


    public Fragment process(UnaryXExpr segment) {

        ensureInit();

        relPath = relPath.resolve(segment);


        List vars = relation.getVars();
        Var v = vars.get(columnIdx);
        ++columnIdx;

        if (!segment.isAlwaysTrue()) {

            // Substitute the only variable in the expression with that of the relation instance
            Expr expr = segment.getExpr().applyNodeTransform(x -> x.isVariable() ? v : x);


            conditions.add(expr);
        }

        Fragment r = relation.filter(conditions);

        ensureInit();

        return r;
    }


    public Var getCurrentVar() {
        return relation.getVars().get(columnIdx);
    }

    public List getPastRelations() {
        return pastRelations;
    }


    public Fragment1 getCurrentConcept() {
        return new Concept(assemble(), getCurrentVar());
    }

    /** Assemble the complete element */
    public Element assemble() {
        List elts = pastRelations.stream()
                .flatMap(r -> r.getElements().stream())
                .collect(Collectors.toList());

        if (relation != null) {
            elts.add(relation.filter(conditions).getElement());
        }

        Element elt = ElementUtils.groupIfNeeded(elts);
        return elt;
    }


    protected void updateHash() {
        HashCode nextHashCode = computeNextHash(contextHash, relationStartAbsPath, relPath);
        setHashCode(nextHashCode);
    }

    protected void setHashCode(HashCode hashCode) {
        contextHash = hashCode;
        contextHashStr = hashCode == null ? null : encodeHashCode(hashCode);
    }

    protected String encodeHashCode(HashCode hashCode) {
        return hashCode.toString(); // BaseEncoding.base64Url().encode(contextHash.asBytes());
    }

    protected HashCode computeNextHash(HashCode currentHash, PathPE relationStartAbsPath, PathPE relPath) {
        HashCode contrib = Hashing.murmur3_32().hashString(relPath.toString(), StandardCharsets.UTF_8);

        HashCode result = currentHash == null ? contrib : Hashing.combineOrdered(Arrays.asList(currentHash, contrib));
        return result;
    }

}