All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.aksw.jenax.sparql.fragment.impl.FragmentJoiner Maven / Gradle / Ivy

There is a newer version: 5.0.0-1
Show newest version
package org.aksw.jenax.sparql.fragment.impl;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;

import org.aksw.commons.collections.generator.Generator;
import org.aksw.jenax.arq.util.syntax.ElementUtils;
import org.aksw.jenax.arq.util.var.VarGeneratorBlacklist;
import org.aksw.jenax.arq.util.var.VarUtils;
import org.aksw.jenax.sparql.fragment.api.Fragment;
import org.aksw.jenax.sparql.fragment.api.Fragment1;
import org.apache.jena.atlas.lib.tuple.Tuple;
import org.apache.jena.sparql.algebra.Algebra;
import org.apache.jena.sparql.algebra.Op;
import org.apache.jena.sparql.algebra.OpVars;
import org.apache.jena.sparql.core.Var;
import org.apache.jena.sparql.syntax.Element;
import org.apache.jena.sparql.syntax.ElementOptional;

import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;

public class FragmentJoiner {
    protected Fragment attrRelation;
    protected List attrJoinVars;

    protected Fragment filterRelation;
    protected List filterJoinVars;

    protected boolean filterRelationFirst;

    // Variables of the filter relation to be appended to the variables of the resulting relation
    // Note, that the variables may be renamed
    //protected List filterExtensionVars;


    /**
     * Idea to create a new projection as part of the join creation:
     * addSrcVar and addTgtVar add entries to the linked map varToOrigin
     * origin==false means, use the variable from the lhs of the join
     * origin==true means, use the variable from the rhs of the join
     *
     *
     */
    protected Map varToOrigin = null;

    public FragmentJoiner(Fragment attrRelation, List attrJoinVars) {
        this(attrRelation, attrJoinVars, false);
    }

    public FragmentJoiner(Fragment attrRelation, List attrJoinVars, boolean filterRelationFirst) {
        super();
        this.attrRelation = attrRelation;
        this.attrJoinVars = attrJoinVars;
        this.filterRelationFirst = filterRelationFirst;
    }


//	public static RelationJoiner from(Relation r, Collection vars) {
//		RelationJoiner result = new RelationJoiner(r, vars));
//		return result;
//	}

    public static FragmentJoiner from(Fragment r, Var ... vars) {
        return from(r, Arrays.asList(vars));
    }

    public static FragmentJoiner from(Fragment r, List vars) {
        FragmentJoiner result = new FragmentJoiner(r, new ArrayList<>(vars));
        return result;
    }

    public FragmentJoiner addAttrJoinVar(Var var) {
        attrJoinVars.add(var);
        return this;
    }

    public FragmentJoiner filterRelationFirst(boolean onOrOff) {
        this.filterRelationFirst = onOrOff;
        return this;
    }


    public FragmentJoiner projectSrcVars(Var ... vars) {
        varToOrigin = varToOrigin != null ? varToOrigin : new LinkedHashMap<>();
        for(Var v : vars) {
            Boolean prior = varToOrigin.put(v, true);
            if(prior != null) {
                throw new RuntimeException("Variable " + v + " was already projected; prior value: " + prior + " - current value: " + true);
            }
         }
        return this;
    }

    public FragmentJoiner projectTgtVars(Var ... vars) {
        varToOrigin = varToOrigin != null ? varToOrigin : new LinkedHashMap<>();
        for(Var v : vars) {
            Boolean prior = varToOrigin.put(v, false);
            if(prior != null) {
                throw new RuntimeException("Variable " + v + " was already projected; prior value: " + prior + " - current value: " + false);
            }
        }
        return this;
    }

    /**
     * Join with null is a no-op - i.e. it yields the original relation
     *
     * @param c
     * @param joinVars If empty, all vars of c will be used for the join
     * @return
     */
    public Fragment with(Fragment c, Var ... joinVars) {
        Fragment result;
        if(c != null) {
            filterRelation = c;
            filterJoinVars = joinVars.length == 0 ? c.getVars() : Arrays.asList(joinVars);

            result = get();
        } else {
            result = attrRelation;
        }
        return result;
        //return this;
    }

    public Fragment with(Fragment1 ur) {
        return with(ur, ur.getVar());
    }


    // This API for this method is somewhat hacky as it conflates joining with renaming; it should be revised.
    // Maybe introduce some generic operation class?
    // relation.opOn(vars).joinWith(otherRelation)
    // relation.opOn(vars).yieldRenamedFilter(filterRelation)
    public Fragment yieldRenamedFilter(Fragment c) {
        filterRelation = c;
        filterJoinVars = c.getVars();

        Fragment result = yieldRenamedFilterCore();

        return result;
    }

    /**
     * Only yield the renamed filter portion of a 'join':
     *
     * newFilter = attrRelation.joinOn(vars).yieldRenamedFilter(filter);
     *
     *
     * @return
     */
    public Fragment yieldRenamedFilterCore() {
        Set attrVarsMentioned = attrRelation.getVarsMentioned();
        Set filterVarsMentioned = filterRelation.getVarsMentioned();

        Map varMap = VarUtils.createJoinVarMap(attrVarsMentioned, filterVarsMentioned, attrJoinVars, filterJoinVars, null); //, varNameGenerator);

//		Element attrElement = attrRelation.getElement();
        Element filterElement = filterRelation.getElement();
        Element newFilterElement = ElementUtils.createRenamedElement(filterElement, varMap);

        List newFilterVars = filterRelation.getVars().stream()
            .map(v -> varMap.getOrDefault(v, v))
            .collect(Collectors.toList());

        Fragment result = new FragmentImpl(newFilterElement, newFilterVars);
        return result;
    }

//	public List getAttrP

    /**
     * Perform variable renaming according the configuration and yield a resulting element.
     * By default, all variables of lhs are considered fixed, whereas all variables of rhs
     * are subject to renaming.
     *
     *
     * TODO This method could use some clean up.
     *
     * If we have { ?s ?p ?o }(?s) join { ?p ?s ?o }(?p)
     * we not only have to map rhs.?p->?s, but also add rhs.?s -> freshVar
     *
     * So after having set up rhs join var map,
     * for each rhs.var that maps to a var which is exists in rhs.mentionedVars, remap it to a fresh variable
     *
     * If we have { ?s ?p ?o }(?s, ?p) join { ?p ?s ?o }(?p, ?s)
     * { ?s ?p ?o } { ?s ?p ?x }
     * @return
     */
    public Fragment get() {
        List attrProjVars = varToOrigin == null ? attrRelation.getVars() : new ArrayList<>(varToOrigin.keySet());

        Set attrVarsMentioned = attrRelation.getVarsMentioned();
        Set filterVarsMentioned = filterRelation.getVarsMentioned();

//		System.out.println("JOIN ON " + attrJoinVars + " --- " + filterJoinVars);
//		System.out.println(attrRelation);
//		System.out.println(filterRelation);

        // all projected attr and filters vars must NOT be renamed
        //
        // Conversely,

        // attrVars are all projected variables

        // Convention: if no projection was specified, all variables of lhs
        // are fixed (so none is undistinguished),
        // and all vars of rhs are non-distinguished
//		Set nonDistVarsLHs = Collections.emptySet();
//		Set nonDistVarsRhs = filterVarsMentioned;

        Set fixedVarsLhs = attrVarsMentioned;
        Set fixedVarsRhs = Collections.emptySet();
        if(varToOrigin != null) {
            fixedVarsLhs = varToOrigin.entrySet().stream().filter(e -> e.getValue()).map(Entry::getKey).collect(Collectors.toSet());
            fixedVarsRhs = varToOrigin.entrySet().stream().filter(e -> !e.getValue()).map(Entry::getKey).collect(Collectors.toSet());

            // non distinguished vars = those that are not projected
//			nonDistVarsLHs = Sets.difference(attrVarsMentioned, fixedVarsLhs);
//			nonDistVarsRhs = Sets.difference(filterVarsMentioned, fixedVarsRhs);
        }

        Set conflictVars = new HashSet<>(Sets.intersection(attrVarsMentioned, filterVarsMentioned));

        //Set conflictsRhs = Sets.intersection(set1, set2)

        //BiMap rhsToLhs = HashBiMap.create();
        Map lhsMap = new HashMap<>();
        Map rhsMap = new HashMap<>();

        for (int i = 0; i < attrJoinVars.size(); ++i) {
            Var sourceJoinVar = attrJoinVars.get(i);
            Var targetJoinVar = filterJoinVars.get(i);

            rhsMap.put(targetJoinVar, sourceJoinVar);
            //lhsMap.put(key, value)
            // Map targetVar to sourceVar
            //rhsToLhs.put(targetJoinVar, sourceJoinVar);
            // rename[targetVar.getName()] = sourceVar;
        }

        Generator gen = VarGeneratorBlacklist.create(Sets.union(attrVarsMentioned, filterVarsMentioned));


        // Remap rhs
        resolveConflicts(filterVarsMentioned, fixedVarsRhs, conflictVars, lhsMap, rhsMap, gen);
        resolveConflicts(attrVarsMentioned, fixedVarsLhs, conflictVars, rhsMap, lhsMap, gen);

        // Resolve remaining conflicts; rename for lhs




        // [?a ?b] join [?x ?y] on [?b=?x] projSrc(?a) projTgt(?y)-> [?a ?y]
        // Note: It is invalid for the the same variable to be projected from lhs and rhs
        //       (even if it is used in a join (lhs.?x = rhs.?x), it should only be projected once)
        //
        // src
        //   attrFixedVars = all its projected vars
        //   varsThatMustBeRenamed = attr vars common with filter
        //

        //Map varMapRhs = VarUtils.createJoinVarMap(attrVarsMentioned, nonDistVarsRhs, attrJoinVars, filterJoinVars, null); //, varNameGenerator);
        Element filterElement = filterRelation.getElement();
        Element newFilterElement = ElementUtils.createRenamedElement(filterElement, rhsMap);


        // All non-distinguished attr vars are subject to renaming
        //Map attrVarMap = VarUtils.createJoinVarMap(filterVarsMentioned, nonDistVarsLHs, attrJoinVars, filterJoinVars, null); //, varNameGenerator);
        Element attrElement = attrRelation.getElement();
        Element newAttrElement = ElementUtils.createRenamedElement(attrElement, lhsMap);

//        System.out.println("-----------------------");
//        if(!newAttrElement.equals(attrElement)) {
//        	System.out.println("DEBUG POINT");
//        }
//		System.out.println(newAttrElement);
//		System.out.println(newFilterElement);


        // TODO Maybe add a flag whether omission of joins should actually be applied
        // If the filter is a subject concept and its variable appears
        // in the subject position of the attr element,
        // we can omit the filter
        boolean allowOmitJoin = true;

        boolean canOmitJoin = false;
        if(allowOmitJoin) {
            if(filterRelation.getElements().isEmpty()) {
                // TODO We may want to apply normalization - e.g. detect a group with an empty bgb
                canOmitJoin = true;
            } else if(filterRelation.getVars().size() == 1) {
                Fragment1 fr = filterRelation.toFragment1();
                Var rawFilterVar = fr.getVar();
                if(fr.isSubjectConcept()) {

                    boolean requiresJoin = false;
                    // If we are prepending an attr element that starts with
                    // OPTIONAL, we cannot omit the join

                    // TODO This rule is quite simple yet effective - we should
                    // make this more flexible though
                    if(filterRelationFirst) {
                        List elts = attrRelation.getElements();
                        if(!elts.isEmpty()) {
                            requiresJoin = elts.get(0) instanceof ElementOptional;
                        }
                    }

                    if(!requiresJoin) {
                        // We can omit with a subject concept if there is a join on the subject position
                        Var effectiveFilterVar = rhsMap.get(rawFilterVar);
                        Op attrOp = Algebra.compile(newAttrElement);
                        Tuple> tuple = OpVars.mentionedVarsByPosition(attrOp);
                        canOmitJoin = tuple.get(1).contains(effectiveFilterVar);
                    }
                }
            }
        }

        List fes = ElementUtils.toElementList(newFilterElement);
        List aes = ElementUtils.toElementList(newAttrElement);
        //List combined = ElementUtils.groupIfNeeded(Iterables.concat(fes, aes));

        Element newElement = canOmitJoin ?
                newAttrElement : filterRelationFirst
                    ? ElementUtils.groupIfNeeded(Iterables.concat(fes, aes))
                    : ElementUtils.groupIfNeeded(Iterables.concat(aes, fes));

        Fragment result = new FragmentImpl(newElement, attrProjVars);
        return result;
    }

    public static  T pop(Iterable items) {
        Iterator it = items.iterator();
        T result = it.next();
        it.remove();
        return result;
    }

    public void resolveConflicts(Set rhsVarsMentioned, Set rhsFixedVars, Set conflictVars,
            Map lhsMap, Map rhsMap, Generator gen) {
        //for(Var rhsJoinVar : new HashSet<>(conflictVars)) {
        while(!conflictVars.isEmpty()) {
            Var rhsJoinVar = pop(conflictVars);

            // If the variable is fixed in rhs, its occurrence in in lhs has to be renamed
            if(!rhsFixedVars.contains(rhsJoinVar)) {

                // note: A variable can only be fixed in both lhs and rhs if it used on both sides of a join

                Set rhsJoinVars = rhsMap.keySet();
                Set rhsNonJoinVars = Sets.difference(rhsVarsMentioned, rhsJoinVars);

                // If the variable is part of the join, try the join var first
                Var targetLhsVar = rhsMap.get(rhsJoinVar);

                if(targetLhsVar != null) {
                    // Here is the case where a rhs var joins with a target var X where X occurrs
                    // as a non-joining variable in rhs

                    // If rhs.joinVar joins with another variable targetLhsVar X,
                    // where X happens to be in rhs.nonJoinVars, rename X in rhs
                    if(rhsNonJoinVars.contains(targetLhsVar)) {
                        Var rhsFreshVar = gen.next();
                        conflictVars.remove(targetLhsVar);
                        rhsMap.put(targetLhsVar, rhsFreshVar);
                    }
                } else {
                    // Here is the case where the conflict variable simply overlaps with one of lhs
                    // Here is the case where a variable X of rhs overlaps with one of lhs
                    // // an *effective* conflict variable X
                    Var rhsFreshVar = gen.next();
                    //conflictVars.remove(rhsJoinVar);
                    rhsMap.put(rhsJoinVar, rhsFreshVar);
                }

                if(false) {
                    Var targetVar = targetLhsVar == null ? rhsJoinVar : targetLhsVar;
                    // If the target var is also in conflict, allocate a fresh variable
                    // A conflict exists, if the targetVar in mentioned in rhs
                    // [(?s) x ?o ] X [?s y (?o)]
                    Var resolvedVar = !Objects.equals(rhsJoinVar, targetLhsVar) && rhsVarsMentioned.contains(targetVar)
                            ? gen.next()
                            : targetVar;



                    //rhsToLhs.put(v, resolvedVar);
                    rhsMap.put(rhsJoinVar, resolvedVar);
                }
                // Conflict for this variable resolved
                //conflictVars.remove(rhsJoinVar);

                if(false) {
//        		if(targetLhsVar != null) {
//        			conflictVars.remove(targetLhsVar);
//        			lhsMap.put(targetLhsVar, resolvedVar);
//        			// Update the join entry in the lhs map
//        		}
                }
            }
        }
    }

//
//	public static RelationJoiner join(Element a, Element b) {
//
//	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy