All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.bop.joinGraph.fast.DefaultEvaluationPlan2 Maven / Gradle / Ivy

/*

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/
/*
 * Created on Jun 26, 2008
 */

package com.bigdata.bop.joinGraph.fast;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.log4j.Logger;

import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.IVariableOrConstant;
import com.bigdata.bop.Var;
import com.bigdata.bop.joinGraph.IEvaluationPlan;
import com.bigdata.bop.joinGraph.IRangeCountFactory;
import com.bigdata.journal.ITx;
import com.bigdata.relation.rule.IRule;
import com.bigdata.relation.rule.IAccessPathExpander;
import com.bigdata.relation.rule.IStarJoin;
import com.bigdata.relation.rule.eval.IJoinNexus;

/**
 * The evaluation order is determined by analysis of the propagation of
 * bindings. The most selective predicate is chosen first (having the fewest
 * unbound variables with ties broken by a range count on the data) and "fake"
 * bindings are propagated to the other predicates in the tail. This process is
 * repeated until all variables are bound and an evaluation order has been
 * determined.
 * 
 * @author Bryan Thompson
 * @version $Id$
 */
public class DefaultEvaluationPlan2 implements IEvaluationPlan {

    protected static final transient Logger log = Logger.getLogger(DefaultEvaluationPlan2.class);
    
    protected static final transient boolean DEBUG = log.isDebugEnabled();
    
    protected static final transient boolean INFO = log.isInfoEnabled();
    
    /**
     * @todo not serializable but used by {@link #rangeCount(int)}, which is a
     *       problem.
     */
    private final IRangeCountFactory rangeCountFactory;

    private final IRule rule;

    private final int tailCount;
    
    private static final transient long BOTH_OPTIONAL = Long.MAX_VALUE-1;
    
    private static final transient long ONE_OPTIONAL = Long.MAX_VALUE-2;
    
    private static final transient long NO_SHARED_VARS = Long.MAX_VALUE-3;
    
    /**
     * The computed evaluation order. The elements in this array are the order
     * in which each tail predicate will be evaluated. The index into the array
     * is the index of the tail predicate whose evaluation order you want. So
     * [2,0,1] says that the predicates will be evaluated in the
     * order tail[2], then tail[0], then tail[1]. 
     */
    private int[/* order */] order;

    public int[] getOrder() {

        if (order == null) {
         
            /*
             * This will happen if you try to use toString() during the ctor
             * before the order has been computed.
             */

            throw new IllegalStateException();
            
        }
//        calc();
        
        return order;
        
    }

    /**
     * Cache of the computed range counts for the predicates in the tail. The
     * elements of this array are initialized to -1L, which indicates that the
     * range count has NOT been computed. Range counts are computed on demand
     * and MAY be zero. Only an approximate range count is obtained. Such
     * approximate range counts are an upper bound on the #of elements that are
     * spanned by the access pattern. Therefore if the range count reports ZERO
     * (0L) it is a real zero and the access pattern does not match anything in
     * the data. The only other caveat is that the range counts are valid as of
     * the commit point on which the access pattern is reading. If you obtain
     * them for {@link ITx#READ_COMMITTED} or {@link ITx#UNISOLATED} views then
     * they could be invalidated by concurrent writers.
     */
    private long[/*tailIndex*/] rangeCount;
    
    /**
     * Keeps track of which tails have been used already and which still need
     * to be evaluated.
     */
    private transient boolean[/*tailIndex*/] used;
    
    /**
     * true iff the rule was proven to have no solutions.
     * 
     * @todo this is not being computed.
     */
    private boolean empty = false;
    
    public boolean isEmpty() {
        
        return empty;
        
    }
    
    /**
     * Computes an evaluation plan for the rule.
     * 
     * @param joinNexus
     *            The join nexus.
     * @param rule
     *            The rule.
     */
    public DefaultEvaluationPlan2(final IJoinNexus joinNexus, 
            final IRule rule) {
        
        this(joinNexus.getRangeCountFactory(), rule);
        
    }
    
    /**
     * Computes an evaluation plan for the rule.
     * 
     * @param rangeCountFactory
     *            The range count factory.
     * @param rule
     *            The rule.
     */
    public DefaultEvaluationPlan2(final IRangeCountFactory rangeCountFactory, 
            final IRule rule) {
        
        if (rangeCountFactory == null)
            throw new IllegalArgumentException();

        if (rule == null)
            throw new IllegalArgumentException();
        
        this.rangeCountFactory = rangeCountFactory;
        
        this.rule = rule;
        
        this.tailCount = rule.getTailCount();
    
        if(DEBUG) {
            
            log.debug("rule=" + rule);
            
        }
        
        calc(rule);
        
        if (DEBUG) {
            for (int i = 0; i < tailCount; i++) {
                log.debug(order[i]);
            }
        }
        
    }
    
    /**
     * Compute the evaluation order.
     */
    private void calc(final IRule rule) {

        if (order != null)
            return;

        order = new int[tailCount];
        rangeCount = new long[tailCount];
        used = new boolean[tailCount];
        
        // clear arrays.
        for (int i = 0; i < tailCount; i++) {
            order[i] = -1; // -1 is used to detect logic errors.
            rangeCount[i] = -1L;  // -1L indicates no range count yet.
            used[i] = false;  // not yet evaluated
        }

        if (tailCount == 1) {
            order[0] = 0;
            return;
        }
        
        /*
        if (tailCount == 2) {
            order[0] = cardinality(0) <= cardinality(1) ? 0 : 1;
            order[1] = cardinality(0) <= cardinality(1) ? 1 : 0;
            return;
        }
        */
        
        final Set> runFirstVars = new HashSet>();
        
        int startIndex = 0;
        for (int i = 0; i < tailCount; i++) {
            final IPredicate pred = rule.getTail(i);
        	final IAccessPathExpander expander = pred.getAccessPathExpander();
            if (expander != null && expander.runFirst()) {
                if (DEBUG) log.debug("found a run first, tail " + i);
                final Iterator> it = BOpUtility.getArgumentVariables(pred);
                while (it.hasNext()) {
                	runFirstVars.add(it.next());
                }
                order[startIndex++] = i;
                used[i] = true;
            }
        }
        
        // if there are no more tails left after the expanders, we're done
        if (startIndex == tailCount) {
        	return;
        }
        
        // if there is only one tail left after the expanders
        if (startIndex == tailCount-1) {
            if (DEBUG) log.debug("one tail left");
            for (int i = 0; i < tailCount; i++) {
                // only check unused tails
                if (used[i]) {
                    continue;
                }
                order[tailCount-1] = i;
                used[i] = true;
                return;
            }            
        }
        
        int preferredFirstTail = -1;
        // give preferential treatment to a tail that shares variables with the
        // runFirst expanders
        for (int i = 0; i < tailCount; i++) {
            // only check unused tails
            if (used[i]) {
                continue;
            }
            final IPredicate pred = rule.getTail(i);
            final Iterator> it = BOpUtility.getArgumentVariables(pred);
            while (it.hasNext()) {
            	if (runFirstVars.contains(it.next())) {
            		preferredFirstTail = i;
            	}
            }
            if (preferredFirstTail != -1)
            	break;
        }            
        
        // if there are only two tails left after the expanders
        if (startIndex == tailCount-2) {
            if (DEBUG) log.debug("two tails left");
            int t1 = -1;
            int t2 = -1;
            for (int i = 0; i < tailCount; i++) {
                // only check unused tails
                if (used[i]) {
                    continue;
                }
                // find the two unused tail indexes
                if (t1 == -1) {
                    t1 = i;
                } else {
                    t2 = i;
                    break;
                }
            }
            if (DEBUG) log.debug(t1 + ", " + t2);
            if (preferredFirstTail != -1) {
            	order[tailCount-2] = preferredFirstTail;
            	order[tailCount-1] = preferredFirstTail == t1 ? t2 : t1;
            } else {
	            order[tailCount-2] = cardinality(t1) <= cardinality(t2) ? t1 : t2;
	            order[tailCount-1] = cardinality(t1) <= cardinality(t2) ? t2 : t1;
            }
            return;
        }
        
        /*
         * There will be (tails-1) joins, we just need to figure out what
         * they should be.
         */
        Join join = preferredFirstTail == -1 ? getFirstJoin() : getFirstJoin(preferredFirstTail);
        int t1 = ((Tail) join.getD1()).getTail();
        int t2 = ((Tail) join.getD2()).getTail();
        if (preferredFirstTail == -1) {
	        order[startIndex] = cardinality(t1) <= cardinality(t2) ? t1 : t2;
	        order[startIndex+1] = cardinality(t1) <= cardinality(t2) ? t2 : t1;
        } else {
        	order[startIndex] = t1;
        	order[startIndex+1] = t2;
        }
        used[order[startIndex]] = true;
        used[order[startIndex+1]] = true;
        for (int i = startIndex+2; i < tailCount; i++) {
            join = getNextJoin(join);
            order[i] = ((Tail) join.getD2()).getTail();
            used[order[i]] = true;
        }
        
    }
    
    /**
     * Start by looking at every possible initial join. Take every tail and
     * match it with every other tail to find the lowest possible cardinality.
     * See {@link #computeJoinCardinality(com.bigdata.bop.joinGraph.fast.DefaultEvaluationPlan2.IJoinDimension, com.bigdata.bop.joinGraph.fast.DefaultEvaluationPlan2.IJoinDimension)}
     * for more on this. 
     */
    private Join getFirstJoin() {
        if (DEBUG) {
            log.debug("evaluating first join");
        }
        long minJoinCardinality = Long.MAX_VALUE;
        long minTailCardinality = Long.MAX_VALUE;
        long minOtherTailCardinality = Long.MAX_VALUE;
        Tail minT1 = null;
        Tail minT2 = null;
        for (int i = 0; i < tailCount; i++) {
            // only check unused tails
            if (used[i]) {
                continue;
            }
            Tail t1 = new Tail(i, rangeCount(i), getVars(i));
            long t1Cardinality = cardinality(i);
            for (int j = 0; j < tailCount; j++) {
                // check only non-same and unused tails
                if (i == j || used[j]) {
                    continue;
                }
                Tail t2 = new Tail(j, rangeCount(j), getVars(j));
                long t2Cardinality = cardinality(j);
                long joinCardinality = computeJoinCardinality(t1, t2);
                long tailCardinality = Math.min(t1Cardinality, t2Cardinality);
                long otherTailCardinality = Math.max(t1Cardinality, t2Cardinality);
                if(DEBUG) log.debug("evaluating " + i + " X " + j + ": cardinality= " + joinCardinality);
                if (joinCardinality < minJoinCardinality) {
                    if(DEBUG) log.debug("found a new min: " + joinCardinality);
                    minJoinCardinality = joinCardinality;
                    minTailCardinality = tailCardinality;
                    minOtherTailCardinality = otherTailCardinality;
                    minT1 = t1;
                    minT2 = t2;
                } else if (joinCardinality == minJoinCardinality) {
                    if (tailCardinality < minTailCardinality) {
                        if(DEBUG) log.debug("found a new min: " + joinCardinality);
                        minJoinCardinality = joinCardinality;
                        minTailCardinality = tailCardinality;
                        minOtherTailCardinality = otherTailCardinality;
                        minT1 = t1;
                        minT2 = t2;
                    } else if (tailCardinality == minTailCardinality) {
                        if (otherTailCardinality < minOtherTailCardinality) {
                            if(DEBUG) log.debug("found a new min: " + joinCardinality);
                            minJoinCardinality = joinCardinality;
                            minTailCardinality = tailCardinality;
                            minOtherTailCardinality = otherTailCardinality;
                            minT1 = t1;
                            minT2 = t2;
                        }
                    }
                }
            }
        }
        // the join variables is the union of the join dimensions' variables
        Set vars = new HashSet();
        vars.addAll(minT1.getVars());
        vars.addAll(minT2.getVars());
        return new Join(minT1, minT2, minJoinCardinality, vars);
    }
    
    private Join getFirstJoin(final int preferredFirstTail) {
        if (DEBUG) {
            log.debug("evaluating first join");
        }
        
        
        long minJoinCardinality = Long.MAX_VALUE;
        long minOtherTailCardinality = Long.MAX_VALUE;
        Tail minT2 = null;
        final int i = preferredFirstTail;
        final Tail t1 = new Tail(i, rangeCount(i), getVars(i));
        for (int j = 0; j < tailCount; j++) {
            // check only non-same and unused tails
            if (i == j || used[j]) {
                continue;
            }
            Tail t2 = new Tail(j, rangeCount(j), getVars(j));
            long t2Cardinality = cardinality(j);
            long joinCardinality = computeJoinCardinality(t1, t2);
            if(DEBUG) log.debug("evaluating " + i + " X " + j + ": cardinality= " + joinCardinality);
            if (joinCardinality < minJoinCardinality) {
                if(DEBUG) log.debug("found a new min: " + joinCardinality);
                minJoinCardinality = joinCardinality;
                minOtherTailCardinality = t2Cardinality;
                minT2 = t2;
            } else if (joinCardinality == minJoinCardinality) {
                if (t2Cardinality < minOtherTailCardinality) {
                    if(DEBUG) log.debug("found a new min: " + joinCardinality);
                    minJoinCardinality = joinCardinality;
                    minOtherTailCardinality = t2Cardinality;
                    minT2 = t2;
                }
            }
        }

        // the join variables is the union of the join dimensions' variables
        Set vars = new HashSet();
        vars.addAll(t1.getVars());
        vars.addAll(minT2.getVars());
        return new Join(t1, minT2, minJoinCardinality, vars);
    }
    
    /**
     * Similar to {@link #getFirstJoin()}, but we have one join dimension
     * already calculated.
     * 
     * @param d1 
     *          the first join dimension
     * @return 
     *          the new join with the lowest cardinality from the remaining tails
     */
    private Join getNextJoin(IJoinDimension d1) {
        if (DEBUG) {
            log.debug("evaluating next join");
        }
        long minJoinCardinality = Long.MAX_VALUE;
        long minTailCardinality = Long.MAX_VALUE;
        Tail minTail = null;
        for (int i = 0; i < tailCount; i++) {
            // only check unused tails
            if (used[i]) {
                continue;
            }
            Tail tail = new Tail(i, rangeCount(i), getVars(i));
            long tailCardinality = cardinality(i);
            long joinCardinality = computeJoinCardinality(d1, tail);
            if(DEBUG) log.debug("evaluating " + d1.toJoinString() + " X " + i + ": cardinality= " + joinCardinality);
            if (joinCardinality < minJoinCardinality) {
                if(DEBUG) log.debug("found a new min: " + joinCardinality);
                minJoinCardinality = joinCardinality;
                minTailCardinality = tailCardinality;
                minTail = tail;
            } else if (joinCardinality == minJoinCardinality) {
                if (tailCardinality < minTailCardinality) {
                    if(DEBUG) log.debug("found a new min: " + joinCardinality);
                    minJoinCardinality = joinCardinality;
                    minTailCardinality = tailCardinality;
                    minTail = tail;
                }
            }
        }
        // if we are at the "no shared variables" tails, order by range count
        if (minJoinCardinality == NO_SHARED_VARS) {
            minJoinCardinality = Long.MAX_VALUE;
            for (int i = 0; i < tailCount; i++) {
                // only check unused tails
                if (used[i]) {
                    continue;
                }
                Tail tail = new Tail(i, rangeCount(i), getVars(i));
                long tailCardinality = cardinality(i);
                if (tailCardinality < minJoinCardinality) {
                    if(DEBUG) log.debug("found a new min: " + tailCardinality);
                    minJoinCardinality = tailCardinality;
                    minTail = tail;
                }
            }            
        }
        // the join variables is the union of the join dimensions' variables
        Set vars = new HashSet();
        vars.addAll(d1.getVars());
        vars.addAll(minTail.getVars());
        return new Join(d1, minTail, minJoinCardinality, vars);
    }
    
    /**
     * Return the range count for the predicate, ignoring any bindings. The
     * range count for the tail predicate is cached the first time it is
     * requested and returned from the cache thereafter. The range counts are
     * requested using the "non-exact" range count query, so the range counts
     * are actually the upper bound. However, if the upper bound is ZERO (0)
     * then the range count really is ZERO (0).
     * 
     * @param tailIndex
     *            The index of the predicate in the tail of the rule.
     * 
     * @return The range count for that tail predicate.
     */
    public long rangeCount(final int tailIndex) {

        if (rangeCount[tailIndex] == -1L) {

            final IPredicate predicate = rule.getTail(tailIndex);
            
            final IAccessPathExpander expander = predicate.getAccessPathExpander();

            if (expander != null && expander.runFirst()) {

                /*
                 * Note: runFirst() essentially indicates that the cardinality
                 * of the predicate in the data is to be ignored. Therefore we
                 * do not request the actual range count and just return -1L as
                 * a marker indicating that the range count is not available.
                 */
                
                return -1L;
                
            }
            
            final long rangeCount = rangeCountFactory
                    .rangeCount(rule.getTail(tailIndex));

            this.rangeCount[tailIndex] = rangeCount;

        }

        return rangeCount[tailIndex];

    }

    /**
     * Return the cardinality of a particular tail, which is the range count
     * if not optional and infinite if optional.
     */
    public long cardinality(final int tailIndex) {
        IPredicate tail = rule.getTail(tailIndex);
        if (tail.isOptional() || tail instanceof IStarJoin) {
            return Long.MAX_VALUE;
        } else {
            return rangeCount(tailIndex);
        }
    }
    
    public String toString() {
        return Arrays.toString(getOrder());
    }
    
    /**
     * This is the secret sauce.  There are three possibilities for computing
     * the join cardinality, which we are defining as the upper-bound for
     * solutions for a particular join.  First, if there are no shared variables
     * then the cardinality will just be the simple sum of the cardinality of
     * each join dimension.  If there are shared variables but no unshared
     * variables, then the cardinality will be the minimum cardinality from
     * the join dimensions.  If there are shared variables but also some
     * unshared variables, then the join cardinality will be the maximum
     * cardinality from each join dimension.
     * 

* Any join involving an optional will have infinite cardinality, so that * optionals get placed at the end. * * @param d1 * the first join dimension * @param d2 * the second join dimension * @return * the join cardinality */ protected long computeJoinCardinality(IJoinDimension d1, IJoinDimension d2) { // two optionals is worse than one if (d1.isOptional() && d2.isOptional()) { return BOTH_OPTIONAL; } if (d1.isOptional() || d2.isOptional()) { return ONE_OPTIONAL; } final boolean sharedVars = hasSharedVars(d1, d2); final boolean unsharedVars = hasUnsharedVars(d1, d2); final long joinCardinality; if (sharedVars == false) { // no shared vars - take the sum // joinCardinality = d1.getCardinality() + d2.getCardinality(); // different approach - give preference to shared variables joinCardinality = NO_SHARED_VARS; } else { if (unsharedVars == false) { // shared vars and no unshared vars - take the min joinCardinality = Math.min(d1.getCardinality(), d2.getCardinality()); } else { // shared vars and unshared vars - take the max /* * This modification to the join planner results in * significantly faster queries for the bsbm benchmark (3x - 5x * overall). It takes a more optimistic perspective on the * intersection of two statement patterns, predicting that this * will constraint, rather than increase, the multiplicity of * the solutions. However, this COULD lead to pathological cases * where the resulting join plan is WORSE than it would have * been otherwise. For example, this change produces a 3x to 5x * improvement in the BSBM benchmark results. However, it has a * negative effect on LUBM Q2. * * Update: Ok so just to go into a little detail - yesterday's * change means we choose the join ordering based on an * optimistic view of the cardinality of any particular join. If * you have two triple patterns that share variables but that * also have unshared variables, then technically the maximum * cardinality of the join is the maximum range count of the two * tails. But often the true cardinality of the join is closer * to the minimum range count than the maximum. So yesterday we * started assigning an expected cardinality for the join of the * minimum range count rather than the maximum. What this means * is that a lot of the time when those joins move toward the * front of the line the query will do a lot better, but * occasionally (LUBM 2), the query will do much much worse * (when the true cardinality is closer to the max range count). * * Today we put in an extra tie-breaker condition. We already * had one tie-breaker - if two joins have the same expected * cardinality we chose the one with the lower minimum range * count. But the new tie-breaker is that if two joins have the * same expected cardinality and minimum range count, we now * chose the one that has the minimum range count on the other * tail (the minimum maximum if that makes sense). */ joinCardinality = Math.min(d1.getCardinality(), d2.getCardinality()); // Math.max(d1.getCardinality(), d2.getCardinality()); } } return joinCardinality; } /** * Get the named variables for a given tail. Is there a better way to do * this? * * @param tail * the tail * @return * the named variables */ protected Set getVars(int tail) { final Set vars = new HashSet(); IPredicate pred = rule.getTail(tail); for (int i = 0; i < pred.arity(); i++) { IVariableOrConstant term = pred.get(i); if (term.isVar()) { vars.add(term.getName()); } } return vars; } /** * Look for shared variables. * * @param d1 * the first join dimension * @param d2 * the second join dimension * @return * true if there are shared variables, false otherwise */ protected boolean hasSharedVars(IJoinDimension d1, IJoinDimension d2) { for(String var : d1.getVars()) { if (d2.getVars().contains(var)) { return true; } } return false; } /** * Look for unshared variables. * * @param d1 * the first join dimension * @param d2 * the second join dimension * @return * true if there are unshared variables, false otherwise */ protected boolean hasUnsharedVars(IJoinDimension d1, IJoinDimension d2) { for(String var : d1.getVars()) { if (d2.getVars().contains(var) == false) { return true; } } for(String var : d2.getVars()) { if (d1.getVars().contains(var) == false) { return true; } } return false; } /** * A join dimension can be either a tail, or a previous join. Either way * we need to know its cardinality, its variables, and its tails. */ private interface IJoinDimension { long getCardinality(); Set getVars(); String toJoinString(); boolean isOptional(); } /** * A join implementation of a join dimension. The join can consist of two * tails, or one tail and another join. Theoretically it could be two * joins as well, which might be a future optimization worth thinking about. */ private static class Join implements IJoinDimension { private final IJoinDimension d1, d2; private final long cardinality; private final Set vars; public Join(IJoinDimension d1, IJoinDimension d2, long cardinality, Set vars) { this.d1 = d1; this.d2 = d2; this.cardinality = cardinality; this.vars = vars; } public IJoinDimension getD1() { return d1; } public IJoinDimension getD2() { return d2; } public Set getVars() { return vars; } public long getCardinality() { return cardinality; } public boolean isOptional() { return false; } public String toJoinString() { return d1.toJoinString() + " X " + d2.toJoinString(); } } /** * A tail implementation of a join dimension. */ private class Tail implements IJoinDimension { private final int tail; private final long cardinality; private final Set vars; public Tail(int tail, long cardinality, Set vars) { this.tail = tail; this.cardinality = cardinality; this.vars = vars; } public int getTail() { return tail; } public long getCardinality() { return cardinality; } public Set getVars() { return vars; } public boolean isOptional() { return rule.getTail(tail).isOptional(); } public String toJoinString() { return String.valueOf(tail); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy