All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.eclipse.rdf4j.sail.federation.optimizers.QueryMultiJoinOptimizer Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Distribution License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/org/documents/edl-v10.php.
 *******************************************************************************/
package org.eclipse.rdf4j.sail.federation.optimizers;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.eclipse.rdf4j.query.BindingSet;
import org.eclipse.rdf4j.query.Dataset;
import org.eclipse.rdf4j.query.algebra.Join;
import org.eclipse.rdf4j.query.algebra.LeftJoin;
import org.eclipse.rdf4j.query.algebra.QueryModelNode;
import org.eclipse.rdf4j.query.algebra.StatementPattern;
import org.eclipse.rdf4j.query.algebra.TupleExpr;
import org.eclipse.rdf4j.query.algebra.Var;
import org.eclipse.rdf4j.query.algebra.evaluation.QueryOptimizer;
import org.eclipse.rdf4j.query.algebra.helpers.AbstractQueryModelVisitor;
import org.eclipse.rdf4j.query.algebra.helpers.StatementPatternCollector;
import org.eclipse.rdf4j.sail.federation.algebra.NaryJoin;

/**
 * A query optimizer that re-orders nested Joins.
 *
 * @author Arjohn Kampman
 * @author James Leigh
 */
public class QueryMultiJoinOptimizer implements QueryOptimizer {

	protected final EvaluationStatistics statistics;

	public QueryMultiJoinOptimizer() {
		this(new EvaluationStatistics());
	}

	public QueryMultiJoinOptimizer(EvaluationStatistics statistics) {
		this.statistics = statistics;
	}

	/**
	 * Applies generally applicable optimizations: path expressions are sorted from more to less specific.
	 *
	 * @throws StoreException
	 */
	@Override
	public void optimize(TupleExpr tupleExpr, Dataset dataset, BindingSet bindings) {
		tupleExpr.visit(new JoinVisitor());
	}

	protected class JoinVisitor extends AbstractQueryModelVisitor {

		private Set boundVars = new HashSet<>();

		@Override
		public void meet(LeftJoin leftJoin) {
			leftJoin.getLeftArg().visit(this);

			Set origBoundVars = boundVars;
			try {
				boundVars = new HashSet<>(boundVars);
				boundVars.addAll(leftJoin.getLeftArg().getBindingNames());

				leftJoin.getRightArg().visit(this);
			} finally {
				boundVars = origBoundVars;
			}
		}

		@Override
		public void meetOther(QueryModelNode node) throws RuntimeException {
			if (node instanceof NaryJoin) {
				meetJoin((NaryJoin) node);
			} else {
				super.meetOther(node);
			}
		}

		@Override
		public void meet(Join node) throws RuntimeException {
			meetJoin(node);
		}

		public void meetJoin(TupleExpr node) {
			Set origBoundVars = boundVars;
			try {
				boundVars = new HashSet<>(boundVars);

				// Recursively get the join arguments
				List joinArgs = getJoinArgs(node, new ArrayList<>());

				// Build maps of cardinalities and vars per tuple expression
				Map cardinalityMap = new HashMap<>();
				Map> varsMap = new HashMap<>();

				for (TupleExpr tupleExpr : joinArgs) {
					cardinalityMap.put(tupleExpr, statistics.getCardinality(tupleExpr));
					varsMap.put(tupleExpr, getStatementPatternVars(tupleExpr));
				}

				// Build map of var frequences
				Map varFreqMap = new HashMap<>();
				for (List varList : varsMap.values()) {
					getVarFreqMap(varList, varFreqMap);
				}

				// Reorder the (recursive) join arguments to a more optimal sequence
				List orderedJoinArgs = new ArrayList<>(joinArgs.size());
				while (!joinArgs.isEmpty()) {
					TupleExpr tupleExpr = selectNextTupleExpr(joinArgs, cardinalityMap, varsMap, varFreqMap, boundVars);

					joinArgs.remove(tupleExpr);
					orderedJoinArgs.add(tupleExpr);

					// Recursively optimize join arguments
					tupleExpr.visit(this);

					boundVars.addAll(tupleExpr.getBindingNames());
				}

				// Build new join hierarchy
				TupleExpr replacement = new NaryJoin(orderedJoinArgs);

				// Replace old join hierarchy
				node.replaceWith(replacement);
			} finally {
				boundVars = origBoundVars;
			}
		}

		protected > L getJoinArgs(TupleExpr tupleExpr, L joinArgs) {
			if (tupleExpr instanceof NaryJoin) {
				NaryJoin join = (NaryJoin) tupleExpr;
				for (TupleExpr arg : join.getArgs()) {
					getJoinArgs(arg, joinArgs);
				}
			} else if (tupleExpr instanceof Join) {
				Join join = (Join) tupleExpr;
				getJoinArgs(join.getLeftArg(), joinArgs);
				getJoinArgs(join.getRightArg(), joinArgs);
			} else {
				joinArgs.add(tupleExpr);
			}

			return joinArgs;
		}

		protected List getStatementPatternVars(TupleExpr tupleExpr) {
			List stPatterns = StatementPatternCollector.process(tupleExpr);
			List varList = new ArrayList<>(stPatterns.size() * 4);
			for (StatementPattern sp : stPatterns) {
				sp.getVars(varList);
			}
			return varList;
		}

		protected > M getVarFreqMap(List varList, M varFreqMap) {
			for (Var var : varList) {
				Integer freq = varFreqMap.get(var);
				freq = (freq == null) ? 1 : freq + 1;
				varFreqMap.put(var, freq);
			}
			return varFreqMap;
		}

		/**
		 * Selects from a list of tuple expressions the next tuple expression that should be evaluated. This method
		 * selects the tuple expression with highest number of bound variables, preferring variables that have been
		 * bound in other tuple expressions over variables with a fixed value.
		 */
		protected TupleExpr selectNextTupleExpr(List expressions, Map cardinalityMap,
				Map> varsMap, Map varFreqMap, Set boundVars) {
			double lowestCost = Double.MAX_VALUE;
			TupleExpr result = null;

			for (TupleExpr tupleExpr : expressions) {
				// Calculate a score for this tuple expression
				double cost = getTupleExprCost(tupleExpr, cardinalityMap, varsMap, varFreqMap, boundVars);

				if (cost < lowestCost) {
					// More specific path expression found
					lowestCost = cost;
					result = tupleExpr;
				}
			}

			return result;
		}

		protected double getTupleExprCost(TupleExpr tupleExpr, Map cardinalityMap,
				Map> varsMap, Map varFreqMap, Set boundVars) {
			double cost = cardinalityMap.get(tupleExpr);

			List vars = varsMap.get(tupleExpr);

			// Compensate for variables that are bound earlier in the evaluation
			List unboundVars = getUnboundVars(vars);
			List constantVars = getConstantVars(vars);
			int nonConstantCount = vars.size() - constantVars.size();
			if (nonConstantCount > 0) {
				double exp = (double) unboundVars.size() / nonConstantCount;
				cost = Math.pow(cost, exp);
			}

			if (unboundVars.isEmpty()) {
				// Prefer patterns with more bound vars
				if (nonConstantCount > 0) {
					cost /= nonConstantCount;
				}
			} else {
				// Prefer patterns that bind variables from other tuple expressions
				int foreignVarFreq = getForeignVarFreq(unboundVars, varFreqMap);
				if (foreignVarFreq > 0) {
					cost /= foreignVarFreq;
				}
			}

			// Prefer patterns that bind more variables
			// List distinctUnboundVars = getUnboundVars(new
			// HashSet(vars));
			// if (distinctUnboundVars.size() >= 2) {
			// cardinality /= distinctUnboundVars.size();
			// }

			return cost;
		}

		protected List getConstantVars(Iterable vars) {
			List constantVars = new ArrayList<>();

			for (Var var : vars) {
				if (var.hasValue()) {
					constantVars.add(var);
				}
			}

			return constantVars;
		}

		protected List getUnboundVars(Iterable vars) {
			List unboundVars = new ArrayList<>();

			for (Var var : vars) {
				if (!var.hasValue() && !this.boundVars.contains(var.getName())) {
					unboundVars.add(var);
				}
			}

			return unboundVars;
		}

		protected int getForeignVarFreq(List ownUnboundVars, Map varFreqMap) {
			int result = 0;

			Map ownFreqMap = getVarFreqMap(ownUnboundVars, new HashMap<>());

			for (Map.Entry entry : ownFreqMap.entrySet()) {
				Var var = entry.getKey();
				int ownFreq = entry.getValue();
				result += varFreqMap.get(var) - ownFreq;
			}

			return result;
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy