org.eclipse.rdf4j.sail.federation.optimizers.QueryMultiJoinOptimizer Maven / Gradle / Ivy
/*******************************************************************************
* Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*******************************************************************************/
package org.eclipse.rdf4j.sail.federation.optimizers;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.eclipse.rdf4j.query.BindingSet;
import org.eclipse.rdf4j.query.Dataset;
import org.eclipse.rdf4j.query.algebra.Join;
import org.eclipse.rdf4j.query.algebra.LeftJoin;
import org.eclipse.rdf4j.query.algebra.QueryModelNode;
import org.eclipse.rdf4j.query.algebra.StatementPattern;
import org.eclipse.rdf4j.query.algebra.TupleExpr;
import org.eclipse.rdf4j.query.algebra.Var;
import org.eclipse.rdf4j.query.algebra.evaluation.QueryOptimizer;
import org.eclipse.rdf4j.query.algebra.helpers.AbstractQueryModelVisitor;
import org.eclipse.rdf4j.query.algebra.helpers.StatementPatternCollector;
import org.eclipse.rdf4j.sail.federation.algebra.NaryJoin;
/**
* A query optimizer that re-orders nested Joins.
*
* @author Arjohn Kampman
* @author James Leigh
*/
public class QueryMultiJoinOptimizer implements QueryOptimizer {
protected final EvaluationStatistics statistics;
public QueryMultiJoinOptimizer() {
this(new EvaluationStatistics());
}
public QueryMultiJoinOptimizer(EvaluationStatistics statistics) {
this.statistics = statistics;
}
/**
* Applies generally applicable optimizations: path expressions are sorted from more to less specific.
*
* @throws StoreException
*/
@Override
public void optimize(TupleExpr tupleExpr, Dataset dataset, BindingSet bindings) {
tupleExpr.visit(new JoinVisitor());
}
protected class JoinVisitor extends AbstractQueryModelVisitor {
private Set boundVars = new HashSet<>();
@Override
public void meet(LeftJoin leftJoin) {
leftJoin.getLeftArg().visit(this);
Set origBoundVars = boundVars;
try {
boundVars = new HashSet<>(boundVars);
boundVars.addAll(leftJoin.getLeftArg().getBindingNames());
leftJoin.getRightArg().visit(this);
} finally {
boundVars = origBoundVars;
}
}
@Override
public void meetOther(QueryModelNode node) throws RuntimeException {
if (node instanceof NaryJoin) {
meetJoin((NaryJoin) node);
} else {
super.meetOther(node);
}
}
@Override
public void meet(Join node) throws RuntimeException {
meetJoin(node);
}
public void meetJoin(TupleExpr node) {
Set origBoundVars = boundVars;
try {
boundVars = new HashSet<>(boundVars);
// Recursively get the join arguments
List joinArgs = getJoinArgs(node, new ArrayList<>());
// Build maps of cardinalities and vars per tuple expression
Map cardinalityMap = new HashMap<>();
Map> varsMap = new HashMap<>();
for (TupleExpr tupleExpr : joinArgs) {
cardinalityMap.put(tupleExpr, statistics.getCardinality(tupleExpr));
varsMap.put(tupleExpr, getStatementPatternVars(tupleExpr));
}
// Build map of var frequences
Map varFreqMap = new HashMap<>();
for (List varList : varsMap.values()) {
getVarFreqMap(varList, varFreqMap);
}
// Reorder the (recursive) join arguments to a more optimal sequence
List orderedJoinArgs = new ArrayList<>(joinArgs.size());
while (!joinArgs.isEmpty()) {
TupleExpr tupleExpr = selectNextTupleExpr(joinArgs, cardinalityMap, varsMap, varFreqMap, boundVars);
joinArgs.remove(tupleExpr);
orderedJoinArgs.add(tupleExpr);
// Recursively optimize join arguments
tupleExpr.visit(this);
boundVars.addAll(tupleExpr.getBindingNames());
}
// Build new join hierarchy
TupleExpr replacement = new NaryJoin(orderedJoinArgs);
// Replace old join hierarchy
node.replaceWith(replacement);
} finally {
boundVars = origBoundVars;
}
}
protected > L getJoinArgs(TupleExpr tupleExpr, L joinArgs) {
if (tupleExpr instanceof NaryJoin) {
NaryJoin join = (NaryJoin) tupleExpr;
for (TupleExpr arg : join.getArgs()) {
getJoinArgs(arg, joinArgs);
}
} else if (tupleExpr instanceof Join) {
Join join = (Join) tupleExpr;
getJoinArgs(join.getLeftArg(), joinArgs);
getJoinArgs(join.getRightArg(), joinArgs);
} else {
joinArgs.add(tupleExpr);
}
return joinArgs;
}
protected List getStatementPatternVars(TupleExpr tupleExpr) {
List stPatterns = StatementPatternCollector.process(tupleExpr);
List varList = new ArrayList<>(stPatterns.size() * 4);
for (StatementPattern sp : stPatterns) {
sp.getVars(varList);
}
return varList;
}
protected > M getVarFreqMap(List varList, M varFreqMap) {
for (Var var : varList) {
Integer freq = varFreqMap.get(var);
freq = (freq == null) ? 1 : freq + 1;
varFreqMap.put(var, freq);
}
return varFreqMap;
}
/**
* Selects from a list of tuple expressions the next tuple expression that should be evaluated. This method
* selects the tuple expression with highest number of bound variables, preferring variables that have been
* bound in other tuple expressions over variables with a fixed value.
*/
protected TupleExpr selectNextTupleExpr(List expressions, Map cardinalityMap,
Map> varsMap, Map varFreqMap, Set boundVars) {
double lowestCost = Double.MAX_VALUE;
TupleExpr result = null;
for (TupleExpr tupleExpr : expressions) {
// Calculate a score for this tuple expression
double cost = getTupleExprCost(tupleExpr, cardinalityMap, varsMap, varFreqMap, boundVars);
if (cost < lowestCost) {
// More specific path expression found
lowestCost = cost;
result = tupleExpr;
}
}
return result;
}
protected double getTupleExprCost(TupleExpr tupleExpr, Map cardinalityMap,
Map> varsMap, Map varFreqMap, Set boundVars) {
double cost = cardinalityMap.get(tupleExpr);
List vars = varsMap.get(tupleExpr);
// Compensate for variables that are bound earlier in the evaluation
List unboundVars = getUnboundVars(vars);
List constantVars = getConstantVars(vars);
int nonConstantCount = vars.size() - constantVars.size();
if (nonConstantCount > 0) {
double exp = (double) unboundVars.size() / nonConstantCount;
cost = Math.pow(cost, exp);
}
if (unboundVars.isEmpty()) {
// Prefer patterns with more bound vars
if (nonConstantCount > 0) {
cost /= nonConstantCount;
}
} else {
// Prefer patterns that bind variables from other tuple expressions
int foreignVarFreq = getForeignVarFreq(unboundVars, varFreqMap);
if (foreignVarFreq > 0) {
cost /= foreignVarFreq;
}
}
// Prefer patterns that bind more variables
// List distinctUnboundVars = getUnboundVars(new
// HashSet(vars));
// if (distinctUnboundVars.size() >= 2) {
// cardinality /= distinctUnboundVars.size();
// }
return cost;
}
protected List getConstantVars(Iterable vars) {
List constantVars = new ArrayList<>();
for (Var var : vars) {
if (var.hasValue()) {
constantVars.add(var);
}
}
return constantVars;
}
protected List getUnboundVars(Iterable vars) {
List unboundVars = new ArrayList<>();
for (Var var : vars) {
if (!var.hasValue() && !this.boundVars.contains(var.getName())) {
unboundVars.add(var);
}
}
return unboundVars;
}
protected int getForeignVarFreq(List ownUnboundVars, Map varFreqMap) {
int result = 0;
Map ownFreqMap = getVarFreqMap(ownUnboundVars, new HashMap<>());
for (Map.Entry entry : ownFreqMap.entrySet()) {
Var var = entry.getKey();
int ownFreq = entry.getValue();
result += varFreqMap.get(var) - ownFreq;
}
return result;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy