com.hazelcast.org.apache.calcite.rel.rules.MultiJoinOptimizeBushyRule Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.org.apache.calcite.rel.rules;
import com.hazelcast.org.apache.calcite.config.CalciteSystemProperty;
import com.hazelcast.org.apache.calcite.plan.RelOptRule;
import com.hazelcast.org.apache.calcite.plan.RelOptRuleCall;
import com.hazelcast.org.apache.calcite.rel.RelNode;
import com.hazelcast.org.apache.calcite.rel.core.JoinRelType;
import com.hazelcast.org.apache.calcite.rel.core.RelFactories;
import com.hazelcast.org.apache.calcite.rel.metadata.RelMdUtil;
import com.hazelcast.org.apache.calcite.rel.metadata.RelMetadataQuery;
import com.hazelcast.org.apache.calcite.rex.RexBuilder;
import com.hazelcast.org.apache.calcite.rex.RexNode;
import com.hazelcast.org.apache.calcite.rex.RexPermuteInputsShuttle;
import com.hazelcast.org.apache.calcite.rex.RexUtil;
import com.hazelcast.org.apache.calcite.rex.RexVisitor;
import com.hazelcast.org.apache.calcite.tools.RelBuilder;
import com.hazelcast.org.apache.calcite.tools.RelBuilderFactory;
import com.hazelcast.org.apache.calcite.util.ImmutableBitSet;
import com.hazelcast.org.apache.calcite.util.Pair;
import com.hazelcast.org.apache.calcite.util.Util;
import com.hazelcast.org.apache.calcite.util.mapping.Mappings;
import com.hazelcast.com.google.common.collect.ImmutableList;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import static com.hazelcast.org.apache.calcite.rel.rules.LoptMultiJoin.Edge;
import static com.hazelcast.org.apache.calcite.util.mapping.Mappings.TargetMapping;
/**
* Planner rule that finds an approximately optimal ordering for join operators
* using a heuristic algorithm.
*
* It is triggered by the pattern
* {@link com.hazelcast.org.apache.calcite.rel.logical.LogicalProject} ({@link MultiJoin}).
*
*
It is similar to
* {@link com.hazelcast.org.apache.calcite.rel.rules.LoptOptimizeJoinRule}.
* {@code LoptOptimizeJoinRule} is only capable of producing left-deep joins;
* this rule is capable of producing bushy joins.
*
*
TODO:
*
* - Join conditions that touch 1 factor.
*
- Join conditions that touch 3 factors.
*
- More than 1 join conditions that touch the same pair of factors,
* e.g. {@code t0.c1 = t1.c1 and t1.c2 = t0.c3}
*
*/
public class MultiJoinOptimizeBushyRule extends RelOptRule implements TransformationRule {
public static final MultiJoinOptimizeBushyRule INSTANCE =
new MultiJoinOptimizeBushyRule(RelFactories.LOGICAL_BUILDER);
private final PrintWriter pw = CalciteSystemProperty.DEBUG.value()
? Util.printWriter(System.out)
: null;
/** Creates an MultiJoinOptimizeBushyRule. */
public MultiJoinOptimizeBushyRule(RelBuilderFactory relBuilderFactory) {
super(operand(MultiJoin.class, any()), relBuilderFactory, null);
}
@Deprecated // to be removed before 2.0
public MultiJoinOptimizeBushyRule(RelFactories.JoinFactory joinFactory,
RelFactories.ProjectFactory projectFactory) {
this(RelBuilder.proto(joinFactory, projectFactory));
}
@Override public void onMatch(RelOptRuleCall call) {
final MultiJoin multiJoinRel = call.rel(0);
final RexBuilder rexBuilder = multiJoinRel.getCluster().getRexBuilder();
final RelBuilder relBuilder = call.builder();
final RelMetadataQuery mq = call.getMetadataQuery();
final LoptMultiJoin multiJoin = new LoptMultiJoin(multiJoinRel);
final List vertexes = new ArrayList<>();
int x = 0;
for (int i = 0; i < multiJoin.getNumJoinFactors(); i++) {
final RelNode rel = multiJoin.getJoinFactor(i);
double cost = mq.getRowCount(rel);
vertexes.add(new LeafVertex(i, rel, cost, x));
x += rel.getRowType().getFieldCount();
}
assert x == multiJoin.getNumTotalFields();
final List unusedEdges = new ArrayList<>();
for (RexNode node : multiJoin.getJoinFilters()) {
unusedEdges.add(multiJoin.createEdge(node));
}
// Comparator that chooses the best edge. A "good edge" is one that has
// a large difference in the number of rows on LHS and RHS.
final Comparator edgeComparator =
new Comparator() {
public int compare(LoptMultiJoin.Edge e0, LoptMultiJoin.Edge e1) {
return Double.compare(rowCountDiff(e0), rowCountDiff(e1));
}
private double rowCountDiff(LoptMultiJoin.Edge edge) {
assert edge.factors.cardinality() == 2 : edge.factors;
final int factor0 = edge.factors.nextSetBit(0);
final int factor1 = edge.factors.nextSetBit(factor0 + 1);
return Math.abs(vertexes.get(factor0).cost
- vertexes.get(factor1).cost);
}
};
final List usedEdges = new ArrayList<>();
for (;;) {
final int edgeOrdinal = chooseBestEdge(unusedEdges, edgeComparator);
if (pw != null) {
trace(vertexes, unusedEdges, usedEdges, edgeOrdinal, pw);
}
final int[] factors;
if (edgeOrdinal == -1) {
// No more edges. Are there any un-joined vertexes?
final Vertex lastVertex = Util.last(vertexes);
final int z = lastVertex.factors.previousClearBit(lastVertex.id - 1);
if (z < 0) {
break;
}
factors = new int[] {z, lastVertex.id};
} else {
final LoptMultiJoin.Edge bestEdge = unusedEdges.get(edgeOrdinal);
// For now, assume that the edge is between precisely two factors.
// 1-factor conditions have probably been pushed down,
// and 3-or-more-factor conditions are advanced. (TODO:)
// Therefore, for now, the factors that are merged are exactly the
// factors on this edge.
assert bestEdge.factors.cardinality() == 2;
factors = bestEdge.factors.toArray();
}
// Determine which factor is to be on the LHS of the join.
final int majorFactor;
final int minorFactor;
if (vertexes.get(factors[0]).cost <= vertexes.get(factors[1]).cost) {
majorFactor = factors[0];
minorFactor = factors[1];
} else {
majorFactor = factors[1];
minorFactor = factors[0];
}
final Vertex majorVertex = vertexes.get(majorFactor);
final Vertex minorVertex = vertexes.get(minorFactor);
// Find the join conditions. All conditions whose factors are now all in
// the join can now be used.
final int v = vertexes.size();
final ImmutableBitSet newFactors =
majorVertex.factors
.rebuild()
.addAll(minorVertex.factors)
.set(v)
.build();
final List conditions = new ArrayList<>();
final Iterator edgeIterator = unusedEdges.iterator();
while (edgeIterator.hasNext()) {
LoptMultiJoin.Edge edge = edgeIterator.next();
if (newFactors.contains(edge.factors)) {
conditions.add(edge.condition);
edgeIterator.remove();
usedEdges.add(edge);
}
}
double cost =
majorVertex.cost
* minorVertex.cost
* RelMdUtil.guessSelectivity(
RexUtil.composeConjunction(rexBuilder, conditions));
final Vertex newVertex =
new JoinVertex(v, majorFactor, minorFactor, newFactors,
cost, ImmutableList.copyOf(conditions));
vertexes.add(newVertex);
// Re-compute selectivity of edges above the one just chosen.
// Suppose that we just chose the edge between "product" (10k rows) and
// "product_class" (10 rows).
// Both of those vertices are now replaced by a new vertex "P-PC".
// This vertex has fewer rows (1k rows) -- a fact that is critical to
// decisions made later. (Hence "greedy" algorithm not "simple".)
// The adjacent edges are modified.
final ImmutableBitSet merged =
ImmutableBitSet.of(minorFactor, majorFactor);
for (int i = 0; i < unusedEdges.size(); i++) {
final LoptMultiJoin.Edge edge = unusedEdges.get(i);
if (edge.factors.intersects(merged)) {
ImmutableBitSet newEdgeFactors =
edge.factors
.rebuild()
.removeAll(newFactors)
.set(v)
.build();
assert newEdgeFactors.cardinality() == 2;
final LoptMultiJoin.Edge newEdge =
new LoptMultiJoin.Edge(edge.condition, newEdgeFactors,
edge.columns);
unusedEdges.set(i, newEdge);
}
}
}
// We have a winner!
List> relNodes = new ArrayList<>();
for (Vertex vertex : vertexes) {
if (vertex instanceof LeafVertex) {
LeafVertex leafVertex = (LeafVertex) vertex;
final Mappings.TargetMapping mapping =
Mappings.offsetSource(
Mappings.createIdentity(
leafVertex.rel.getRowType().getFieldCount()),
leafVertex.fieldOffset,
multiJoin.getNumTotalFields());
relNodes.add(Pair.of(leafVertex.rel, mapping));
} else {
JoinVertex joinVertex = (JoinVertex) vertex;
final Pair leftPair =
relNodes.get(joinVertex.leftFactor);
RelNode left = leftPair.left;
final Mappings.TargetMapping leftMapping = leftPair.right;
final Pair rightPair =
relNodes.get(joinVertex.rightFactor);
RelNode right = rightPair.left;
final Mappings.TargetMapping rightMapping = rightPair.right;
final Mappings.TargetMapping mapping =
Mappings.merge(leftMapping,
Mappings.offsetTarget(rightMapping,
left.getRowType().getFieldCount()));
if (pw != null) {
pw.println("left: " + leftMapping);
pw.println("right: " + rightMapping);
pw.println("combined: " + mapping);
pw.println();
}
final RexVisitor shuttle =
new RexPermuteInputsShuttle(mapping, left, right);
final RexNode condition =
RexUtil.composeConjunction(rexBuilder, joinVertex.conditions);
final RelNode join = relBuilder.push(left)
.push(right)
.join(JoinRelType.INNER, condition.accept(shuttle))
.build();
relNodes.add(Pair.of(join, mapping));
}
if (pw != null) {
pw.println(Util.last(relNodes));
}
}
final Pair top = Util.last(relNodes);
relBuilder.push(top.left)
.project(relBuilder.fields(top.right));
call.transformTo(relBuilder.build());
}
private void trace(List vertexes,
List unusedEdges, List usedEdges,
int edgeOrdinal, PrintWriter pw) {
pw.println("bestEdge: " + edgeOrdinal);
pw.println("vertexes:");
for (Vertex vertex : vertexes) {
pw.println(vertex);
}
pw.println("unused edges:");
for (LoptMultiJoin.Edge edge : unusedEdges) {
pw.println(edge);
}
pw.println("edges:");
for (LoptMultiJoin.Edge edge : usedEdges) {
pw.println(edge);
}
pw.println();
pw.flush();
}
int chooseBestEdge(List edges,
Comparator comparator) {
return minPos(edges, comparator);
}
/** Returns the index within a list at which compares least according to a
* comparator.
*
* In the case of a tie, returns the earliest such element.
*
* If the list is empty, returns -1.
*/
static int minPos(List list, Comparator fn) {
if (list.isEmpty()) {
return -1;
}
E eBest = list.get(0);
int iBest = 0;
for (int i = 1; i < list.size(); i++) {
E e = list.get(i);
if (fn.compare(e, eBest) < 0) {
eBest = e;
iBest = i;
}
}
return iBest;
}
/** Participant in a join (relation or join). */
abstract static class Vertex {
final int id;
protected final ImmutableBitSet factors;
final double cost;
Vertex(int id, ImmutableBitSet factors, double cost) {
this.id = id;
this.factors = factors;
this.cost = cost;
}
}
/** Relation participating in a join. */
static class LeafVertex extends Vertex {
private final RelNode rel;
final int fieldOffset;
LeafVertex(int id, RelNode rel, double cost, int fieldOffset) {
super(id, ImmutableBitSet.of(id), cost);
this.rel = rel;
this.fieldOffset = fieldOffset;
}
@Override public String toString() {
return "LeafVertex(id: " + id
+ ", cost: " + Util.human(cost)
+ ", factors: " + factors
+ ", fieldOffset: " + fieldOffset
+ ")";
}
}
/** Participant in a join which is itself a join. */
static class JoinVertex extends Vertex {
private final int leftFactor;
private final int rightFactor;
/** Zero or more join conditions. All are in terms of the original input
* columns (not in terms of the outputs of left and right input factors). */
final ImmutableList conditions;
JoinVertex(int id, int leftFactor, int rightFactor, ImmutableBitSet factors,
double cost, ImmutableList conditions) {
super(id, factors, cost);
this.leftFactor = leftFactor;
this.rightFactor = rightFactor;
this.conditions = Objects.requireNonNull(conditions);
}
@Override public String toString() {
return "JoinVertex(id: " + id
+ ", cost: " + Util.human(cost)
+ ", factors: " + factors
+ ", leftFactor: " + leftFactor
+ ", rightFactor: " + rightFactor
+ ")";
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy