org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveOnTezCostModel Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer.calcite.cost;
import java.util.ArrayList;
import java.util.List;
import org.apache.calcite.plan.RelOptCost;
import org.apache.calcite.rel.RelCollation;
import org.apache.calcite.rel.RelDistribution;
import org.apache.calcite.rel.RelDistribution.Type;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.ImmutableIntList;
import org.apache.calcite.util.Pair;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin.MapJoinStreamingRelation;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Sets;
/**
* Cost model for Tez execution engine.
*/
public class HiveOnTezCostModel extends HiveCostModel {
private static HiveOnTezCostModel INSTANCE;
private static HiveAlgorithmsUtil algoUtils;
synchronized public static HiveOnTezCostModel getCostModel(HiveConf conf) {
if (INSTANCE == null) {
INSTANCE = new HiveOnTezCostModel(conf);
}
return INSTANCE;
}
private HiveOnTezCostModel(HiveConf conf) {
super(Sets.newHashSet(
TezCommonJoinAlgorithm.INSTANCE,
TezMapJoinAlgorithm.INSTANCE,
TezBucketJoinAlgorithm.INSTANCE,
TezSMBJoinAlgorithm.INSTANCE));
algoUtils = new HiveAlgorithmsUtil(conf);
}
@Override
public RelOptCost getDefaultCost() {
return HiveCost.FACTORY.makeZeroCost();
}
@Override
public RelOptCost getScanCost(HiveTableScan ts) {
return algoUtils.computeScanCost(ts.getRows(), RelMetadataQuery.getAverageRowSize(ts));
}
@Override
public RelOptCost getAggregateCost(HiveAggregate aggregate) {
if (aggregate.isBucketedInput()) {
return HiveCost.FACTORY.makeZeroCost();
} else {
// 1. Sum of input cardinalities
final Double rCount = RelMetadataQuery.getRowCount(aggregate.getInput());
if (rCount == null) {
return null;
}
// 2. CPU cost = sorting cost
final double cpuCost = algoUtils.computeSortCPUCost(rCount);
// 3. IO cost = cost of writing intermediary results to local FS +
// cost of reading from local FS for transferring to GBy +
// cost of transferring map outputs to GBy operator
final Double rAverageSize = RelMetadataQuery.getAverageRowSize(aggregate.getInput());
if (rAverageSize == null) {
return null;
}
final double ioCost = algoUtils.computeSortIOCost(new Pair(rCount,rAverageSize));
// 4. Result
return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost);
}
}
/**
* COMMON_JOIN is Sort Merge Join. Each parallel computation handles multiple
* splits.
*/
public static class TezCommonJoinAlgorithm implements JoinAlgorithm {
public static final JoinAlgorithm INSTANCE = new TezCommonJoinAlgorithm();
private static final String ALGORITHM_NAME = "CommonJoin";
@Override
public String toString() {
return ALGORITHM_NAME;
}
@Override
public boolean isExecutable(HiveJoin join) {
return true;
}
@Override
public RelOptCost getCost(HiveJoin join) {
// 1. Sum of input cardinalities
final Double leftRCount = RelMetadataQuery.getRowCount(join.getLeft());
final Double rightRCount = RelMetadataQuery.getRowCount(join.getRight());
if (leftRCount == null || rightRCount == null) {
return null;
}
final double rCount = leftRCount + rightRCount;
// 2. CPU cost = sorting cost (for each relation) +
// total merge cost
ImmutableList cardinalities = new ImmutableList.Builder().
add(leftRCount).
add(rightRCount).
build();
final double cpuCost = algoUtils.computeSortMergeCPUCost(cardinalities, join.getSortedInputs());
// 3. IO cost = cost of writing intermediary results to local FS +
// cost of reading from local FS for transferring to join +
// cost of transferring map outputs to Join operator
final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft());
final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight());
if (leftRAverageSize == null || rightRAverageSize == null) {
return null;
}
ImmutableList> relationInfos = new ImmutableList.Builder>().
add(new Pair(leftRCount,leftRAverageSize)).
add(new Pair(rightRCount,rightRAverageSize)).
build();
final double ioCost = algoUtils.computeSortMergeIOCost(relationInfos);
// 4. Result
return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost);
}
@Override
public ImmutableList getCollation(HiveJoin join) {
return HiveAlgorithmsUtil.getJoinCollation(join.getJoinPredicateInfo(),
MapJoinStreamingRelation.NONE);
}
@Override
public RelDistribution getDistribution(HiveJoin join) {
return HiveAlgorithmsUtil.getJoinRedistribution(join.getJoinPredicateInfo());
}
@Override
public Double getMemory(HiveJoin join) {
return HiveAlgorithmsUtil.getJoinMemory(join, MapJoinStreamingRelation.NONE);
}
@Override
public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) {
JoinAlgorithm oldAlgo = join.getJoinAlgorithm();
join.setJoinAlgorithm(TezCommonJoinAlgorithm.INSTANCE);
final Double memoryWithinPhase =
RelMetadataQuery.cumulativeMemoryWithinPhase(join);
final Integer splitCount = RelMetadataQuery.splitCount(join);
join.setJoinAlgorithm(oldAlgo);
if (memoryWithinPhase == null || splitCount == null) {
return null;
}
return memoryWithinPhase / splitCount;
}
@Override
public Boolean isPhaseTransition(HiveJoin join) {
return true;
}
@Override
public Integer getSplitCount(HiveJoin join) {
return HiveAlgorithmsUtil.getSplitCountWithRepartition(join);
}
}
/**
* MAP_JOIN a hash join that keeps the whole data set of non streaming tables
* in memory.
*/
public static class TezMapJoinAlgorithm implements JoinAlgorithm {
public static final JoinAlgorithm INSTANCE = new TezMapJoinAlgorithm();
private static final String ALGORITHM_NAME = "MapJoin";
@Override
public String toString() {
return ALGORITHM_NAME;
}
@Override
public boolean isExecutable(HiveJoin join) {
final Double maxMemory = join.getCluster().getPlanner().getContext().
unwrap(HiveAlgorithmsConf.class).getMaxMemory();
// Check streaming side
RelNode smallInput = join.getStreamingInput();
if (smallInput == null) {
return false;
}
return HiveAlgorithmsUtil.isFittingIntoMemory(maxMemory, smallInput, 1);
}
@Override
public RelOptCost getCost(HiveJoin join) {
// 1. Sum of input cardinalities
final Double leftRCount = RelMetadataQuery.getRowCount(join.getLeft());
final Double rightRCount = RelMetadataQuery.getRowCount(join.getRight());
if (leftRCount == null || rightRCount == null) {
return null;
}
final double rCount = leftRCount + rightRCount;
// 2. CPU cost = HashTable construction cost +
// join cost
ImmutableList cardinalities = new ImmutableList.Builder().
add(leftRCount).
add(rightRCount).
build();
ImmutableBitSet.Builder streamingBuilder = new ImmutableBitSet.Builder();
switch (join.getStreamingSide()) {
case LEFT_RELATION:
streamingBuilder.set(0);
break;
case RIGHT_RELATION:
streamingBuilder.set(1);
break;
default:
return null;
}
ImmutableBitSet streaming = streamingBuilder.build();
final double cpuCost = HiveAlgorithmsUtil.computeMapJoinCPUCost(cardinalities, streaming);
// 3. IO cost = cost of transferring small tables to join node *
// degree of parallelism
final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft());
final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight());
if (leftRAverageSize == null || rightRAverageSize == null) {
return null;
}
ImmutableList> relationInfos = new ImmutableList.Builder>().
add(new Pair(leftRCount,leftRAverageSize)).
add(new Pair(rightRCount,rightRAverageSize)).
build();
JoinAlgorithm oldAlgo = join.getJoinAlgorithm();
join.setJoinAlgorithm(TezMapJoinAlgorithm.INSTANCE);
final int parallelism = RelMetadataQuery.splitCount(join) == null
? 1 : RelMetadataQuery.splitCount(join);
join.setJoinAlgorithm(oldAlgo);
final double ioCost = algoUtils.computeMapJoinIOCost(relationInfos, streaming, parallelism);
// 4. Result
return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost);
}
@Override
public ImmutableList getCollation(HiveJoin join) {
if (join.getStreamingSide() != MapJoinStreamingRelation.LEFT_RELATION
|| join.getStreamingSide() != MapJoinStreamingRelation.RIGHT_RELATION) {
return null;
}
return HiveAlgorithmsUtil.getJoinCollation(join.getJoinPredicateInfo(),
join.getStreamingSide());
}
@Override
public RelDistribution getDistribution(HiveJoin join) {
if (join.getStreamingSide() != MapJoinStreamingRelation.LEFT_RELATION
|| join.getStreamingSide() != MapJoinStreamingRelation.RIGHT_RELATION) {
return null;
}
return HiveAlgorithmsUtil.getJoinDistribution(join.getJoinPredicateInfo(),
join.getStreamingSide());
}
@Override
public Double getMemory(HiveJoin join) {
return HiveAlgorithmsUtil.getJoinMemory(join);
}
@Override
public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) {
// Check streaming side
RelNode inMemoryInput;
if (join.getStreamingSide() == MapJoinStreamingRelation.LEFT_RELATION) {
inMemoryInput = join.getRight();
} else if (join.getStreamingSide() == MapJoinStreamingRelation.RIGHT_RELATION) {
inMemoryInput = join.getLeft();
} else {
return null;
}
// If simple map join, the whole relation goes in memory
return RelMetadataQuery.cumulativeMemoryWithinPhase(inMemoryInput);
}
@Override
public Boolean isPhaseTransition(HiveJoin join) {
return false;
}
@Override
public Integer getSplitCount(HiveJoin join) {
return HiveAlgorithmsUtil.getSplitCountWithoutRepartition(join);
}
}
/**
* BUCKET_JOIN is a hash joins where one bucket of the non streaming tables
* is kept in memory at the time.
*/
public static class TezBucketJoinAlgorithm implements JoinAlgorithm {
public static final JoinAlgorithm INSTANCE = new TezBucketJoinAlgorithm();
private static final String ALGORITHM_NAME = "BucketJoin";
@Override
public String toString() {
return ALGORITHM_NAME;
}
@Override
public boolean isExecutable(HiveJoin join) {
final Double maxMemory = join.getCluster().getPlanner().getContext().
unwrap(HiveAlgorithmsConf.class).getMaxMemory();
// Check streaming side
RelNode smallInput = join.getStreamingInput();
if (smallInput == null) {
return false;
}
// Get key columns
JoinPredicateInfo joinPredInfo = join.getJoinPredicateInfo();
List joinKeysInChildren = new ArrayList();
joinKeysInChildren.add(
ImmutableIntList.copyOf(
joinPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema()));
joinKeysInChildren.add(
ImmutableIntList.copyOf(
joinPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema()));
// Requirements: for Bucket, bucketed by their keys on both sides and fitting in memory
// Obtain number of buckets
//TODO: Incase of non bucketed splits would be computed based on data size/max part size
// What we need is a way to get buckets not splits
JoinAlgorithm oldAlgo = join.getJoinAlgorithm();
join.setJoinAlgorithm(TezBucketJoinAlgorithm.INSTANCE);
Integer buckets = RelMetadataQuery.splitCount(smallInput);
join.setJoinAlgorithm(oldAlgo);
if (buckets == null) {
return false;
}
if (!HiveAlgorithmsUtil.isFittingIntoMemory(maxMemory, smallInput, buckets)) {
return false;
}
for (int i=0; i cardinalities = new ImmutableList.Builder().
add(leftRCount).
add(rightRCount).
build();
ImmutableBitSet.Builder streamingBuilder = new ImmutableBitSet.Builder();
switch (join.getStreamingSide()) {
case LEFT_RELATION:
streamingBuilder.set(0);
break;
case RIGHT_RELATION:
streamingBuilder.set(1);
break;
default:
return null;
}
ImmutableBitSet streaming = streamingBuilder.build();
final double cpuCost = algoUtils.computeBucketMapJoinCPUCost(cardinalities, streaming);
// 3. IO cost = cost of transferring small tables to join node *
// degree of parallelism
final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft());
final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight());
if (leftRAverageSize == null || rightRAverageSize == null) {
return null;
}
ImmutableList> relationInfos = new ImmutableList.Builder>().
add(new Pair(leftRCount,leftRAverageSize)).
add(new Pair(rightRCount,rightRAverageSize)).
build();
//TODO: No Of buckets is not same as no of splits
JoinAlgorithm oldAlgo = join.getJoinAlgorithm();
join.setJoinAlgorithm(TezBucketJoinAlgorithm.INSTANCE);
final int parallelism = RelMetadataQuery.splitCount(join) == null
? 1 : RelMetadataQuery.splitCount(join);
join.setJoinAlgorithm(oldAlgo);
final double ioCost = algoUtils.computeBucketMapJoinIOCost(relationInfos, streaming, parallelism);
// 4. Result
return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost);
}
@Override
public ImmutableList getCollation(HiveJoin join) {
if (join.getStreamingSide() != MapJoinStreamingRelation.LEFT_RELATION
|| join.getStreamingSide() != MapJoinStreamingRelation.RIGHT_RELATION) {
return null;
}
return HiveAlgorithmsUtil.getJoinCollation(join.getJoinPredicateInfo(),
join.getStreamingSide());
}
@Override
public RelDistribution getDistribution(HiveJoin join) {
return HiveAlgorithmsUtil.getJoinRedistribution(join.getJoinPredicateInfo());
}
@Override
public Double getMemory(HiveJoin join) {
return HiveAlgorithmsUtil.getJoinMemory(join);
}
@Override
public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) {
// Check streaming side
RelNode inMemoryInput;
if (join.getStreamingSide() == MapJoinStreamingRelation.LEFT_RELATION) {
inMemoryInput = join.getRight();
} else if (join.getStreamingSide() == MapJoinStreamingRelation.RIGHT_RELATION) {
inMemoryInput = join.getLeft();
} else {
return null;
}
// If bucket map join, only a split goes in memory
final Double memoryInput =
RelMetadataQuery.cumulativeMemoryWithinPhase(inMemoryInput);
final Integer splitCount = RelMetadataQuery.splitCount(inMemoryInput);
if (memoryInput == null || splitCount == null) {
return null;
}
return memoryInput / splitCount;
}
@Override
public Boolean isPhaseTransition(HiveJoin join) {
return false;
}
@Override
public Integer getSplitCount(HiveJoin join) {
return HiveAlgorithmsUtil.getSplitCountWithoutRepartition(join);
}
}
/**
* SMB_JOIN is a Sort Merge Join. Each parallel computation handles one bucket.
*/
public static class TezSMBJoinAlgorithm implements JoinAlgorithm {
public static final JoinAlgorithm INSTANCE = new TezSMBJoinAlgorithm();
private static final String ALGORITHM_NAME = "SMBJoin";
@Override
public String toString() {
return ALGORITHM_NAME;
}
@Override
public boolean isExecutable(HiveJoin join) {
// Requirements: for SMB, sorted by their keys on both sides and bucketed.
// Get key columns
JoinPredicateInfo joinPredInfo = join.getJoinPredicateInfo();
List joinKeysInChildren = new ArrayList();
joinKeysInChildren.add(
ImmutableIntList.copyOf(
joinPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema()));
joinKeysInChildren.add(
ImmutableIntList.copyOf(
joinPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema()));
for (int i=0; i cardinalities = new ImmutableList.Builder().
add(leftRCount).
add(rightRCount).
build();
ImmutableBitSet.Builder streamingBuilder = new ImmutableBitSet.Builder();
switch (join.getStreamingSide()) {
case LEFT_RELATION:
streamingBuilder.set(0);
break;
case RIGHT_RELATION:
streamingBuilder.set(1);
break;
default:
return null;
}
ImmutableBitSet streaming = streamingBuilder.build();
final double cpuCost = HiveAlgorithmsUtil.computeSMBMapJoinCPUCost(cardinalities);
// 3. IO cost = cost of transferring small tables to join node *
// degree of parallelism
final Double leftRAverageSize = RelMetadataQuery.getAverageRowSize(join.getLeft());
final Double rightRAverageSize = RelMetadataQuery.getAverageRowSize(join.getRight());
if (leftRAverageSize == null || rightRAverageSize == null) {
return null;
}
ImmutableList> relationInfos = new ImmutableList.Builder>().
add(new Pair(leftRCount,leftRAverageSize)).
add(new Pair(rightRCount,rightRAverageSize)).
build();
// TODO: Split count is not the same as no of buckets
JoinAlgorithm oldAlgo = join.getJoinAlgorithm();
join.setJoinAlgorithm(TezSMBJoinAlgorithm.INSTANCE);
final int parallelism = RelMetadataQuery.splitCount(join) == null ? 1 : RelMetadataQuery
.splitCount(join);
join.setJoinAlgorithm(oldAlgo);
final double ioCost = algoUtils.computeSMBMapJoinIOCost(relationInfos, streaming, parallelism);
// 4. Result
return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost);
}
@Override
public ImmutableList getCollation(HiveJoin join) {
return HiveAlgorithmsUtil.getJoinCollation(join.getJoinPredicateInfo(),
MapJoinStreamingRelation.NONE);
}
@Override
public RelDistribution getDistribution(HiveJoin join) {
return HiveAlgorithmsUtil.getJoinRedistribution(join.getJoinPredicateInfo());
}
@Override
public Double getMemory(HiveJoin join) {
return 0.0;
}
@Override
public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) {
// TODO: Split count is not same as no of buckets
JoinAlgorithm oldAlgo = join.getJoinAlgorithm();
join.setJoinAlgorithm(TezSMBJoinAlgorithm.INSTANCE);
final Double memoryWithinPhase = RelMetadataQuery.cumulativeMemoryWithinPhase(join);
final Integer splitCount = RelMetadataQuery.splitCount(join);
join.setJoinAlgorithm(oldAlgo);
if (memoryWithinPhase == null || splitCount == null) {
return null;
}
return memoryWithinPhase / splitCount;
}
@Override
public Boolean isPhaseTransition(HiveJoin join) {
return false;
}
@Override
public Integer getSplitCount(HiveJoin join) {
return HiveAlgorithmsUtil.getSplitCountWithoutRepartition(join);
}
}
}