All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsUtil Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer.calcite.cost;
import org.apache.calcite.plan.RelOptCost;
import org.apache.calcite.rel.RelCollation;
import org.apache.calcite.rel.RelCollationTraitDef;
import org.apache.calcite.rel.RelDistribution;
import org.apache.calcite.rel.RelFieldCollation;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.Pair;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelCollation;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin.MapJoinStreamingRelation;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
import com.google.common.collect.ImmutableList;
public class HiveAlgorithmsUtil {
private final double cpuCost;
private final double netCost;
private final double localFSWrite;
private final double localFSRead;
private final double hdfsWrite;
private final double hdfsRead;
HiveAlgorithmsUtil(HiveConf conf) {
cpuCost = Double.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_CBO_COST_MODEL_CPU));
netCost = cpuCost
* Double.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_CBO_COST_MODEL_NET));
localFSWrite = netCost
* Double.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_CBO_COST_MODEL_LFS_WRITE));
localFSRead = netCost
* Double.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_CBO_COST_MODEL_LFS_READ));
hdfsWrite = localFSWrite
* Double.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_CBO_COST_MODEL_HDFS_WRITE));
hdfsRead = localFSRead
* Double.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_CBO_COST_MODEL_HDFS_READ));
}
public static RelOptCost computeCardinalityBasedCost(HiveRelNode hr) {
return new HiveCost(hr.getRows(), 0, 0);
}
public HiveCost computeScanCost(double cardinality, double avgTupleSize) {
return new HiveCost(cardinality, 0, hdfsRead * cardinality * avgTupleSize);
}
public double computeSortMergeCPUCost(
ImmutableList cardinalities,
ImmutableBitSet sorted) {
// Sort-merge join
double cpuCost = 0.0;
for (int i=0; i> relationInfos) {
// Sort-merge join
double ioCost = 0.0;
for (Pair relationInfo : relationInfos) {
ioCost += computeSortIOCost(relationInfo);
}
return ioCost;
}
public double computeSortIOCost(Pair relationInfo) {
// Sort-merge join
double ioCost = 0.0;
double cardinality = relationInfo.left;
double averageTupleSize = relationInfo.right;
// Write cost
ioCost += cardinality * averageTupleSize * localFSWrite;
// Read cost
ioCost += cardinality * averageTupleSize * localFSRead;
// Net transfer cost
ioCost += cardinality * averageTupleSize * netCost;
return ioCost;
}
public static double computeMapJoinCPUCost(
ImmutableList cardinalities,
ImmutableBitSet streaming) {
// Hash-join
double cpuCost = 0.0;
for (int i=0; i> relationInfos,
ImmutableBitSet streaming, int parallelism) {
// Hash-join
double ioCost = 0.0;
for (int i=0; i cardinalities,
ImmutableBitSet streaming) {
// Hash-join
double cpuCost = 0.0;
for (int i=0; i> relationInfos,
ImmutableBitSet streaming, int parallelism) {
// Hash-join
double ioCost = 0.0;
for (int i=0; i cardinalities) {
// Hash-join
double cpuCost = 0.0;
for (int i=0; i> relationInfos,
ImmutableBitSet streaming, int parallelism) {
// Hash-join
double ioCost = 0.0;
for (int i=0; i maxSize) {
return false;
}
return true;
}
return false;
}
public static ImmutableList getJoinCollation(JoinPredicateInfo joinPredInfo,
MapJoinStreamingRelation streamingRelation) {
// Compute collations
ImmutableList.Builder collationListBuilder =
new ImmutableList.Builder();
ImmutableList.Builder leftCollationListBuilder =
new ImmutableList.Builder();
ImmutableList.Builder rightCollationListBuilder =
new ImmutableList.Builder();
for (int i = 0; i < joinPredInfo.getEquiJoinPredicateElements().size(); i++) {
JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo.
getEquiJoinPredicateElements().get(i);
for (int leftPos : joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInJoinSchema()) {
final RelFieldCollation leftFieldCollation = new RelFieldCollation(leftPos);
collationListBuilder.add(leftFieldCollation);
leftCollationListBuilder.add(leftFieldCollation);
}
for (int rightPos : joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInJoinSchema()) {
final RelFieldCollation rightFieldCollation = new RelFieldCollation(rightPos);
collationListBuilder.add(rightFieldCollation);
rightCollationListBuilder.add(rightFieldCollation);
}
}
// Return join collations
final ImmutableList collation;
switch (streamingRelation) {
case LEFT_RELATION:
collation = ImmutableList.of(
RelCollationTraitDef.INSTANCE.canonize(
new HiveRelCollation(leftCollationListBuilder.build())));
break;
case RIGHT_RELATION:
collation = ImmutableList.of(
RelCollationTraitDef.INSTANCE.canonize(
new HiveRelCollation(rightCollationListBuilder.build())));
break;
default:
collation = ImmutableList.of(
RelCollationTraitDef.INSTANCE.canonize(
new HiveRelCollation(collationListBuilder.build())));
break;
}
return collation;
}
public static RelDistribution getJoinRedistribution(JoinPredicateInfo joinPredInfo) {
// Compute distribution
ImmutableList.Builder keysListBuilder =
new ImmutableList.Builder();
for (int i = 0; i < joinPredInfo.getEquiJoinPredicateElements().size(); i++) {
JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo.
getEquiJoinPredicateElements().get(i);
for (int leftPos : joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInJoinSchema()) {
keysListBuilder.add(leftPos);
}
for (int rightPos : joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInJoinSchema()) {
keysListBuilder.add(rightPos);
}
}
return new HiveRelDistribution(
RelDistribution.Type.HASH_DISTRIBUTED, keysListBuilder.build());
}
public static RelDistribution getJoinDistribution(JoinPredicateInfo joinPredInfo,
MapJoinStreamingRelation streamingRelation) {
// Compute distribution
ImmutableList.Builder leftKeysListBuilder =
new ImmutableList.Builder();
ImmutableList.Builder rightKeysListBuilder =
new ImmutableList.Builder();
for (int i = 0; i < joinPredInfo.getEquiJoinPredicateElements().size(); i++) {
JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo.
getEquiJoinPredicateElements().get(i);
for (int leftPos : joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInJoinSchema()) {
leftKeysListBuilder.add(leftPos);
}
for (int rightPos : joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInJoinSchema()) {
rightKeysListBuilder.add(rightPos);
}
}
RelDistribution distribution = null;
// Keep buckets from the streaming relation
if (streamingRelation == MapJoinStreamingRelation.LEFT_RELATION) {
distribution = new HiveRelDistribution(
RelDistribution.Type.HASH_DISTRIBUTED, leftKeysListBuilder.build());
} else if (streamingRelation == MapJoinStreamingRelation.RIGHT_RELATION) {
distribution = new HiveRelDistribution(
RelDistribution.Type.HASH_DISTRIBUTED, rightKeysListBuilder.build());
}
return distribution;
}
public static Double getJoinMemory(HiveJoin join) {
return getJoinMemory(join, join.getStreamingSide());
}
public static Double getJoinMemory(HiveJoin join, MapJoinStreamingRelation streamingSide) {
Double memory = 0.0;
if (streamingSide == MapJoinStreamingRelation.NONE ||
streamingSide == MapJoinStreamingRelation.RIGHT_RELATION) {
// Left side
final Double leftAvgRowSize = RelMetadataQuery.getAverageRowSize(join.getLeft());
final Double leftRowCount = RelMetadataQuery.getRowCount(join.getLeft());
if (leftAvgRowSize == null || leftRowCount == null) {
return null;
}
memory += leftAvgRowSize * leftRowCount;
}
if (streamingSide == MapJoinStreamingRelation.NONE ||
streamingSide == MapJoinStreamingRelation.LEFT_RELATION) {
// Right side
final Double rightAvgRowSize = RelMetadataQuery.getAverageRowSize(join.getRight());
final Double rightRowCount = RelMetadataQuery.getRowCount(join.getRight());
if (rightAvgRowSize == null || rightRowCount == null) {
return null;
}
memory += rightAvgRowSize * rightRowCount;
}
return memory;
}
public static Integer getSplitCountWithRepartition(HiveJoin join) {
final Double maxSplitSize = join.getCluster().getPlanner().getContext().
unwrap(HiveAlgorithmsConf.class).getMaxSplitSize();
// We repartition: new number of splits
final Double averageRowSize = RelMetadataQuery.getAverageRowSize(join);
final Double rowCount = RelMetadataQuery.getRowCount(join);
if (averageRowSize == null || rowCount == null) {
return null;
}
final Double totalSize = averageRowSize * rowCount;
final Double splitCount = totalSize / maxSplitSize;
return splitCount.intValue();
}
public static Integer getSplitCountWithoutRepartition(HiveJoin join) {
RelNode largeInput;
if (join.getStreamingSide() == MapJoinStreamingRelation.LEFT_RELATION) {
largeInput = join.getLeft();
} else if (join.getStreamingSide() == MapJoinStreamingRelation.RIGHT_RELATION) {
largeInput = join.getRight();
} else {
return null;
}
return RelMetadataQuery.splitCount(largeInput);
}
}