org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsUtil Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.optimizer.calcite.cost;

import org.apache.calcite.plan.RelOptCost;
import org.apache.calcite.rel.RelCollation;
import org.apache.calcite.rel.RelCollationTraitDef;
import org.apache.calcite.rel.RelDistribution;
import org.apache.calcite.rel.RelFieldCollation;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.Pair;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelCollation;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin.MapJoinStreamingRelation;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;

import com.google.common.collect.ImmutableList;

public class HiveAlgorithmsUtil {

  private final double cpuCost;
  private final double netCost;
  private final double localFSWrite;
  private final double localFSRead;
  private final double hdfsWrite;
  private final double hdfsRead;

  HiveAlgorithmsUtil(HiveConf conf) {
    cpuCost = Double.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_CBO_COST_MODEL_CPU));
    netCost = cpuCost
        * Double.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_CBO_COST_MODEL_NET));
    localFSWrite = netCost
        * Double.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_CBO_COST_MODEL_LFS_WRITE));
    localFSRead = netCost
        * Double.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_CBO_COST_MODEL_LFS_READ));
    hdfsWrite = localFSWrite
        * Double.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_CBO_COST_MODEL_HDFS_WRITE));
    hdfsRead = localFSRead
        * Double.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_CBO_COST_MODEL_HDFS_READ));
  }

  public static RelOptCost computeCardinalityBasedCost(HiveRelNode hr) {
    return new HiveCost(hr.getRows(), 0, 0);
  }

  public HiveCost computeScanCost(double cardinality, double avgTupleSize) {
    return new HiveCost(cardinality, 0, hdfsRead * cardinality * avgTupleSize);
  }

  public double computeSortMergeCPUCost(
          ImmutableList cardinalities,
          ImmutableBitSet sorted) {
    // Sort-merge join
    double cpuCost = 0.0;
    for (int i=0; i> relationInfos) {
    // Sort-merge join
    double ioCost = 0.0;
    for (Pair relationInfo : relationInfos) {
      ioCost += computeSortIOCost(relationInfo);
    }
    return ioCost;
  }

  public double computeSortIOCost(Pair relationInfo) {
    // Sort-merge join
    double ioCost = 0.0;
    double cardinality = relationInfo.left;
    double averageTupleSize = relationInfo.right;
    // Write cost
    ioCost += cardinality * averageTupleSize * localFSWrite;
    // Read cost
    ioCost += cardinality * averageTupleSize * localFSRead;
    // Net transfer cost
    ioCost += cardinality * averageTupleSize * netCost;
    return ioCost;
  }

  public static double computeMapJoinCPUCost(
          ImmutableList cardinalities,
          ImmutableBitSet streaming) {
    // Hash-join
    double cpuCost = 0.0;
    for (int i=0; i> relationInfos,
          ImmutableBitSet streaming, int parallelism) {
    // Hash-join
    double ioCost = 0.0;
    for (int i=0; i cardinalities,
          ImmutableBitSet streaming) {
    // Hash-join
    double cpuCost = 0.0;
    for (int i=0; i> relationInfos,
          ImmutableBitSet streaming, int parallelism) {
    // Hash-join
    double ioCost = 0.0;
    for (int i=0; i cardinalities) {
    // Hash-join
    double cpuCost = 0.0;
    for (int i=0; i> relationInfos,
          ImmutableBitSet streaming, int parallelism) {
    // Hash-join
    double ioCost = 0.0;
    for (int i=0; i maxSize) {
        return false;
      }
      return true;
    }
    return false;
  }

  public static ImmutableList getJoinCollation(JoinPredicateInfo joinPredInfo,
          MapJoinStreamingRelation streamingRelation) {
    // Compute collations
    ImmutableList.Builder collationListBuilder =
            new ImmutableList.Builder();
    ImmutableList.Builder leftCollationListBuilder =
            new ImmutableList.Builder();
    ImmutableList.Builder rightCollationListBuilder =
            new ImmutableList.Builder();
    for (int i = 0; i < joinPredInfo.getEquiJoinPredicateElements().size(); i++) {
      JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo.
          getEquiJoinPredicateElements().get(i);
      for (int leftPos : joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInJoinSchema()) {
        final RelFieldCollation leftFieldCollation = new RelFieldCollation(leftPos);
        collationListBuilder.add(leftFieldCollation);
        leftCollationListBuilder.add(leftFieldCollation);        
      }
      for (int rightPos : joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInJoinSchema()) {
        final RelFieldCollation rightFieldCollation = new RelFieldCollation(rightPos);
        collationListBuilder.add(rightFieldCollation);
        rightCollationListBuilder.add(rightFieldCollation);        
      }
    }

    // Return join collations
    final ImmutableList collation;
    switch (streamingRelation) {
      case LEFT_RELATION:
        collation = ImmutableList.of(
                RelCollationTraitDef.INSTANCE.canonize(
                        new HiveRelCollation(leftCollationListBuilder.build())));
        break;
      case RIGHT_RELATION:
        collation = ImmutableList.of(
                RelCollationTraitDef.INSTANCE.canonize(
                        new HiveRelCollation(rightCollationListBuilder.build())));
        break;
      default:
        collation = ImmutableList.of(
                RelCollationTraitDef.INSTANCE.canonize(
                        new HiveRelCollation(collationListBuilder.build())));
        break;
    }
    return collation;
  }

  public static RelDistribution getJoinRedistribution(JoinPredicateInfo joinPredInfo) {
    // Compute distribution
    ImmutableList.Builder keysListBuilder =
            new ImmutableList.Builder();
    for (int i = 0; i < joinPredInfo.getEquiJoinPredicateElements().size(); i++) {
      JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo.
          getEquiJoinPredicateElements().get(i);
      for (int leftPos : joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInJoinSchema()) {
        keysListBuilder.add(leftPos);
      }
      for (int rightPos : joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInJoinSchema()) {
        keysListBuilder.add(rightPos);
      }
    }
    return new HiveRelDistribution(
                RelDistribution.Type.HASH_DISTRIBUTED, keysListBuilder.build());
  }

  public static RelDistribution getJoinDistribution(JoinPredicateInfo joinPredInfo,
          MapJoinStreamingRelation streamingRelation) {
    // Compute distribution
    ImmutableList.Builder leftKeysListBuilder =
            new ImmutableList.Builder();
    ImmutableList.Builder rightKeysListBuilder =
            new ImmutableList.Builder();
    for (int i = 0; i < joinPredInfo.getEquiJoinPredicateElements().size(); i++) {
      JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo.
          getEquiJoinPredicateElements().get(i);
      for (int leftPos : joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInJoinSchema()) {
        leftKeysListBuilder.add(leftPos);        
      }
      for (int rightPos : joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInJoinSchema()) {
        rightKeysListBuilder.add(rightPos);        
      }
    }

    RelDistribution distribution = null;
    // Keep buckets from the streaming relation
    if (streamingRelation == MapJoinStreamingRelation.LEFT_RELATION) {
      distribution = new HiveRelDistribution(
              RelDistribution.Type.HASH_DISTRIBUTED, leftKeysListBuilder.build());
    } else if (streamingRelation == MapJoinStreamingRelation.RIGHT_RELATION) {
      distribution = new HiveRelDistribution(
              RelDistribution.Type.HASH_DISTRIBUTED, rightKeysListBuilder.build());
    }

    return distribution;
  }

  public static Double getJoinMemory(HiveJoin join) {
    return getJoinMemory(join, join.getStreamingSide());
  }

  public static Double getJoinMemory(HiveJoin join, MapJoinStreamingRelation streamingSide) {
    Double memory = 0.0;
    if (streamingSide == MapJoinStreamingRelation.NONE ||
            streamingSide == MapJoinStreamingRelation.RIGHT_RELATION) {
      // Left side
      final Double leftAvgRowSize = RelMetadataQuery.getAverageRowSize(join.getLeft());
      final Double leftRowCount = RelMetadataQuery.getRowCount(join.getLeft());
      if (leftAvgRowSize == null || leftRowCount == null) {
        return null;
      }
      memory += leftAvgRowSize * leftRowCount;
    }
    if (streamingSide == MapJoinStreamingRelation.NONE ||
            streamingSide == MapJoinStreamingRelation.LEFT_RELATION) {
      // Right side
      final Double rightAvgRowSize = RelMetadataQuery.getAverageRowSize(join.getRight());
      final Double rightRowCount = RelMetadataQuery.getRowCount(join.getRight());
      if (rightAvgRowSize == null || rightRowCount == null) {
        return null;
      }
      memory += rightAvgRowSize * rightRowCount;
    }
    return memory;
  }

  public static Integer getSplitCountWithRepartition(HiveJoin join) {
    final Double maxSplitSize = join.getCluster().getPlanner().getContext().
            unwrap(HiveAlgorithmsConf.class).getMaxSplitSize();
    // We repartition: new number of splits
    final Double averageRowSize = RelMetadataQuery.getAverageRowSize(join);
    final Double rowCount = RelMetadataQuery.getRowCount(join);
    if (averageRowSize == null || rowCount == null) {
      return null;
    }
    final Double totalSize = averageRowSize * rowCount;
    final Double splitCount = totalSize / maxSplitSize;
    return splitCount.intValue();
  }

  public static Integer getSplitCountWithoutRepartition(HiveJoin join) {
    RelNode largeInput;
    if (join.getStreamingSide() == MapJoinStreamingRelation.LEFT_RELATION) {
      largeInput = join.getLeft();
    } else if (join.getStreamingSide() == MapJoinStreamingRelation.RIGHT_RELATION) {
      largeInput = join.getRight();
    } else {
      return null;
    }
    return RelMetadataQuery.splitCount(largeInput);
  }

}