org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of hive-exec Show documentation
The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.optimizer;

import java.util.ArrayList;
import java.util.List;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
import org.apache.hadoop.hive.ql.parse.SemanticException;

/*
 * This is a pluggable policy to choose the candidate map-join table for converting a join to a
 * sort merge join. The largest table is chosen based on the size of the tables.
 */
public class AvgPartitionSizeBasedBigTableSelectorForAutoSMJ
    extends SizeBasedBigTableSelectorForAutoSMJ
    implements BigTableSelectorForAutoSMJ {

  private static final Log LOG = LogFactory
      .getLog(AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.class.getName());

  public int getBigTablePosition(ParseContext parseCtx, JoinOperator joinOp,
      Set bigTableCandidates)
    throws SemanticException {
    int bigTablePos = -1;
    long maxSize = -1;
    int numPartitionsCurrentBigTable = 0; // number of partitions for the chosen big table
    HiveConf conf = parseCtx.getConf();

    try {
      List topOps = new ArrayList();
      getListTopOps(joinOp, topOps);
      int currentPos = 0;
      for (TableScanOperator topOp : topOps) {

        if (topOp == null) {
          return -1;
        }

        if (!bigTableCandidates.contains(currentPos)) {
          currentPos++;
          continue;
        }

        int numPartitions = 1; // in case the sizes match, preference is
                               // given to the table with fewer partitions
        Table table = topOp.getConf().getTableMetadata();
        long averageSize = 0;

        if (!table.isPartitioned()) {
          averageSize = getSize(conf, table);
        }
        else {
          // For partitioned tables, get the size of all the partitions
          PrunedPartitionList partsList = PartitionPruner.prune(topOp, parseCtx, null);
          numPartitions = partsList.getNotDeniedPartns().size();
          long totalSize = 0;
          for (Partition part : partsList.getNotDeniedPartns()) {
            totalSize += getSize(conf, part);
          }
          averageSize = numPartitions == 0 ? 0 : totalSize/numPartitions;
        }

        if (averageSize > maxSize) {
          maxSize = averageSize;
          bigTablePos = currentPos;
          numPartitionsCurrentBigTable = numPartitions;
        }
        // If the sizes match, prefer the table with fewer partitions
        else if (averageSize == maxSize) {
          if (numPartitions < numPartitionsCurrentBigTable) {
            bigTablePos = currentPos;
            numPartitionsCurrentBigTable = numPartitions;
          }
        }

        currentPos++;
      }
    } catch (HiveException e) {
      throw new SemanticException(e.getMessage());
    }

    return bigTablePos;
  }
}