All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.plan.GroupByDesc Maven / Gradle / Ivy

There is a newer version: 4.0.1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.plan;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hive.common.util.AnnotationUtils;
import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
import org.apache.hadoop.hive.ql.plan.Explain.Level;
import org.apache.hadoop.hive.ql.plan.Explain.Vectorization;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;


/**
 * GroupByDesc.
 *
 */
@Explain(displayName = "Group By Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
public class GroupByDesc extends AbstractOperatorDesc {
  /**
   * Group-by Mode: COMPLETE: complete 1-phase aggregation: iterate, terminate
   * PARTIAL1: partial aggregation - first phase: iterate, terminatePartial
   * PARTIAL2: partial aggregation - second phase: merge, terminatePartial
   * PARTIALS: For non-distinct the same as PARTIAL2, for distinct the same as
   * PARTIAL1
   * FINAL: partial aggregation - final phase: merge, terminate
   * HASH: For non-distinct the same as PARTIAL1 but use hash-table-based aggregation
   * MERGEPARTIAL: FINAL for non-distinct aggregations, COMPLETE for distinct
   * aggregations.
   */
  private static final long serialVersionUID = 1L;

  /**
   * Mode.
   *
   */
  public static enum Mode {
    COMPLETE, PARTIAL1, PARTIAL2, PARTIALS, FINAL, HASH, MERGEPARTIAL
  };

  private Mode mode;

  // no hash aggregations for group by
  private boolean bucketGroup;

  private ArrayList keys;
  private List listGroupingSets;
  private boolean groupingSetsPresent;
  private int groupingSetPosition = -1;
  private ArrayList aggregators;
  private ArrayList outputColumnNames;
  private float groupByMemoryUsage;
  private float memoryThreshold;
  transient private boolean isDistinct;
  private boolean dontResetAggrsDistinct;

  public GroupByDesc() {
    vectorDesc = new VectorGroupByDesc();
  }

  public GroupByDesc(
      final Mode mode,
      final ArrayList outputColumnNames,
      final ArrayList keys,
      final ArrayList aggregators,
      final float groupByMemoryUsage,
      final float memoryThreshold,
      final List listGroupingSets,
      final boolean groupingSetsPresent,
      final int groupingSetsPosition,
      final boolean isDistinct) {
    this(mode, outputColumnNames, keys, aggregators,
        false, groupByMemoryUsage, memoryThreshold, listGroupingSets,
        groupingSetsPresent, groupingSetsPosition, isDistinct);
  }

  public GroupByDesc(
      final Mode mode,
      final ArrayList outputColumnNames,
      final ArrayList keys,
      final ArrayList aggregators,
      final boolean bucketGroup,
      final float groupByMemoryUsage,
      final float memoryThreshold,
      final List listGroupingSets,
      final boolean groupingSetsPresent,
      final int groupingSetsPosition,
      final boolean isDistinct) {
    vectorDesc = new VectorGroupByDesc();
    this.mode = mode;
    this.outputColumnNames = outputColumnNames;
    this.keys = keys;
    this.aggregators = aggregators;
    this.bucketGroup = bucketGroup;
    this.groupByMemoryUsage = groupByMemoryUsage;
    this.memoryThreshold = memoryThreshold;
    this.listGroupingSets = listGroupingSets;
    this.groupingSetsPresent = groupingSetsPresent;
    this.groupingSetPosition = groupingSetsPosition;
    this.isDistinct = isDistinct;
  }

  public Mode getMode() {
    return mode;
  }

  @Explain(displayName = "mode")
  public String getModeString() {
    switch (mode) {
    case COMPLETE:
      return "complete";
    case PARTIAL1:
      return "partial1";
    case PARTIAL2:
      return "partial2";
    case PARTIALS:
      return "partials";
    case HASH:
      return "hash";
    case FINAL:
      return "final";
    case MERGEPARTIAL:
      return "mergepartial";
    }

    return "unknown";
  }

  public void setMode(final Mode mode) {
    this.mode = mode;
  }

  @Explain(displayName = "keys")
  public String getKeyString() {
    return PlanUtils.getExprListString(keys);
  }

  @Explain(displayName = "keys", explainLevels = { Level.USER })
  public String getUserLevelExplainKeyString() {
    return PlanUtils.getExprListString(keys, true);
  }

  public ArrayList getKeys() {
    return keys;
  }

  public void setKeys(final ArrayList keys) {
    this.keys = keys;
  }

  @Explain(displayName = "outputColumnNames")
  public ArrayList getOutputColumnNames() {
    return outputColumnNames;
  }

  @Explain(displayName = "Output", explainLevels = { Level.USER })
  public ArrayList getUserLevelExplainOutputColumnNames() {
    return outputColumnNames;
  }

  @Explain(displayName = "pruneGroupingSetId", displayOnlyOnTrue = true)
  public boolean pruneGroupingSetId() {
    return groupingSetPosition >= 0 &&
        outputColumnNames.size() != keys.size() + aggregators.size();
  }

  public void setOutputColumnNames(
      ArrayList outputColumnNames) {
    this.outputColumnNames = outputColumnNames;
  }

  public float getGroupByMemoryUsage() {
    return groupByMemoryUsage;
  }

  public void setGroupByMemoryUsage(float groupByMemoryUsage) {
    this.groupByMemoryUsage = groupByMemoryUsage;
  }

  public float getMemoryThreshold() {
    return memoryThreshold;
  }

  public void setMemoryThreshold(float memoryThreshold) {
    this.memoryThreshold = memoryThreshold;
  }

  @Explain(displayName = "aggregations", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
  public List getAggregatorStrings() {
    List res = new ArrayList();
    for (AggregationDesc agg: aggregators) {
      res.add(agg.getExprString());
    }
    return res;
  }

  public ArrayList getAggregators() {
    return aggregators;
  }

  public void setAggregators(
      final ArrayList aggregators) {
    this.aggregators = aggregators;
  }

  public boolean isAggregate() {
    if (this.aggregators != null && !this.aggregators.isEmpty()) {
      return true;
    }
    return false;
  }
  
  @Explain(displayName = "bucketGroup", displayOnlyOnTrue = true)
  public boolean getBucketGroup() {
    return bucketGroup;
  }

  public void setBucketGroup(boolean bucketGroup) {
    this.bucketGroup = bucketGroup;
  }

  /**
   * Checks if this grouping is like distinct, which means that all non-distinct grouping
   * columns behave like they were distinct - for example min and max operators.
   */
  public boolean isDistinctLike() {
    ArrayList aggregators = getAggregators();
    for (AggregationDesc ad : aggregators) {
      if (!ad.getDistinct()) {
        GenericUDAFEvaluator udafEval = ad.getGenericUDAFEvaluator();
        UDFType annot = AnnotationUtils.getAnnotation(udafEval.getClass(), UDFType.class);
        if (annot == null || !annot.distinctLike()) {
          return false;
        }
      }
    }
    return true;
  }

  // Consider a query like:
  // select a, b, count(distinct c) from T group by a,b with rollup;
  // Assume that hive.map.aggr is set to true and hive.groupby.skewindata is false,
  // in which case the group by would execute as a single map-reduce job.
  // For the group-by, the group by keys should be: a,b,groupingSet(for rollup), c
  // So, the starting position of grouping set need to be known
  public List getListGroupingSets() {
    return listGroupingSets;
  }

  public void setListGroupingSets(final List listGroupingSets) {
    this.listGroupingSets = listGroupingSets;
  }

  public boolean isGroupingSetsPresent() {
    return groupingSetsPresent;
  }

  public void setGroupingSetsPresent(boolean groupingSetsPresent) {
    this.groupingSetsPresent = groupingSetsPresent;
  }

  public int getGroupingSetPosition() {
    return groupingSetPosition;
  }

  public void setGroupingSetPosition(int groupingSetPosition) {
    this.groupingSetPosition = groupingSetPosition;
  }

  public boolean isDontResetAggrsDistinct() {
    return dontResetAggrsDistinct;
  }

  public void setDontResetAggrsDistinct(boolean dontResetAggrsDistinct) {
    this.dontResetAggrsDistinct = dontResetAggrsDistinct;
  }

  public boolean isDistinct() {
    return isDistinct;
  }

  public void setDistinct(boolean isDistinct) {
    this.isDistinct = isDistinct;
  }

  public class GroupByOperatorExplainVectorization extends OperatorExplainVectorization {

    private final GroupByDesc groupByDesc;
    private final VectorGroupByDesc vectorGroupByDesc;

    public GroupByOperatorExplainVectorization(GroupByDesc groupByDesc, VectorDesc vectorDesc) {
      // Native vectorization not supported.
      super(vectorDesc, false);
      this.groupByDesc = groupByDesc;
      vectorGroupByDesc = (VectorGroupByDesc) vectorDesc;
    }

    @Explain(vectorization = Vectorization.EXPRESSION, displayName = "keyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED })
    public List getKeysExpression() {
      return vectorExpressionsToStringList(vectorGroupByDesc.getKeyExpressions());
    }

    @Explain(vectorization = Vectorization.EXPRESSION, displayName = "aggregators", explainLevels = { Level.DEFAULT, Level.EXTENDED })
    public List getAggregators() {
      VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators();
      List vecAggrList = new ArrayList(vecAggregators.length);
      for (VectorAggregateExpression vecAggr : vecAggregators) {
        vecAggrList.add(vecAggr.toString());
      }
      return vecAggrList;
    }

    @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutput", explainLevels = { Level.DEFAULT, Level.EXTENDED })
    public boolean getGroupByRowOutputCascade() {
      return vectorGroupByDesc.isVectorOutput();
    }

    @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutputConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED })
    public List getVectorOutputConditionsNotMet() {
      List results = new ArrayList();
      VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators();
      for (VectorAggregateExpression vecAggr : vecAggregators) {
        Category category = Vectorizer.aggregationOutputCategory(vecAggr);
        if (category != ObjectInspector.Category.PRIMITIVE) {
          results.add(
              "Vector output of " + vecAggr.toString() + " output type " + category + " requires PRIMITIVE IS false");
        }
      }
      if (results.size() == 0) {
        return null;
      }
      return results;
    }

    @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED })
    public String getProjectedOutputColumns() {
      return Arrays.toString(vectorGroupByDesc.getProjectedOutputColumns());
    }
  }

  @Explain(vectorization = Vectorization.OPERATOR, displayName = "Group By Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED })
  public GroupByOperatorExplainVectorization getGroupByVectorization() {
    if (vectorDesc == null) {
      return null;
    }
    return new GroupByOperatorExplainVectorization(this, vectorDesc);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy