Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
import hive.com.google.common.base.Preconditions;
import hive.com.google.common.collect.Lists;
import org.apache.calcite.adapter.druid.DruidQuery;
import org.apache.calcite.adapter.druid.DruidRules.DruidAggregateFilterTransposeRule;
import org.apache.calcite.adapter.druid.DruidRules.DruidAggregateProjectRule;
import org.apache.calcite.adapter.druid.DruidRules.DruidAggregateRule;
import org.apache.calcite.adapter.druid.DruidRules.DruidFilterAggregateTransposeRule;
import org.apache.calcite.adapter.druid.DruidRules.DruidFilterProjectTransposeRule;
import org.apache.calcite.adapter.druid.DruidRules.DruidFilterRule;
import org.apache.calcite.adapter.druid.DruidRules.DruidHavingFilterRule;
import org.apache.calcite.adapter.druid.DruidRules.DruidPostAggregationProjectRule;
import org.apache.calcite.adapter.druid.DruidRules.DruidProjectFilterTransposeRule;
import org.apache.calcite.adapter.druid.DruidRules.DruidProjectRule;
import org.apache.calcite.adapter.druid.DruidRules.DruidProjectSortTransposeRule;
import org.apache.calcite.adapter.druid.DruidRules.DruidSortProjectTransposeRule;
import org.apache.calcite.adapter.druid.DruidRules.DruidSortRule;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.rel.core.Aggregate;
import org.apache.calcite.rel.core.AggregateCall;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.fun.SqlSumEmptyIsZeroAggFunction;
import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.tools.RelBuilderFactory;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.Pair;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
/**
* Druid rules with Hive builder factory.
*/
public class HiveDruidRules {
public static final DruidFilterRule FILTER = new DruidFilterRule(HiveRelFactories.HIVE_BUILDER);
public static final DruidProjectRule PROJECT = new DruidProjectRule(HiveRelFactories.HIVE_BUILDER);
public static final DruidAggregateRule AGGREGATE = new DruidAggregateRule(HiveRelFactories.HIVE_BUILDER);
public static final DruidAggregateProjectRule AGGREGATE_PROJECT =
new DruidAggregateProjectRule(HiveRelFactories.HIVE_BUILDER);
public static final DruidSortRule SORT = new DruidSortRule(HiveRelFactories.HIVE_BUILDER);
public static final DruidSortProjectTransposeRule SORT_PROJECT_TRANSPOSE =
new DruidSortProjectTransposeRule(HiveRelFactories.HIVE_BUILDER);
public static final DruidProjectSortTransposeRule PROJECT_SORT_TRANSPOSE =
new DruidProjectSortTransposeRule(HiveRelFactories.HIVE_BUILDER);
public static final DruidProjectFilterTransposeRule PROJECT_FILTER_TRANSPOSE =
new DruidProjectFilterTransposeRule(HiveRelFactories.HIVE_BUILDER);
public static final DruidFilterProjectTransposeRule FILTER_PROJECT_TRANSPOSE =
new DruidFilterProjectTransposeRule(HiveRelFactories.HIVE_BUILDER);
public static final DruidAggregateFilterTransposeRule AGGREGATE_FILTER_TRANSPOSE =
new DruidAggregateFilterTransposeRule(HiveRelFactories.HIVE_BUILDER);
public static final DruidFilterAggregateTransposeRule FILTER_AGGREGATE_TRANSPOSE =
new DruidFilterAggregateTransposeRule(HiveRelFactories.HIVE_BUILDER);
public static final DruidPostAggregationProjectRule POST_AGGREGATION_PROJECT =
new DruidPostAggregationProjectRule(HiveRelFactories.HIVE_BUILDER);
public static final DruidHavingFilterRule HAVING_FILTER_RULE =
new DruidHavingFilterRule(HiveRelFactories.HIVE_BUILDER);
public static final AggregateExpandDistinctAggregatesDruidRule EXPAND_SINGLE_DISTINCT_AGGREGATES_DRUID_RULE =
new AggregateExpandDistinctAggregatesDruidRule(HiveRelFactories.HIVE_BUILDER);
/**
* This is a simplified version of {@link org.apache.calcite.rel.rules.AggregateExpandDistinctAggregatesRule}
* The goal of this simplified version is to help pushing single count distinct as multi-phase aggregates.
* This is an okay solution before we actually support grouping sets push-down to Druid.
* We are limiting it to one Distinct count to avoid expensive cross join and running into issue
* https://issues.apache.org/jira/browse/HIVE-19601
*/
public static class AggregateExpandDistinctAggregatesDruidRule extends RelOptRule {
public AggregateExpandDistinctAggregatesDruidRule(RelBuilderFactory relBuilderFactory) {
super(operand(Aggregate.class, operand(DruidQuery.class, none())), relBuilderFactory,
null
);
}
@Override public void onMatch(RelOptRuleCall call) {
Aggregate aggregate = call.rel(0);
if (!aggregate.containsDistinctCall()) {
return;
}
final long numCountDistinct = aggregate.getAggCallList()
.stream()
.filter(aggregateCall -> aggregateCall.getAggregation().getKind().equals(SqlKind.COUNT) &&
aggregateCall.isDistinct())
.count();
if (numCountDistinct != 1) {
return;
}
// Find all of the agg expressions. We use a LinkedHashSet to ensure determinism.
int nonDistinctAggCallCount = 0; // find all aggregate calls without distinct
int filterCount = 0;
int unsupportedNonDistinctAggCallCount = 0;
final Set, Integer>> argLists = new LinkedHashSet<>();
for (AggregateCall aggCall : aggregate.getAggCallList()) {
if (aggCall.filterArg >= 0) {
++filterCount;
}
if (!aggCall.isDistinct()) {
++nonDistinctAggCallCount;
final SqlKind aggCallKind = aggCall.getAggregation().getKind();
// We only support COUNT/SUM/MIN/MAX for the "single" count distinct optimization
switch (aggCallKind) {
case COUNT:
case SUM:
case SUM0:
case MIN:
case MAX:
break;
default:
++unsupportedNonDistinctAggCallCount;
}
} else {
argLists.add(Pair.of(aggCall.getArgList(), aggCall.filterArg));
}
}
// If only one distinct aggregate and one or more non-distinct aggregates,
// we can generate multi-phase aggregates
if (numCountDistinct == 1 // one distinct aggregate
&& filterCount == 0 // no filter
&& unsupportedNonDistinctAggCallCount == 0 // sum/min/max/count in non-distinct aggregate
&& nonDistinctAggCallCount > 0) { // one or more non-distinct aggregates
final RelBuilder relBuilder = call.builder();
convertSingletonDistinct(relBuilder, aggregate, argLists);
call.transformTo(relBuilder.build());
return;
}
}
/**
* Converts an aggregate with one distinct aggregate and one or more
* non-distinct aggregates to multi-phase aggregates (see reference example
* below).
*
* @param relBuilder Contains the input relational expression
* @param aggregate Original aggregate
* @param argLists Arguments and filters to the distinct aggregate function
*
*/
private RelBuilder convertSingletonDistinct(RelBuilder relBuilder,
Aggregate aggregate, Set, Integer>> argLists) {
// In this case, we are assuming that there is a single distinct function.
// So make sure that argLists is of size one.
Preconditions.checkArgument(argLists.size() == 1);
// For example,
// SELECT deptno, COUNT(*), SUM(bonus), MIN(DISTINCT sal)
// FROM emp
// GROUP BY deptno
//
// becomes
//
// SELECT deptno, SUM(cnt), SUM(bonus), MIN(sal)
// FROM (
// SELECT deptno, COUNT(*) as cnt, SUM(bonus), sal
// FROM EMP
// GROUP BY deptno, sal) // Aggregate B
// GROUP BY deptno // Aggregate A
relBuilder.push(aggregate.getInput());
final List originalAggCalls = aggregate.getAggCallList();
final ImmutableBitSet originalGroupSet = aggregate.getGroupSet();
// Add the distinct aggregate column(s) to the group-by columns,
// if not already a part of the group-by
final SortedSet bottomGroupSet = new TreeSet<>();
bottomGroupSet.addAll(aggregate.getGroupSet().asList());
for (AggregateCall aggCall : originalAggCalls) {
if (aggCall.isDistinct()) {
bottomGroupSet.addAll(aggCall.getArgList());
break; // since we only have single distinct call
}
}
// Generate the intermediate aggregate B, the one on the bottom that converts
// a distinct call to group by call.
// Bottom aggregate is the same as the original aggregate, except that
// the bottom aggregate has converted the DISTINCT aggregate to a group by clause.
final List bottomAggregateCalls = new ArrayList<>();
for (AggregateCall aggCall : originalAggCalls) {
// Project the column corresponding to the distinct aggregate. Project
// as-is all the non-distinct aggregates
if (!aggCall.isDistinct()) {
final AggregateCall newCall =
AggregateCall.create(aggCall.getAggregation(), false,
aggCall.isApproximate(), aggCall.getArgList(), -1,
ImmutableBitSet.of(bottomGroupSet).cardinality(),
relBuilder.peek(), null, aggCall.name);
bottomAggregateCalls.add(newCall);
}
}
// Generate the aggregate B (see the reference example above)
relBuilder.push(
aggregate.copy(
aggregate.getTraitSet(), relBuilder.build(),
false, ImmutableBitSet.of(bottomGroupSet), null, bottomAggregateCalls));
// Add aggregate A (see the reference example above), the top aggregate
// to handle the rest of the aggregation that the bottom aggregate hasn't handled
final List topAggregateCalls = Lists.newArrayList();
// Use the remapped arguments for the (non)distinct aggregate calls
int nonDistinctAggCallProcessedSoFar = 0;
for (AggregateCall aggCall : originalAggCalls) {
final AggregateCall newCall;
if (aggCall.isDistinct()) {
List newArgList = new ArrayList<>();
for (int arg : aggCall.getArgList()) {
newArgList.add(bottomGroupSet.headSet(arg).size());
}
newCall =
AggregateCall.create(aggCall.getAggregation(),
false,
aggCall.isApproximate(),
newArgList,
-1,
originalGroupSet.cardinality(),
relBuilder.peek(),
aggCall.getType(),
aggCall.name);
} else {
// If aggregate B had a COUNT aggregate call the corresponding aggregate at
// aggregate A must be SUM. For other aggregates, it remains the same.
final List newArgs =
Lists.newArrayList(bottomGroupSet.size() + nonDistinctAggCallProcessedSoFar);
if (aggCall.getAggregation().getKind() == SqlKind.COUNT) {
newCall =
AggregateCall.create(new SqlSumEmptyIsZeroAggFunction(), false,
aggCall.isApproximate(), newArgs, -1,
originalGroupSet.cardinality(), relBuilder.peek(),
aggCall.getType(), aggCall.getName());
} else {
newCall =
AggregateCall.create(aggCall.getAggregation(), false,
aggCall.isApproximate(), newArgs, -1,
originalGroupSet.cardinality(),
relBuilder.peek(), aggCall.getType(), aggCall.name);
}
nonDistinctAggCallProcessedSoFar++;
}
topAggregateCalls.add(newCall);
}
// Populate the group-by keys with the remapped arguments for aggregate A
// The top groupset is basically an identity (first X fields of aggregate B's
// output), minus the distinct aggCall's input.
final Set topGroupSet = new HashSet<>();
int groupSetToAdd = 0;
for (int bottomGroup : bottomGroupSet) {
if (originalGroupSet.get(bottomGroup)) {
topGroupSet.add(groupSetToAdd);
}
groupSetToAdd++;
}
relBuilder.push(
aggregate.copy(aggregate.getTraitSet(),
relBuilder.build(), aggregate.indicator,
ImmutableBitSet.of(topGroupSet), null, topAggregateCalls));
return relBuilder;
}
}
}