org.apache.flink.table.planner.plan.rules.physical.stream.IncrementalAggregateRule.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of flink-table-planner-blink_2.11 Show documentation
Show all versions of flink-table-planner-blink_2.11 Show documentation
This module bridges Table/SQL API and runtime. It contains
all resources that are required during pre-flight and runtime
phase. The content of this module is work-in-progress. It will
replace flink-table-planner once it is stable. See FLINK-11439
and FLIP-32 for more details.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.planner.plan.rules.physical.stream
import org.apache.flink.annotation.Experimental
import org.apache.flink.configuration.ConfigOption
import org.apache.flink.configuration.ConfigOptions.key
import org.apache.flink.table.planner.calcite.{FlinkContext, FlinkTypeFactory}
import org.apache.flink.table.planner.plan.PartialFinalType
import org.apache.flink.table.planner.plan.nodes.physical.stream.{StreamPhysicalExchange, StreamPhysicalGlobalGroupAggregate, StreamPhysicalIncrementalGroupAggregate, StreamPhysicalLocalGroupAggregate}
import org.apache.flink.table.planner.plan.utils.AggregateUtil
import org.apache.flink.util.Preconditions
import org.apache.calcite.plan.RelOptRule.{any, operand}
import org.apache.calcite.plan.{RelOptRule, RelOptRuleCall, RelOptUtil}
import java.lang.{Boolean => JBoolean}
import java.util.Collections
/**
* Rule that matches final [[StreamPhysicalGlobalGroupAggregate]] on [[StreamPhysicalExchange]]
* on final [[StreamPhysicalLocalGroupAggregate]] on partial [[StreamPhysicalGlobalGroupAggregate]],
* and combines the final [[StreamPhysicalLocalGroupAggregate]] and
* the partial [[StreamPhysicalGlobalGroupAggregate]] into a
* [[StreamPhysicalIncrementalGroupAggregate]].
*/
class IncrementalAggregateRule
extends RelOptRule(
operand(classOf[StreamPhysicalGlobalGroupAggregate], // final global agg
operand(classOf[StreamPhysicalExchange], // key by
operand(classOf[StreamPhysicalLocalGroupAggregate], // final local agg
operand(classOf[StreamPhysicalGlobalGroupAggregate], any())))), // partial global agg
"IncrementalAggregateRule") {
override def matches(call: RelOptRuleCall): Boolean = {
val finalGlobalAgg: StreamPhysicalGlobalGroupAggregate = call.rel(0)
val finalLocalAgg: StreamPhysicalLocalGroupAggregate = call.rel(2)
val partialGlobalAgg: StreamPhysicalGlobalGroupAggregate = call.rel(3)
val tableConfig = call.getPlanner.getContext.unwrap(classOf[FlinkContext]).getTableConfig
// whether incremental aggregate is enabled
val incrementalAggEnabled = tableConfig.getConfiguration.getBoolean(
IncrementalAggregateRule.TABLE_OPTIMIZER_INCREMENTAL_AGG_ENABLED)
partialGlobalAgg.partialFinalType == PartialFinalType.PARTIAL &&
finalLocalAgg.partialFinalType == PartialFinalType.FINAL &&
finalGlobalAgg.partialFinalType == PartialFinalType.FINAL &&
incrementalAggEnabled
}
override def onMatch(call: RelOptRuleCall): Unit = {
val finalGlobalAgg: StreamPhysicalGlobalGroupAggregate = call.rel(0)
val exchange: StreamPhysicalExchange = call.rel(1)
val finalLocalAgg: StreamPhysicalLocalGroupAggregate = call.rel(2)
val partialGlobalAgg: StreamPhysicalGlobalGroupAggregate = call.rel(3)
val partialLocalAggInputRowType = partialGlobalAgg.localAggInputRowType
val partialOriginalAggCalls = partialGlobalAgg.aggCalls.toArray
val partialRealAggCalls = partialGlobalAgg.localAggInfoList.getActualAggregateCalls
val finalRealAggCalls = finalGlobalAgg.globalAggInfoList.getActualAggregateCalls
val incrAgg = new StreamPhysicalIncrementalGroupAggregate(
partialGlobalAgg.getCluster,
finalLocalAgg.getTraitSet, // extends final local agg traits (ACC trait)
partialGlobalAgg.getInput,
partialGlobalAgg.grouping,
partialRealAggCalls,
finalLocalAgg.grouping,
finalRealAggCalls,
partialOriginalAggCalls,
partialGlobalAgg.aggCallNeedRetractions,
partialGlobalAgg.needRetraction,
partialLocalAggInputRowType,
partialGlobalAgg.getRowType)
val incrAggOutputRowType = incrAgg.getRowType
val newExchange = exchange.copy(exchange.getTraitSet, incrAgg, exchange.distribution)
val partialAggCountStarInserted = partialGlobalAgg.globalAggInfoList.countStarInserted
val globalAgg = if (partialAggCountStarInserted) {
val globalAggInputAccType = finalLocalAgg.getRowType
Preconditions.checkState(RelOptUtil.areRowTypesEqual(
incrAggOutputRowType,
globalAggInputAccType,
false))
finalGlobalAgg.copy(finalGlobalAgg.getTraitSet, Collections.singletonList(newExchange))
} else {
// an additional count1 is inserted, need to adapt the global agg
val localAggInfoList = AggregateUtil.transformToStreamAggregateInfoList(
// the final agg input is partial agg
FlinkTypeFactory.toLogicalRowType(partialGlobalAgg.getRowType),
finalRealAggCalls,
// all the aggs do not need retraction
Array.fill(finalRealAggCalls.length)(false),
// also do not need count*
needInputCount = false,
// the local agg is not works on state
isStateBackendDataViews = false)
// check whether the global agg required input row type equals the incr agg output row type
val globalAggInputAccType = AggregateUtil.inferLocalAggRowType(
localAggInfoList,
incrAgg.getRowType,
finalGlobalAgg.grouping,
finalGlobalAgg.getCluster.getTypeFactory.asInstanceOf[FlinkTypeFactory])
Preconditions.checkState(RelOptUtil.areRowTypesEqual(
incrAggOutputRowType,
globalAggInputAccType,
false))
new StreamPhysicalGlobalGroupAggregate(
finalGlobalAgg.getCluster,
finalGlobalAgg.getTraitSet,
newExchange,
finalGlobalAgg.getRowType,
finalGlobalAgg.grouping,
finalRealAggCalls,
// all the aggs do not need retraction
Array.fill(finalRealAggCalls.length)(false),
finalGlobalAgg.localAggInputRowType,
needRetraction = false,
finalGlobalAgg.partialFinalType)
}
call.transformTo(globalAgg)
}
}
object IncrementalAggregateRule {
val INSTANCE = new IncrementalAggregateRule
// It is a experimental config, will may be removed later.
@Experimental
val TABLE_OPTIMIZER_INCREMENTAL_AGG_ENABLED: ConfigOption[JBoolean] =
key("table.optimizer.incremental-agg-enabled")
.defaultValue(JBoolean.valueOf(true))
.withDescription("When both local aggregation and distinct aggregation splitting " +
"are enabled, a distinct aggregation will be optimized into four aggregations, " +
"i.e., local-agg1, global-agg1, local-agg2 and global-Agg2. We can combine global-agg1" +
" and local-agg2 into a single operator (we call it incremental agg because " +
"it receives incremental accumulators and output incremental results). " +
"In this way, we can reduce some state overhead and resources. Default is enabled.")
}