org.apache.flink.table.planner.plan.optimize.RelNodeBlock.scala Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.planner.plan.optimize
import org.apache.flink.annotation.Experimental
import org.apache.flink.configuration.ConfigOption
import org.apache.flink.configuration.ConfigOptions.key
import org.apache.flink.table.api.TableConfig
import org.apache.flink.table.planner.plan.`trait`.MiniBatchInterval
import org.apache.flink.table.planner.plan.nodes.calcite.LegacySink
import org.apache.flink.table.planner.plan.reuse.SubplanReuser.{SubplanReuseContext, SubplanReuseShuttle}
import org.apache.flink.table.planner.plan.rules.logical.WindowPropertiesRules
import org.apache.flink.table.planner.plan.utils.{DefaultRelShuttle, ExpandTableScanShuttle}
import org.apache.flink.util.Preconditions
import com.google.common.collect.Sets
import org.apache.calcite.rel._
import org.apache.calcite.rel.core.{Aggregate, Project, Snapshot, TableFunctionScan, Union}
import org.apache.calcite.rex.RexNode
import java.lang.{Boolean => JBoolean}
import java.util
import scala.collection.JavaConversions._
import scala.collection.mutable
/**
* A [[RelNodeBlock]] is a sub-tree in the [[RelNode]] DAG, and represents common sub-graph
* in [[CommonSubGraphBasedOptimizer]]. All [[RelNode]]s in each block have
* only one [[LegacySink]] output.
*
* The algorithm works as follows:
* 1. If there is only one tree, the whole tree is in one block. (the next steps is needless.)
* 2. reuse common sub-plan in different RelNode tree, generate a RelNode DAG,
* 3. traverse each tree from root to leaf, and mark the sink RelNode of each RelNode
* 4. traverse each tree from root to leaf again, if meet a RelNode which has multiple sink
* RelNode, the RelNode is the output node of a new block (or named break-point).
* There are several special cases that a RelNode can not be a break-point.
* (1). UnionAll is not a break-point
* when [[RelNodeBlockPlanBuilder.TABLE_OPTIMIZER_UNIONALL_AS_BREAKPOINT_ENABLED]] is false
* (2). [[TableFunctionScan]], [[Snapshot]] or window aggregate ([[Aggregate]] on a [[Project]]
* with window attribute) are not a break-point because their physical RelNodes are a composite
* RelNode, each of them cannot be optimized individually. e.g. FlinkLogicalTableFunctionScan and
* FlinkLogicalCorrelate will be combined into a BatchExecCorrelate or a StreamExecCorrelate.
*
* For example: (Table API)
*
* {{{-
* val sourceTable = tEnv.scan("test_table").select('a, 'b, 'c)
* val leftTable = sourceTable.filter('a > 0).select('a as 'a1, 'b as 'b1)
* val rightTable = sourceTable.filter('c.isNotNull).select('b as 'b2, 'c as 'c2)
* val joinTable = leftTable.join(rightTable, 'a1 === 'b2)
* joinTable.where('a1 >= 70).select('a1, 'b1).writeToSink(sink1)
* joinTable.where('a1 < 70 ).select('a1, 'c2).writeToSink(sink2)
* }}}
*
* the RelNode DAG is:
*
* {{{-
* Sink(sink1) Sink(sink2)
* | |
* Project(a1,b1) Project(a1,c2)
* | |
* Filter(a1>=70) Filter(a1<70)
* \ /
* Join(a1=b2)
* / \
* Project(a1,b1) Project(b2,c2)
* | |
* Filter(a>0) Filter(c is not null)
* \ /
* Project(a,b,c)
* |
* TableScan
* }}}
*
* This [[RelNode]] DAG will be decomposed into three [[RelNodeBlock]]s, the break-point
* is the [[RelNode]](`Join(a1=b2)`) which data outputs to multiple [[LegacySink]]s.
* Notes: Although `Project(a,b,c)` has two parents (outputs),
* they eventually merged at `Join(a1=b2)`. So `Project(a,b,c)` is not a break-point.
*
the first [[RelNodeBlock]] includes TableScan, Project(a,b,c), Filter(a>0),
* Filter(c is not null), Project(a1,b1), Project(b2,c2) and Join(a1=b2)
*
the second one includes Filter(a1>=70), Project(a1,b1) and Sink(sink1)
*
the third one includes Filter(a1<70), Project(a1,c2) and Sink(sink2)
*
And the first [[RelNodeBlock]] is the child of another two.
*
* The [[RelNodeBlock]] plan is:
* {{{-
* RelNodeBlock2 RelNodeBlock3
* \ /
* RelNodeBlock1
* }}}
*
* The optimizing order is from child block to parent. The optimized result (RelNode)
* will be wrapped as an IntermediateRelTable first, and then be converted to a new TableScan
* which is the new output node of current block and is also the input of its parent blocks.
*
* @param outputNode A RelNode of the output in the block, which could be a [[LegacySink]] or
* other RelNode which data outputs to multiple [[LegacySink]]s.
*/
class RelNodeBlock(val outputNode: RelNode) {
// child (or input) blocks
private val childBlocks = mutable.LinkedHashSet[RelNodeBlock]()
// After this block has been optimized, the result will be converted to a new TableScan as
// new output node
private var newOutputNode: Option[RelNode] = None
private var outputTableName: Option[String] = None
private var optimizedPlan: Option[RelNode] = None
// whether any parent block requires UPDATE_BEFORE messages
private var updateBeforeRequired: Boolean = false
private var miniBatchInterval: MiniBatchInterval = MiniBatchInterval.NONE
def addChild(block: RelNodeBlock): Unit = childBlocks += block
def children: Seq[RelNodeBlock] = childBlocks.toSeq
def setNewOutputNode(newNode: RelNode): Unit = newOutputNode = Option(newNode)
def getNewOutputNode: Option[RelNode] = newOutputNode
def setOutputTableName(name: String): Unit = outputTableName = Option(name)
def getOutputTableName: String = outputTableName.orNull
def setOptimizedPlan(rel: RelNode): Unit = this.optimizedPlan = Option(rel)
def getOptimizedPlan: RelNode = optimizedPlan.orNull
def setUpdateBeforeRequired(requireUpdateBefore: Boolean): Unit = {
// set the child block whether need to produce update before messages for updates,
// a child block may have multiple parents (outputs), if one of the parents require
// update before message, then this child block has to produce update before for updates.
if (requireUpdateBefore) {
this.updateBeforeRequired = true
}
}
/**
* Returns true if any parent block requires UPDATE_BEFORE messages for updates.
*/
def isUpdateBeforeRequired: Boolean = updateBeforeRequired
def setMiniBatchInterval(miniBatchInterval: MiniBatchInterval): Unit = {
this.miniBatchInterval = miniBatchInterval
}
def getMiniBatchInterval: MiniBatchInterval = miniBatchInterval
def getChildBlock(node: RelNode): Option[RelNodeBlock] = {
val find = children.filter(_.outputNode.equals(node))
if (find.isEmpty) {
None
} else {
Preconditions.checkArgument(find.size == 1)
Some(find.head)
}
}
/**
* Get new plan of this block. The child blocks (inputs) will be replace with new RelNodes (the
* optimized result of child block).
*
* @return New plan of this block
*/
def getPlan: RelNode = {
val shuttle = new RelNodeBlockShuttle
outputNode.accept(shuttle)
}
private class RelNodeBlockShuttle extends DefaultRelShuttle {
override def visit(rel: RelNode): RelNode = {
val block = getChildBlock(rel)
block match {
case Some(b) => b.getNewOutputNode.get
case _ => super.visit(rel)
}
}
}
}
/**
* Holds information to build [[RelNodeBlock]].
*/
class RelNodeWrapper(relNode: RelNode) {
// parent nodes of `relNode`
private val parentNodes = Sets.newIdentityHashSet[RelNode]()
// output nodes of some blocks that data of `relNode` outputs to
private val blockOutputNodes = Sets.newIdentityHashSet[RelNode]()
// stores visited parent nodes when builds RelNodeBlock
private val visitedParentNodes = Sets.newIdentityHashSet[RelNode]()
def addParentNode(parent: Option[RelNode]): Unit = {
parent match {
case Some(p) => parentNodes.add(p)
case None => // Ignore
}
}
def addVisitedParentNode(parent: Option[RelNode]): Unit = {
parent match {
case Some(p) =>
require(parentNodes.contains(p))
visitedParentNodes.add(p)
case None => // Ignore
}
}
def addBlockOutputNode(blockOutputNode: RelNode): Unit = blockOutputNodes.add(blockOutputNode)
/**
* Returns true if all parent nodes had been visited, else false
*/
def allParentNodesVisited: Boolean = parentNodes.size() == visitedParentNodes.size()
/**
* Returns true if number of `blockOutputNodes` is greater than 1, else false
*/
def hasMultipleBlockOutputNodes: Boolean = blockOutputNodes.size() > 1
/**
* Returns the output node of the block that the `relNode` belongs to
*/
def getBlockOutputNode: RelNode = {
if (hasMultipleBlockOutputNodes) {
// If has multiple block output nodes, the `relNode` is a break-point.
// So the `relNode` is the output node of the block that the `relNode` belongs to
relNode
} else {
// the `relNode` is not a break-point
require(blockOutputNodes.size == 1)
blockOutputNodes.head
}
}
}
/**
* Builds [[RelNodeBlock]] plan
*/
class RelNodeBlockPlanBuilder private(config: TableConfig) {
private val node2Wrapper = new util.IdentityHashMap[RelNode, RelNodeWrapper]()
private val node2Block = new util.IdentityHashMap[RelNode, RelNodeBlock]()
private val isUnionAllAsBreakPointEnabled = config.getConfiguration.getBoolean(
RelNodeBlockPlanBuilder.TABLE_OPTIMIZER_UNIONALL_AS_BREAKPOINT_ENABLED)
/**
* Decompose the [[RelNode]] plan into many [[RelNodeBlock]]s,
* and rebuild [[RelNodeBlock]] plan.
*
* @param sinks RelNode DAG to decompose
* @return Sink-RelNodeBlocks, each Sink-RelNodeBlock is a tree.
*/
def buildRelNodeBlockPlan(sinks: Seq[RelNode]): Seq[RelNodeBlock] = {
sinks.foreach(buildRelNodeWrappers(_, None))
buildBlockOutputNodes(sinks)
sinks.map(buildBlockPlan)
}
private def buildRelNodeWrappers(node: RelNode, parent: Option[RelNode]): Unit = {
node2Wrapper.getOrElseUpdate(node, new RelNodeWrapper(node)).addParentNode(parent)
node.getInputs.foreach(child => buildRelNodeWrappers(child, Some(node)))
}
private def buildBlockPlan(node: RelNode): RelNodeBlock = {
val currentBlock = new RelNodeBlock(node)
buildBlock(node, currentBlock, createNewBlockWhenMeetValidBreakPoint = false)
currentBlock
}
private def buildBlock(
node: RelNode,
currentBlock: RelNodeBlock,
createNewBlockWhenMeetValidBreakPoint: Boolean): Unit = {
val hasDiffBlockOutputNodes = node2Wrapper(node).hasMultipleBlockOutputNodes
val validBreakPoint = isValidBreakPoint(node)
if (validBreakPoint && (createNewBlockWhenMeetValidBreakPoint || hasDiffBlockOutputNodes)) {
val childBlock = node2Block.getOrElseUpdate(node, new RelNodeBlock(node))
currentBlock.addChild(childBlock)
node.getInputs.foreach {
child => buildBlock(child, childBlock, createNewBlockWhenMeetValidBreakPoint = false)
}
} else {
val newCreateNewBlockWhenMeetValidBreakPoint =
createNewBlockWhenMeetValidBreakPoint || hasDiffBlockOutputNodes && !validBreakPoint
node.getInputs.foreach {
child => buildBlock(child, currentBlock, newCreateNewBlockWhenMeetValidBreakPoint)
}
}
}
/**
* TableFunctionScan/Snapshot/Window Aggregate cannot be optimized individually,
* so TableFunctionScan/Snapshot/Window Aggregate is not a valid break-point
* even though it has multiple parents.
*/
private def isValidBreakPoint(node: RelNode): Boolean = node match {
case _: TableFunctionScan | _: Snapshot => false
case union: Union if union.all => isUnionAllAsBreakPointEnabled
case project: Project => project.getProjects.forall(p => !hasWindowGroup(p))
case agg: Aggregate =>
agg.getInput match {
case project: Project =>
agg.getGroupSet.forall { group =>
val p = project.getProjects.get(group)
!hasWindowGroup(p)
}
case _ => true
}
case _ => true
}
private def hasWindowGroup(rexNode: RexNode): Boolean = {
WindowPropertiesRules.hasGroupAuxiliaries(rexNode) ||
WindowPropertiesRules.hasGroupFunction(rexNode)
}
private def buildBlockOutputNodes(sinks: Seq[RelNode]): Unit = {
// init sink block output node
sinks.foreach(sink => node2Wrapper.get(sink).addBlockOutputNode(sink))
val unvisitedNodeQueue: util.Deque[RelNode] = new util.ArrayDeque[RelNode]()
unvisitedNodeQueue.addAll(sinks)
while (unvisitedNodeQueue.nonEmpty) {
val node = unvisitedNodeQueue.removeFirst()
val wrapper = node2Wrapper.get(node)
require(wrapper != null)
val blockOutputNode = wrapper.getBlockOutputNode
buildBlockOutputNodes(None, node, blockOutputNode, unvisitedNodeQueue)
}
}
private def buildBlockOutputNodes(
parent: Option[RelNode],
node: RelNode,
curBlockOutputNode: RelNode,
unvisitedNodeQueue: util.Deque[RelNode]): Unit = {
val wrapper = node2Wrapper.get(node)
require(wrapper != null)
wrapper.addBlockOutputNode(curBlockOutputNode)
wrapper.addVisitedParentNode(parent)
// the node can be visited only when its all parent nodes have been visited
if (wrapper.allParentNodesVisited) {
val newBlockOutputNode = if (wrapper.hasMultipleBlockOutputNodes) {
// if the node has different output node, the node is the output node of current block.
node
} else {
curBlockOutputNode
}
node.getInputs.foreach { input =>
buildBlockOutputNodes(Some(node), input, newBlockOutputNode, unvisitedNodeQueue)
}
unvisitedNodeQueue.remove(node)
} else {
// visit later
unvisitedNodeQueue.addLast(node)
}
}
}
object RelNodeBlockPlanBuilder {
// It is a experimental config, will may be removed later.
@Experimental
val TABLE_OPTIMIZER_UNIONALL_AS_BREAKPOINT_ENABLED: ConfigOption[JBoolean] =
key("table.optimizer.union-all-as-breakpoint-enabled")
.defaultValue(JBoolean.valueOf(true))
.withDescription("When true, the optimizer will breakup the graph at union-all node " +
"when it's a breakpoint. When false, the optimizer will skip the union-all node " +
"even it's a breakpoint, and will try find the breakpoint in its inputs.")
// It is a experimental config, will may be removed later.
@Experimental
val TABLE_OPTIMIZER_REUSE_OPTIMIZE_BLOCK_WITH_DIGEST_ENABLED: ConfigOption[JBoolean] =
key("table.optimizer.reuse-optimize-block-with-digest-enabled")
.defaultValue(JBoolean.valueOf(false))
.withDescription("When true, the optimizer will try to find out duplicated sub-plan by " +
"digest to build optimize block(a.k.a. common sub-graph). " +
"Each optimize block will be optimized independently.")
/**
* Decompose the [[RelNode]] trees into [[RelNodeBlock]] trees. First, convert LogicalNode
* trees to RelNode trees. Second, reuse same sub-plan in different trees. Third, decompose the
* RelNode dag to [[RelNodeBlock]] trees.
*
* @param sinkNodes SinkNodes belongs to a LogicalNode plan.
* @return Sink-RelNodeBlocks, each Sink-RelNodeBlock is a tree.
*/
def buildRelNodeBlockPlan(
sinkNodes: Seq[RelNode],
config: TableConfig): Seq[RelNodeBlock] = {
require(sinkNodes.nonEmpty)
// expand QueryOperationCatalogViewTable in TableScan
val shuttle = new ExpandTableScanShuttle
val convertedRelNodes = sinkNodes.map(_.accept(shuttle))
if (convertedRelNodes.size == 1) {
Seq(new RelNodeBlock(convertedRelNodes.head))
} else {
// merge multiple RelNode trees to RelNode dag
val relNodeDag = reuseRelNodes(convertedRelNodes, config)
val builder = new RelNodeBlockPlanBuilder(config)
builder.buildRelNodeBlockPlan(relNodeDag)
}
}
/**
* Reuse common sub-plan in different RelNode tree, generate a RelNode dag
*
* @param relNodes RelNode trees
* @return RelNode dag which reuse common subPlan in each tree
*/
private def reuseRelNodes(relNodes: Seq[RelNode], tableConfig: TableConfig): Seq[RelNode] = {
val findOpBlockWithDigest = tableConfig.getConfiguration.getBoolean(
RelNodeBlockPlanBuilder.TABLE_OPTIMIZER_REUSE_OPTIMIZE_BLOCK_WITH_DIGEST_ENABLED)
if (!findOpBlockWithDigest) {
return relNodes
}
// reuse sub-plan with same digest in input RelNode trees.
val context = new SubplanReuseContext(true, relNodes: _*)
val reuseShuttle = new SubplanReuseShuttle(context)
relNodes.map(_.accept(reuseShuttle))
}
}