org.apache.flink.table.plan.subplan.RelNodeBlock.scala Maven / Gradle / Ivy
The newest version!
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package org.apache.flink.table.plan.subplan
import org.apache.flink.table.api.{TableConfigOptions, TableEnvironment, TableException}
import org.apache.flink.table.plan.logical.{LogicalNode, SinkNode}
import org.apache.flink.table.plan.nodes.calcite.Sink
import org.apache.flink.table.plan.rules.logical.WindowPropertiesRules
import org.apache.flink.table.plan.schema.RelTable
import org.apache.flink.table.plan.util.{SubplanReuseContext, SubplanReuseShuttle}
import org.apache.flink.util.Preconditions
import org.apache.calcite.plan.RelOptUtil
import org.apache.calcite.rel._
import org.apache.calcite.rel.core.{Aggregate, Project, Snapshot, TableFunctionScan, TableScan, Union}
import org.apache.calcite.rel.logical.LogicalTableScan
import org.apache.calcite.rex.RexNode
import java.util.IdentityHashMap
import scala.collection.JavaConversions._
import scala.collection.mutable
* A [[RelNodeBlock]] is a sub-tree in the [[RelNode]] plan. All [[RelNode]]s in
* each block have at most one parent (output) node.
* The nodes in different block will be optimized independently.
* For example: (Table API)
* {{{-
* val table = tEnv.scan("test_table").select('a, 'b, 'c)
* table.where('a >= 70).select('a, 'b).writeToSink(sink1)
* table.where('a < 70 ).select('a, 'c).writeToSink(sink2)
* }}}
* the RelNode DAG is:
* {{{-
* TableScan
* |
* Project(a,b,c)
* / \
* Filter(a>=70) Filter(a<70)
* | |
* Project(a,b) Project(a,c)
* | |
* Sink(sink1) Sink(sink2)
* }}}
* This [[RelNode]] DAG will be decomposed into three [[RelNodeBlock]]s, the break-point
* is a [[RelNode]] which has more than one output nodes.
* the first [[RelNodeBlock]] includes TableScan and Project('a,'b,'c)
* the second one includes Filter('a>=70), Project('a,'b) and Sink(sink1)
* the third one includes Filter('a<70), Project('a,'c), Sink(sink2)
* And the first [[RelNodeBlock]] is the child of another two.
* The [[RelNodeBlock]] plan is:
* {{{-
* RelNodeBlock1
* / \
* RelNodeBlock2 RelNodeBlock3
* }}}
* The optimizing order is from child block to parent. The optimized result (DataStream)
* will be registered into tables first, and then be converted to a new TableScan which is the
* new output node of current block and is also the input of its parent blocks.
* @param outputNode A RelNode of the output in the block, which could be a [[Sink]] or
* other RelNode with more than one parent nodes.
class RelNodeBlock(val outputNode: RelNode, tEnv: TableEnvironment) {
// child (or input) blocks
private val childBlocks = mutable.LinkedHashSet[RelNodeBlock]()
// After this block has been optimized, the result will be converted to a new TableScan as
// new output node
private var newOutputNode: Option[RelNode] = None
private var outputTableName: Option[String] = None
private var optimizedPlan: Option[RelNode] = None
private var updateAsRetract: Boolean = false
def addChild(block: RelNodeBlock): Unit = childBlocks += block
def children: Seq[RelNodeBlock] = childBlocks.toSeq
def setNewOutputNode(newNode: RelNode): Unit = newOutputNode = Option(newNode)
def getNewOutputNode: Option[RelNode] = newOutputNode
def setOutputTableName(name: String): Unit = outputTableName = Option(name)
def getOutputTableName: String = outputTableName.orNull
def setOptimizedPlan(rel: RelNode): Unit = this.optimizedPlan = Option(rel)
def getOptimizedPlan: RelNode = optimizedPlan.orNull
def setUpdateAsRetraction(updateAsRetract: Boolean): Unit = {
// set child block updateAsRetract, a child may have multi father.
if (updateAsRetract) {
this.updateAsRetract = true
def isUpdateAsRetraction: Boolean = updateAsRetract
def isChildBlockOutputRelNode(node: RelNode): Option[RelNodeBlock] = {
val find = children.filter(_.outputNode.equals(node))
if (find.isEmpty) {
} else {
Preconditions.checkArgument(find.size == 1)
* Get new plan of this block. The child blocks (inputs) will be replace with new RelNodes (the
* optimized result of child block).
* @return New plan of this block
def getPlan: RelNode = {
val shuttle = new RelNodeBlockShuttle
private class RelNodeBlockShuttle extends RelShuttleImpl {
override def visitChild(parent: RelNode, i: Int, child: RelNode): RelNode = {
val block = isChildBlockOutputRelNode(parent)
if (block.isDefined) {
} else {
super.visitChild(parent, i, child)
override def visitChildren(rel: RelNode): RelNode = {
val block = isChildBlockOutputRelNode(rel)
if (block.isDefined) {
} else {
* Builds [[RelNodeBlock]] plan
class RelNodeBlockPlanBuilder private(tEnv: TableEnvironment) {
private val node2Wrapper = new IdentityHashMap[RelNode, RelNodeWrapper]()
private val node2Block = new IdentityHashMap[RelNode, RelNodeBlock]()
private val isUnionAllAsBreakPointDisabled = tEnv.config.getConf.getBoolean(
* Decompose the [[RelNode]] plan into many [[RelNodeBlock]]s,
* and rebuild [[RelNodeBlock]] plan.
* @param sinks RelNode DAG to decompose
* @return Sink-RelNodeBlocks, each Sink-RelNodeBlock is a tree.
def buildRelNodeBlockPlan(sinks: Seq[RelNode]): Seq[RelNodeBlock] = {
sinks.foreach(buildRelNodeWrappers(_, None))
private def buildRelNodeWrappers(node: RelNode, parent: Option[RelNode]): Unit = {
node2Wrapper.getOrElseUpdate(node, new RelNodeWrapper(node)).addParentNode(parent)
node.getInputs.foreach(child => buildRelNodeWrappers(child, Some(node)))
private def buildBlockPlan(node: RelNode): RelNodeBlock = {
val currentBlock = new RelNodeBlock(node, tEnv)
buildBlock(node, currentBlock, createNewBlockWhenMeetValidBreakPoint = false)
private def buildBlock(
node: RelNode,
currentBlock: RelNodeBlock,
createNewBlockWhenMeetValidBreakPoint: Boolean): Unit = {
val hasMultipleParents = node2Wrapper(node).hasMultipleParents
val validBreakPoint = isValidBreakPoint(node)
if (validBreakPoint && (createNewBlockWhenMeetValidBreakPoint || hasMultipleParents)) {
val childBlock = node2Block.getOrElseUpdate(node, new RelNodeBlock(node, tEnv))
node.getInputs.foreach {
child => buildBlock(child, childBlock, createNewBlockWhenMeetValidBreakPoint = false)
} else {
val newCreateNewBlockWhenMeetValidBreakPoint =
createNewBlockWhenMeetValidBreakPoint || hasMultipleParents && !validBreakPoint
node.getInputs.foreach {
child => buildBlock(child, currentBlock, newCreateNewBlockWhenMeetValidBreakPoint)
* TableFunctionScan/Snapshot/Window Aggregate cannot be optimized individually,
* so TableFunctionScan/Snapshot/Window Aggregate is not a break-point
* even though it has multiple parents.
private def isValidBreakPoint(node: RelNode): Boolean = node match {
case _: TableFunctionScan | _: Snapshot => false
case union: Union if union.all => !isUnionAllAsBreakPointDisabled
case project: Project => project.getProjects.forall(p => !hasWindowGroup(p))
case agg: Aggregate =>
agg.getInput match {
case project: Project =>
agg.getGroupSet.forall { group =>
val p = project.getProjects.get(group)
case _ => true
case _ => true
private def hasWindowGroup(rexNode: RexNode): Boolean = {
WindowPropertiesRules.hasGroupAuxiliaries(rexNode) ||
object RelNodeBlockPlanBuilder {
* Decompose the [[LogicalNode]] trees into [[RelNodeBlock]] trees. First, convert LogicalNode
* trees to RelNode trees. Second, reuse same sub-plan in different trees. Third, decompose the
* RelNode dag to [[RelNodeBlock]] trees.
* @param sinkNodes SinkNodes belongs to a LogicalNode plan.
* @return Sink-RelNodeBlocks, each Sink-RelNodeBlock is a tree.
def buildRelNodeBlockPlan(
sinkNodes: Seq[LogicalNode],
tEnv: TableEnvironment): Seq[RelNodeBlock] = {
// checks sink node
sinkNodes.foreach {
case _: SinkNode => // do nothing
case o => throw new TableException(s"Error node: $o, Only SinkNode is supported.")
// convert LogicalNode tree to RelNode tree
val relNodeTrees =
// merge RelNode tree to RelNode dag
val relNodeDag = reuseRelNodes(relNodeTrees)
val builder = new RelNodeBlockPlanBuilder(tEnv)
* Reuse common subPlan in different RelNode tree, generate a RelNode dag
* @param relNodes RelNode trees
* @return RelNode dag which reuse common subPlan in each tree
private def reuseRelNodes(relNodes: Seq[RelNode]): Seq[RelNode] = {
class ExpandTableScanShuttle extends RelShuttleImpl {
* Converts [[LogicalTableScan]] the result [[RelNode]] tree by calling [[RelTable]]#toRel
override def visit(scan: TableScan): RelNode = {
scan match {
case scan: LogicalTableScan =>
val relTable = scan.getTable.unwrap(classOf[RelTable])
if (relTable != null) {
val relNode = relTable.toRel(RelOptUtil.getContext(scan.getCluster), scan.getTable)
} else {
case _ => scan
// expand RelTable in TableScan
val shuttle = new ExpandTableScanShuttle
val convertedRelNodes =
// reuse subPlan with same digest in input RelNode trees
val context = new SubplanReuseContext(false, convertedRelNodes: _*)
val reuseShuttle = new SubplanReuseShuttle(context)
class RelNodeWrapper(relNode: RelNode) {
private val parentNodes = Sets.newIdentityHashSet[RelNode]()
def addParentNode(parent: Option[RelNode]): Unit = {
parent match {
case Some(p) => parentNodes.add(p)
case None => // Ignore
def hasMultipleParents: Boolean = parentNodes.size > 1
© 2015 - 2025 Weber Informatics LLC | Privacy Policy