All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.table.plan.subplan.RelNodeBlock.scala Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.table.plan.subplan

import org.apache.flink.table.api.{TableConfigOptions, TableEnvironment, TableException}
import org.apache.flink.table.plan.logical.{LogicalNode, SinkNode}
import org.apache.flink.table.plan.nodes.calcite.Sink
import org.apache.flink.table.plan.rules.logical.WindowPropertiesRules
import org.apache.flink.table.plan.schema.RelTable
import org.apache.flink.table.plan.util.{SubplanReuseContext, SubplanReuseShuttle}
import org.apache.flink.util.Preconditions

import com.google.common.collect.Sets
import org.apache.calcite.plan.RelOptUtil
import org.apache.calcite.rel._
import org.apache.calcite.rel.core.{Aggregate, Project, Snapshot, TableFunctionScan, TableScan, Union}
import org.apache.calcite.rel.logical.LogicalTableScan
import org.apache.calcite.rex.RexNode

import java.util.IdentityHashMap

import scala.collection.JavaConversions._
import scala.collection.mutable

/**
  * A [[RelNodeBlock]] is a sub-tree in the [[RelNode]] plan. All [[RelNode]]s in
  * each block have at most one parent (output) node.
  * The nodes in different block will be optimized independently.
  *
  * For example: (Table API)
  *
  * {{{-
  *  val table = tEnv.scan("test_table").select('a, 'b, 'c)
  *  table.where('a >= 70).select('a, 'b).writeToSink(sink1)
  *  table.where('a < 70 ).select('a, 'c).writeToSink(sink2)
  * }}}
  *
  * the RelNode DAG is:
  *
  * {{{-
  *        TableScan
  *            |
  *       Project(a,b,c)
  *        /          \
  * Filter(a>=70)  Filter(a<70)
  *     |              |
  * Project(a,b)  Project(a,c)
  *     |              |
  * Sink(sink1)   Sink(sink2)
  * }}}
  *
  * This [[RelNode]] DAG will be decomposed into three [[RelNodeBlock]]s, the break-point
  * is a [[RelNode]] which has more than one output nodes.
  * the first [[RelNodeBlock]] includes TableScan and Project('a,'b,'c)
  * the second one includes Filter('a>=70), Project('a,'b) and Sink(sink1)
  * the third one includes Filter('a<70), Project('a,'c), Sink(sink2)
  * And the first [[RelNodeBlock]] is the child of another two.
  * The [[RelNodeBlock]] plan is:
  *
  * {{{-
  *         RelNodeBlock1
  *          /            \
  * RelNodeBlock2  RelNodeBlock3
  * }}}
  *
  * The optimizing order is from child block to parent. The optimized result (DataStream)
  * will be registered into tables first, and then be converted to a new TableScan which is the
  * new output node of current block and is also the input of its parent blocks.
  *
  * @param outputNode A RelNode of the output in the block, which could be a [[Sink]] or
  *                   other RelNode with more than one parent nodes.
  */
class RelNodeBlock(val outputNode: RelNode, tEnv: TableEnvironment) {
  // child (or input) blocks
  private val childBlocks = mutable.LinkedHashSet[RelNodeBlock]()

  // After this block has been optimized, the result will be converted to a new TableScan as
  // new output node
  private var newOutputNode: Option[RelNode] = None

  private var outputTableName: Option[String] = None

  private var optimizedPlan: Option[RelNode] = None

  private var updateAsRetract: Boolean = false

  def addChild(block: RelNodeBlock): Unit = childBlocks += block

  def children: Seq[RelNodeBlock] = childBlocks.toSeq

  def setNewOutputNode(newNode: RelNode): Unit = newOutputNode = Option(newNode)

  def getNewOutputNode: Option[RelNode] = newOutputNode

  def setOutputTableName(name: String): Unit = outputTableName = Option(name)

  def getOutputTableName: String = outputTableName.orNull

  def setOptimizedPlan(rel: RelNode): Unit = this.optimizedPlan = Option(rel)

  def getOptimizedPlan: RelNode = optimizedPlan.orNull

  def setUpdateAsRetraction(updateAsRetract: Boolean): Unit = {
    // set child block updateAsRetract, a child may have multi father.
    if (updateAsRetract) {
      this.updateAsRetract = true
    }
  }

  def isUpdateAsRetraction: Boolean = updateAsRetract

  def isChildBlockOutputRelNode(node: RelNode): Option[RelNodeBlock] = {
    val find = children.filter(_.outputNode.equals(node))
    if (find.isEmpty) {
      None
    } else {
      Preconditions.checkArgument(find.size == 1)
      Some(find.head)
    }
  }

  /**
    * Get new plan of this block. The child blocks (inputs) will be replace with new RelNodes (the
    * optimized result of child block).
    *
    * @return New plan of this block
    */
  def getPlan: RelNode = {
    val shuttle = new RelNodeBlockShuttle
    outputNode.accept(shuttle)
  }

  private class RelNodeBlockShuttle extends RelShuttleImpl {

    override def visitChild(parent: RelNode, i: Int, child: RelNode): RelNode = {
      val block = isChildBlockOutputRelNode(parent)
      if (block.isDefined) {
        block.get.getNewOutputNode.get
      } else {
        super.visitChild(parent, i, child)
      }
    }

    override def visitChildren(rel: RelNode): RelNode = {
      val block = isChildBlockOutputRelNode(rel)
      if (block.isDefined) {
        block.get.getNewOutputNode.get
      } else {
        super.visitChildren(rel)
      }
    }
  }

}

/**
  * Builds [[RelNodeBlock]] plan
  */
class RelNodeBlockPlanBuilder private(tEnv: TableEnvironment) {

  private val node2Wrapper = new IdentityHashMap[RelNode, RelNodeWrapper]()
  private val node2Block = new IdentityHashMap[RelNode, RelNodeBlock]()

  private val isUnionAllAsBreakPointDisabled = tEnv.config.getConf.getBoolean(
    TableConfigOptions.SQL_OPTIMIZER_SUBSECTION_UNIONALL_AS_BREAKPOINT_DISABLED)


  /**
    * Decompose the [[RelNode]] plan into many [[RelNodeBlock]]s,
    * and rebuild [[RelNodeBlock]] plan.
    *
    * @param  sinks RelNode DAG to decompose
    * @return Sink-RelNodeBlocks, each Sink-RelNodeBlock is a tree.
    */
  def buildRelNodeBlockPlan(sinks: Seq[RelNode]): Seq[RelNodeBlock] = {
    sinks.foreach(buildRelNodeWrappers(_, None))
    sinks.map(buildBlockPlan)
  }

  private def buildRelNodeWrappers(node: RelNode, parent: Option[RelNode]): Unit = {
    node2Wrapper.getOrElseUpdate(node, new RelNodeWrapper(node)).addParentNode(parent)
    node.getInputs.foreach(child => buildRelNodeWrappers(child, Some(node)))
  }

  private def buildBlockPlan(node: RelNode): RelNodeBlock = {
    val currentBlock = new RelNodeBlock(node, tEnv)
    buildBlock(node, currentBlock, createNewBlockWhenMeetValidBreakPoint = false)
    currentBlock
  }

  private def buildBlock(
      node: RelNode,
      currentBlock: RelNodeBlock,
      createNewBlockWhenMeetValidBreakPoint: Boolean): Unit = {
    val hasMultipleParents = node2Wrapper(node).hasMultipleParents
    val validBreakPoint = isValidBreakPoint(node)

    if (validBreakPoint && (createNewBlockWhenMeetValidBreakPoint || hasMultipleParents)) {
      val childBlock = node2Block.getOrElseUpdate(node, new RelNodeBlock(node, tEnv))
      currentBlock.addChild(childBlock)
      node.getInputs.foreach {
        child => buildBlock(child, childBlock, createNewBlockWhenMeetValidBreakPoint = false)
      }
    } else {
      val newCreateNewBlockWhenMeetValidBreakPoint =
        createNewBlockWhenMeetValidBreakPoint || hasMultipleParents && !validBreakPoint
      node.getInputs.foreach {
        child => buildBlock(child, currentBlock, newCreateNewBlockWhenMeetValidBreakPoint)
      }
    }
  }

  /**
    * TableFunctionScan/Snapshot/Window Aggregate cannot be optimized individually,
    * so TableFunctionScan/Snapshot/Window Aggregate is not a break-point
    * even though it has multiple parents.
    */
  private def isValidBreakPoint(node: RelNode): Boolean = node match {
    case _: TableFunctionScan | _: Snapshot => false
    case union: Union if union.all => !isUnionAllAsBreakPointDisabled
    case project: Project => project.getProjects.forall(p => !hasWindowGroup(p))
    case agg: Aggregate =>
      agg.getInput match {
        case project: Project =>
          agg.getGroupSet.forall { group =>
            val p = project.getProjects.get(group)
            !hasWindowGroup(p)
          }
        case _ => true
      }
    case _ => true
  }

  private def hasWindowGroup(rexNode: RexNode): Boolean = {
    WindowPropertiesRules.hasGroupAuxiliaries(rexNode) ||
      WindowPropertiesRules.hasGroupFunction(rexNode)
  }

}

object RelNodeBlockPlanBuilder {

  /**
    * Decompose the [[LogicalNode]] trees into [[RelNodeBlock]] trees. First, convert LogicalNode
    * trees to RelNode trees. Second, reuse same sub-plan in different trees. Third, decompose the
    * RelNode dag to [[RelNodeBlock]] trees.
    *
    * @param  sinkNodes SinkNodes belongs to a LogicalNode plan.
    * @return Sink-RelNodeBlocks, each Sink-RelNodeBlock is a tree.
    */
  def buildRelNodeBlockPlan(
      sinkNodes: Seq[LogicalNode],
      tEnv: TableEnvironment): Seq[RelNodeBlock] = {

    // checks sink node
    sinkNodes.foreach {
      case _: SinkNode => // do nothing
      case o => throw new TableException(s"Error node: $o, Only SinkNode is supported.")
    }
    // convert LogicalNode tree to RelNode tree
    val relNodeTrees = sinkNodes.map(_.toRelNode(tEnv.getRelBuilder))
    // merge RelNode tree to RelNode dag
    val relNodeDag = reuseRelNodes(relNodeTrees)
    val builder = new RelNodeBlockPlanBuilder(tEnv)
    builder.buildRelNodeBlockPlan(relNodeDag)
  }

  /**
    * Reuse common subPlan in different RelNode tree, generate a RelNode dag
    *
    * @param relNodes RelNode trees
    * @return RelNode dag which reuse common subPlan in each tree
    */
  private def reuseRelNodes(relNodes: Seq[RelNode]): Seq[RelNode] = {

    class ExpandTableScanShuttle extends RelShuttleImpl {

      /**
        * Converts [[LogicalTableScan]] the result [[RelNode]] tree by calling [[RelTable]]#toRel
        */
      override def visit(scan: TableScan): RelNode = {

        scan match {
          case scan: LogicalTableScan =>
            val relTable = scan.getTable.unwrap(classOf[RelTable])
            if (relTable != null) {
              val relNode = relTable.toRel(RelOptUtil.getContext(scan.getCluster), scan.getTable)
              relNode.accept(this)
            } else {
              scan
            }
          case _ => scan
        }
      }
    }
    // expand RelTable in TableScan
    val shuttle = new ExpandTableScanShuttle
    val convertedRelNodes = relNodes.map(_.accept(shuttle))
    // reuse subPlan with same digest in input RelNode trees
    val context = new SubplanReuseContext(false, convertedRelNodes: _*)
    val reuseShuttle = new SubplanReuseShuttle(context)
    convertedRelNodes.map(_.accept(reuseShuttle))
  }

}

class RelNodeWrapper(relNode: RelNode) {
  private val parentNodes = Sets.newIdentityHashSet[RelNode]()

  def addParentNode(parent: Option[RelNode]): Unit = {
    parent match {
      case Some(p) => parentNodes.add(p)
      case None => // Ignore
    }
  }

  def hasMultipleParents: Boolean = parentNodes.size > 1
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy