All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.table.planner.plan.reuse.SubplanReuser.scala Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.table.planner.plan.reuse

import org.apache.flink.table.api.config.OptimizerConfigOptions
import org.apache.flink.table.api.{TableConfig, TableException}
import org.apache.flink.table.planner.plan.nodes.calcite.{LegacySink, Sink}
import org.apache.flink.table.planner.plan.nodes.logical.{FlinkLogicalLegacyTableSourceScan, FlinkLogicalTableSourceScan}
import org.apache.flink.table.planner.plan.nodes.physical.common.{CommonPhysicalLegacyTableSourceScan, CommonPhysicalTableSourceScan}
import org.apache.flink.table.planner.plan.utils.{DefaultRelShuttle, FlinkRelOptUtil}

import com.google.common.collect.{Maps, Sets}
import org.apache.calcite.rel.core.{Exchange, TableFunctionScan}
import org.apache.calcite.rel.{RelNode, RelVisitor}

import java.util

import scala.collection.JavaConversions._

/**
  * An utility class for finding out duplicated sub-plans by digest and reusing them.
  *
  * 

e.g. * if Project1-Scan1 and Project2-Scan2 have same digest, they could be reused, * and only one of duplicated sub-plan will be retained. * after * {{{ * Join Join * / \ / \ * Filter1 Filter2 Filter1 Filter2 * | | => \ / * Project1 Project2 Project1 * | | | * Scan1 Scan2 Scan1 * }}} */ object SubplanReuser { /** * Finds duplicated sub-plans and return the reused plan. */ def reuseDuplicatedSubplan(rels: Seq[RelNode], tableConfig: TableConfig): Seq[RelNode] = { if (!tableConfig.getConfiguration.getBoolean( OptimizerConfigOptions.TABLE_OPTIMIZER_REUSE_SUB_PLAN_ENABLED)) { return rels } val tableSourceReuseEnabled = tableConfig.getConfiguration.getBoolean( OptimizerConfigOptions.TABLE_OPTIMIZER_REUSE_SOURCE_ENABLED) val context = new SubplanReuseContext(tableSourceReuseEnabled, rels: _*) val reuseShuttle = new SubplanReuseShuttle(context) rels.map(_.accept(reuseShuttle)) } /** * The Context holds sub-plan reuse information. * *

If two sub-plan ([[RelNode]] tree with leaf node) , even belongs to different tree, have * same digest, they are in a reusable sub-plan group. * In a reusable sub-plan group, the leftmost sub-plan in the earlier visited tree is reused * sub-plan and the remaining will reuse the leftmost one in the earlier visited tree. *

Uses reuse id to distinguish different reusable sub-plan group, * the reuse id of each sub-plan is same in a group. * *

e.g. * {{{ * Join * / \ * Filter1 Filter2 * | | * Project1 Project2 * | | * Scan1 Scan2 * }}} * Project1-Scan1 and Project2-Scan2 have same digest, so they are in a reusable sub-plan group. */ class SubplanReuseContext(tableSourceReuseEnabled: Boolean, roots: RelNode*) { // mapping a relNode to its digest private val mapRelToDigest = Maps.newIdentityHashMap[RelNode, String]() // mapping the digest to RelNodes private val mapDigestToReusableNodes = new util.HashMap[String, util.List[RelNode]]() val visitor = new ReusableSubplanVisitor() roots.map(visitor.go) /** * Return the digest of the given rel node. */ def getRelDigest(node: RelNode): String = { val digest = mapRelToDigest.get(node) if (digest != null) { digest } else { val newDigest = FlinkRelOptUtil.getDigest(node) mapRelToDigest.put(node, newDigest) newDigest } } /** * Returns true if the given node can reuse other node, else false. * The nodes with same digest are reusable, * and the non-head node of node-list can reuse the head node. */ def reuseOtherNode(node: RelNode): Boolean = { val reusableNodes = getReusableNodes(node) if (isReusableNodes(reusableNodes)) { node ne reusableNodes.head } else { false } } /** * Returns reusable nodes which have same digest. */ private def getReusableNodes(node: RelNode): List[RelNode] = { val digest = getRelDigest(node) mapDigestToReusableNodes.getOrDefault(digest, List.empty[RelNode]).toList } /** * Returns true if the given nodes can be reused, else false. */ private def isReusableNodes(reusableNodes: List[RelNode]): Boolean = { if (reusableNodes.size() > 1) { // Does not reuse nodes which are reusable disabled !isNodeReusableDisabled(reusableNodes.head) } else { false } } /** * Returns true if the given node is reusable disabled */ private def isNodeReusableDisabled(node: RelNode): Boolean = { node match { // TableSourceScan node can not be reused if reuse TableSource disabled case _: FlinkLogicalLegacyTableSourceScan | _: CommonPhysicalLegacyTableSourceScan | _: FlinkLogicalTableSourceScan | _: CommonPhysicalTableSourceScan => !tableSourceReuseEnabled // Exchange node can not be reused if its input is reusable disabled case e: Exchange => isNodeReusableDisabled(e.getInput) // TableFunctionScan and sink can not be reused case _: TableFunctionScan | _: LegacySink | _: Sink => true case _ => false } } class ReusableSubplanVisitor extends RelVisitor { private val visitedNodes = Sets.newIdentityHashSet[RelNode]() override def visit(node: RelNode, ordinal: Int, parent: RelNode): Unit = { if (visitedNodes.contains(node)) { // does not need to visit a node which is already visited. return } visitedNodes.add(node) // the same sub-plan should have same digest value, // uses `explain` with `RelDigestWriterImpl` to get the digest of a sub-plan. val digest = getRelDigest(node) val nodes = mapDigestToReusableNodes.getOrElseUpdate(digest, new util.ArrayList[RelNode]()) nodes.add(node) // the node will be reused if there are more than one nodes with same digest, // so there is no need to visit a reused node's inputs. if (!isReusableNodes(nodes.toList)) { super.visit(node, ordinal, parent) } } } } /** * Rewrite reusable sub-plans with different rel objects to same rel object. * *

e.g. * Scan1-Project1 and Scan2-Project2 have same digest, so they can be reused. * {{{ * Join Join * / \ / \ * Filter1 Filter2 Filter1 Filter2 * | | => \ / * Project1 Project2 Project1 * | | | * Scan1 Scan2 Scan1 * }}} * After rewrote, Scan2-Project2 is replaced by Scan1-Project1. * *

NOTES: This is a stateful class, please use same shuttle object to reuse multiple trees. */ class SubplanReuseShuttle(context: SubplanReuseContext) extends DefaultRelShuttle { private val mapDigestToNewNode = new util.HashMap[String, RelNode]() override def visit(rel: RelNode): RelNode = { val canReuseOtherNode = context.reuseOtherNode(rel) val digest = context.getRelDigest(rel) if (canReuseOtherNode) { val newNode = mapDigestToNewNode.get(digest) if (newNode == null) { throw new TableException("This should not happen") } newNode } else { val newNode = visitInputs(rel) mapDigestToNewNode.put(digest, newNode) newNode } } /** * Parent rel need not change, just replaces inputs if any inputs change. */ private def visitInputs(rel: RelNode): RelNode = { rel.getInputs.zipWithIndex.foreach { case (input, index) => val newInput = input.accept(this) if (input ne newInput) { rel.replaceInput(index, newInput) } } rel } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy