All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.table.plan.CacheAwareRelNodePlanBuilder.scala Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.flink.table.plan

import org.apache.flink.table.api.{RichTableSchema, TableEnvironment}
import org.apache.flink.table.calcite.FlinkTypeFactory
import org.apache.flink.table.factories.{BatchTableSinkFactory, BatchTableSourceFactory, TableFactory}
import org.apache.flink.table.plan.logical.LogicalNode
import org.apache.flink.table.plan.util.LogicalNodeUtil
import org.apache.flink.table.sinks.TableSink
import org.apache.flink.table.sources.TableSource
import org.apache.flink.table.types.{DataType, InternalType, TimestampType}
import org.apache.flink.table.temptable.{FlinkTableServiceFactory, TableServiceOptions}
import org.apache.flink.table.util.TableProperties

import _root_.scala.collection.JavaConverters._

/**
  * A helper class for rebuilding LogicalNode Plan when Table.persist/cache is enabled.
  * @param tEnv
  */
class CacheAwareRelNodePlanBuilder(tEnv: TableEnvironment) {

  /**
    * store the the mapping of original node and new node,
    * so that we won't clone twice for one node when building plan.
    */
  private val originalNewNodeMap = new java.util.IdentityHashMap[LogicalNode, LogicalNode]

  /**
    * It will rebuild the logicalNode trees if any table has been cached successfully.
    * @param sinkNodes LogicalNodes which stores in TableEnvironment.
    * @return new LogicalNodes which has been rebuilt.
    */
  def buildPlanIfNeeded(sinkNodes: Seq[LogicalNode]): Seq[LogicalNode] = {
    // clear the map, because the new created tree can also be changed again
    originalNewNodeMap.clear()

    if (tEnv.tableServiceManager.cachedTables.size() > 0) {
      buildPlan(sinkNodes)
    } else {
      sinkNodes
    }
  }

  private def buildPlan(nodes: Seq[LogicalNode]): Seq[LogicalNode] = {
    nodes.map { node =>
      // if the original node has been rebuilt/cloned, just reuse it.
      Option(originalNewNodeMap.get(node)).getOrElse {
        val tableName = tEnv.tableServiceManager.getCachedTableName(node)
        val newNode = tableName match {
          // if a child node(table) has been cached successfully, create a Source node
          case Some(name) => tEnv.scan(name).logicalPlan

          // use new children to clone a new node
          case None => LogicalNodeUtil.cloneLogicalNode(node, buildPlan(node.children))

        }
        originalNewNodeMap.put(node, newNode)
        newNode
      }
    }
  }

  private class CacheSourceSinkTableBuilder(name: String, logicalPlan: LogicalNode) {
    var tableFactory : Option[TableFactory] = tEnv.tableServiceManager.getTableServiceFactory()
    var schema: Option[RichTableSchema] = None
    var properties: Option[TableProperties] =
      tEnv.tableServiceManager.getTableServiceFactoryProperties()

    tableFactory match {
      case Some(factory: FlinkTableServiceFactory) => factory.setTableEnv(tEnv)
      case _ =>
    }

    def createCacheTableSink(): TableSink[_] = {
      build()
      tableFactory.getOrElse(
        throw new Exception("TableServiceFactory is not configured")
      ) match {
        case sink: BatchTableSinkFactory[_] => {
          val tableProperties = properties.get
          tableProperties.putTableNameIntoProperties(name)
          tableProperties.putSchemaIntoProperties(schema.get)
          tableProperties.setString(
            TableServiceOptions.TABLE_SERVICE_ID,
            tEnv.tableServiceManager.getTableServiceId()
          )
          sink.createBatchTableSink(tableProperties.toMap)
        }
        case _ => throw new RuntimeException("Do not supported: " + tableFactory)
      }
    }

    def createCacheTableSource(): TableSource = {
      build()
      tableFactory.getOrElse(
        throw new Exception("TableServiceFactory is not configured")
      ) match {
        case sink: BatchTableSourceFactory[_] => {
          val tableProperties = properties.get
          tableProperties.putTableNameIntoProperties(name)
          tableProperties.putSchemaIntoProperties(schema.get)
          tableProperties.setString(
            TableServiceOptions.TABLE_SERVICE_ID,
            tEnv.tableServiceManager.getTableServiceId()
          )
          sink.createBatchTableSource(tableProperties.toMap)
        }
        case _ => throw new RuntimeException("Do not supported: " + tableFactory)
      }
    }

    private def build() = {
      val relNode = logicalPlan.toRelNode(tEnv.getRelBuilder)
      val rowType = relNode.getRowType
      val names: Array[String] = rowType.getFieldNames.asScala.toArray
      val types: Array[DataType] = rowType.getFieldList.asScala
        .map(field => FlinkTypeFactory.toInternalType(field.getType))
        .map {
          // replace time indicator types by SQL_TIMESTAMP
          case t: TimestampType if FlinkTypeFactory.isTimeIndicatorType(t)
          => TimestampType.TIMESTAMP
          case t: InternalType => t
        }.toArray

      schema = Some(new RichTableSchema(names, types.map {_.asInstanceOf[InternalType]}))
    }
  }

  /**
    * Create a Sink node for caching a table.
    * @param name A unique generated table name.
    * @param logicalPlan LogicalNode of the cached table.
    * @return TableSink[_] TableSink which the cached table will write to.
    */
  def createCacheTableSink(name: String, logicalPlan: LogicalNode): TableSink[_] = {
    val builder = new CacheSourceSinkTableBuilder(name, logicalPlan)
    builder.createCacheTableSink()
  }

  /**
    * Create a Source logical node to scan the cached table.
    * @param name A unique generated table name.
    * @param logicalPlan LogicalNode of the cached table.
    * @return TableSource TableSource of which the data will be read from.
    */
  def createCacheTableSource(name: String, logicalPlan: LogicalNode): TableSource = {
    val builder = new CacheSourceSinkTableBuilder(name, logicalPlan)
    builder.createCacheTableSource()
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy