org.apache.flink.table.planner.StreamPlanner.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of flink-table-planner_2.11 Show documentation
This module bridges Table/SQL API and runtime. It contains all resources that are required during pre-flight and runtime phase.
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.table.planner
import org.apache.flink.annotation.VisibleForTesting
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.dag.Transformation
import org.apache.flink.api.java.tuple.{Tuple2 => JTuple2}
import org.apache.flink.streaming.api.datastream.{DataStream, DataStreamSink}
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
import org.apache.flink.table.api._
import org.apache.flink.table.calcite._
import org.apache.flink.table.catalog.{CatalogManager, CatalogManagerCalciteSchema, CatalogTable, ConnectorCatalogTable, _}
import org.apache.flink.table.delegation.{Executor, Parser, Planner}
import org.apache.flink.table.executor.StreamExecutor
import org.apache.flink.table.explain.PlanJsonParser
import org.apache.flink.table.expressions.{ExpressionBridge, PlannerExpression, PlannerExpressionConverter, PlannerTypeInferenceUtilImpl}
import org.apache.flink.table.factories.{TableFactoryService, TableFactoryUtil, TableSinkFactory}
import org.apache.flink.table.operations.OutputConversionModifyOperation.UpdateMode
import org.apache.flink.table.operations._
import org.apache.flink.table.plan.StreamOptimizer
import org.apache.flink.table.plan.nodes.datastream.DataStreamRel
import org.apache.flink.table.plan.util.UpdatingPlanChecker
import org.apache.flink.table.runtime.types.CRow
import org.apache.flink.table.sinks._
import org.apache.flink.table.types.utils.TypeConversions
import org.apache.flink.table.util.JavaScalaConversionUtil

import org.apache.calcite.jdbc.CalciteSchema
import org.apache.calcite.jdbc.CalciteSchemaBuilder.asRootSchema
import org.apache.calcite.plan.RelOptUtil
import org.apache.calcite.rel.RelNode

import _root_.java.lang.{Boolean => JBool}
import _root_.java.util
import _root_.java.util.Objects
import _root_.java.util.function.{Supplier => JSupplier}

import _root_.scala.collection.JavaConversions._
import _root_.scala.collection.JavaConverters._

/**
  * Implementation of [[Planner]] for legacy Flink planner. It supports only streaming use cases.
  * (The new [[org.apache.flink.table.sources.InputFormatTableSource]] should work, but will be
  * handled as streaming sources, and no batch specific optimizations will be applied).
  *
  * @param executor        instance of [[StreamExecutor]], needed to extract
  *                        [[StreamExecutionEnvironment]] for
  *                        [[org.apache.flink.table.sources.StreamTableSource.getDataStream]]
  * @param config          mutable configuration passed from corresponding [[TableEnvironment]]
  * @param functionCatalog catalog of functions
  * @param catalogManager  manager of catalog meta objects such as tables, views, databases etc.
  */
class StreamPlanner(
    executor: Executor,
    config: TableConfig,
    functionCatalog: FunctionCatalog,
    catalogManager: CatalogManager)
  extends Planner {

  // temporary utility until we don't use planner expressions anymore
  functionCatalog.setPlannerTypeInferenceUtil(PlannerTypeInferenceUtilImpl.INSTANCE)

  private val internalSchema: CalciteSchema =
    asRootSchema(new CatalogManagerCalciteSchema(catalogManager, true))

  // temporary bridge between API and planner
  private val expressionBridge: ExpressionBridge[PlannerExpression] =
    new ExpressionBridge[PlannerExpression](functionCatalog, PlannerExpressionConverter.INSTANCE)

  private val planningConfigurationBuilder: PlanningConfigurationBuilder =
    new PlanningConfigurationBuilder(
      config,
      functionCatalog,
      internalSchema,
      expressionBridge)

  @VisibleForTesting
  private[flink] val optimizer: StreamOptimizer = new StreamOptimizer(
    () => config.getPlannerConfig
      .unwrap(classOf[CalciteConfig])
      .orElse(CalciteConfig.DEFAULT),
    planningConfigurationBuilder)

  private val parser: Parser = new ParserImpl(
    catalogManager,
    // we do not cache the parser in order to use the most up to
    // date configuration. Users might change parser configuration in TableConfig in between
    // parsing statements
    new JSupplier[FlinkPlannerImpl] {
      override def get(): FlinkPlannerImpl = getFlinkPlanner
    },
    new JSupplier[CalciteParser] {
      override def get(): CalciteParser = planningConfigurationBuilder.createCalciteParser()
    }
  )

  override def getParser: Parser = parser

  override def translate(tableOperations: util.List[ModifyOperation])
    : util.List[Transformation[_]] = {
    tableOperations.asScala.map(translate).filter(Objects.nonNull).asJava
  }

  override def explain(operations: util.List[Operation], extended: Boolean): String = {
    operations.asScala.map {
      case queryOperation: QueryOperation =>
        explain(queryOperation, unwrapQueryConfig)
      case operation =>
        throw new TableException(s"${operation.getClass.getCanonicalName} is not supported")
    }.mkString(s"${System.lineSeparator}${System.lineSeparator}")
  }

  override def getCompletionHints(
      statement: String,
      position: Int)
    : Array[String] = {
    val planner = getFlinkPlanner
    planner.getCompletionHints(statement, position)
  }

  private def translate(tableOperation: ModifyOperation)
    : Transformation[_] = {
    tableOperation match {
      case s : UnregisteredSinkModifyOperation[_] =>
        writeToSink(s.getChild, s.getSink, unwrapQueryConfig)

      case catalogSink: CatalogSinkModifyOperation =>
        getTableSink(catalogSink.getTableIdentifier)
          .map(sink => {
            TableSinkUtils.validateSink(
              catalogSink.getStaticPartitions,
              catalogSink.getChild,
              catalogSink.getTableIdentifier,
              sink)
            // set static partitions if it is a partitioned sink
            sink match {
              case partitionableSink: PartitionableTableSink =>
                partitionableSink.setStaticPartition(catalogSink.getStaticPartitions)
              case _ =>
            }
            // set whether to overwrite if it's an OverwritableTableSink
            sink match {
              case overwritableTableSink: OverwritableTableSink =>
                overwritableTableSink.setOverwrite(catalogSink.isOverwrite)
              case _ =>
                assert(!catalogSink.isOverwrite, "INSERT OVERWRITE requires " +
                  s"${classOf[OverwritableTableSink].getSimpleName} but actually got " +
                  sink.getClass.getName)
            }
            writeToSink(catalogSink.getChild, sink, unwrapQueryConfig)
          }) match {
          case Some(t) => t
          case None =>
            throw new TableException(s"Sink ${catalogSink.getTableIdentifier} does not exists")
        }

      case outputConversion: OutputConversionModifyOperation =>
        val (isRetract, withChangeFlag) = outputConversion.getUpdateMode match {
          case UpdateMode.RETRACT => (true, true)
          case UpdateMode.APPEND => (false, false)
          case UpdateMode.UPSERT => (false, true)
        }

        translateToType(
          tableOperation.getChild,
          unwrapQueryConfig,
          isRetract,
          withChangeFlag,
          TypeConversions.fromDataTypeToLegacyInfo(outputConversion.getType)).getTransformation

      case _ =>
        throw new TableException(s"Unsupported ModifyOperation: $tableOperation")
    }
  }

  private def unwrapQueryConfig = {
    new StreamQueryConfig(
      config.getMinIdleStateRetentionTime,
      config.getMaxIdleStateRetentionTime
    )
  }

  private def explain(tableOperation: QueryOperation, queryConfig: StreamQueryConfig) = {
    val ast = getRelBuilder.tableOperation(tableOperation).build()
    val optimizedPlan = optimizer
      .optimize(ast, updatesAsRetraction = false, getRelBuilder)
    val dataStream = translateToCRow(optimizedPlan, queryConfig)

    val env = dataStream.getExecutionEnvironment
    val jsonSqlPlan = env.getExecutionPlan

    val sqlPlan = PlanJsonParser.getSqlExecutionPlan(jsonSqlPlan, false)

    s"== Abstract Syntax Tree ==" +
      System.lineSeparator +
      s"${RelOptUtil.toString(ast)}" +
      System.lineSeparator +
      s"== Optimized Logical Plan ==" +
      System.lineSeparator +
      s"${RelOptUtil.toString(optimizedPlan)}" +
      System.lineSeparator +
      s"== Physical Execution Plan ==" +
      System.lineSeparator +
      s"$sqlPlan"
  }

  private def getFlinkPlanner: FlinkPlannerImpl = {
    val currentCatalogName = catalogManager.getCurrentCatalog
    val currentDatabase = catalogManager.getCurrentDatabase

    planningConfigurationBuilder.createFlinkPlanner(currentCatalogName, currentDatabase)
  }

  private[flink] def getRelBuilder: FlinkRelBuilder = {
    val currentCatalogName = catalogManager.getCurrentCatalog
    val currentDatabase = catalogManager.getCurrentDatabase

    planningConfigurationBuilder.createRelBuilder(currentCatalogName, currentDatabase)
  }

  private[flink] def getConfig: TableConfig = config

  private[flink] def getExecutionEnvironment: StreamExecutionEnvironment =
    executor.asInstanceOf[StreamExecutor].getExecutionEnvironment

  private def translateToCRow(
    logicalPlan: RelNode,
    queryConfig: StreamQueryConfig): DataStream[CRow] = {

    logicalPlan match {
      case node: DataStreamRel =>
        getExecutionEnvironment.configure(
          config.getConfiguration,
          Thread.currentThread().getContextClassLoader)
        node.translateToPlan(this, queryConfig)
      case _ =>
        throw new TableException("Cannot generate DataStream due to an invalid logical plan. " +
          "This is a bug and should not happen. Please file an issue.")
    }
  }

  private def writeToSink[T](
      tableOperation: QueryOperation,
      sink: TableSink[T],
      queryConfig: StreamQueryConfig)
    : Transformation[_] = {

    val resultSink = sink match {
      case retractSink: RetractStreamTableSink[T] =>
        retractSink match {
          case _: PartitionableTableSink =>
            throw new TableException("Partitionable sink in retract stream mode " +
              "is not supported yet!")
          case _ =>
        }
        writeToRetractSink(retractSink, tableOperation, queryConfig)

      case upsertSink: UpsertStreamTableSink[T] =>
        upsertSink match {
          case _: PartitionableTableSink =>
            throw new TableException("Partitionable sink in upsert stream mode " +
              "is not supported yet!")
          case _ =>
        }
        writeToUpsertSink(upsertSink, tableOperation, queryConfig)

      case appendSink: AppendStreamTableSink[T] =>
        writeToAppendSink(appendSink, tableOperation, queryConfig)

      case _ =>
        throw new ValidationException("Stream Tables can only be emitted by AppendStreamTableSink, "
          + "RetractStreamTableSink, or UpsertStreamTableSink.")
    }

    if (resultSink != null) {
      resultSink.getTransformation
    } else {
      null
    }
  }

  private def writeToRetractSink[T](
      sink: RetractStreamTableSink[T],
      tableOperation: QueryOperation,
      streamQueryConfig: StreamQueryConfig)
    : DataStreamSink[_]= {
    // retraction sink can always be used
    val outputType = TypeConversions.fromDataTypeToLegacyInfo(sink.getConsumedDataType)
      .asInstanceOf[TypeInformation[JTuple2[JBool, T]]]
    // translate the Table into a DataStream and provide the type that the TableSink expects.
    val result: DataStream[JTuple2[JBool, T]] =
      translateToType(
        tableOperation,
        streamQueryConfig,
        updatesAsRetraction = true,
        withChangeFlag = true,
        outputType)
    // Give the DataStream to the TableSink to emit it.
    sink.consumeDataStream(result)
  }

  private def writeToAppendSink[T](
      sink: AppendStreamTableSink[T],
      tableOperation: QueryOperation,
      streamQueryConfig: StreamQueryConfig)
    : DataStreamSink[_]= {
    // optimize plan
    val relNode = getRelBuilder.tableOperation(tableOperation).build()
    val optimizedPlan = optimizer.optimize(relNode, updatesAsRetraction = false, getRelBuilder)
    // verify table is an insert-only (append-only) table
    if (!UpdatingPlanChecker.isAppendOnly(optimizedPlan)) {
      throw new TableException(
        "AppendStreamTableSink requires that Table has only insert changes.")
    }
    val outputType = TypeConversions.fromDataTypeToLegacyInfo(sink.getConsumedDataType)
      .asInstanceOf[TypeInformation[T]]
    val resultType = getTableSchema(tableOperation.getTableSchema.getFieldNames, optimizedPlan)
    // translate the Table into a DataStream and provide the type that the TableSink expects.
    val result: DataStream[T] =
      translateOptimized(
        optimizedPlan,
        resultType,
        outputType,
        streamQueryConfig,
        withChangeFlag = false)
    // Give the DataStream to the TableSink to emit it.
    sink.consumeDataStream(result)
  }

  private def writeToUpsertSink[T](
      sink: UpsertStreamTableSink[T],
      tableOperation: QueryOperation,
      streamQueryConfig: StreamQueryConfig)
    : DataStreamSink[_] = {
    // optimize plan
    val relNode = getRelBuilder.tableOperation(tableOperation).build()
    val optimizedPlan = optimizer.optimize(relNode, updatesAsRetraction = false, getRelBuilder)
    // check for append only table
    val isAppendOnlyTable = UpdatingPlanChecker.isAppendOnly(optimizedPlan)
    sink.setIsAppendOnly(isAppendOnlyTable)
    // extract unique key fields
    val sinkFieldNames = sink.getTableSchema.getFieldNames
    val tableKeys: Option[Array[String]] = UpdatingPlanChecker
      .getUniqueKeyFields(optimizedPlan, sinkFieldNames)
    // check that we have keys if the table has changes (is not append-only)
    tableKeys match {
      case Some(keys) => sink.setKeyFields(keys)
      case None if isAppendOnlyTable => sink.setKeyFields(null)
      case None if !isAppendOnlyTable => throw new TableException(
        "UpsertStreamTableSink requires that Table has full primary keys if it is updated.")
    }
    val outputType = TypeConversions.fromDataTypeToLegacyInfo(sink.getConsumedDataType)
      .asInstanceOf[TypeInformation[JTuple2[JBool, T]]]
    val resultType = getTableSchema(tableOperation.getTableSchema.getFieldNames, optimizedPlan)
    // translate the Table into a DataStream and provide the type that the TableSink expects.
    val result: DataStream[JTuple2[JBool, T]] =
      translateOptimized(
        optimizedPlan,
        resultType,
        outputType,
        streamQueryConfig,
        withChangeFlag = true)
    // Give the DataStream to the TableSink to emit it.
    sink.consumeDataStream(result)
  }

  private def translateToType[A](
      table: QueryOperation,
      queryConfig: StreamQueryConfig,
      updatesAsRetraction: Boolean,
      withChangeFlag: Boolean,
      tpe: TypeInformation[A])
    : DataStream[A] = {
    val relNode = getRelBuilder.tableOperation(table).build()
    val dataStreamPlan = optimizer.optimize(relNode, updatesAsRetraction, getRelBuilder)
    val rowType = getTableSchema(table.getTableSchema.getFieldNames, dataStreamPlan)

    // if no change flags are requested, verify table is an insert-only (append-only) table.
    if (!withChangeFlag && !UpdatingPlanChecker.isAppendOnly(dataStreamPlan)) {
      throw new ValidationException(
        "Table is not an append-only table. " +
          "Use the toRetractStream() in order to handle add and retract messages.")
    }

    // get CRow plan
    translateOptimized(dataStreamPlan, rowType, tpe, queryConfig, withChangeFlag)
  }

  private def translateOptimized[A](
      optimizedPlan: RelNode,
      logicalSchema: TableSchema,
      tpe: TypeInformation[A],
      queryConfig: StreamQueryConfig,
      withChangeFlag: Boolean)
    : DataStream[A] = {
    val dataStream = translateToCRow(optimizedPlan, queryConfig)
    DataStreamConversions.convert(dataStream, logicalSchema, withChangeFlag, tpe, config)
  }

  /**
    * Returns the record type of the optimized plan with field names of the logical plan.
    */
  private def getTableSchema(originalNames: Array[String], optimizedPlan: RelNode): TableSchema = {
    val fieldTypes = optimizedPlan.getRowType.getFieldList.asScala.map(_.getType)
      .map(FlinkTypeFactory.toTypeInfo)
      .map(TypeConversions.fromLegacyInfoToDataType)
      .toArray

    TableSchema.builder().fields(originalNames, fieldTypes).build()
  }

  private def getTableSink(objectIdentifier: ObjectIdentifier): Option[TableSink[_]] = {
    JavaScalaConversionUtil.toScala(catalogManager.getTable(objectIdentifier))
      .map(_.getTable) match {
      case Some(s) if s.isInstanceOf[ConnectorCatalogTable[_, _]] =>
        JavaScalaConversionUtil.toScala(s.asInstanceOf[ConnectorCatalogTable[_, _]].getTableSink)

      case Some(s) if s.isInstanceOf[CatalogTable] =>
        val catalog = catalogManager.getCatalog(objectIdentifier.getCatalogName)
        val catalogTable = s.asInstanceOf[CatalogTable]
        if (catalog.isPresent && catalog.get().getTableFactory.isPresent) {
          val sink = TableFactoryUtil.createTableSinkForCatalogTable(
            catalog.get(),
            catalogTable,
            objectIdentifier.toObjectPath)
          if (sink.isPresent) {
            return Option(sink.get())
          }
        }
        val sinkProperties = catalogTable.toProperties
        Option(TableFactoryService.find(classOf[TableSinkFactory[_]], sinkProperties)
          .createTableSink(sinkProperties))

      case _ => None
    }
  }
}