org.apache.flink.table.planner.plan.utils.ScanUtil.scala Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of flink-table-planner-blink_2.11 Show documentation
This module bridges Table/SQL API and runtime. It contains all resources that are required during pre-flight and runtime phase. The content of this module is work-in-progress. It will replace flink-table-planner once it is stable. See FLINK-11439 and FLIP-32 for more details.
There is a newer version: 1.13.6
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.table.planner.plan.utils

import org.apache.flink.api.dag.Transformation
import org.apache.flink.table.api.TableException
import org.apache.flink.table.data.{GenericRowData, RowData}
import org.apache.flink.table.planner.codegen.CodeGenUtils.{DEFAULT_INPUT1_TERM, GENERIC_ROW}
import org.apache.flink.table.planner.codegen.OperatorCodeGenerator.generateCollect
import org.apache.flink.table.planner.codegen.{CodeGenUtils, CodeGeneratorContext, ExprCodeGenerator, OperatorCodeGenerator}
import org.apache.flink.table.planner.plan.nodes.exec.utils.ExecNodeUtil
import org.apache.flink.table.runtime.operators.CodeGenOperatorFactory
import org.apache.flink.table.runtime.types.LogicalTypeDataTypeConverter.fromDataTypeToLogicalType
import org.apache.flink.table.runtime.typeutils.InternalTypeInfo
import org.apache.flink.table.sources.TableSource
import org.apache.flink.table.types.DataType
import org.apache.flink.table.types.logical.RowType
import org.apache.flink.table.typeutils.TimeIndicatorTypeInfo

import org.apache.calcite.rel.core.TableScan
import org.apache.calcite.rex.RexNode

import java.util

import scala.collection.JavaConversions._

/**
  * Util for [[TableScan]]s.
  */
object ScanUtil {

  def hasTimeAttributeField(indexes: Array[Int]) =
    indexes.contains(TimeIndicatorTypeInfo.ROWTIME_STREAM_MARKER)||
        indexes.contains(TimeIndicatorTypeInfo.ROWTIME_BATCH_MARKER)||
        indexes.contains(TimeIndicatorTypeInfo.PROCTIME_STREAM_MARKER)||
        indexes.contains(TimeIndicatorTypeInfo.PROCTIME_BATCH_MARKER)

  private[flink] def needsConversion(source: TableSource[_]): Boolean = {
    needsConversion(source.getProducedDataType)
  }

  def needsConversion(dataType: DataType): Boolean =
    fromDataTypeToLogicalType(dataType) match {
      case _: RowType => !CodeGenUtils.isInternalClass(dataType)
      case _ => true
    }

  def convertToInternalRow(
      ctx: CodeGeneratorContext,
      input: Transformation[Any],
      fieldIndexes: Array[Int],
      inputType: DataType,
      outputRowType: RowType,
      qualifiedName: util.List[String],
      rowtimeExpr: Option[RexNode] = None,
      beforeConvert: String = "",
      afterConvert: String = ""): Transformation[RowData] = {
    // conversion
    val convertName = "SourceConversion"
    // type convert
    val inputTerm = DEFAULT_INPUT1_TERM
    val internalInType = fromDataTypeToLogicalType(inputType)
    val (inputTermConverter, inputRowType) = {
      val convertFunc = CodeGenUtils.genToInternalConverter(ctx, inputType)
      internalInType match {
        case rt: RowType => (convertFunc, rt)
        case _ => ((record: String) => s"$GENERIC_ROW.of(${convertFunc(record)})",
            RowType.of(internalInType))
      }
    }

    val processCode =
      if ((inputRowType.getChildren == outputRowType.getChildren) &&
          (inputRowType.getFieldNames == outputRowType.getFieldNames) &&
          !hasTimeAttributeField(fieldIndexes)) {
        s"${generateCollect(inputTerm)}"
      } else {

        // field index change (pojo) or has time attribute field
        val conversion = new ExprCodeGenerator(ctx, false)
            .bindInput(inputRowType, inputTerm = inputTerm, inputFieldMapping = Some(fieldIndexes))
            .generateConverterResultExpression(
              outputRowType, classOf[GenericRowData], rowtimeExpression = rowtimeExpr)

        s"""
           |$beforeConvert
           |${conversion.code}
           |${generateCollect(conversion.resultTerm)}
           |$afterConvert
           |""".stripMargin
      }

    val generatedOperator = OperatorCodeGenerator.generateOneInputStreamOperator[Any, RowData](
      ctx,
      convertName,
      processCode,
      outputRowType,
      converter = inputTermConverter)

    val substituteStreamOperator = new CodeGenOperatorFactory[RowData](generatedOperator)

    ExecNodeUtil.createOneInputTransformation(
      input.asInstanceOf[Transformation[RowData]],
      getOperatorName(qualifiedName, outputRowType),
      substituteStreamOperator,
      InternalTypeInfo.of(outputRowType),
      input.getParallelism,
      0)
  }

  /**
    * @param qualifiedName qualified name for table
    */
  private[flink] def getOperatorName(qualifiedName: Seq[String], rowType: RowType): String = {
    val tableQualifiedName = qualifiedName.mkString(".")
    val fieldNames = rowType.getFieldNames.mkString(", ")
    s"SourceConversion(table=[$tableQualifiedName], fields=[$fieldNames])"
  }

  /**
   * Returns the field indices of primary key in given fields.
   */
  def getPrimaryKeyIndices(
      fieldNames: util.List[String],
      keyFields: util.List[String]): Array[Int] = {
    // we must use the output field names of scan node instead of the original schema
    // to calculate the primary key indices, because the scan node maybe projection pushed down
    keyFields.map { k =>
      val index = fieldNames.indexOf(k)
      if (index < 0) {
        // primary key shouldn't be pruned, otherwise it's a bug
        throw new TableException(
          s"Can't find primary key field $k in the input fields $fieldNames. " +
            s"This is a bug, please file an issue.")
      }
      index
    }.toArray
  }
}