Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.planner.sinks
import org.apache.flink.api.java.tuple.{Tuple2 => JTuple2}
import org.apache.flink.api.java.typeutils.{GenericTypeInfo, PojoTypeInfo, TupleTypeInfo}
import org.apache.flink.api.scala.typeutils.CaseClassTypeInfo
import org.apache.flink.table.api._
import org.apache.flink.table.catalog.{CatalogTable, ObjectIdentifier}
import org.apache.flink.table.data.RowData
import org.apache.flink.table.operations.CatalogSinkModifyOperation
import org.apache.flink.table.planner.connectors.DynamicSinkUtils
import org.apache.flink.table.runtime.types.TypeInfoDataTypeConverter.fromDataTypeToTypeInfo
import org.apache.flink.table.runtime.typeutils.InternalTypeInfo
import org.apache.flink.table.sinks._
import org.apache.flink.table.types.DataType
import org.apache.flink.table.types.inference.TypeTransformations.toNullable
import org.apache.flink.table.types.logical.utils.LogicalTypeChecks
import org.apache.flink.table.types.logical.{LegacyTypeInformationType, RowType}
import org.apache.flink.table.types.utils.DataTypeUtils
import org.apache.flink.table.types.utils.TypeConversions.{fromLegacyInfoToDataType, fromLogicalToDataType}
import org.apache.flink.table.utils.{TableSchemaUtils, TypeMappingUtils}
import org.apache.flink.types.Row
import _root_.scala.collection.JavaConversions._
/**
* Note: We aim to gradually port the logic in this class to [[DynamicSinkUtils]].
*/
object TableSinkUtils {
/**
* It checks whether the [[TableSink]] is compatible to the INSERT INTO clause, e.g.
* whether the sink is a [[PartitionableTableSink]] and the partitions are valid.
*
* @param sinkOperation The sink operation with the query that is supposed to be written.
* @param sinkIdentifier Tha path of the sink. It is needed just for logging. It does not
* participate in the validation.
* @param sink The sink that we want to write to.
* @param partitionKeys The partition keys of this table.
*/
def validateTableSink(
sinkOperation: CatalogSinkModifyOperation,
sinkIdentifier: ObjectIdentifier,
sink: TableSink[_],
partitionKeys: Seq[String]): Unit = {
// check partitions are valid
if (partitionKeys.nonEmpty) {
sink match {
case _: PartitionableTableSink =>
case _ => throw new ValidationException("We need PartitionableTableSink to write data to" +
s" partitioned table: $sinkIdentifier")
}
}
val staticPartitions = sinkOperation.getStaticPartitions
if (staticPartitions != null && !staticPartitions.isEmpty) {
staticPartitions.map(_._1) foreach { p =>
if (!partitionKeys.contains(p)) {
throw new ValidationException(s"Static partition column $p should be in the partition" +
s" fields list $partitionKeys for Table($sinkIdentifier).")
}
}
}
sink match {
case overwritableTableSink: OverwritableTableSink =>
overwritableTableSink.setOverwrite(sinkOperation.isOverwrite)
case _ =>
assert(!sinkOperation.isOverwrite, "INSERT OVERWRITE requires " +
s"${classOf[OverwritableTableSink].getSimpleName} but actually got " +
sink.getClass.getName)
}
}
/**
* Inferences the physical schema of [[TableSink]], the physical schema ignores change flag
* field and normalizes physical types (can be generic type or POJO type) into [[TableSchema]].
* @param queryLogicalType the logical type of query, will be used to full-fill sink physical
* schema if the sink physical type is not specified.
* @param sink the instance of [[TableSink]]
*/
def inferSinkPhysicalSchema(
queryLogicalType: RowType,
sink: TableSink[_]): TableSchema = {
val withChangeFlag = sink match {
case _: RetractStreamTableSink[_] | _: UpsertStreamTableSink[_] => true
case _: StreamTableSink[_] => false
case dsts: DataStreamTableSink[_] => dsts.withChangeFlag
}
inferSinkPhysicalSchema(sink.getConsumedDataType, queryLogicalType, withChangeFlag)
}
/**
* Inferences the physical schema of [[TableSink]], the physical schema ignores change flag
* field and normalizes physical types (can be generic type or POJO type) into [[TableSchema]].
*
* @param consumedDataType the consumed data type of sink
* @param queryLogicalType the logical type of query, will be used to full-fill sink physical
* schema if the sink physical type is not specified.
* @param withChangeFlag true if the emitted records contains change flags.
*/
def inferSinkPhysicalSchema(
consumedDataType: DataType,
queryLogicalType: RowType,
withChangeFlag: Boolean): TableSchema = {
// the requested output physical type which ignores the flag field
val requestedOutputType = inferSinkPhysicalDataType(
consumedDataType,
queryLogicalType,
withChangeFlag)
if (LogicalTypeChecks.isCompositeType(requestedOutputType.getLogicalType)) {
// if the requested output type is POJO, then we should ignore the POJO fields order,
// and infer the sink schema via field names, see expandPojoTypeToSchema().
fromDataTypeToTypeInfo(requestedOutputType) match {
case pj: PojoTypeInfo[_] => expandPojoTypeToSchema(pj, queryLogicalType)
case _ =>
TableSchema.fromResolvedSchema(
DataTypeUtils.expandCompositeTypeToSchema(requestedOutputType))
}
} else {
// atomic type
TableSchema.builder().field("f0", requestedOutputType).build()
}
}
/**
* Expands a [[PojoTypeInfo]] to a corresponding [[TableSchema]].
* This is a special handling for [[PojoTypeInfo]], because fields of [[PojoTypeInfo]] is not
* in the defined order but the alphabet order. In order to match the query schema, we should
* reorder the Pojo schema.
*/
private def expandPojoTypeToSchema(
pojo: PojoTypeInfo[_],
queryLogicalType: RowType): TableSchema = {
val fieldNames = queryLogicalType.getFieldNames
// reorder pojo fields according to the query schema
val reorderedFields = fieldNames.map(name => {
val index = pojo.getFieldIndex(name)
if (index < 0) {
throw new TableException(s"$name is not found in ${pojo.toString}")
}
val fieldTypeInfo = pojo.getPojoFieldAt(index).getTypeInformation
val fieldDataType = fieldTypeInfo match {
case nestedPojo: PojoTypeInfo[_] =>
val nestedLogicalType = queryLogicalType.getFields()(index).getType.asInstanceOf[RowType]
expandPojoTypeToSchema(nestedPojo, nestedLogicalType).toRowDataType
case _ =>
fromLegacyInfoToDataType(fieldTypeInfo)
}
DataTypes.FIELD(name, fieldDataType)
})
TableSchema.fromResolvedSchema(
DataTypeUtils.expandCompositeTypeToSchema(DataTypes.ROW(reorderedFields: _*)))
}
/**
* Inferences the physical data type of [[TableSink]], the physical data type ignores
* the change flag field.
*
* @param consumedDataType the consumed data type of sink
* @param queryLogicalType the logical type of query, will be used to full-fill sink physical
* schema if the sink physical type is not specified.
* @param withChangeFlag true if the emitted records contains change flags.
*/
def inferSinkPhysicalDataType(
consumedDataType: DataType,
queryLogicalType: RowType,
withChangeFlag: Boolean): DataType = {
val consumedTypeInfo = consumedDataType.getLogicalType match {
case lt: LegacyTypeInformationType[_] => Some(lt.getTypeInformation)
case _ => None
}
if (consumedTypeInfo.isEmpty) {
return consumedDataType
}
val requestedTypeInfo = if (withChangeFlag) {
consumedTypeInfo.get match {
// Scala tuple
case t: CaseClassTypeInfo[_]
if t.getTypeClass == classOf[(_, _)] && t.getTypeAt(0) == Types.BOOLEAN =>
t.getTypeAt[Any](1)
// Java tuple
case t: TupleTypeInfo[_]
if t.getTypeClass == classOf[JTuple2[_, _]] && t.getTypeAt(0) == Types.BOOLEAN =>
t.getTypeAt[Any](1)
case _ => throw new TableException(
"Don't support " + consumedDataType + " conversion for the retract sink")
}
} else {
consumedTypeInfo.get
}
// The tpe may been inferred by invoking [[TypeExtractor.createTypeInfo]] based the
// class of the resulting type. For example, converts the given [[Table]] into
// an append [[DataStream]]. If the class is Row, then the return type only is
// [[GenericTypeInfo[Row]]. So it should convert to the [[RowTypeInfo]] in order
// to better serialize performance.
requestedTypeInfo match {
case gt: GenericTypeInfo[Row] if gt.getTypeClass == classOf[Row] =>
fromLogicalToDataType(queryLogicalType).bridgedTo(classOf[Row])
case gt: GenericTypeInfo[RowData] if gt.getTypeClass == classOf[RowData] =>
fromLogicalToDataType(queryLogicalType).bridgedTo(classOf[RowData])
case bt: InternalTypeInfo[RowData] =>
val fields = bt.toRowFieldNames.zip(bt.toRowFieldTypes).map { case (n, t) =>
DataTypes.FIELD(n, fromLogicalToDataType(t))
}
DataTypes.ROW(fields: _*).bridgedTo(classOf[RowData])
case _ =>
fromLegacyInfoToDataType(requestedTypeInfo)
}
}
/**
* Checks whether the logical schema (from DDL) and physical schema
* (from TableSink.getConsumedDataType()) of sink are compatible.
*
* @param catalogTable the catalog table of sink
* @param sink the instance of [[TableSink]]
* @param queryLogicalType the logical type of query
*/
def validateLogicalPhysicalTypesCompatible(
catalogTable: CatalogTable,
sink: TableSink[_],
queryLogicalType: RowType): Unit = {
// there may be generated columns in DDL, only get the physical part of DDL
val logicalSchema = TableSchemaUtils.getPhysicalSchema(catalogTable.getSchema)
// infer the physical schema from TableSink#getConsumedDataType
val physicalSchema = TableSinkUtils.inferSinkPhysicalSchema(
queryLogicalType,
sink)
// check for valid type info
if (logicalSchema.getFieldCount != physicalSchema.getFieldCount) {
throw new ValidationException("The field count of logical schema of the table does" +
" not match with the field count of physical schema\n. " +
s"The logical schema: [${logicalSchema.getFieldDataTypes.mkString(",")}]\n" +
s"The physical schema: [${physicalSchema.getFieldDataTypes.mkString(",")}].")
}
for (i <- 0 until logicalSchema.getFieldCount) {
val logicalFieldType = DataTypeUtils.transform(
logicalSchema.getFieldDataTypes()(i), toNullable) // ignore nullabilities
val logicalFieldName = logicalSchema.getFieldNames()(i)
val physicalFieldType = DataTypeUtils.transform(
physicalSchema.getFieldDataTypes()(i), toNullable) // ignore nullabilities
val physicalFieldName = physicalSchema.getFieldNames()(i)
TypeMappingUtils.checkPhysicalLogicalTypeCompatible(
physicalFieldType.getLogicalType,
logicalFieldType.getLogicalType,
physicalFieldName,
logicalFieldName,
false)
}
}
/**
* Gets the NOT NULL physical field indices on the [[CatalogTable]].
*/
def getNotNullFieldIndices(tableSchema: TableSchema): Array[Int] = {
val rowType = tableSchema.toPhysicalRowDataType.getLogicalType.asInstanceOf[RowType]
val fieldTypes = rowType.getFields.map(_.getType).toArray
fieldTypes.indices.filter { index =>
!fieldTypes(index).isNullable
}.toArray
}
}