All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.netease.arctic.spark.sql.ArcticExtensionUtils.scala Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.netease.arctic.spark.sql

import com.netease.arctic.spark.table.{ArcticIcebergSparkTable, ArcticSparkTable, SupportsUpsert}
import com.netease.arctic.spark.{ArcticSparkCatalog, ArcticSparkSessionCatalog}
import org.apache.iceberg.spark.Spark3Util
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.Resolver
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, SubqueryAlias}
import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog}
import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
import org.apache.spark.sql.types.{ArrayType, MapType, StructField, StructType}

import scala.collection.JavaConverters.seqAsJavaList


object ArcticExtensionUtils {

  implicit class ArcticTableHelper(table: Table) {
    def asArcticTable: ArcticSparkTable = {
      table match {
        case arcticTable: ArcticSparkTable => arcticTable
        case _ => throw new IllegalArgumentException(s"$table is not an arctic table")
      }
    }

    def asUpsertWrite: SupportsUpsert = {
      table match {
        case arcticTable: SupportsUpsert => arcticTable
        case _ => throw new IllegalArgumentException(s"$table is not an upsert-able table")
      }
    }
  }

  implicit class ArcticRelationHelper(plan: LogicalPlan) {
    def asTableRelation: DataSourceV2Relation = {
      ArcticExtensionUtils.asTableRelation(plan)
    }
  }

  implicit class ArcticStructTypeHelper(struct: StructType) {
    def findNestedField(fieldNames: Seq[String],
                        includeCollections: Boolean = false,
                        resolver: Resolver = _ == _): Option[(Seq[String], StructField)] = {
      def prettyFieldName(nameParts: Seq[String]): String = {
        nameParts.map(quoteIfNeeded).mkString(".")
      }

      def quoteIfNeeded(part: String): String = {
        if (part.contains(".") || part.contains("`")) {
          s"`${part.replace("`", "``")}`"
        } else {
          part
        }
      }


      def findField(
                     struct: StructType,
                     searchPath: Seq[String],
                     normalizedPath: Seq[String]): Option[(Seq[String], StructField)] = {
        searchPath.headOption.flatMap { searchName =>
          val found = struct.fields.filter(f => resolver(searchName, f.name))
          if (found.length > 1) {
            val names = found.map(f => prettyFieldName(normalizedPath :+ f.name))
              .mkString("[", ", ", " ]")
            throw new UnsupportedOperationException(
              s"Ambiguous field name: ${prettyFieldName(normalizedPath :+ searchName)}. Found " +
                s"multiple columns that can match: $names")
          } else if (found.isEmpty) {
            None
          } else {
            val field = found.head
            (searchPath.tail, field.dataType, includeCollections) match {
              case (Seq(), _, _) =>
                Some(normalizedPath -> field)

              case (names, struct: StructType, _) =>
                findField(struct, names, normalizedPath :+ field.name)

              case (_, _, false) =>
                None // types nested in maps and arrays are not used

              case (Seq("key"), MapType(keyType, _, _), true) =>
                // return the key type as a struct field to include nullability
                Some((normalizedPath :+ field.name) -> StructField("key", keyType, nullable = false))

              case (Seq("key", names@_*), MapType(struct: StructType, _, _), true) =>
                findField(struct, names, normalizedPath ++ Seq(field.name, "key"))

              case (Seq("value"), MapType(_, valueType, isNullable), true) =>
                // return the value type as a struct field to include nullability
                Some((normalizedPath :+ field.name) ->
                  StructField("value", valueType, nullable = isNullable))

              case (Seq("value", names@_*), MapType(_, struct: StructType, _), true) =>
                findField(struct, names, normalizedPath ++ Seq(field.name, "value"))

              case (Seq("element"), ArrayType(elementType, isNullable), true) =>
                // return the element type as a struct field to include nullability
                Some((normalizedPath :+ field.name) ->
                  StructField("element", elementType, nullable = isNullable))

              case (Seq("element", names@_*), ArrayType(struct: StructType, _), true) =>
                findField(struct, names, normalizedPath ++ Seq(field.name, "element"))

              case _ =>
                None
            }
          }
        }
      }

      findField(struct, fieldNames, Nil)
    }
  }

  def isArcticRelation(plan: LogicalPlan): Boolean = {
    def isArcticTable(relation: DataSourceV2Relation): Boolean = relation.table match {
      case _: ArcticSparkTable => true
      case _ => false
    }

    plan.collectLeaves().exists {
      case p: DataSourceV2Relation => isArcticTable(p)
      case s: SubqueryAlias => s.child.children.exists { case p: DataSourceV2Relation => isArcticTable(p) }
      case _ => false
    }
  }

  def isArcticIcebergRelation(plan: LogicalPlan): Boolean = {
    def isArcticIcebergTable(relation: DataSourceV2Relation): Boolean = relation.table match {
      case _: ArcticIcebergSparkTable => true
      case _ => false
    }

    plan.collectLeaves().exists {
      case p: DataSourceV2Relation => isArcticIcebergTable(p)
      case s: SubqueryAlias => s.child.children.exists {
        case p: DataSourceV2Relation => isArcticIcebergTable(p)
      }
    }
  }

  def isArcticCatalog(catalog: TableCatalog): Boolean = {
    catalog match {
      case _: ArcticSparkCatalog => true
      case _: ArcticSparkSessionCatalog[_] => true
      case _ => false
    }
  }

  def isArcticTable(table: Table): Boolean = table match {
    case _: ArcticSparkTable => true
    case _: ArcticIcebergSparkTable => true
    case _ => false
  }

  def asTableRelation(plan: LogicalPlan): DataSourceV2Relation = {
    plan match {
      case s: SubqueryAlias => asTableRelation(s.child)
      case p: Project => asTableRelation(p.child.children.head)
      case r: DataSourceV2Relation => r
      case _ => throw new IllegalArgumentException("Expected a DataSourceV2Relation")
    }
  }

  def isKeyedTable(relation: DataSourceV2Relation): Boolean = {
    relation.table match {
      case arctic: ArcticSparkTable =>
        arctic.table().isKeyedTable
      case _ => false
    }
  }

  def buildCatalogAndIdentifier(sparkSession: SparkSession, originIdentifier: TableIdentifier): (TableCatalog, Identifier) = {
    var identifier: Seq[String] = Seq.empty[String]
    identifier :+= originIdentifier.database.get
    identifier :+= originIdentifier.table
    val catalogAndIdentifier = Spark3Util.catalogAndIdentifier(sparkSession, seqAsJavaList(identifier))
    catalogAndIdentifier.catalog() match {
      case a: TableCatalog => (a, catalogAndIdentifier.identifier())
      case _ => throw new UnsupportedOperationException("Only support TableCatalog or its implementation")
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy