All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.sql.util.PartitioningUtils.scala Maven / Gradle / Ivy

There is a newer version: 3.5.3
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.sql.util

import org.apache.spark.sql.catalyst.analysis.Resolver
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.DEFAULT_PARTITION_NAME
import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, Literal}
import org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils
import org.apache.spark.sql.catalyst.util.CharVarcharUtils
import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
import org.apache.spark.sql.types.{CharType, DataType, StringType, StructField, StructType, VarcharType}
import org.apache.spark.unsafe.types.UTF8String

private[sql] object PartitioningUtils {

  def castPartitionSpec(value: String, dt: DataType, conf: SQLConf): Expression = {
    conf.storeAssignmentPolicy match {
      // SPARK-30844: try our best to follow StoreAssignmentPolicy for static partition
      // values but not completely follow because we can't do static type checking due to
      // the reason that the parser has erased the type info of static partition values
      // and converted them to string.
      case StoreAssignmentPolicy.ANSI | StoreAssignmentPolicy.STRICT =>
        val cast = Cast(Literal(value), dt, Option(conf.sessionLocalTimeZone),
          ansiEnabled = true)
        cast.setTagValue(Cast.BY_TABLE_INSERTION, ())
        cast
      case _ =>
        Cast(Literal(value), dt, Option(conf.sessionLocalTimeZone),
          ansiEnabled = false)
    }
  }

  private def normalizePartitionStringValue(value: String, field: StructField): String = {
    val casted = Cast(
      castPartitionSpec(value, field.dataType, SQLConf.get),
      StringType,
      Option(SQLConf.get.sessionLocalTimeZone)
    ).eval()
    if (casted != null) {
      casted.asInstanceOf[UTF8String].toString
    } else {
      null
    }
  }

  /**
   * Normalize the column names in partition specification, w.r.t. the real partition column names
   * and case sensitivity. e.g., if the partition spec has a column named `monTh`, and there is a
   * partition column named `month`, and it's case insensitive, we will normalize `monTh` to
   * `month`.
   */
  def normalizePartitionSpec[T](
      partitionSpec: Map[String, T],
      partCols: StructType,
      tblName: String,
      resolver: Resolver): Map[String, T] = {
    val rawSchema = CharVarcharUtils.getRawSchema(partCols, SQLConf.get)
    val normalizedPartSpec = partitionSpec.toSeq.map { case (key, value) =>
      val normalizedFiled = rawSchema.find(f => resolver(f.name, key)).getOrElse {
        throw QueryCompilationErrors.invalidPartitionColumnKeyInTableError(key, tblName)
      }

      val normalizedVal =
        if (SQLConf.get.charVarcharAsString) value else normalizedFiled.dataType match {
          case CharType(len) if value != null && value != DEFAULT_PARTITION_NAME =>
            val v = value match {
              case Some(str: String) => Some(charTypeWriteSideCheck(str, len))
              case str: String => charTypeWriteSideCheck(str, len)
              case other => other
            }
            v.asInstanceOf[T]
          case VarcharType(len) if value != null && value != DEFAULT_PARTITION_NAME =>
            val v = value match {
              case Some(str: String) => Some(varcharTypeWriteSideCheck(str, len))
              case str: String => varcharTypeWriteSideCheck(str, len)
              case other => other
            }
            v.asInstanceOf[T]
          case _ if !SQLConf.get.getConf(SQLConf.SKIP_TYPE_VALIDATION_ON_ALTER_PARTITION) &&
              value != null && value != DEFAULT_PARTITION_NAME =>
            val v = value match {
              case Some(str: String) => Some(normalizePartitionStringValue(str, normalizedFiled))
              case str: String => normalizePartitionStringValue(str, normalizedFiled)
              case other => other
            }
            v.asInstanceOf[T]
          case _ => value
        }
      normalizedFiled.name -> normalizedVal
    }

    SchemaUtils.checkColumnNameDuplication(normalizedPartSpec.map(_._1), resolver)

    normalizedPartSpec.toMap
  }

  private def charTypeWriteSideCheck(inputStr: String, limit: Int): String = {
    val toUtf8 = UTF8String.fromString(inputStr)
    CharVarcharCodegenUtils.charTypeWriteSideCheck(toUtf8, limit).toString
  }

  private def varcharTypeWriteSideCheck(inputStr: String, limit: Int): String = {
    val toUtf8 = UTF8String.fromString(inputStr)
    CharVarcharCodegenUtils.varcharTypeWriteSideCheck(toUtf8, limit).toString
  }

  /**
   * Verify if the input partition spec exactly matches the existing defined partition spec
   * The columns must be the same but the orders could be different.
   */
  def requireExactMatchedPartitionSpec(
      tableName: String,
      spec: TablePartitionSpec,
      partitionColumnNames: Seq[String]): Unit = {
    val defined = partitionColumnNames.sorted
    if (spec.keys.toSeq.sorted != defined) {
      throw QueryCompilationErrors.invalidPartitionSpecError(spec.keys.mkString(", "),
        partitionColumnNames, tableName)
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy