
org.apache.spark.sql.catalyst.expressions.ExprUtils.scala Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.catalyst.expressions
import java.text.{DecimalFormat, DecimalFormatSymbols, ParsePosition}
import java.util.Locale
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.util.ArrayBasedMapData
import org.apache.spark.sql.types.{DataType, MapType, StringType, StructType}
import org.apache.spark.unsafe.types.UTF8String
object ExprUtils {
def evalTypeExpr(exp: Expression): DataType = {
if (exp.foldable) {
exp.eval() match {
case s: UTF8String if s != null => DataType.fromDDL(s.toString)
case _ => throw new AnalysisException(
s"The expression '${exp.sql}' is not a valid schema string.")
}
} else {
throw new AnalysisException(
"Schema should be specified in DDL format as a string literal or output of " +
s"the schema_of_json/schema_of_csv functions instead of ${exp.sql}")
}
}
def evalSchemaExpr(exp: Expression): StructType = {
val dataType = evalTypeExpr(exp)
if (!dataType.isInstanceOf[StructType]) {
throw new AnalysisException(
s"Schema should be struct type but got ${dataType.sql}.")
}
dataType.asInstanceOf[StructType]
}
def convertToMapData(exp: Expression): Map[String, String] = exp match {
case m: CreateMap
if m.dataType.acceptsType(MapType(StringType, StringType, valueContainsNull = false)) =>
val arrayMap = m.eval().asInstanceOf[ArrayBasedMapData]
ArrayBasedMapData.toScalaMap(arrayMap).map { case (key, value) =>
key.toString -> value.toString
}
case m: CreateMap =>
throw new AnalysisException(
s"A type of keys and values in map() must be string, but got ${m.dataType.catalogString}")
case _ =>
throw new AnalysisException("Must use a map() function for options")
}
/**
* A convenient function for schema validation in datasources supporting
* `columnNameOfCorruptRecord` as an option.
*/
def verifyColumnNameOfCorruptRecord(
schema: StructType,
columnNameOfCorruptRecord: String): Unit = {
schema.getFieldIndex(columnNameOfCorruptRecord).foreach { corruptFieldIndex =>
val f = schema(corruptFieldIndex)
if (f.dataType != StringType || !f.nullable) {
throw new AnalysisException(
"The field for corrupt records must be string type and nullable")
}
}
}
def getDecimalParser(locale: Locale): String => java.math.BigDecimal = {
if (locale == Locale.US) { // Special handling the default locale for backward compatibility
(s: String) => new java.math.BigDecimal(s.replaceAll(",", ""))
} else {
val decimalFormat = new DecimalFormat("", new DecimalFormatSymbols(locale))
decimalFormat.setParseBigDecimal(true)
(s: String) => {
val pos = new ParsePosition(0)
val result = decimalFormat.parse(s, pos).asInstanceOf[java.math.BigDecimal]
if (pos.getIndex() != s.length() || pos.getErrorIndex() != -1) {
throw new IllegalArgumentException("Cannot parse any decimal");
} else {
result
}
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy