org.apache.spark.sql.catalyst.analysis.unresolved.scala Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.catalyst.analysis
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, LeafNode}
import org.apache.spark.sql.catalyst.trees.TreeNode
import org.apache.spark.sql.types.{BooleanType, DataType}
import org.apache.spark.sql.catalyst.{TableIdentifier, errors}
import org.apache.spark.sql.types.{DataType, StructType}
/**
* Thrown when an invalid attempt is made to access a property of a tree that has yet to be fully
* resolved.
*/
class UnresolvedException[TreeType <: TreeNode[_]](tree: TreeType, function: String) extends
errors.TreeNodeException(tree, s"Invalid call to $function on unresolved object", null)
/**
* Holds the name of a relation that has yet to be looked up in a [[Catalog]].
*/
case class UnresolvedRelation(
tableIdentifier: TableIdentifier,
alias: Option[String] = None) extends LeafNode {
/** Returns a `.` separated name for this relation. */
def tableName: String = tableIdentifier.unquotedString
override def output: Seq[Attribute] = Nil
override lazy val resolved = false
}
/**
* Holds the name of an attribute that has yet to be resolved.
*/
case class UnresolvedAttribute(nameParts: Seq[String]) extends Attribute with Unevaluable {
def name: String =
nameParts.map(n => if (n.contains(".")) s"`$n`" else n).mkString(".")
override def exprId: ExprId = throw new UnresolvedException(this, "exprId")
override def dataType: DataType = throw new UnresolvedException(this, "dataType")
override def nullable: Boolean = throw new UnresolvedException(this, "nullable")
override def qualifiers: Seq[String] = throw new UnresolvedException(this, "qualifiers")
override lazy val resolved = false
override def newInstance(): UnresolvedAttribute = this
override def withNullability(newNullability: Boolean): UnresolvedAttribute = this
override def withQualifiers(newQualifiers: Seq[String]): UnresolvedAttribute = this
override def withName(newName: String): UnresolvedAttribute = UnresolvedAttribute.quoted(newName)
override def toString: String = s"'$name"
}
object UnresolvedAttribute {
/**
* Creates an [[UnresolvedAttribute]], parsing segments separated by dots ('.').
*/
def apply(name: String): UnresolvedAttribute = new UnresolvedAttribute(name.split("\\."))
/**
* Creates an [[UnresolvedAttribute]], from a single quoted string (for example using backticks in
* HiveQL. Since the string is consider quoted, no processing is done on the name.
*/
def quoted(name: String): UnresolvedAttribute = new UnresolvedAttribute(Seq(name))
/**
* Creates an [[UnresolvedAttribute]] from a string in an embedded language. In this case
* we treat it as a quoted identifier, except for '.', which must be further quoted using
* backticks if it is part of a column name.
*/
def quotedString(name: String): UnresolvedAttribute =
new UnresolvedAttribute(parseAttributeName(name))
/**
* Used to split attribute name by dot with backticks rule.
* Backticks must appear in pairs, and the quoted string must be a complete name part,
* which means `ab..c`e.f is not allowed.
* Escape character is not supported now, so we can't use backtick inside name part.
*/
def parseAttributeName(name: String): Seq[String] = {
def e = new AnalysisException(s"syntax error in attribute name: $name")
val nameParts = scala.collection.mutable.ArrayBuffer.empty[String]
val tmp = scala.collection.mutable.ArrayBuffer.empty[Char]
var inBacktick = false
var i = 0
while (i < name.length) {
val char = name(i)
if (inBacktick) {
if (char == '`') {
inBacktick = false
if (i + 1 < name.length && name(i + 1) != '.') throw e
} else {
tmp += char
}
} else {
if (char == '`') {
if (tmp.nonEmpty) throw e
inBacktick = true
} else if (char == '.') {
if (name(i - 1) == '.' || i == name.length - 1) throw e
nameParts += tmp.mkString
tmp.clear()
} else {
tmp += char
}
}
i += 1
}
if (inBacktick) throw e
nameParts += tmp.mkString
nameParts.toSeq
}
}
case class UnresolvedFunction(
name: String,
children: Seq[Expression],
isDistinct: Boolean)
extends Expression with Unevaluable {
override def dataType: DataType = throw new UnresolvedException(this, "dataType")
override def foldable: Boolean = throw new UnresolvedException(this, "foldable")
override def nullable: Boolean = throw new UnresolvedException(this, "nullable")
override lazy val resolved = false
override def prettyString: String = {
s"${name}(${children.map(_.prettyString).mkString(",")})"
}
override def toString: String = s"'$name(${children.mkString(",")})"
}
/**
* Represents all of the input attributes to a given relational operator, for example in
* "SELECT * FROM ...". A [[Star]] gets automatically expanded during analysis.
*/
abstract class Star extends LeafExpression with NamedExpression {
override def name: String = throw new UnresolvedException(this, "name")
override def exprId: ExprId = throw new UnresolvedException(this, "exprId")
override def dataType: DataType = throw new UnresolvedException(this, "dataType")
override def nullable: Boolean = throw new UnresolvedException(this, "nullable")
override def qualifiers: Seq[String] = throw new UnresolvedException(this, "qualifiers")
override def toAttribute: Attribute = throw new UnresolvedException(this, "toAttribute")
override lazy val resolved = false
def expand(input: LogicalPlan, resolver: Resolver): Seq[NamedExpression]
}
/**
* Represents all of the input attributes to a given relational operator, for example in
* "SELECT * FROM ...".
*
* This is also used to expand structs. For example:
* "SELECT record.* from (SELECT struct(a,b,c) as record ...)
*
* @param target an optional name that should be the target of the expansion. If omitted all
* targets' columns are produced. This can either be a table name or struct name. This
* is a list of identifiers that is the path of the expansion.
*/
case class UnresolvedStar(target: Option[Seq[String]]) extends Star with Unevaluable {
override def expand(input: LogicalPlan, resolver: Resolver): Seq[NamedExpression] = {
// First try to expand assuming it is table.*.
val expandedAttributes: Seq[Attribute] = target match {
// If there is no table specified, use all input attributes.
case None => input.output
// If there is a table, pick out attributes that are part of this table.
case Some(t) => if (t.size == 1) {
input.output.filter(_.qualifiers.exists(resolver(_, t.head)))
} else {
List()
}
}
if (expandedAttributes.nonEmpty) return expandedAttributes
// Try to resolve it as a struct expansion. If there is a conflict and both are possible,
// (i.e. [name].* is both a table and a struct), the struct path can always be qualified.
require(target.isDefined)
val attribute = input.resolve(target.get, resolver)
if (attribute.isDefined) {
// This target resolved to an attribute in child. It must be a struct. Expand it.
attribute.get.dataType match {
case s: StructType => s.zipWithIndex.map {
case (f, i) =>
val extract = GetStructField(attribute.get, i)
Alias(extract, f.name)()
}
case _ => {
throw new AnalysisException("Can only star expand struct data types. Attribute: `" +
target.get + "`")
}
}
} else {
val from = input.inputSet.map(_.name).mkString(", ")
val targetString = target.get.mkString(".")
throw new AnalysisException(s"cannot resolve '$targetString.*' give input columns '$from'")
}
}
override def toString: String = target.map(_ + ".").getOrElse("") + "*"
}
/**
* Used to assign new names to Generator's output, such as hive udtf.
* For example the SQL expression "stack(2, key, value, key, value) as (a, b)" could be represented
* as follows:
* MultiAlias(stack_function, Seq(a, b))
* @param child the computation being performed
* @param names the names to be associated with each output of computing [[child]].
*/
case class MultiAlias(child: Expression, names: Seq[String])
extends UnaryExpression with NamedExpression with CodegenFallback {
override def name: String = throw new UnresolvedException(this, "name")
override def exprId: ExprId = throw new UnresolvedException(this, "exprId")
override def dataType: DataType = throw new UnresolvedException(this, "dataType")
override def nullable: Boolean = throw new UnresolvedException(this, "nullable")
override def qualifiers: Seq[String] = throw new UnresolvedException(this, "qualifiers")
override def toAttribute: Attribute = throw new UnresolvedException(this, "toAttribute")
override lazy val resolved = false
override def toString: String = s"$child AS $names"
}
/**
* Represents all the resolved input attributes to a given relational operator. This is used
* in the data frame DSL.
*
* @param expressions Expressions to expand.
*/
case class ResolvedStar(expressions: Seq[NamedExpression]) extends Star with Unevaluable {
override def expand(input: LogicalPlan, resolver: Resolver): Seq[NamedExpression] = expressions
override def toString: String = expressions.mkString("ResolvedStar(", ", ", ")")
}
/**
* Extracts a value or values from an Expression
*
* @param child The expression to extract value from,
* can be Map, Array, Struct or array of Structs.
* @param extraction The expression to describe the extraction,
* can be key of Map, index of Array, field name of Struct.
*/
case class UnresolvedExtractValue(child: Expression, extraction: Expression)
extends UnaryExpression with Unevaluable {
override def dataType: DataType = throw new UnresolvedException(this, "dataType")
override def foldable: Boolean = throw new UnresolvedException(this, "foldable")
override def nullable: Boolean = throw new UnresolvedException(this, "nullable")
override lazy val resolved = false
override def toString: String = s"$child[$extraction]"
}
/**
* Holds the expression that has yet to be aliased.
*/
case class UnresolvedAlias(child: Expression)
extends UnaryExpression with NamedExpression with Unevaluable {
override def toAttribute: Attribute = throw new UnresolvedException(this, "toAttribute")
override def qualifiers: Seq[String] = throw new UnresolvedException(this, "qualifiers")
override def exprId: ExprId = throw new UnresolvedException(this, "exprId")
override def nullable: Boolean = throw new UnresolvedException(this, "nullable")
override def dataType: DataType = throw new UnresolvedException(this, "dataType")
override def name: String = throw new UnresolvedException(this, "name")
override lazy val resolved = false
}
trait SubQueryExpression extends Unevaluable {
def subquery: LogicalPlan
override def dataType: DataType = BooleanType
override def foldable: Boolean = false
override def nullable: Boolean = false
/**
* Replace the subquery with new one, usually will be used when resolving the subquery.
*/
def withNewSubQuery(newSubquery: LogicalPlan): this.type
}
/**
* Exist subquery expression, only used in subquery predicate only.
*
* positive: true means EXISTS, other wise means NOT EXISTS
*
* NOTICE: Exists is a LeafExpression, and we need to resolve the subquery
* explicitly in analyzer rule.
*/
case class Exists(subquery: LogicalPlan, positive: Boolean)
extends LeafExpression with SubQueryExpression {
override def withNewSubQuery(newSubquery: LogicalPlan): this.type = {
this.copy(subquery = newSubquery).asInstanceOf[this.type]
}
override lazy val resolved = true
override def toString: String = if (positive) {
s"Exists(${subquery.asCode})"
} else {
s"NotExists(${subquery.asCode})"
}
}
/**
* In subquery expression, only used in subquery predicate only.
*
* child: The referenced key in WHERE clause for IN / NOT IN
* e.g. SELECT value FROM src a WHERE a.key IN (SELECT key FROM src1 b WHERE b.key > 10)
* The child expression is the 'a.key'
*
* positive: true means EXISTS, other wise means NOT EXISTS
*
* NOTICE: InSubquery is a LeafExpression, and we need to resolve its subquery
* explicitly in analyzer rule.
*/
case class InSubquery(child: Expression, subquery: LogicalPlan, positive: Boolean)
extends UnaryExpression with SubQueryExpression {
override def withNewSubQuery(newSubquery: LogicalPlan): this.type = {
this.copy(subquery = newSubquery).asInstanceOf[this.type]
}
override lazy val resolved = child.resolved
override def toString: String = if (positive) {
s"InSubQuery($child, ${subquery.asCode})"
} else {
s"NotInSubQuery($child, ${subquery.asCode})"
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy