All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.sql.hive.HiveQl.scala Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.sql.hive

import java.sql.Date
import java.util.Locale

import scala.collection.JavaConverters._
import scala.collection.mutable.ArrayBuffer

import org.apache.hadoop.hive.conf.HiveConf
import org.apache.hadoop.hive.conf.HiveConf.ConfVars
import org.apache.hadoop.hive.ql.exec.{FunctionInfo, FunctionRegistry}
import org.apache.hadoop.hive.ql.lib.Node
import org.apache.hadoop.hive.ql.parse._
import org.apache.hadoop.hive.ql.plan.PlanUtils
import org.apache.hadoop.hive.ql.session.SessionState
import org.apache.hadoop.hive.ql.{Context, ErrorMsg}
import org.apache.hadoop.hive.serde.serdeConstants
import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe

import org.apache.spark.Logging
import org.apache.spark.sql.{AnalysisException, catalyst}
import org.apache.spark.sql.catalyst.analysis._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate._
import org.apache.spark.sql.catalyst.plans.{logical, _}
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.trees.CurrentOrigin
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.execution.ExplainCommand
import org.apache.spark.sql.execution.datasources.DescribeCommand
import org.apache.spark.sql.hive.HiveShim._
import org.apache.spark.sql.hive.client._
import org.apache.spark.sql.hive.execution.{AnalyzeTable, DropTable, HiveNativeCommand, HiveScriptIOSchema}
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval
import org.apache.spark.util.random.RandomSampler

/**
 * Used when we need to start parsing the AST before deciding that we are going to pass the command
 * back for Hive to execute natively.  Will be replaced with a native command that contains the
 * cmd string.
 */
private[hive] case object NativePlaceholder extends LogicalPlan {
  override def children: Seq[LogicalPlan] = Seq.empty
  override def output: Seq[Attribute] = Seq.empty
}

private[hive] case class CreateTableAsSelect(
    tableDesc: HiveTable,
    child: LogicalPlan,
    allowExisting: Boolean) extends UnaryNode with Command {

  override def output: Seq[Attribute] = Seq.empty[Attribute]
  override lazy val resolved: Boolean =
    tableDesc.specifiedDatabase.isDefined &&
    tableDesc.schema.size > 0 &&
    tableDesc.serde.isDefined &&
    tableDesc.inputFormat.isDefined &&
    tableDesc.outputFormat.isDefined &&
    childrenResolved
}

private[hive] case class CreateViewAsSelect(
    tableDesc: HiveTable,
    child: LogicalPlan,
    allowExisting: Boolean,
    replace: Boolean,
    sql: String) extends UnaryNode with Command {
  override def output: Seq[Attribute] = Seq.empty[Attribute]
  override lazy val resolved: Boolean = false
}

/** Provides a mapping from HiveQL statements to catalyst logical plans and expression trees. */
private[hive] object HiveQl extends Logging {
  protected val nativeCommands = Seq(
    "TOK_ALTERDATABASE_OWNER",
    "TOK_ALTERDATABASE_PROPERTIES",
    "TOK_ALTERINDEX_PROPERTIES",
    "TOK_ALTERINDEX_REBUILD",
    "TOK_ALTERTABLE",
    "TOK_ALTERTABLE_ADDCOLS",
    "TOK_ALTERTABLE_ADDPARTS",
    "TOK_ALTERTABLE_ALTERPARTS",
    "TOK_ALTERTABLE_ARCHIVE",
    "TOK_ALTERTABLE_CLUSTER_SORT",
    "TOK_ALTERTABLE_DROPPARTS",
    "TOK_ALTERTABLE_PARTITION",
    "TOK_ALTERTABLE_PROPERTIES",
    "TOK_ALTERTABLE_RENAME",
    "TOK_ALTERTABLE_RENAMECOL",
    "TOK_ALTERTABLE_REPLACECOLS",
    "TOK_ALTERTABLE_SKEWED",
    "TOK_ALTERTABLE_TOUCH",
    "TOK_ALTERTABLE_UNARCHIVE",
    "TOK_ALTERVIEW_ADDPARTS",
    "TOK_ALTERVIEW_AS",
    "TOK_ALTERVIEW_DROPPARTS",
    "TOK_ALTERVIEW_PROPERTIES",
    "TOK_ALTERVIEW_RENAME",

    "TOK_CREATEDATABASE",
    "TOK_CREATEFUNCTION",
    "TOK_CREATEINDEX",
    "TOK_CREATEMACRO",
    "TOK_CREATEROLE",

    "TOK_DESCDATABASE",
    "TOK_DESCFUNCTION",

    "TOK_DROPDATABASE",
    "TOK_DROPFUNCTION",
    "TOK_DROPINDEX",
    "TOK_DROPMACRO",
    "TOK_DROPROLE",
    "TOK_DROPTABLE_PROPERTIES",
    "TOK_DROPVIEW",
    "TOK_DROPVIEW_PROPERTIES",

    "TOK_EXPORT",

    "TOK_GRANT",
    "TOK_GRANT_ROLE",

    "TOK_IMPORT",

    "TOK_LOAD",

    "TOK_LOCKTABLE",

    "TOK_MSCK",

    "TOK_REVOKE",

    "TOK_SHOW_COMPACTIONS",
    "TOK_SHOW_CREATETABLE",
    "TOK_SHOW_GRANT",
    "TOK_SHOW_ROLE_GRANT",
    "TOK_SHOW_ROLE_PRINCIPALS",
    "TOK_SHOW_ROLES",
    "TOK_SHOW_SET_ROLE",
    "TOK_SHOW_TABLESTATUS",
    "TOK_SHOW_TBLPROPERTIES",
    "TOK_SHOW_TRANSACTIONS",
    "TOK_SHOWCOLUMNS",
    "TOK_SHOWDATABASES",
    "TOK_SHOWFUNCTIONS",
    "TOK_SHOWINDEXES",
    "TOK_SHOWLOCKS",
    "TOK_SHOWPARTITIONS",

    "TOK_SWITCHDATABASE",

    "TOK_UNLOCKTABLE"
  )

  // Commands that we do not need to explain.
  protected val noExplainCommands = Seq(
    "TOK_DESCTABLE",
    "TOK_SHOWTABLES",
    "TOK_TRUNCATETABLE"     // truncate table" is a NativeCommand, does not need to explain.
  ) ++ nativeCommands

  protected val hqlParser = new ExtendedHiveQlParser

  /**
   * A set of implicit transformations that allow Hive ASTNodes to be rewritten by transformations
   * similar to [[catalyst.trees.TreeNode]].
   *
   * Note that this should be considered very experimental and is not indented as a replacement
   * for TreeNode.  Primarily it should be noted ASTNodes are not immutable and do not appear to
   * have clean copy semantics.  Therefore, users of this class should take care when
   * copying/modifying trees that might be used elsewhere.
   */
  implicit class TransformableNode(n: ASTNode) {
    /**
     * Returns a copy of this node where `rule` has been recursively applied to it and all of its
     * children.  When `rule` does not apply to a given node it is left unchanged.
     * @param rule the function use to transform this nodes children
     */
    def transform(rule: PartialFunction[ASTNode, ASTNode]): ASTNode = {
      try {
        val afterRule = rule.applyOrElse(n, identity[ASTNode])
        afterRule.withChildren(
          nilIfEmpty(afterRule.getChildren)
            .asInstanceOf[Seq[ASTNode]]
            .map(ast => Option(ast).map(_.transform(rule)).orNull))
      } catch {
        case e: Exception =>
          logError(dumpTree(n).toString)
          throw e
      }
    }

    /**
     * Returns a scala.Seq equivalent to [s] or Nil if [s] is null.
     */
    private def nilIfEmpty[A](s: java.util.List[A]): Seq[A] =
      Option(s).map(_.asScala).getOrElse(Nil)

    /**
     * Returns this ASTNode with the text changed to `newText`.
     */
    def withText(newText: String): ASTNode = {
      n.token.asInstanceOf[org.antlr.runtime.CommonToken].setText(newText)
      n
    }

    /**
     * Returns this ASTNode with the children changed to `newChildren`.
     */
    def withChildren(newChildren: Seq[ASTNode]): ASTNode = {
      (1 to n.getChildCount).foreach(_ => n.deleteChild(0))
      n.addChildren(newChildren.asJava)
      n
    }

    /**
     * Throws an error if this is not equal to other.
     *
     * Right now this function only checks the name, type, text and children of the node
     * for equality.
     */
    def checkEquals(other: ASTNode): Unit = {
      def check(field: String, f: ASTNode => Any): Unit = if (f(n) != f(other)) {
        sys.error(s"$field does not match for trees. " +
          s"'${f(n)}' != '${f(other)}' left: ${dumpTree(n)}, right: ${dumpTree(other)}")
      }
      check("name", _.getName)
      check("type", _.getType)
      check("text", _.getText)
      check("numChildren", n => nilIfEmpty(n.getChildren).size)

      val leftChildren = nilIfEmpty(n.getChildren).asInstanceOf[Seq[ASTNode]]
      val rightChildren = nilIfEmpty(other.getChildren).asInstanceOf[Seq[ASTNode]]
      leftChildren zip rightChildren foreach {
        case (l, r) => l checkEquals r
      }
    }
  }

  /**
   * Returns the AST for the given SQL string.
   */
  def getAst(sql: String): ASTNode = {
    /*
     * Context has to be passed in hive0.13.1.
     * Otherwise, there will be Null pointer exception,
     * when retrieving properties form HiveConf.
     */
    val hContext = createContext()
    val node = getAst(sql, hContext)
    hContext.clear()
    node
  }

  private def createContext(): Context = new Context(hiveConf)

  private def getAst(sql: String, context: Context) =
    ParseUtils.findRootNonNullToken((new ParseDriver).parse(sql, context))

  /**
   * Returns the HiveConf
   */
  private[this] def hiveConf: HiveConf = {
    var ss = SessionState.get()
    // SessionState is lazy initialization, it can be null here
    if (ss == null) {
      val original = Thread.currentThread().getContextClassLoader
      val conf = new HiveConf(classOf[SessionState])
      conf.setClassLoader(original)
      ss = new SessionState(conf)
      SessionState.start(ss)
    }
    ss.getConf
  }

  /** Returns a LogicalPlan for a given HiveQL string. */
  def parseSql(sql: String): LogicalPlan = hqlParser.parse(sql)

  val errorRegEx = "line (\\d+):(\\d+) (.*)".r

  /** Creates LogicalPlan for a given HiveQL string. */
  def createPlan(sql: String): LogicalPlan = {
    try {
      val context = createContext()
      val tree = getAst(sql, context)
      val plan = if (nativeCommands contains tree.getText) {
        HiveNativeCommand(sql)
      } else {
        nodeToPlan(tree, context) match {
          case NativePlaceholder => HiveNativeCommand(sql)
          case other => other
        }
      }
      context.clear()
      plan
    } catch {
      case pe: org.apache.hadoop.hive.ql.parse.ParseException =>
        pe.getMessage match {
          case errorRegEx(line, start, message) =>
            throw new AnalysisException(message, Some(line.toInt), Some(start.toInt))
          case otherMessage =>
            throw new AnalysisException(otherMessage)
        }
      case e: MatchError => throw e
      case e: Exception =>
        throw new AnalysisException(e.getMessage)
      case e: NotImplementedError =>
        throw new AnalysisException(
          s"""
            |Unsupported language features in query: $sql
            |${dumpTree(getAst(sql))}
            |$e
            |${e.getStackTrace.head}
          """.stripMargin)
    }
  }

  def parseDdl(ddl: String): Seq[Attribute] = {
    val tree =
      try {
        ParseUtils.findRootNonNullToken(
          (new ParseDriver).parse(ddl, null /* no context required for parsing alone */))
      } catch {
        case pe: org.apache.hadoop.hive.ql.parse.ParseException =>
          throw new RuntimeException(s"Failed to parse ddl: '$ddl'", pe)
      }
    assert(tree.asInstanceOf[ASTNode].getText == "TOK_CREATETABLE", "Only CREATE TABLE supported.")
    val tableOps = tree.getChildren
    val colList =
      tableOps.asScala
        .find(_.asInstanceOf[ASTNode].getText == "TOK_TABCOLLIST")
        .getOrElse(sys.error("No columnList!")).getChildren

    colList.asScala.map(nodeToAttribute)
  }

  /** Extractor for matching Hive's AST Tokens. */
  object Token {
    /** @return matches of the form (tokenName, children). */
    def unapply(t: Any): Option[(String, Seq[ASTNode])] = t match {
      case t: ASTNode =>
        CurrentOrigin.setPosition(t.getLine, t.getCharPositionInLine)
        Some((t.getText,
          Option(t.getChildren).map(_.asScala.toList).getOrElse(Nil).asInstanceOf[Seq[ASTNode]]))
      case _ => None
    }
  }

  protected def getClauses(
      clauseNames: Seq[String],
      nodeList: Seq[ASTNode]): Seq[Option[ASTNode]] = {
    var remainingNodes = nodeList
    val clauses = clauseNames.map { clauseName =>
      val (matches, nonMatches) = remainingNodes.partition(_.getText.toUpperCase == clauseName)
      remainingNodes = nonMatches ++ (if (matches.nonEmpty) matches.tail else Nil)
      matches.headOption
    }

    if (remainingNodes.nonEmpty) {
      sys.error(
        s"""Unhandled clauses: ${remainingNodes.map(dumpTree(_)).mkString("\n")}.
           |You are likely trying to use an unsupported Hive feature."""".stripMargin)
    }
    clauses
  }

  def getClause(clauseName: String, nodeList: Seq[Node]): Node =
    getClauseOption(clauseName, nodeList).getOrElse(sys.error(
      s"Expected clause $clauseName missing from ${nodeList.map(dumpTree(_)).mkString("\n")}"))

  def getClauseOption(clauseName: String, nodeList: Seq[Node]): Option[Node] = {
    nodeList.filter { case ast: ASTNode => ast.getText == clauseName } match {
      case Seq(oneMatch) => Some(oneMatch)
      case Seq() => None
      case _ => sys.error(s"Found multiple instances of clause $clauseName")
    }
  }

  protected def nodeToAttribute(node: Node): Attribute = node match {
    case Token("TOK_TABCOL", Token(colName, Nil) :: dataType :: Nil) =>
      AttributeReference(colName, nodeToDataType(dataType), true)()

    case a: ASTNode =>
      throw new NotImplementedError(s"No parse rules for:\n ${dumpTree(a).toString} ")
  }

  protected def nodeToDataType(node: Node): DataType = node match {
    case Token("TOK_DECIMAL", precision :: scale :: Nil) =>
      DecimalType(precision.getText.toInt, scale.getText.toInt)
    case Token("TOK_DECIMAL", precision :: Nil) =>
      DecimalType(precision.getText.toInt, 0)
    case Token("TOK_DECIMAL", Nil) => DecimalType.USER_DEFAULT
    case Token("TOK_BIGINT", Nil) => LongType
    case Token("TOK_INT", Nil) => IntegerType
    case Token("TOK_TINYINT", Nil) => ByteType
    case Token("TOK_SMALLINT", Nil) => ShortType
    case Token("TOK_BOOLEAN", Nil) => BooleanType
    case Token("TOK_STRING", Nil) => StringType
    case Token("TOK_VARCHAR", Token(_, Nil) :: Nil) => StringType
    case Token("TOK_FLOAT", Nil) => FloatType
    case Token("TOK_DOUBLE", Nil) => DoubleType
    case Token("TOK_DATE", Nil) => DateType
    case Token("TOK_TIMESTAMP", Nil) => TimestampType
    case Token("TOK_BINARY", Nil) => BinaryType
    case Token("TOK_LIST", elementType :: Nil) => ArrayType(nodeToDataType(elementType))
    case Token("TOK_STRUCT",
           Token("TOK_TABCOLLIST", fields) :: Nil) =>
      StructType(fields.map(nodeToStructField))
    case Token("TOK_MAP",
           keyType ::
           valueType :: Nil) =>
      MapType(nodeToDataType(keyType), nodeToDataType(valueType))
    case a: ASTNode =>
      throw new NotImplementedError(s"No parse rules for DataType:\n ${dumpTree(a).toString} ")
  }

  protected def nodeToStructField(node: Node): StructField = node match {
    case Token("TOK_TABCOL",
           Token(fieldName, Nil) ::
           dataType :: Nil) =>
      StructField(fieldName, nodeToDataType(dataType), nullable = true)
    case Token("TOK_TABCOL",
           Token(fieldName, Nil) ::
             dataType ::
             _ /* comment */:: Nil) =>
      StructField(fieldName, nodeToDataType(dataType), nullable = true)
    case a: ASTNode =>
      throw new NotImplementedError(s"No parse rules for StructField:\n ${dumpTree(a).toString} ")
  }

  protected def extractTableIdent(tableNameParts: Node): TableIdentifier = {
    tableNameParts.getChildren.asScala.map {
      case Token(part, Nil) => cleanIdentifier(part)
    } match {
      case Seq(tableOnly) => TableIdentifier(tableOnly)
      case Seq(databaseName, table) => TableIdentifier(table, Some(databaseName))
      case other => sys.error("Hive only supports tables names like 'tableName' " +
        s"or 'databaseName.tableName', found '$other'")
    }
  }

  /**
   * SELECT MAX(value) FROM src GROUP BY k1, k2, k3 GROUPING SETS((k1, k2), (k2))
   * is equivalent to
   * SELECT MAX(value) FROM src GROUP BY k1, k2 UNION SELECT MAX(value) FROM src GROUP BY k2
   * Check the following link for details.
   *
https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C+Grouping+and+Rollup
   *
   * The bitmask denotes the grouping expressions validity for a grouping set,
   * the bitmask also be called as grouping id (`GROUPING__ID`, the virtual column in Hive)
   * e.g. In superset (k1, k2, k3), (bit 0: k1, bit 1: k2, and bit 2: k3), the grouping id of
   * GROUPING SETS (k1, k2) and (k2) should be 3 and 2 respectively.
   */
  protected def extractGroupingSet(children: Seq[ASTNode]): (Seq[Expression], Seq[Int]) = {
    val (keyASTs, setASTs) = children.partition( n => n match {
        case Token("TOK_GROUPING_SETS_EXPRESSION", children) => false // grouping sets
        case _ => true // grouping keys
      })

    val keys = keyASTs.map(nodeToExpr).toSeq
    val keyMap = keyASTs.map(_.toStringTree).zipWithIndex.toMap

    val bitmasks: Seq[Int] = setASTs.map(set => set match {
      case Token("TOK_GROUPING_SETS_EXPRESSION", null) => 0
      case Token("TOK_GROUPING_SETS_EXPRESSION", children) =>
        children.foldLeft(0)((bitmap, col) => {
          val colString = col.asInstanceOf[ASTNode].toStringTree()
          require(keyMap.contains(colString), s"$colString doens't show up in the GROUP BY list")
          bitmap | 1 << keyMap(colString)
        })
      case _ => sys.error("Expect GROUPING SETS clause")
    })

    (keys, bitmasks)
  }

  protected def getProperties(node: Node): Seq[(String, String)] = node match {
    case Token("TOK_TABLEPROPLIST", list) =>
      list.map {
        case Token("TOK_TABLEPROPERTY", Token(key, Nil) :: Token(value, Nil) :: Nil) =>
          (unquoteString(key) -> unquoteString(value))
      }
  }

  private def createView(
      view: ASTNode,
      context: Context,
      viewNameParts: ASTNode,
      query: ASTNode,
      schema: Seq[HiveColumn],
      properties: Map[String, String],
      allowExist: Boolean,
      replace: Boolean): CreateViewAsSelect = {
    val TableIdentifier(viewName, dbName) = extractTableIdent(viewNameParts)

    val originalText = context.getTokenRewriteStream
      .toString(query.getTokenStartIndex, query.getTokenStopIndex)

    val tableDesc = HiveTable(
      specifiedDatabase = dbName,
      name = viewName,
      schema = schema,
      partitionColumns = Seq.empty[HiveColumn],
      properties = properties,
      serdeProperties = Map[String, String](),
      tableType = VirtualView,
      location = None,
      inputFormat = None,
      outputFormat = None,
      serde = None,
      viewText = Some(originalText))

    // We need to keep the original SQL string so that if `spark.sql.nativeView` is
    // false, we can fall back to use hive native command later.
    // We can remove this when parser is configurable(can access SQLConf) in the future.
    val sql = context.getTokenRewriteStream
      .toString(view.getTokenStartIndex, view.getTokenStopIndex)
    CreateViewAsSelect(tableDesc, nodeToPlan(query, context), allowExist, replace, sql)
  }

  protected def nodeToPlan(node: ASTNode, context: Context): LogicalPlan = node match {
    // Special drop table that also uncaches.
    case Token("TOK_DROPTABLE",
           Token("TOK_TABNAME", tableNameParts) ::
           ifExists) =>
      val tableName = tableNameParts.map { case Token(p, Nil) => p }.mkString(".")
      DropTable(tableName, ifExists.nonEmpty)
    // Support "ANALYZE TABLE tableNmae COMPUTE STATISTICS noscan"
    case Token("TOK_ANALYZE",
           Token("TOK_TAB", Token("TOK_TABNAME", tableNameParts) :: partitionSpec) ::
           isNoscan) =>
      // Reference:
      // https://cwiki.apache.org/confluence/display/Hive/StatsDev#StatsDev-ExistingTables
      if (partitionSpec.nonEmpty) {
        // Analyze partitions will be treated as a Hive native command.
        NativePlaceholder
      } else if (isNoscan.isEmpty) {
        // If users do not specify "noscan", it will be treated as a Hive native command.
        NativePlaceholder
      } else {
        val tableName = tableNameParts.map { case Token(p, Nil) => p }.mkString(".")
        AnalyzeTable(tableName)
      }
    // Just fake explain for any of the native commands.
    case Token("TOK_EXPLAIN", explainArgs)
      if noExplainCommands.contains(explainArgs.head.getText) =>
      ExplainCommand(OneRowRelation)
    case Token("TOK_EXPLAIN", explainArgs)
      if "TOK_CREATETABLE" == explainArgs.head.getText =>
      val Some(crtTbl) :: _ :: extended :: Nil =
        getClauses(Seq("TOK_CREATETABLE", "FORMATTED", "EXTENDED"), explainArgs)
      ExplainCommand(
        nodeToPlan(crtTbl, context),
        extended = extended.isDefined)
    case Token("TOK_EXPLAIN", explainArgs) =>
      // Ignore FORMATTED if present.
      val Some(query) :: _ :: extended :: Nil =
        getClauses(Seq("TOK_QUERY", "FORMATTED", "EXTENDED"), explainArgs)
      ExplainCommand(
        nodeToPlan(query, context),
        extended = extended.isDefined)

    case Token("TOK_DESCTABLE", describeArgs) =>
      // Reference: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL
      val Some(tableType) :: formatted :: extended :: pretty :: Nil =
        getClauses(Seq("TOK_TABTYPE", "FORMATTED", "EXTENDED", "PRETTY"), describeArgs)
      if (formatted.isDefined || pretty.isDefined) {
        // FORMATTED and PRETTY are not supported and this statement will be treated as
        // a Hive native command.
        NativePlaceholder
      } else {
        tableType match {
          case Token("TOK_TABTYPE", nameParts) if nameParts.size == 1 => {
            nameParts.head match {
              case Token(".", dbName :: tableName :: Nil) =>
                // It is describing a table with the format like "describe db.table".
                // TODO: Actually, a user may mean tableName.columnName. Need to resolve this issue.
                val tableIdent = extractTableIdent(nameParts.head)
                DescribeCommand(
                  UnresolvedRelation(tableIdent, None), isExtended = extended.isDefined)
              case Token(".", dbName :: tableName :: colName :: Nil) =>
                // It is describing a column with the format like "describe db.table column".
                NativePlaceholder
              case tableName =>
                // It is describing a table with the format like "describe table".
                DescribeCommand(
                  UnresolvedRelation(TableIdentifier(tableName.getText), None),
                  isExtended = extended.isDefined)
            }
          }
          // All other cases.
          case _ => NativePlaceholder
        }
      }

    case view @ Token("TOK_ALTERVIEW", children) =>
      val Some(viewNameParts) :: maybeQuery :: ignores =
        getClauses(Seq(
          "TOK_TABNAME",
          "TOK_QUERY",
          "TOK_ALTERVIEW_ADDPARTS",
          "TOK_ALTERVIEW_DROPPARTS",
          "TOK_ALTERVIEW_PROPERTIES",
          "TOK_ALTERVIEW_RENAME"), children)

      // if ALTER VIEW doesn't have query part, let hive to handle it.
      maybeQuery.map { query =>
        createView(view, context, viewNameParts, query, Nil, Map(), false, true)
      }.getOrElse(NativePlaceholder)

    case view @ Token("TOK_CREATEVIEW", children)
        if children.collect { case t @ Token("TOK_QUERY", _) => t }.nonEmpty =>
      val Seq(
        Some(viewNameParts),
        Some(query),
        maybeComment,
        replace,
        allowExisting,
        maybeProperties,
        maybeColumns,
        maybePartCols
      ) = getClauses(Seq(
        "TOK_TABNAME",
        "TOK_QUERY",
        "TOK_TABLECOMMENT",
        "TOK_ORREPLACE",
        "TOK_IFNOTEXISTS",
        "TOK_TABLEPROPERTIES",
        "TOK_TABCOLNAME",
        "TOK_VIEWPARTCOLS"), children)

      // If the view is partitioned, we let hive handle it.
      if (maybePartCols.isDefined) {
        NativePlaceholder
      } else {
        val schema = maybeColumns.map { cols =>
          BaseSemanticAnalyzer.getColumns(cols, true).asScala.map { field =>
            // We can't specify column types when create view, so fill it with null first, and
            // update it after the schema has been resolved later.
            HiveColumn(field.getName, null, field.getComment)
          }
        }.getOrElse(Seq.empty[HiveColumn])

        val properties = scala.collection.mutable.Map.empty[String, String]

        maybeProperties.foreach {
          case Token("TOK_TABLEPROPERTIES", list :: Nil) =>
            properties ++= getProperties(list)
        }

        maybeComment.foreach {
          case Token("TOK_TABLECOMMENT", child :: Nil) =>
            val comment = BaseSemanticAnalyzer.unescapeSQLString(child.getText)
            if (comment ne null) {
              properties += ("comment" -> comment)
            }
        }

        createView(view, context, viewNameParts, query, schema, properties.toMap,
          allowExisting.isDefined, replace.isDefined)
      }

    case Token("TOK_CREATETABLE", children)
        if children.collect { case t @ Token("TOK_QUERY", _) => t }.nonEmpty =>
      // Reference: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL
      val (
          Some(tableNameParts) ::
          _ /* likeTable */ ::
          externalTable ::
          Some(query) ::
          allowExisting +:
          ignores) =
        getClauses(
          Seq(
            "TOK_TABNAME",
            "TOK_LIKETABLE",
            "EXTERNAL",
            "TOK_QUERY",
            "TOK_IFNOTEXISTS",
            "TOK_TABLECOMMENT",
            "TOK_TABCOLLIST",
            "TOK_TABLEPARTCOLS", // Partitioned by
            "TOK_TABLEBUCKETS", // Clustered by
            "TOK_TABLESKEWED", // Skewed by
            "TOK_TABLEROWFORMAT",
            "TOK_TABLESERIALIZER",
            "TOK_FILEFORMAT_GENERIC",
            "TOK_TABLEFILEFORMAT", // User-provided InputFormat and OutputFormat
            "TOK_STORAGEHANDLER", // Storage handler
            "TOK_TABLELOCATION",
            "TOK_TABLEPROPERTIES"),
          children)
      val TableIdentifier(tblName, dbName) = extractTableIdent(tableNameParts)

      // TODO add bucket support
      var tableDesc: HiveTable = HiveTable(
        specifiedDatabase = dbName,
        name = tblName,
        schema = Seq.empty[HiveColumn],
        partitionColumns = Seq.empty[HiveColumn],
        properties = Map[String, String](),
        serdeProperties = Map[String, String](),
        tableType = if (externalTable.isDefined) ExternalTable else ManagedTable,
        location = None,
        inputFormat = None,
        outputFormat = None,
        serde = None,
        viewText = None)

      // default storage type abbreviation (e.g. RCFile, ORC, PARQUET etc.)
      val defaultStorageType = hiveConf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT)
      // handle the default format for the storage type abbreviation
      val hiveSerDe = HiveSerDe.sourceToSerDe(defaultStorageType, hiveConf).getOrElse {
        HiveSerDe(
          inputFormat = Option("org.apache.hadoop.mapred.TextInputFormat"),
          outputFormat = Option("org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat"))
      }

      hiveSerDe.inputFormat.foreach(f => tableDesc = tableDesc.copy(inputFormat = Some(f)))
      hiveSerDe.outputFormat.foreach(f => tableDesc = tableDesc.copy(outputFormat = Some(f)))
      hiveSerDe.serde.foreach(f => tableDesc = tableDesc.copy(serde = Some(f)))

      children.collect {
        case list @ Token("TOK_TABCOLLIST", _) =>
          val cols = BaseSemanticAnalyzer.getColumns(list, true)
          if (cols != null) {
            tableDesc = tableDesc.copy(
              schema = cols.asScala.map { field =>
                HiveColumn(field.getName, field.getType, field.getComment)
              })
          }
        case Token("TOK_TABLECOMMENT", child :: Nil) =>
          val comment = BaseSemanticAnalyzer.unescapeSQLString(child.getText)
          // TODO support the sql text
          tableDesc = tableDesc.copy(viewText = Option(comment))
        case Token("TOK_TABLEPARTCOLS", list @ Token("TOK_TABCOLLIST", _) :: Nil) =>
          val cols = BaseSemanticAnalyzer.getColumns(list(0), false)
          if (cols != null) {
            tableDesc = tableDesc.copy(
              partitionColumns = cols.asScala.map { field =>
                HiveColumn(field.getName, field.getType, field.getComment)
              })
          }
        case Token("TOK_TABLEROWFORMAT", Token("TOK_SERDEPROPS", child :: Nil) :: Nil) =>
          val serdeParams = new java.util.HashMap[String, String]()
          child match {
            case Token("TOK_TABLEROWFORMATFIELD", rowChild1 :: rowChild2) =>
              val fieldDelim = BaseSemanticAnalyzer.unescapeSQLString (rowChild1.getText())
              serdeParams.put(serdeConstants.FIELD_DELIM, fieldDelim)
              serdeParams.put(serdeConstants.SERIALIZATION_FORMAT, fieldDelim)
              if (rowChild2.length > 1) {
                val fieldEscape = BaseSemanticAnalyzer.unescapeSQLString (rowChild2(0).getText)
                serdeParams.put(serdeConstants.ESCAPE_CHAR, fieldEscape)
              }
            case Token("TOK_TABLEROWFORMATCOLLITEMS", rowChild :: Nil) =>
              val collItemDelim = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText)
              serdeParams.put(serdeConstants.COLLECTION_DELIM, collItemDelim)
            case Token("TOK_TABLEROWFORMATMAPKEYS", rowChild :: Nil) =>
              val mapKeyDelim = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText)
              serdeParams.put(serdeConstants.MAPKEY_DELIM, mapKeyDelim)
            case Token("TOK_TABLEROWFORMATLINES", rowChild :: Nil) =>
              val lineDelim = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText)
              if (!(lineDelim == "\n") && !(lineDelim == "10")) {
                throw new AnalysisException(
                  SemanticAnalyzer.generateErrorMessage(
                    rowChild,
                    ErrorMsg.LINES_TERMINATED_BY_NON_NEWLINE.getMsg))
              }
              serdeParams.put(serdeConstants.LINE_DELIM, lineDelim)
            case Token("TOK_TABLEROWFORMATNULL", rowChild :: Nil) =>
              val nullFormat = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText)
              // TODO support the nullFormat
            case _ => assert(false)
          }
          tableDesc = tableDesc.copy(
            serdeProperties = tableDesc.serdeProperties ++ serdeParams.asScala)
        case Token("TOK_TABLELOCATION", child :: Nil) =>
          var location = BaseSemanticAnalyzer.unescapeSQLString(child.getText)
          location = EximUtil.relativeToAbsolutePath(hiveConf, location)
          tableDesc = tableDesc.copy(location = Option(location))
        case Token("TOK_TABLESERIALIZER", child :: Nil) =>
          tableDesc = tableDesc.copy(
            serde = Option(BaseSemanticAnalyzer.unescapeSQLString(child.getChild(0).getText)))
          if (child.getChildCount == 2) {
            val serdeParams = new java.util.HashMap[String, String]()
            BaseSemanticAnalyzer.readProps(
              (child.getChild(1).getChild(0)).asInstanceOf[ASTNode], serdeParams)
            tableDesc = tableDesc.copy(
              serdeProperties = tableDesc.serdeProperties ++ serdeParams.asScala)
          }
        case Token("TOK_FILEFORMAT_GENERIC", child :: Nil) =>
          child.getText().toLowerCase(Locale.ENGLISH) match {
            case "orc" =>
              tableDesc = tableDesc.copy(
                inputFormat = Option("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"),
                outputFormat = Option("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"))
              if (tableDesc.serde.isEmpty) {
                tableDesc = tableDesc.copy(
                  serde = Option("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
              }

            case "parquet" =>
              tableDesc = tableDesc.copy(
                inputFormat =
                  Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"),
                outputFormat =
                  Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"))
              if (tableDesc.serde.isEmpty) {
                tableDesc = tableDesc.copy(
                  serde = Option("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"))
              }

            case "rcfile" =>
              tableDesc = tableDesc.copy(
                inputFormat = Option("org.apache.hadoop.hive.ql.io.RCFileInputFormat"),
                outputFormat = Option("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
              if (tableDesc.serde.isEmpty) {
                tableDesc = tableDesc.copy(
                  serde = Option("org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe"))
              }

            case "textfile" =>
              tableDesc = tableDesc.copy(
                inputFormat =
                  Option("org.apache.hadoop.mapred.TextInputFormat"),
                outputFormat =
                  Option("org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat"))

            case "sequencefile" =>
              tableDesc = tableDesc.copy(
                inputFormat = Option("org.apache.hadoop.mapred.SequenceFileInputFormat"),
                outputFormat = Option("org.apache.hadoop.mapred.SequenceFileOutputFormat"))

            case "avro" =>
              tableDesc = tableDesc.copy(
                inputFormat =
                  Option("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat"),
                outputFormat =
                  Option("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat"))
              if (tableDesc.serde.isEmpty) {
                tableDesc = tableDesc.copy(
                  serde = Option("org.apache.hadoop.hive.serde2.avro.AvroSerDe"))
              }

            case _ =>
              throw new SemanticException(
                s"Unrecognized file format in STORED AS clause: ${child.getText}")
          }

        case Token("TOK_TABLESERIALIZER",
               Token("TOK_SERDENAME", Token(serdeName, Nil) :: otherProps) :: Nil) =>
          tableDesc = tableDesc.copy(serde = Option(unquoteString(serdeName)))

          otherProps match {
            case Token("TOK_TABLEPROPERTIES", list :: Nil) :: Nil =>
              tableDesc = tableDesc.copy(
                serdeProperties = tableDesc.serdeProperties ++ getProperties(list))
            case Nil =>
          }

        case Token("TOK_TABLEPROPERTIES", list :: Nil) =>
          tableDesc = tableDesc.copy(properties = tableDesc.properties ++ getProperties(list))
        case list @ Token("TOK_TABLEFILEFORMAT", children) =>
          tableDesc = tableDesc.copy(
            inputFormat =
              Option(BaseSemanticAnalyzer.unescapeSQLString(list.getChild(0).getText)),
            outputFormat =
              Option(BaseSemanticAnalyzer.unescapeSQLString(list.getChild(1).getText)))
        case Token("TOK_STORAGEHANDLER", _) =>
          throw new AnalysisException(ErrorMsg.CREATE_NON_NATIVE_AS.getMsg())
        case _ => // Unsupport features
      }

      CreateTableAsSelect(tableDesc, nodeToPlan(query, context), allowExisting != None)

    // If its not a "CTAS" like above then take it as a native command
    case Token("TOK_CREATETABLE", _) => NativePlaceholder

    // Support "TRUNCATE TABLE table_name [PARTITION partition_spec]"
    case Token("TOK_TRUNCATETABLE",
          Token("TOK_TABLE_PARTITION", table) :: Nil) => NativePlaceholder

    case Token("TOK_QUERY", queryArgs)
        if Seq("TOK_FROM", "TOK_INSERT").contains(queryArgs.head.getText) =>

      val (fromClause: Option[ASTNode], insertClauses, cteRelations) =
        queryArgs match {
          case Token("TOK_FROM", args: Seq[ASTNode]) :: insertClauses =>
            // check if has CTE
            insertClauses.last match {
              case Token("TOK_CTE", cteClauses) =>
                val cteRelations = cteClauses.map(node => {
                  val relation = nodeToRelation(node, context).asInstanceOf[Subquery]
                  (relation.alias, relation)
                }).toMap
                (Some(args.head), insertClauses.init, Some(cteRelations))

              case _ => (Some(args.head), insertClauses, None)
            }

          case Token("TOK_INSERT", _) :: Nil => (None, queryArgs, None)
        }

      // Return one query for each insert clause.
      val queries = insertClauses.map { case Token("TOK_INSERT", singleInsert) =>
        val (
            intoClause ::
            destClause ::
            selectClause ::
            selectDistinctClause ::
            whereClause ::
            groupByClause ::
            rollupGroupByClause ::
            cubeGroupByClause ::
            groupingSetsClause ::
            orderByClause ::
            havingClause ::
            sortByClause ::
            clusterByClause ::
            distributeByClause ::
            limitClause ::
            lateralViewClause ::
            windowClause :: Nil) = {
          getClauses(
            Seq(
              "TOK_INSERT_INTO",
              "TOK_DESTINATION",
              "TOK_SELECT",
              "TOK_SELECTDI",
              "TOK_WHERE",
              "TOK_GROUPBY",
              "TOK_ROLLUP_GROUPBY",
              "TOK_CUBE_GROUPBY",
              "TOK_GROUPING_SETS",
              "TOK_ORDERBY",
              "TOK_HAVING",
              "TOK_SORTBY",
              "TOK_CLUSTERBY",
              "TOK_DISTRIBUTEBY",
              "TOK_LIMIT",
              "TOK_LATERAL_VIEW",
              "WINDOW"),
            singleInsert)
        }

        val relations = fromClause match {
          case Some(f) => nodeToRelation(f, context)
          case None => OneRowRelation
        }

        val withWhere = whereClause.map { whereNode =>
          val Seq(whereExpr) = whereNode.getChildren.asScala
          Filter(nodeToExpr(whereExpr), relations)
        }.getOrElse(relations)

        val select =
          (selectClause orElse selectDistinctClause).getOrElse(sys.error("No select clause."))

        // Script transformations are expressed as a select clause with a single expression of type
        // TOK_TRANSFORM
        val transformation = select.getChildren.iterator().next() match {
          case Token("TOK_SELEXPR",
                 Token("TOK_TRANSFORM",
                   Token("TOK_EXPLIST", inputExprs) ::
                   Token("TOK_SERDE", inputSerdeClause) ::
                   Token("TOK_RECORDWRITER", writerClause) ::
                   // TODO: Need to support other types of (in/out)put
                   Token(script, Nil) ::
                   Token("TOK_SERDE", outputSerdeClause) ::
                   Token("TOK_RECORDREADER", readerClause) ::
                   outputClause) :: Nil) =>

            val (output, schemaLess) = outputClause match {
              case Token("TOK_ALIASLIST", aliases) :: Nil =>
                (aliases.map { case Token(name, Nil) => AttributeReference(name, StringType)() },
                  false)
              case Token("TOK_TABCOLLIST", attributes) :: Nil =>
                (attributes.map { case Token("TOK_TABCOL", Token(name, Nil) :: dataType :: Nil) =>
                  AttributeReference(name, nodeToDataType(dataType))() }, false)
              case Nil =>
                (List(AttributeReference("key", StringType)(),
                  AttributeReference("value", StringType)()), true)
            }

            type SerDeInfo = (
              Seq[(String, String)],  // Input row format information
              Option[String],         // Optional input SerDe class
              Seq[(String, String)],  // Input SerDe properties
              Boolean                 // Whether to use default record reader/writer
            )

            def matchSerDe(clause: Seq[ASTNode]): SerDeInfo = clause match {
              case Token("TOK_SERDEPROPS", propsClause) :: Nil =>
                val rowFormat = propsClause.map {
                  case Token(name, Token(value, Nil) :: Nil) => (name, value)
                }
                (rowFormat, None, Nil, false)

              case Token("TOK_SERDENAME", Token(serdeClass, Nil) :: Nil) :: Nil =>
                (Nil, Some(BaseSemanticAnalyzer.unescapeSQLString(serdeClass)), Nil, false)

              case Token("TOK_SERDENAME", Token(serdeClass, Nil) ::
                Token("TOK_TABLEPROPERTIES",
                Token("TOK_TABLEPROPLIST", propsClause) :: Nil) :: Nil) :: Nil =>
                val serdeProps = propsClause.map {
                  case Token("TOK_TABLEPROPERTY", Token(name, Nil) :: Token(value, Nil) :: Nil) =>
                    (BaseSemanticAnalyzer.unescapeSQLString(name),
                      BaseSemanticAnalyzer.unescapeSQLString(value))
                }

                // SPARK-10310: Special cases LazySimpleSerDe
                // TODO Fully supports user-defined record reader/writer classes
                val unescapedSerDeClass = BaseSemanticAnalyzer.unescapeSQLString(serdeClass)
                val useDefaultRecordReaderWriter =
                  unescapedSerDeClass == classOf[LazySimpleSerDe].getCanonicalName
                (Nil, Some(unescapedSerDeClass), serdeProps, useDefaultRecordReaderWriter)

              case Nil =>
                // Uses default TextRecordReader/TextRecordWriter, sets field delimiter here
                val serdeProps = Seq(serdeConstants.FIELD_DELIM -> "\t")
                (Nil, Option(hiveConf.getVar(ConfVars.HIVESCRIPTSERDE)), serdeProps, true)
            }

            val (inRowFormat, inSerdeClass, inSerdeProps, useDefaultRecordReader) =
              matchSerDe(inputSerdeClause)

            val (outRowFormat, outSerdeClass, outSerdeProps, useDefaultRecordWriter) =
              matchSerDe(outputSerdeClause)

            val unescapedScript = BaseSemanticAnalyzer.unescapeSQLString(script)

            // TODO Adds support for user-defined record reader/writer classes
            val recordReaderClass = if (useDefaultRecordReader) {
              Option(hiveConf.getVar(ConfVars.HIVESCRIPTRECORDREADER))
            } else {
              None
            }

            val recordWriterClass = if (useDefaultRecordWriter) {
              Option(hiveConf.getVar(ConfVars.HIVESCRIPTRECORDWRITER))
            } else {
              None
            }

            val schema = HiveScriptIOSchema(
              inRowFormat, outRowFormat,
              inSerdeClass, outSerdeClass,
              inSerdeProps, outSerdeProps,
              recordReaderClass, recordWriterClass,
              schemaLess)

            Some(
              logical.ScriptTransformation(
                inputExprs.map(nodeToExpr),
                unescapedScript,
                output,
                withWhere, schema))
          case _ => None
        }

        val withLateralView = lateralViewClause.map { lv =>
          val Token("TOK_SELECT",
          Token("TOK_SELEXPR", clauses) :: Nil) = lv.getChildren.iterator().next()

          val alias = getClause("TOK_TABALIAS", clauses).getChildren.iterator().next()
            .asInstanceOf[ASTNode].getText

          val (generator, attributes) = nodesToGenerator(clauses)
            Generate(
              generator,
              join = true,
              outer = false,
              Some(alias.toLowerCase),
              attributes.map(UnresolvedAttribute(_)),
              withWhere)
        }.getOrElse(withWhere)

        // The projection of the query can either be a normal projection, an aggregation
        // (if there is a group by) or a script transformation.
        val withProject: LogicalPlan = transformation.getOrElse {
          val selectExpressions =
            select.getChildren.asScala.flatMap(selExprNodeToExpr).map(UnresolvedAlias)
          Seq(
            groupByClause.map(e => e match {
              case Token("TOK_GROUPBY", children) =>
                // Not a transformation so must be either project or aggregation.
                Aggregate(children.map(nodeToExpr), selectExpressions, withLateralView)
              case _ => sys.error("Expect GROUP BY")
            }),
            groupingSetsClause.map(e => e match {
              case Token("TOK_GROUPING_SETS", children) =>
                val(groupByExprs, masks) = extractGroupingSet(children)
                GroupingSets(masks, groupByExprs, withLateralView, selectExpressions)
              case _ => sys.error("Expect GROUPING SETS")
            }),
            rollupGroupByClause.map(e => e match {
              case Token("TOK_ROLLUP_GROUPBY", children) =>
                Rollup(children.map(nodeToExpr), withLateralView, selectExpressions)
              case _ => sys.error("Expect WITH ROLLUP")
            }),
            cubeGroupByClause.map(e => e match {
              case Token("TOK_CUBE_GROUPBY", children) =>
                Cube(children.map(nodeToExpr), withLateralView, selectExpressions)
              case _ => sys.error("Expect WITH CUBE")
            }),
            Some(Project(selectExpressions, withLateralView))).flatten.head
        }

        // Handle HAVING clause.
        val withHaving = havingClause.map { h =>
          val havingExpr = h.getChildren.asScala match { case Seq(hexpr) => nodeToExpr(hexpr) }
          // Note that we added a cast to boolean. If the expression itself is already boolean,
          // the optimizer will get rid of the unnecessary cast.
          Filter(Cast(havingExpr, BooleanType), withProject)
        }.getOrElse(withProject)

        // Handle SELECT DISTINCT
        val withDistinct =
          if (selectDistinctClause.isDefined) Distinct(withHaving) else withHaving

        // Handle ORDER BY, SORT BY, DISTRIBUTE BY, and CLUSTER BY clause.
        val withSort =
          (orderByClause, sortByClause, distributeByClause, clusterByClause) match {
            case (Some(totalOrdering), None, None, None) =>
              Sort(totalOrdering.getChildren.asScala.map(nodeToSortOrder), true, withDistinct)
            case (None, Some(perPartitionOrdering), None, None) =>
              Sort(
                perPartitionOrdering.getChildren.asScala.map(nodeToSortOrder),
                false, withDistinct)
            case (None, None, Some(partitionExprs), None) =>
              RepartitionByExpression(
                partitionExprs.getChildren.asScala.map(nodeToExpr), withDistinct)
            case (None, Some(perPartitionOrdering), Some(partitionExprs), None) =>
              Sort(
                perPartitionOrdering.getChildren.asScala.map(nodeToSortOrder), false,
                RepartitionByExpression(
                  partitionExprs.getChildren.asScala.map(nodeToExpr),
                  withDistinct))
            case (None, None, None, Some(clusterExprs)) =>
              Sort(
                clusterExprs.getChildren.asScala.map(nodeToExpr).map(SortOrder(_, Ascending)),
                false,
                RepartitionByExpression(
                  clusterExprs.getChildren.asScala.map(nodeToExpr),
                  withDistinct))
            case (None, None, None, None) => withDistinct
            case _ => sys.error("Unsupported set of ordering / distribution clauses.")
          }

        val withLimit =
          limitClause.map(l => nodeToExpr(l.getChildren.iterator().next()))
            .map(Limit(_, withSort))
            .getOrElse(withSort)

        // Collect all window specifications defined in the WINDOW clause.
        val windowDefinitions = windowClause.map(_.getChildren.asScala.collect {
          case Token("TOK_WINDOWDEF",
          Token(windowName, Nil) :: Token("TOK_WINDOWSPEC", spec) :: Nil) =>
            windowName -> nodesToWindowSpecification(spec)
        }.toMap)
        // Handle cases like
        // window w1 as (partition by p_mfgr order by p_name
        //               range between 2 preceding and 2 following),
        //        w2 as w1
        val resolvedCrossReference = windowDefinitions.map {
          windowDefMap => windowDefMap.map {
            case (windowName, WindowSpecReference(other)) =>
              (windowName, windowDefMap(other).asInstanceOf[WindowSpecDefinition])
            case o => o.asInstanceOf[(String, WindowSpecDefinition)]
          }
        }

        val withWindowDefinitions =
          resolvedCrossReference.map(WithWindowDefinition(_, withLimit)).getOrElse(withLimit)

        // TOK_INSERT_INTO means to add files to the table.
        // TOK_DESTINATION means to overwrite the table.
        val resultDestination =
          (intoClause orElse destClause).getOrElse(sys.error("No destination found."))
        val overwrite = intoClause.isEmpty
        nodeToDest(
          resultDestination,
          withWindowDefinitions,
          overwrite)
      }

      // If there are multiple INSERTS just UNION them together into on query.
      val query = queries.reduceLeft(Union)

      // return With plan if there is CTE
      cteRelations.map(With(query, _)).getOrElse(query)

    // HIVE-9039 renamed TOK_UNION => TOK_UNIONALL while adding TOK_UNIONDISTINCT
    case Token("TOK_UNIONALL", left :: right :: Nil) =>
      Union(nodeToPlan(left, context), nodeToPlan(right, context))

    case a: ASTNode =>
      throw new NotImplementedError(s"No parse rules for $node:\n ${dumpTree(a).toString} ")
  }

  val allJoinTokens = "(TOK_.*JOIN)".r
  val laterViewToken = "TOK_LATERAL_VIEW(.*)".r
  def nodeToRelation(node: Node, context: Context): LogicalPlan = node match {
    case Token("TOK_SUBQUERY",
           query :: Token(alias, Nil) :: Nil) =>
      Subquery(cleanIdentifier(alias), nodeToPlan(query, context))

    case Token(laterViewToken(isOuter), selectClause :: relationClause :: Nil) =>
      val Token("TOK_SELECT",
            Token("TOK_SELEXPR", clauses) :: Nil) = selectClause

      val alias = getClause("TOK_TABALIAS", clauses).getChildren.iterator().next()
        .asInstanceOf[ASTNode].getText

      val (generator, attributes) = nodesToGenerator(clauses)
        Generate(
          generator,
          join = true,
          outer = isOuter.nonEmpty,
          Some(alias.toLowerCase),
          attributes.map(UnresolvedAttribute(_)),
          nodeToRelation(relationClause, context))

    /* All relations, possibly with aliases or sampling clauses. */
    case Token("TOK_TABREF", clauses) =>
      // If the last clause is not a token then it's the alias of the table.
      val (nonAliasClauses, aliasClause) =
        if (clauses.last.getText.startsWith("TOK")) {
          (clauses, None)
        } else {
          (clauses.dropRight(1), Some(clauses.last))
        }

      val (Some(tableNameParts) ::
          splitSampleClause ::
          bucketSampleClause :: Nil) = {
        getClauses(Seq("TOK_TABNAME", "TOK_TABLESPLITSAMPLE", "TOK_TABLEBUCKETSAMPLE"),
          nonAliasClauses)
      }

      val tableIdent = extractTableIdent(tableNameParts)
      val alias = aliasClause.map { case Token(a, Nil) => cleanIdentifier(a) }
      val relation = UnresolvedRelation(tableIdent, alias)

      // Apply sampling if requested.
      (bucketSampleClause orElse splitSampleClause).map {
        case Token("TOK_TABLESPLITSAMPLE",
               Token("TOK_ROWCOUNT", Nil) ::
               Token(count, Nil) :: Nil) =>
          Limit(Literal(count.toInt), relation)
        case Token("TOK_TABLESPLITSAMPLE",
               Token("TOK_PERCENT", Nil) ::
               Token(fraction, Nil) :: Nil) =>
          // The range of fraction accepted by Sample is [0, 1]. Because Hive's block sampling
          // function takes X PERCENT as the input and the range of X is [0, 100], we need to
          // adjust the fraction.
          require(
            fraction.toDouble >= (0.0 - RandomSampler.roundingEpsilon)
              && fraction.toDouble <= (100.0 + RandomSampler.roundingEpsilon),
            s"Sampling fraction ($fraction) must be on interval [0, 100]")
          Sample(0.0, fraction.toDouble / 100, withReplacement = false, (math.random * 1000).toInt,
            relation)
        case Token("TOK_TABLEBUCKETSAMPLE",
               Token(numerator, Nil) ::
               Token(denominator, Nil) :: Nil) =>
          val fraction = numerator.toDouble / denominator.toDouble
          Sample(0.0, fraction, withReplacement = false, (math.random * 1000).toInt, relation)
        case a: ASTNode =>
          throw new NotImplementedError(
            s"""No parse rules for sampling clause: ${a.getType}, text: ${a.getText} :
           |${dumpTree(a).toString}" +
         """.stripMargin)
      }.getOrElse(relation)

    case Token("TOK_UNIQUEJOIN", joinArgs) =>
      val tableOrdinals =
        joinArgs.zipWithIndex.filter {
          case (arg, i) => arg.getText == "TOK_TABREF"
        }.map(_._2)

      val isPreserved = tableOrdinals.map(i => (i - 1 < 0) || joinArgs(i - 1).getText == "PRESERVE")
      val tables = tableOrdinals.map(i => nodeToRelation(joinArgs(i), context))
      val joinExpressions =
        tableOrdinals.map(i => joinArgs(i + 1).getChildren.asScala.map(nodeToExpr))

      val joinConditions = joinExpressions.sliding(2).map {
        case Seq(c1, c2) =>
          val predicates = (c1, c2).zipped.map { case (e1, e2) => EqualTo(e1, e2): Expression }
          predicates.reduceLeft(And)
      }.toBuffer

      val joinType = isPreserved.sliding(2).map {
        case Seq(true, true) => FullOuter
        case Seq(true, false) => LeftOuter
        case Seq(false, true) => RightOuter
        case Seq(false, false) => Inner
      }.toBuffer

      val joinedTables = tables.reduceLeft(Join(_, _, Inner, None))

      // Must be transform down.
      val joinedResult = joinedTables transform {
        case j: Join =>
          j.copy(
            condition = Some(joinConditions.remove(joinConditions.length - 1)),
            joinType = joinType.remove(joinType.length - 1))
      }

      val groups = joinExpressions.head.indices.map(i => Coalesce(joinExpressions.map(_(i))))

      // Unique join is not really the same as an outer join so we must group together results where
      // the joinExpressions are the same, taking the First of each value is only okay because the
      // user of a unique join is implicitly promising that there is only one result.
      // TODO: This doesn't actually work since [[Star]] is not a valid aggregate expression.
      // instead we should figure out how important supporting this feature is and whether it is
      // worth the number of hacks that will be required to implement it.  Namely, we need to add
      // some sort of mapped star expansion that would expand all child output row to be similarly
      // named output expressions where some aggregate expression has been applied (i.e. First).
      // Aggregate(groups, Star(None, First(_)) :: Nil, joinedResult)
      throw new UnsupportedOperationException

    case Token(allJoinTokens(joinToken),
           relation1 ::
           relation2 :: other) =>
      if (!(other.size <= 1)) {
        sys.error(s"Unsupported join operation: $other")
      }

      val joinType = joinToken match {
        case "TOK_JOIN" => Inner
        case "TOK_CROSSJOIN" => Inner
        case "TOK_RIGHTOUTERJOIN" => RightOuter
        case "TOK_LEFTOUTERJOIN" => LeftOuter
        case "TOK_FULLOUTERJOIN" => FullOuter
        case "TOK_LEFTSEMIJOIN" => LeftSemi
      }
      Join(nodeToRelation(relation1, context),
        nodeToRelation(relation2, context),
        joinType,
        other.headOption.map(nodeToExpr))

    case a: ASTNode =>
      throw new NotImplementedError(s"No parse rules for:\n ${dumpTree(a).toString} ")
  }

  def nodeToSortOrder(node: Node): SortOrder = node match {
    case Token("TOK_TABSORTCOLNAMEASC", sortExpr :: Nil) =>
      SortOrder(nodeToExpr(sortExpr), Ascending)
    case Token("TOK_TABSORTCOLNAMEDESC", sortExpr :: Nil) =>
      SortOrder(nodeToExpr(sortExpr), Descending)

    case a: ASTNode =>
      throw new NotImplementedError(s"No parse rules for:\n ${dumpTree(a).toString} ")
  }

  val destinationToken = "TOK_DESTINATION|TOK_INSERT_INTO".r
  protected def nodeToDest(
      node: Node,
      query: LogicalPlan,
      overwrite: Boolean): LogicalPlan = node match {
    case Token(destinationToken(),
           Token("TOK_DIR",
             Token("TOK_TMP_FILE", Nil) :: Nil) :: Nil) =>
      query

    case Token(destinationToken(),
           Token("TOK_TAB",
              tableArgs) :: Nil) =>
      val Some(tableNameParts) :: partitionClause :: Nil =
        getClauses(Seq("TOK_TABNAME", "TOK_PARTSPEC"), tableArgs)

      val tableIdent = extractTableIdent(tableNameParts)

      val partitionKeys = partitionClause.map(_.getChildren.asScala.map {
        // Parse partitions. We also make keys case insensitive.
        case Token("TOK_PARTVAL", Token(key, Nil) :: Token(value, Nil) :: Nil) =>
          cleanIdentifier(key.toLowerCase) -> Some(PlanUtils.stripQuotes(value))
        case Token("TOK_PARTVAL", Token(key, Nil) :: Nil) =>
          cleanIdentifier(key.toLowerCase) -> None
      }.toMap).getOrElse(Map.empty)

      InsertIntoTable(UnresolvedRelation(tableIdent, None), partitionKeys, query, overwrite, false)

    case Token(destinationToken(),
           Token("TOK_TAB",
             tableArgs) ::
           Token("TOK_IFNOTEXISTS",
             ifNotExists) :: Nil) =>
      val Some(tableNameParts) :: partitionClause :: Nil =
        getClauses(Seq("TOK_TABNAME", "TOK_PARTSPEC"), tableArgs)

      val tableIdent = extractTableIdent(tableNameParts)

      val partitionKeys = partitionClause.map(_.getChildren.asScala.map {
        // Parse partitions. We also make keys case insensitive.
        case Token("TOK_PARTVAL", Token(key, Nil) :: Token(value, Nil) :: Nil) =>
          cleanIdentifier(key.toLowerCase) -> Some(PlanUtils.stripQuotes(value))
        case Token("TOK_PARTVAL", Token(key, Nil) :: Nil) =>
          cleanIdentifier(key.toLowerCase) -> None
      }.toMap).getOrElse(Map.empty)

      InsertIntoTable(UnresolvedRelation(tableIdent, None), partitionKeys, query, overwrite, true)

    case a: ASTNode =>
      throw new NotImplementedError(s"No parse rules for ${a.getName}:" +
          s"\n ${dumpTree(a).toString} ")
  }

  protected def selExprNodeToExpr(node: Node): Option[Expression] = node match {
    case Token("TOK_SELEXPR", e :: Nil) =>
      Some(nodeToExpr(e))

    case Token("TOK_SELEXPR", e :: Token(alias, Nil) :: Nil) =>
      Some(Alias(nodeToExpr(e), cleanIdentifier(alias))())

    case Token("TOK_SELEXPR", e :: aliasChildren) =>
      var aliasNames = ArrayBuffer[String]()
      aliasChildren.foreach { _ match {
        case Token(name, Nil) => aliasNames += cleanIdentifier(name)
        case _ =>
        }
      }
      Some(MultiAlias(nodeToExpr(e), aliasNames))

    /* Hints are ignored */
    case Token("TOK_HINTLIST", _) => None

    case a: ASTNode =>
      throw new NotImplementedError(s"No parse rules for ${a.getName }:" +
          s"\n ${dumpTree(a).toString } ")
  }

  protected val escapedIdentifier = "`([^`]+)`".r
  protected val doubleQuotedString = "\"([^\"]+)\"".r
  protected val singleQuotedString = "'([^']+)'".r

  protected def unquoteString(str: String) = str match {
    case singleQuotedString(s) => s
    case doubleQuotedString(s) => s
    case other => other
  }

  /** Strips backticks from ident if present */
  protected def cleanIdentifier(ident: String): String = ident match {
    case escapedIdentifier(i) => i
    case plainIdent => plainIdent
  }

  val numericAstTypes = Seq(
    HiveParser.Number,
    HiveParser.TinyintLiteral,
    HiveParser.SmallintLiteral,
    HiveParser.BigintLiteral,
    HiveParser.DecimalLiteral)

  /* Case insensitive matches */
  val COUNT = "(?i)COUNT".r
  val SUM = "(?i)SUM".r
  val AND = "(?i)AND".r
  val OR = "(?i)OR".r
  val NOT = "(?i)NOT".r
  val TRUE = "(?i)TRUE".r
  val FALSE = "(?i)FALSE".r
  val LIKE = "(?i)LIKE".r
  val RLIKE = "(?i)RLIKE".r
  val REGEXP = "(?i)REGEXP".r
  val IN = "(?i)IN".r
  val DIV = "(?i)DIV".r
  val BETWEEN = "(?i)BETWEEN".r
  val WHEN = "(?i)WHEN".r
  val CASE = "(?i)CASE".r

  protected def nodeToExpr(node: Node): Expression = node match {
    /* Attribute References */
    case Token("TOK_TABLE_OR_COL",
           Token(name, Nil) :: Nil) =>
      UnresolvedAttribute.quoted(cleanIdentifier(name))
    case Token(".", qualifier :: Token(attr, Nil) :: Nil) =>
      nodeToExpr(qualifier) match {
        case UnresolvedAttribute(nameParts) =>
          UnresolvedAttribute(nameParts :+ cleanIdentifier(attr))
        case other => UnresolvedExtractValue(other, Literal(attr))
      }

    /* Stars (*) */
    case Token("TOK_ALLCOLREF", Nil) => UnresolvedStar(None)
    // The format of dbName.tableName.* cannot be parsed by HiveParser. TOK_TABNAME will only
    // has a single child which is tableName.
    case Token("TOK_ALLCOLREF", Token("TOK_TABNAME", Token(name, Nil) :: Nil) :: Nil) =>
      UnresolvedStar(Some(UnresolvedAttribute.parseAttributeName(name)))

    /* Aggregate Functions */
    case Token("TOK_FUNCTIONDI", Token(COUNT(), Nil) :: args) =>
      Count(args.map(nodeToExpr)).toAggregateExpression(isDistinct = true)
    case Token("TOK_FUNCTIONSTAR", Token(COUNT(), Nil) :: Nil) =>
      Count(Literal(1)).toAggregateExpression()

    /* Casts */
    case Token("TOK_FUNCTION", Token("TOK_STRING", Nil) :: arg :: Nil) =>
      Cast(nodeToExpr(arg), StringType)
    case Token("TOK_FUNCTION", Token("TOK_VARCHAR", _) :: arg :: Nil) =>
      Cast(nodeToExpr(arg), StringType)
    case Token("TOK_FUNCTION", Token("TOK_CHAR", _) :: arg :: Nil) =>
      Cast(nodeToExpr(arg), StringType)
    case Token("TOK_FUNCTION", Token("TOK_INT", Nil) :: arg :: Nil) =>
      Cast(nodeToExpr(arg), IntegerType)
    case Token("TOK_FUNCTION", Token("TOK_BIGINT", Nil) :: arg :: Nil) =>
      Cast(nodeToExpr(arg), LongType)
    case Token("TOK_FUNCTION", Token("TOK_FLOAT", Nil) :: arg :: Nil) =>
      Cast(nodeToExpr(arg), FloatType)
    case Token("TOK_FUNCTION", Token("TOK_DOUBLE", Nil) :: arg :: Nil) =>
      Cast(nodeToExpr(arg), DoubleType)
    case Token("TOK_FUNCTION", Token("TOK_SMALLINT", Nil) :: arg :: Nil) =>
      Cast(nodeToExpr(arg), ShortType)
    case Token("TOK_FUNCTION", Token("TOK_TINYINT", Nil) :: arg :: Nil) =>
      Cast(nodeToExpr(arg), ByteType)
    case Token("TOK_FUNCTION", Token("TOK_BINARY", Nil) :: arg :: Nil) =>
      Cast(nodeToExpr(arg), BinaryType)
    case Token("TOK_FUNCTION", Token("TOK_BOOLEAN", Nil) :: arg :: Nil) =>
      Cast(nodeToExpr(arg), BooleanType)
    case Token("TOK_FUNCTION", Token("TOK_DECIMAL", precision :: scale :: nil) :: arg :: Nil) =>
      Cast(nodeToExpr(arg), DecimalType(precision.getText.toInt, scale.getText.toInt))
    case Token("TOK_FUNCTION", Token("TOK_DECIMAL", precision :: Nil) :: arg :: Nil) =>
      Cast(nodeToExpr(arg), DecimalType(precision.getText.toInt, 0))
    case Token("TOK_FUNCTION", Token("TOK_DECIMAL", Nil) :: arg :: Nil) =>
      Cast(nodeToExpr(arg), DecimalType.USER_DEFAULT)
    case Token("TOK_FUNCTION", Token("TOK_TIMESTAMP", Nil) :: arg :: Nil) =>
      Cast(nodeToExpr(arg), TimestampType)
    case Token("TOK_FUNCTION", Token("TOK_DATE", Nil) :: arg :: Nil) =>
      Cast(nodeToExpr(arg), DateType)

    /* Arithmetic */
    case Token("+", child :: Nil) => nodeToExpr(child)
    case Token("-", child :: Nil) => UnaryMinus(nodeToExpr(child))
    case Token("~", child :: Nil) => BitwiseNot(nodeToExpr(child))
    case Token("+", left :: right:: Nil) => Add(nodeToExpr(left), nodeToExpr(right))
    case Token("-", left :: right:: Nil) => Subtract(nodeToExpr(left), nodeToExpr(right))
    case Token("*", left :: right:: Nil) => Multiply(nodeToExpr(left), nodeToExpr(right))
    case Token("/", left :: right:: Nil) => Divide(nodeToExpr(left), nodeToExpr(right))
    case Token(DIV(), left :: right:: Nil) =>
      Cast(Divide(nodeToExpr(left), nodeToExpr(right)), LongType)
    case Token("%", left :: right:: Nil) => Remainder(nodeToExpr(left), nodeToExpr(right))
    case Token("&", left :: right:: Nil) => BitwiseAnd(nodeToExpr(left), nodeToExpr(right))
    case Token("|", left :: right:: Nil) => BitwiseOr(nodeToExpr(left), nodeToExpr(right))
    case Token("^", left :: right:: Nil) => BitwiseXor(nodeToExpr(left), nodeToExpr(right))

    /* Comparisons */
    case Token("=", left :: right:: Nil) => EqualTo(nodeToExpr(left), nodeToExpr(right))
    case Token("==", left :: right:: Nil) => EqualTo(nodeToExpr(left), nodeToExpr(right))
    case Token("<=>", left :: right:: Nil) => EqualNullSafe(nodeToExpr(left), nodeToExpr(right))
    case Token("!=", left :: right:: Nil) => Not(EqualTo(nodeToExpr(left), nodeToExpr(right)))
    case Token("<>", left :: right:: Nil) => Not(EqualTo(nodeToExpr(left), nodeToExpr(right)))
    case Token(">", left :: right:: Nil) => GreaterThan(nodeToExpr(left), nodeToExpr(right))
    case Token(">=", left :: right:: Nil) => GreaterThanOrEqual(nodeToExpr(left), nodeToExpr(right))
    case Token("<", left :: right:: Nil) => LessThan(nodeToExpr(left), nodeToExpr(right))
    case Token("<=", left :: right:: Nil) => LessThanOrEqual(nodeToExpr(left), nodeToExpr(right))
    case Token(LIKE(), left :: right:: Nil) => Like(nodeToExpr(left), nodeToExpr(right))
    case Token(RLIKE(), left :: right:: Nil) => RLike(nodeToExpr(left), nodeToExpr(right))
    case Token(REGEXP(), left :: right:: Nil) => RLike(nodeToExpr(left), nodeToExpr(right))
    case Token("TOK_FUNCTION", Token("TOK_ISNOTNULL", Nil) :: child :: Nil) =>
      IsNotNull(nodeToExpr(child))
    case Token("TOK_FUNCTION", Token("TOK_ISNULL", Nil) :: child :: Nil) =>
      IsNull(nodeToExpr(child))
    case Token("TOK_FUNCTION", Token(IN(), Nil) :: value :: list) =>
      In(nodeToExpr(value), list.map(nodeToExpr))
    case Token("TOK_FUNCTION",
           Token(BETWEEN(), Nil) ::
           kw ::
           target ::
           minValue ::
           maxValue :: Nil) =>

      val targetExpression = nodeToExpr(target)
      val betweenExpr =
        And(
          GreaterThanOrEqual(targetExpression, nodeToExpr(minValue)),
          LessThanOrEqual(targetExpression, nodeToExpr(maxValue)))
      kw match {
        case Token("KW_FALSE", Nil) => betweenExpr
        case Token("KW_TRUE", Nil) => Not(betweenExpr)
      }

    /* Boolean Logic */
    case Token(AND(), left :: right:: Nil) => And(nodeToExpr(left), nodeToExpr(right))
    case Token(OR(), left :: right:: Nil) => Or(nodeToExpr(left), nodeToExpr(right))
    case Token(NOT(), child :: Nil) => Not(nodeToExpr(child))
    case Token("!", child :: Nil) => Not(nodeToExpr(child))

    /* Case statements */
    case Token("TOK_FUNCTION", Token(WHEN(), Nil) :: branches) =>
      CaseWhen(branches.map(nodeToExpr))
    case Token("TOK_FUNCTION", Token(CASE(), Nil) :: branches) =>
      val keyExpr = nodeToExpr(branches.head)
      CaseKeyWhen(keyExpr, branches.drop(1).map(nodeToExpr))

    /* Complex datatype manipulation */
    case Token("[", child :: ordinal :: Nil) =>
      UnresolvedExtractValue(nodeToExpr(child), nodeToExpr(ordinal))

    /* Window Functions */
    case Token("TOK_FUNCTION", Token(name, Nil) +: args :+ Token("TOK_WINDOWSPEC", spec)) =>
      val function = UnresolvedWindowFunction(name, args.map(nodeToExpr))
      nodesToWindowSpecification(spec) match {
        case reference: WindowSpecReference =>
          UnresolvedWindowExpression(function, reference)
        case definition: WindowSpecDefinition =>
          WindowExpression(function, definition)
      }
    case Token("TOK_FUNCTIONSTAR", Token(name, Nil) :: Token("TOK_WINDOWSPEC", spec) :: Nil) =>
      // Safe to use Literal(1)?
      val function = UnresolvedWindowFunction(name, Literal(1) :: Nil)
      nodesToWindowSpecification(spec) match {
        case reference: WindowSpecReference =>
          UnresolvedWindowExpression(function, reference)
        case definition: WindowSpecDefinition =>
          WindowExpression(function, definition)
      }

    /* UDFs - Must be last otherwise will preempt built in functions */
    case Token("TOK_FUNCTION", Token(name, Nil) :: args) =>
      UnresolvedFunction(name, args.map(nodeToExpr), isDistinct = false)
    // Aggregate function with DISTINCT keyword.
    case Token("TOK_FUNCTIONDI", Token(name, Nil) :: args) =>
      UnresolvedFunction(name, args.map(nodeToExpr), isDistinct = true)
    case Token("TOK_FUNCTIONSTAR", Token(name, Nil) :: args) =>
      UnresolvedFunction(name, UnresolvedStar(None) :: Nil, isDistinct = false)

    /* Literals */
    case Token("TOK_NULL", Nil) => Literal.create(null, NullType)
    case Token(TRUE(), Nil) => Literal.create(true, BooleanType)
    case Token(FALSE(), Nil) => Literal.create(false, BooleanType)
    case Token("TOK_STRINGLITERALSEQUENCE", strings) =>
      Literal(strings.map(s => BaseSemanticAnalyzer.unescapeSQLString(s.getText)).mkString)

    // This code is adapted from
    // /ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java#L223
    case ast: ASTNode if numericAstTypes contains ast.getType =>
      var v: Literal = null
      try {
        if (ast.getText.endsWith("L")) {
          // Literal bigint.
          v = Literal.create(ast.getText.substring(0, ast.getText.length() - 1).toLong, LongType)
        } else if (ast.getText.endsWith("S")) {
          // Literal smallint.
          v = Literal.create(ast.getText.substring(0, ast.getText.length() - 1).toShort, ShortType)
        } else if (ast.getText.endsWith("Y")) {
          // Literal tinyint.
          v = Literal.create(ast.getText.substring(0, ast.getText.length() - 1).toByte, ByteType)
        } else if (ast.getText.endsWith("BD") || ast.getText.endsWith("D")) {
          // Literal decimal
          val strVal = ast.getText.stripSuffix("D").stripSuffix("B")
          v = Literal(Decimal(strVal))
        } else {
          v = Literal.create(ast.getText.toDouble, DoubleType)
          v = Literal.create(ast.getText.toLong, LongType)
          v = Literal.create(ast.getText.toInt, IntegerType)
        }
      } catch {
        case nfe: NumberFormatException => // Do nothing
      }

      if (v == null) {
        sys.error(s"Failed to parse number '${ast.getText}'.")
      } else {
        v
      }

    case ast: ASTNode if ast.getType == HiveParser.StringLiteral =>
      Literal(BaseSemanticAnalyzer.unescapeSQLString(ast.getText))

    case ast: ASTNode if ast.getType == HiveParser.TOK_DATELITERAL =>
      Literal(Date.valueOf(ast.getText.substring(1, ast.getText.length - 1)))

    case ast: ASTNode if ast.getType == HiveParser.TOK_CHARSETLITERAL =>
      Literal(BaseSemanticAnalyzer.charSetString(ast.getChild(0).getText, ast.getChild(1).getText))

    case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_YEAR_MONTH_LITERAL =>
      Literal(CalendarInterval.fromYearMonthString(ast.getText))

    case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_DAY_TIME_LITERAL =>
      Literal(CalendarInterval.fromDayTimeString(ast.getText))

    case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_YEAR_LITERAL =>
      Literal(CalendarInterval.fromSingleUnitString("year", ast.getText))

    case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_MONTH_LITERAL =>
      Literal(CalendarInterval.fromSingleUnitString("month", ast.getText))

    case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_DAY_LITERAL =>
      Literal(CalendarInterval.fromSingleUnitString("day", ast.getText))

    case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_HOUR_LITERAL =>
      Literal(CalendarInterval.fromSingleUnitString("hour", ast.getText))

    case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_MINUTE_LITERAL =>
      Literal(CalendarInterval.fromSingleUnitString("minute", ast.getText))

    case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_SECOND_LITERAL =>
      Literal(CalendarInterval.fromSingleUnitString("second", ast.getText))

    case a: ASTNode =>
      throw new NotImplementedError(
        s"""No parse rules for ASTNode type: ${a.getType}, text: ${a.getText} :
           |${dumpTree(a).toString}" +
         """.stripMargin)
  }

  /* Case insensitive matches for Window Specification */
  val PRECEDING = "(?i)preceding".r
  val FOLLOWING = "(?i)following".r
  val CURRENT = "(?i)current".r
  def nodesToWindowSpecification(nodes: Seq[ASTNode]): WindowSpec = nodes match {
    case Token(windowName, Nil) :: Nil =>
      // Refer to a window spec defined in the window clause.
      WindowSpecReference(windowName)
    case Nil =>
      // OVER()
      WindowSpecDefinition(
        partitionSpec = Nil,
        orderSpec = Nil,
        frameSpecification = UnspecifiedFrame)
    case spec =>
      val (partitionClause :: rowFrame :: rangeFrame :: Nil) =
        getClauses(
          Seq(
            "TOK_PARTITIONINGSPEC",
            "TOK_WINDOWRANGE",
            "TOK_WINDOWVALUES"),
          spec)

      // Handle Partition By and Order By.
      val (partitionSpec, orderSpec) = partitionClause.map { partitionAndOrdering =>
        val (partitionByClause :: orderByClause :: sortByClause :: clusterByClause :: Nil) =
          getClauses(
            Seq("TOK_DISTRIBUTEBY", "TOK_ORDERBY", "TOK_SORTBY", "TOK_CLUSTERBY"),
            partitionAndOrdering.getChildren.asScala.asInstanceOf[Seq[ASTNode]])

        (partitionByClause, orderByClause.orElse(sortByClause), clusterByClause) match {
          case (Some(partitionByExpr), Some(orderByExpr), None) =>
            (partitionByExpr.getChildren.asScala.map(nodeToExpr),
              orderByExpr.getChildren.asScala.map(nodeToSortOrder))
          case (Some(partitionByExpr), None, None) =>
            (partitionByExpr.getChildren.asScala.map(nodeToExpr), Nil)
          case (None, Some(orderByExpr), None) =>
            (Nil, orderByExpr.getChildren.asScala.map(nodeToSortOrder))
          case (None, None, Some(clusterByExpr)) =>
            val expressions = clusterByExpr.getChildren.asScala.map(nodeToExpr)
            (expressions, expressions.map(SortOrder(_, Ascending)))
          case _ =>
            throw new NotImplementedError(
              s"""No parse rules for Node ${partitionAndOrdering.getName}
              """.stripMargin)
        }
      }.getOrElse {
        (Nil, Nil)
      }

      // Handle Window Frame
      val windowFrame =
        if (rowFrame.isEmpty && rangeFrame.isEmpty) {
          UnspecifiedFrame
        } else {
          val frameType = rowFrame.map(_ => RowFrame).getOrElse(RangeFrame)
          def nodeToBoundary(node: Node): FrameBoundary = node match {
            case Token(PRECEDING(), Token(count, Nil) :: Nil) =>
              if (count.toLowerCase() == "unbounded") {
                UnboundedPreceding
              } else {
                ValuePreceding(count.toInt)
              }
            case Token(FOLLOWING(), Token(count, Nil) :: Nil) =>
              if (count.toLowerCase() == "unbounded") {
                UnboundedFollowing
              } else {
                ValueFollowing(count.toInt)
              }
            case Token(CURRENT(), Nil) => CurrentRow
            case _ =>
              throw new NotImplementedError(
                s"""No parse rules for the Window Frame Boundary based on Node ${node.getName}
              """.stripMargin)
          }

          rowFrame.orElse(rangeFrame).map { frame =>
            frame.getChildren.asScala.toList match {
              case precedingNode :: followingNode :: Nil =>
                SpecifiedWindowFrame(
                  frameType,
                  nodeToBoundary(precedingNode),
                  nodeToBoundary(followingNode))
              case precedingNode :: Nil =>
                SpecifiedWindowFrame(frameType, nodeToBoundary(precedingNode), CurrentRow)
              case _ =>
                throw new NotImplementedError(
                  s"""No parse rules for the Window Frame based on Node ${frame.getName}
                  """.stripMargin)
            }
          }.getOrElse(sys.error(s"If you see this, please file a bug report with your query."))
        }

      WindowSpecDefinition(partitionSpec, orderSpec, windowFrame)
  }

  val explode = "(?i)explode".r
  val jsonTuple = "(?i)json_tuple".r
  def nodesToGenerator(nodes: Seq[Node]): (Generator, Seq[String]) = {
    val function = nodes.head

    val attributes = nodes.flatMap {
      case Token(a, Nil) => a.toLowerCase :: Nil
      case _ => Nil
    }

    function match {
      case Token("TOK_FUNCTION", Token(explode(), Nil) :: child :: Nil) =>
        (Explode(nodeToExpr(child)), attributes)

      case Token("TOK_FUNCTION", Token(jsonTuple(), Nil) :: children) =>
        (JsonTuple(children.map(nodeToExpr)), attributes)

      case Token("TOK_FUNCTION", Token(functionName, Nil) :: children) =>
        val functionInfo: FunctionInfo =
          Option(FunctionRegistry.getFunctionInfo(functionName.toLowerCase)).getOrElse(
            sys.error(s"Couldn't find function $functionName"))
        val functionClassName = functionInfo.getFunctionClass.getName

        (HiveGenericUDTF(
          new HiveFunctionWrapper(functionClassName),
          children.map(nodeToExpr)), attributes)

      case a: ASTNode =>
        throw new NotImplementedError(
          s"""No parse rules for ASTNode type: ${a.getType}, text: ${a.getText}, tree:
             |${dumpTree(a).toString}
           """.stripMargin)
    }
  }

  def dumpTree(node: Node, builder: StringBuilder = new StringBuilder, indent: Int = 0)
    : StringBuilder = {
    node match {
      case a: ASTNode => builder.append(
        ("  " * indent) + a.getText + " " +
          a.getLine + ", " +
          a.getTokenStartIndex + "," +
          a.getTokenStopIndex + ", " +
          a.getCharPositionInLine + "\n")
      case other => sys.error(s"Non ASTNode encountered: $other")
    }

    Option(node.getChildren).map(_.asScala).getOrElse(Nil).foreach(dumpTree(_, builder, indent + 1))
    builder
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy