shark.util.HiveUtils.scala Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of shark_2.10 Show documentation
shark
The newest version!
/*
 * Copyright (C) 2012 The Regents of The University California.
 * All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package shark.util

import java.util.{Arrays => JArrays, ArrayList => JArrayList}
import java.util.{HashMap => JHashMap, HashSet => JHashSet}
import java.util.Properties

import scala.reflect.ClassTag
import scala.collection.JavaConversions._

import org.apache.hadoop.hive.conf.HiveConf
import org.apache.hadoop.hive.metastore.api.FieldSchema
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS
import org.apache.hadoop.hive.serde2.Deserializer
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector
import org.apache.hadoop.hive.serde2.objectinspector.UnionStructObjectInspector
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory
import org.apache.hadoop.hive.ql.exec.DDLTask
import org.apache.hadoop.hive.ql.hooks.{ReadEntity, WriteEntity}
import org.apache.hadoop.hive.ql.plan.{CreateTableDesc, DDLWork, DropTableDesc}

import shark.api.{DataType, DataTypes}
import shark.memstore2.SharkTblProperties


private[shark] object HiveUtils {

  def getJavaPrimitiveObjectInspector(c: ClassTag[_]): PrimitiveObjectInspector = {
    getJavaPrimitiveObjectInspector(DataTypes.fromClassTag(c))
  }

  def getJavaPrimitiveObjectInspector(t: DataType): PrimitiveObjectInspector = t match {
    case DataTypes.BOOLEAN => PrimitiveObjectInspectorFactory.javaBooleanObjectInspector
    case DataTypes.TINYINT => PrimitiveObjectInspectorFactory.javaByteObjectInspector
    case DataTypes.SMALLINT => PrimitiveObjectInspectorFactory.javaShortObjectInspector
    case DataTypes.INT => PrimitiveObjectInspectorFactory.javaIntObjectInspector
    case DataTypes.BIGINT => PrimitiveObjectInspectorFactory.javaLongObjectInspector
    case DataTypes.FLOAT => PrimitiveObjectInspectorFactory.javaFloatObjectInspector
    case DataTypes.DOUBLE => PrimitiveObjectInspectorFactory.javaDoubleObjectInspector
    case DataTypes.TIMESTAMP => PrimitiveObjectInspectorFactory.javaTimestampObjectInspector
    case DataTypes.STRING => PrimitiveObjectInspectorFactory.javaStringObjectInspector
  }

  /**
   * Returns a StructObjectInspector
   */
  def makeStandardStructObjectInspector(fieldNames: Seq[String], ois: Seq[PrimitiveObjectInspector]) = {
    val fields = fieldNames.toList
    val oiList = ois.toList

    ObjectInspectorFactory.getStandardStructObjectInspector(fields, oiList)
  }

  /**
   * Return a UnionStructObjectInspector that combines the StructObjectInspectors for the table
   * schema and the partition columns, which are virtual in Hive.
   */
  def makeUnionOIForPartitionedTable(
      partProps: Properties,
      tableSerDe: Deserializer): UnionStructObjectInspector = {
    val partCols = partProps.getProperty(META_TABLE_PARTITION_COLUMNS)
    val partColNames = new JArrayList[String]
    val partColObjectInspectors = new JArrayList[ObjectInspector]
    partCols.trim().split("/").foreach { colName =>
      partColNames.add(colName)
      partColObjectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector)
    }

    val partColObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(
      partColNames, partColObjectInspectors)
    val oiList = JArrays.asList(
      tableSerDe.getObjectInspector.asInstanceOf[StructObjectInspector],
      partColObjectInspector.asInstanceOf[StructObjectInspector])
    // New oi is union of table + partition object inspectors
    ObjectInspectorFactory.getUnionStructObjectInspector(oiList)
  }

  /**
   * Execute the create table DDL operation against Hive's metastore.
   */
  def createTableInHive(
      tableName: String,
      columnNames: Seq[String],
      columnTypes: Seq[ClassTag[_]],
      hiveConf: HiveConf = new HiveConf): Boolean = {
    val schema = columnNames.zip(columnTypes).map { case (colName, classTag) =>
      new FieldSchema(colName, DataTypes.fromClassTag(classTag).hiveName, "")
    }

    // Setup the create table descriptor with necessary information.
    val createTableDesc = new CreateTableDesc()
    createTableDesc.setTableName(tableName)
    createTableDesc.setCols(new JArrayList[FieldSchema](schema))
    createTableDesc.setTblProps(
      SharkTblProperties.initializeWithDefaults(new JHashMap[String, String]()))
    createTableDesc.setInputFormat("org.apache.hadoop.mapred.TextInputFormat")
    createTableDesc.setOutputFormat("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")
    createTableDesc.setSerName(classOf[shark.memstore2.ColumnarSerDe].getName)
    createTableDesc.setNumBuckets(-1)

    // Execute the create table against the Hive metastore.
    val work = new DDLWork(new JHashSet[ReadEntity], new JHashSet[WriteEntity], createTableDesc)
    val taskExecutionStatus = executeDDLTaskDirectly(work, hiveConf)
    taskExecutionStatus == 0
  }

  def dropTableInHive(tableName: String, hiveConf: HiveConf = new HiveConf): Boolean = {
    // Setup the drop table descriptor with necessary information.
    val dropTblDesc = new DropTableDesc(
      tableName,
      false /* expectView */,
      false /* ifExists */,
      false /* stringPartitionColumns */)

    // Execute the drop table against the metastore.
    val work = new DDLWork(new JHashSet[ReadEntity], new JHashSet[WriteEntity], dropTblDesc)
    val taskExecutionStatus = executeDDLTaskDirectly(work, hiveConf)
    taskExecutionStatus == 0
  }

  /**
   * Creates a DDLTask from the DDLWork given, and directly calls DDLTask#execute(). Returns 0 if
   * the create table command is executed successfully.
   * This is safe to use for all DDL commands except for AlterTableTypes.ARCHIVE, which actually
   * requires the DriverContext created in Hive Driver#execute().
   */
  def executeDDLTaskDirectly(ddlWork: DDLWork, hiveConf: HiveConf): Int = {
    val task = new DDLTask()
    task.initialize(hiveConf, null /* queryPlan */, null /* ctx: DriverContext */)
    task.setWork(ddlWork)
    task.execute(null /* driverContext */)
  }
}