All Downloads are FREE. Search and download functionalities are using the official Maven repository.

shark.execution.UDTFOperator.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (C) 2012 The Regents of The University California. 
 * All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package shark.execution

import java.util.{List => JavaList}

import scala.collection.mutable.ArrayBuffer
import scala.collection.JavaConversions._
import scala.reflect.BeanProperty

import org.apache.hadoop.hive.ql.plan.UDTFDesc
import org.apache.hadoop.hive.ql.udf.generic.Collector
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector
import org.apache.hadoop.hive.serde2.objectinspector.StructField


class UDTFOperator extends UnaryOperator[UDTFDesc] {

  @BeanProperty var conf: UDTFDesc = _

  @transient var objToSendToUDTF: Array[java.lang.Object] = _
  @transient var soi: StandardStructObjectInspector = _
  @transient var inputFields: JavaList[_ <: StructField] = _
  @transient var collector: UDTFCollector = _
  @transient var outputObjInspector: ObjectInspector = _

  override def initializeOnMaster() {
    super.initializeOnMaster()
    
    conf = desc
    
    initializeOnSlave()
  }

  override def initializeOnSlave() {
    collector = new UDTFCollector
    conf.getGenericUDTF().setCollector(collector)

    // Make an object inspector [] of the arguments to the UDTF
    soi = objectInspectors.head.asInstanceOf[StandardStructObjectInspector]
    inputFields = soi.getAllStructFieldRefs()

    val udtfInputOIs = inputFields.map { case inputField =>
      inputField.getFieldObjectInspector()
    }.toArray

    objToSendToUDTF = new Array[java.lang.Object](inputFields.size)
    outputObjInspector = conf.getGenericUDTF().initialize(udtfInputOIs)
  }

  override def outputObjectInspector() = outputObjInspector

  override def processPartition(split: Int, iter: Iterator[_]): Iterator[_] = {
    iter.flatMap { row =>
      explode(row)
    }
  }

  def explode[T](row: T): ArrayBuffer[java.lang.Object] = {
    (0 until inputFields.size).foreach { case i =>
      objToSendToUDTF(i) = soi.getStructFieldData(row, inputFields.get(i))
    }
    conf.getGenericUDTF().process(objToSendToUDTF)
    collector.collectRows()
  }
}

class UDTFCollector extends Collector {

  var collected = new ArrayBuffer[java.lang.Object]

  override def collect(input: java.lang.Object) {
    // We need to clone the input here because implementations of
    // GenericUDTF reuse the same object. Luckily they are always an array, so
    // it is easy to clone.
    collected += input.asInstanceOf[Array[_]].clone
  }

  def collectRows() = {
    val toCollect = collected
    collected = new ArrayBuffer[java.lang.Object]
    toCollect
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy