All Downloads are FREE. Search and download functionalities are using the official Maven repository.

shark.memstore2.column.ColumnType.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (C) 2012 The Regents of The University California.
 * All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package shark.memstore2.column

import java.nio.ByteBuffer
import java.sql.Timestamp

import scala.reflect.ClassTag

import org.apache.hadoop.hive.serde2.ByteStream
import org.apache.hadoop.hive.serde2.`lazy`.{ByteArrayRef, LazyBinary}
import org.apache.hadoop.hive.serde2.io.ByteWritable
import org.apache.hadoop.hive.serde2.io.DoubleWritable
import org.apache.hadoop.hive.serde2.io.ShortWritable
import org.apache.hadoop.hive.serde2.io.TimestampWritable
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector
import org.apache.hadoop.hive.serde2.objectinspector.primitive._
import org.apache.hadoop.io._


/**
 * @param typeID A unique ID representing the type.
 * @param defaultSize Default size in bytes for one element of type T (e.g. Int = 4).
 * @tparam T Scala data type for the column.
 * @tparam V Writable data type for the column.
 */
sealed abstract class ColumnType[T : ClassTag, V : ClassTag](
    val typeID: Int, val defaultSize: Int) {

  /**
   * Scala ClassTag. Can be used to create primitive arrays and hash tables.
   */
  def scalaTag = implicitly[ClassTag[T]]

  /**
   * Scala ClassTag. Can be used to create primitive arrays and hash tables.
   */
  def writableScalaTag = implicitly[ClassTag[V]]

  /**
   * Extract a value out of the buffer at the buffer's current position.
   */
  def extract(buffer: ByteBuffer): T

  /**
   * Append the given value v of type T into the given ByteBuffer.
   */
  def append(v: T, buffer: ByteBuffer)

  /**
   * Return the Scala data representation of the given object, using an object inspector.
   */
  def get(o: Object, oi: ObjectInspector): T

  /**
   * Return the size of the value. This is used to calculate the size of variable length types
   * such as byte arrays and strings.
   */
  def actualSize(v: T): Int = defaultSize

  /**
   * Extract a value out of the buffer at the buffer's current position, and put it in the writable
   * object. This is used as an optimization to reduce the temporary objects created, since the
   * writable object can be reused.
   */
  def extractInto(buffer: ByteBuffer, writable: V)

  /**
   * Create a new writable object corresponding to this type.
   */
  def newWritable(): V

  /**
   * Create a duplicated copy of the value.
   */
  def clone(v: T): T = v
}


object INT extends ColumnType[Int, IntWritable](0, 4) {

  override def append(v: Int, buffer: ByteBuffer) {
    buffer.putInt(v)
  }

  override def extract(buffer: ByteBuffer) = {
    buffer.getInt()
  }

  override def get(o: Object, oi: ObjectInspector): Int = {
    oi.asInstanceOf[IntObjectInspector].get(o)
  }

  // Primitive version
  def getInt(o: Object, oi: ObjectInspector): Int = oi.asInstanceOf[IntObjectInspector].get(o)

  override def extractInto(buffer: ByteBuffer, writable: IntWritable) {
    writable.set(extract(buffer))
  }

  override def newWritable() = new IntWritable
}


object LONG extends ColumnType[Long, LongWritable](1, 8) {

  override def append(v: Long, buffer: ByteBuffer) {
    buffer.putLong(v)
  }

  override def extract(buffer: ByteBuffer) = {
    buffer.getLong()
  }

  override def get(o: Object, oi: ObjectInspector): Long = {
    oi.asInstanceOf[LongObjectInspector].get(o)
  }

  // Primitive version
  def getLong(o: Object, oi: ObjectInspector): Long = oi.asInstanceOf[LongObjectInspector].get(o)

  override def extractInto(buffer: ByteBuffer, writable: LongWritable) {
    writable.set(extract(buffer))
  }

  override def newWritable() = new LongWritable
}


object FLOAT extends ColumnType[Float, FloatWritable](2, 4) {

  override def append(v: Float, buffer: ByteBuffer) {
    buffer.putFloat(v)
  }

  override def extract(buffer: ByteBuffer) = {
    buffer.getFloat()
  }

  override def get(o: Object, oi: ObjectInspector): Float = {
    oi.asInstanceOf[FloatObjectInspector].get(o)
  }

  override def extractInto(buffer: ByteBuffer, writable: FloatWritable) {
    writable.set(extract(buffer))
  }

  override def newWritable() = new FloatWritable
}


object DOUBLE extends ColumnType[Double, DoubleWritable](3, 8) {

  override def append(v: Double, buffer: ByteBuffer) {
    buffer.putDouble(v)
  }

  override def extract(buffer: ByteBuffer) = {
    buffer.getDouble()
  }

  override def get(o: Object, oi: ObjectInspector): Double = {
    oi.asInstanceOf[DoubleObjectInspector].get(o)
  }

  override def extractInto(buffer: ByteBuffer, writable: DoubleWritable) {
    writable.set(extract(buffer))
  }

  override def newWritable() = new DoubleWritable
}


object BOOLEAN extends ColumnType[Boolean, BooleanWritable](4, 1) {

  override def append(v: Boolean, buffer: ByteBuffer) {
    buffer.put(if (v) 1.toByte else 0.toByte)
  }

  override def extract(buffer: ByteBuffer) = {
    if (buffer.get() == 1) true else false
  }

  override def get(o: Object, oi: ObjectInspector): Boolean = {
    oi.asInstanceOf[BooleanObjectInspector].get(o)
  }

  override def extractInto(buffer: ByteBuffer, writable: BooleanWritable) {
    writable.set(extract(buffer))
  }

  override def newWritable() = new BooleanWritable
}


object BYTE extends ColumnType[Byte, ByteWritable](5, 1) {

  override def append(v: Byte, buffer: ByteBuffer) {
    buffer.put(v)
  }

  override def extract(buffer: ByteBuffer) = {
    buffer.get()
  }

  override def get(o: Object, oi: ObjectInspector): Byte = {
    oi.asInstanceOf[ByteObjectInspector].get(o)
  }

  override def extractInto(buffer: ByteBuffer, writable: ByteWritable) {
    writable.set(extract(buffer))
  }

  override def newWritable() = new ByteWritable
}


object SHORT extends ColumnType[Short, ShortWritable](6, 2) {

  override def append(v: Short, buffer: ByteBuffer) {
    buffer.putShort(v)
  }

  override def extract(buffer: ByteBuffer) = {
    buffer.getShort()
  }

  override def get(o: Object, oi: ObjectInspector): Short = {
    oi.asInstanceOf[ShortObjectInspector].get(o)
  }

  // Primitive version
  def getShort(o: Object, oi: ObjectInspector): Short = oi.asInstanceOf[ShortObjectInspector].get(o)

  def extractInto(buffer: ByteBuffer, writable: ShortWritable) {
    writable.set(extract(buffer))
  }

  def newWritable() = new ShortWritable
}


object VOID extends ColumnType[Void, NullWritable](7, 0) {

  override def append(v: Void, buffer: ByteBuffer) {}

  override def extract(buffer: ByteBuffer) = {
    throw new UnsupportedOperationException()
  }

  override def get(o: Object, oi: ObjectInspector) = null

  override def extractInto(buffer: ByteBuffer, writable: NullWritable) {}

  override def newWritable() = NullWritable.get
}


object STRING extends ColumnType[Text, Text](8, 8) {

  private val _bytesFld = {
    val f = classOf[Text].getDeclaredField("bytes")
    f.setAccessible(true)
    f
  }

  private val _lengthFld = {
    val f = classOf[Text].getDeclaredField("length")
    f.setAccessible(true)
    f
  }

  override def append(v: Text, buffer: ByteBuffer) {
    val length = v.getLength()
    buffer.putInt(length)
    buffer.put(v.getBytes(), 0, length)
  }

  override def extract(buffer: ByteBuffer) = {
    val t = new Text()
    extractInto(buffer, t)
    t
  }

  override def get(o: Object, oi: ObjectInspector): Text = {
    oi.asInstanceOf[StringObjectInspector].getPrimitiveWritableObject(o)
  }

  override def actualSize(v: Text) = v.getLength() + 4

  override def extractInto(buffer: ByteBuffer, writable: Text) {
    val length = buffer.getInt()
    var b = _bytesFld.get(writable).asInstanceOf[Array[Byte]]
    if (b == null || b.length < length) {
      b = new Array[Byte](length)
      _bytesFld.set(writable, b)
    }
    buffer.get(b, 0, length)
    _lengthFld.set(writable, length)
  }

  override def newWritable() = new Text

  override def clone(v: Text) = {
    val t = new Text()
    t.set(v)
    t
  }
}


object TIMESTAMP extends ColumnType[Timestamp, TimestampWritable](9, 12) {

  override def append(v: Timestamp, buffer: ByteBuffer) {
    buffer.putLong(v.getTime())
    buffer.putInt(v.getNanos())
  }

  override def extract(buffer: ByteBuffer) = {
    val ts = new Timestamp(0)
    ts.setTime(buffer.getLong())
    ts.setNanos(buffer.getInt())
    ts
  }

  override def get(o: Object, oi: ObjectInspector): Timestamp = {
    oi.asInstanceOf[TimestampObjectInspector].getPrimitiveJavaObject(o)
  }

  override def extractInto(buffer: ByteBuffer, writable: TimestampWritable) {
    writable.set(extract(buffer))
  }

  override def newWritable() = new TimestampWritable
}


object BINARY extends ColumnType[BytesWritable, BytesWritable](10, 16) {

  private val _bytesFld = {
    val f = classOf[BytesWritable].getDeclaredField("bytes")
    f.setAccessible(true)
    f
  }

  private val _lengthFld = {
    val f = classOf[BytesWritable].getDeclaredField("size")
    f.setAccessible(true)
    f
  }

  override def append(v: BytesWritable, buffer: ByteBuffer) {
    val length = v.getLength()
    buffer.putInt(length)
    buffer.put(v.getBytes(), 0, length)
  }

  override def extract(buffer: ByteBuffer) = {
    throw new UnsupportedOperationException()
  }

  override def get(o: Object, oi: ObjectInspector): BytesWritable = {
    o match {
      case lb: LazyBinary => lb.getWritableObject()
      case b: BytesWritable => b
      case _ => throw new UnsupportedOperationException("Unknown binary type " + oi)
    }
  }

  override def extractInto(buffer: ByteBuffer, writable: BytesWritable) {
    val length = buffer.getInt()
    var b = _bytesFld.get(writable).asInstanceOf[Array[Byte]]
    if (b == null || b.length < length) {
      b = new Array[Byte](length)
      _bytesFld.set(writable, b)
    }
    buffer.get(b, 0, length)
    _lengthFld.set(writable, length)
  }

  override def newWritable() = new BytesWritable
  
  override def actualSize(v: BytesWritable) = v.getLength() + 4
}


object GENERIC extends ColumnType[ByteStream.Output, ByteArrayRef](11, 16) {

  override def append(v: ByteStream.Output, buffer: ByteBuffer) {
    val length = v.getCount()
    buffer.putInt(length)
    buffer.put(v.getData(), 0, length)
  }

  override def extract(buffer: ByteBuffer) = {
    throw new UnsupportedOperationException()
  }

  override def get(o: Object, oi: ObjectInspector) = {
    o.asInstanceOf[ByteStream.Output]
  }

  override def extractInto(buffer: ByteBuffer, writable: ByteArrayRef) {
    val length = buffer.getInt()
    val a = new Array[Byte](length)
    buffer.get(a, 0, length)
    writable.setData(a)
  }

  override def newWritable() = new ByteArrayRef

  override def actualSize(v: ByteStream.Output): Int = v.getCount() + 4
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy