All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.gravity.hbase.schema.Schema.scala Maven / Gradle / Ivy

/** Licensed to Gravity.com under one
  * or more contributor license agreements. See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership. Gravity.com licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License. You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

package com.gravity.hbase.schema

import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.util._
import scala.collection.JavaConversions._
import org.apache.hadoop.conf.Configuration
import java.io._
import org.apache.hadoop.io.Writable
import scala.collection._
import mutable.{ArrayBuffer, Buffer}
import org.joda.time.DateTime
import com.gravity.hbase.schema._
import java.math.BigInteger
import java.nio.ByteBuffer
import org.apache.commons.lang.ArrayUtils
import java.util.{Arrays, HashMap}
import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator
import com.gravity.hbase.AnyNotSupportedException
import org.apache.hadoop.hbase.{HColumnDescriptor, KeyValue}

/*             )\._.,--....,'``.
.b--.        /;   _.. \   _\  (`._ ,.
`=,-,-'~~~   `----(,_..'--(,_..'`-.;.'  */


/** When a query comes back, there are a bucket of column families and columns to retrieve.  This class retrieves them.
  *
  * @tparam T the source [[com.gravity.hbase.schema.HbaseTable]] this result came from
  * @tparam R the `type` of the table's rowid
  *
  * @param result the raw [[org.apache.hadoop.hbase.client.Result]] returned from the `hbase` [[org.apache.hadoop.hbase.client.Get]]
  * @param table the underlying [[com.gravity.hbase.schema.HbaseTable]]
  * @param tableName the name of the actual table
  */
class QueryResult[T <: HbaseTable[T, R, _], R](val result: DeserializedResult, val table: HbaseTable[T, R, _], val tableName: String) extends Serializable {


  /** This is a convenience method to allow consumers to check
    * if a column has a value present in the result without
    * invoking the deserialization of the value
    *
    * @tparam F the type of the column family name
    * @tparam K the type of the column family qualifier
    * @tparam V the type of the column family value
    *
    * @param column the underlying table's column `val`
    *
    * @return `true` if the column value is present and otherwise `false`
    */
  def isColumnPresent[F, K, V](column: (T) => Column[T, R, F, K, V]): Boolean = {
    val co = column(table.pops)
    result.hasColumn(co)
  }

  /** Extracts and deserializes the value of the `column` specified
    *
    * @tparam F the type of the column family name
    * @tparam K the type of the column family qualifier
    * @tparam V the type of the column family value
    *
    * @param column the underlying table's column `val`
    *
    * @return `Some` value of type `V` if the column value is present, otherwise `None`
    *
    * @note if there is no explicitly defined `val` for the desired column, use `columnFromFamily`
    */
  def column[F, K, V](column: (T) => Column[T, R, F, K, V]) = {
    val co = column(table.pops)
    val colVal = result.columnValueSpecific(co)
    if (colVal == null) {
      None
    }
    else {
      Some[V](colVal.asInstanceOf[V])
    }
  }

  /** Extracts and deserializes the value of the `family` + `columnName` specified
    *
    * @tparam F the type of the column family name
    * @tparam K the type of the column family qualifier
    * @tparam V the type of the column family value
    *
    * @param family the underlying table's family `val`
    * @param columnName value of the desired column's qualifier
    *
    * @return `Some` value of type `V` if the column value is present, otherwise `None`
    */
  def columnFromFamily[F, K, V](family: (T) => ColumnFamily[T, R, F, K, V], columnName: K) = {
    val fam = family(table.pops)
    val colVal = result.columnValue(fam, columnName.asInstanceOf[AnyRef])
    if (colVal == null) {
      None
    }
    else {
      Some[V](colVal.asInstanceOf[V])
    }
  }

  /** Extracts and deserializes the Timestamp of the `family` + `columnName` specified
    *
    * @tparam F the type of the column family name
    * @tparam K the type of the column family qualifier
    * @tparam V the type of the column family value
    *
    * @param family the underlying table's family `val`
    * @param columnName value of the desired column's qualifier
    *
    * @return `Some` [[org.joda.time.DateTime]] if the column value is present, otherwise `None`
    */
  def columnFromFamilyTimestamp[F, K, V](family: (T) => ColumnFamily[T, R, F, K, V], columnName: K) = {
    val fam = family(table.pops)
    val colVal = result.columnTimestampByNameAsDate(fam, columnName.asInstanceOf[AnyRef])
    if (colVal == null) {
      None
    }
    else {
      Some(colVal)
    }
  }

  /** Extracts column timestamp of the specified `column`
    *
    * @tparam F the type of the column family name
    * @tparam K the type of the column family qualifier
    * @tparam V the type of the column family value
    *
    * @param column the underlying table's column `val`
    *
    * @return `Some` [[org.joda.time.DateTime]] if the column value is present, otherwise `None`
    */
  def columnTimestamp[F, K, V](column: (T) => Column[T, R, F, K, V]): Option[DateTime] = {
    val co = column(table.pops)
    val res = result.columnTimestampAsDate(co)
    if (res == null) {
      None
    }
    else {
      Some(res)
    }
  }

  /** Extracts most recent column timestamp of the specified `family`
    *
    * @tparam F the type of the column family name
    * @tparam K the type of the column family qualifier
    * @tparam V the type of the column family value
    *
    * @param family the underlying table's family `val`
    *
    * @return `Some` [[org.joda.time.DateTime]] if at least one column value is present, otherwise `None`
    */
  def familyLatestTimestamp[F, K, V](family: (T) => ColumnFamily[T, R, F, K, V]): Option[DateTime] = {
    val fam = family(table.pops)
    val familyPairs = result.familyMap(fam)
    if (familyPairs != null) {
      var ts = -1l
      for (kv <- familyPairs) {
        val tsn = result.columnTimestampByName(fam, kv._1)
        if (tsn > ts) ts = tsn
      }
      if (ts > 0) {
        Some(new DateTime(ts))
      }
      else {
        None
      }

    } else {
      None
    }
  }

  /** Extracts and deserializes the entire family as a `Map[K, V]`
    *
    * @tparam F the type of the column family name
    * @tparam K the type of the column family qualifier
    * @tparam V the type of the column family value
    *
    * @param family the underlying table's family `val`
    *
    */
  def family[F, K, V](family: (T) => ColumnFamily[T, R, F, K, V]): Map[K, V] = {
    val fm = family(table.pops)
    result.familyValueMap[K, V](fm)

  }

  /** Extracts and deserializes only the keys (qualifiers) of the family as a `Set[K]`
    *
    * @tparam F the type of the column family name
    * @tparam K the type of the column family qualifier
    *
    * @param family the underlying table's family `val`
    *
    */
  def familyKeySet[F, K](family: (T) => ColumnFamily[T, R, F, K, _]): Set[K] = {
    val fm = family(table.pops)
    result.familyKeySet[K](fm)
  }

  /** The row identifier deserialized as type `R`
    *
    */
  def rowid = result.getRow[R]()

  def getTableName = tableName
}

/**
  * A query for setting up a scanner across the whole table or key subsets.
  * There is a lot of room for expansion in this class -- caching parameters, scanner specs, key-only, etc.
  */


/**
  * An individual data modification operation (put, increment, or delete usually)
  * These operations are chained together by the client, and then executed in bulk.
  */
class OpBase[T <: HbaseTable[T, R, _], R](val table: HbaseTable[T, R, _], key: Array[Byte], previous: Buffer[OpBase[T, R]] = Buffer[OpBase[T, R]]()) {

  previous += this

  def put(key: R, writeToWAL: Boolean = true) = {
    val po = new PutOp(table, table.rowKeyConverter.toBytes(key), previous, writeToWAL)
    po
  }

  def increment(key: R) = {
    val inc = new IncrementOp(table, table.rowKeyConverter.toBytes(key), previous)
    inc
  }

  def delete(key: R) = {
    val del = new DeleteOp(table, table.rowKeyConverter.toBytes(key), previous)
    del
  }

  def size = previous.size

  def getOperations: Iterable[Writable] = {
    val calls = Buffer[Writable]()
    previous.foreach {
      case put: PutOp[T, R] => {
        calls += put.put
      }
      case delete: DeleteOp[T, R] => {
        calls += delete.delete
      }
      case increment: IncrementOp[T, R] => {
        calls += increment.increment
      }
    }

    calls

  }

  /**
    * This is an experimental call that utilizes a shared instance of a table to flush writes.
    */
  def executeBuffered(tableName: String = table.tableName)  {

    val (deletes, puts, increments) = prepareOperations

    if (deletes.size == 0 && puts.size == 0 && increments.size == 0) {
    } else {
      table.withBufferedTable(tableName) {
        bufferTable =>
          if (puts.size > 0) {
            bufferTable.put(puts)
          }
          if (deletes.size > 0) {
            bufferTable.delete(deletes)
          }
          if (increments.size > 0) {
            increments.foreach {
              increment =>
                bufferTable.increment(increment)
            }
          }
      }
    }

  }

  def prepareOperations = {
    val puts = Buffer[Put]()
    val deletes = Buffer[Delete]()
    val increments = Buffer[Increment]()

    previous.foreach {
      case put: PutOp[T, R] => {
        if (!put.put.isEmpty) {
          puts += put.put
        }
      }
      case delete: DeleteOp[T, R] => {
        if (!delete.delete.isEmpty) {
          deletes += delete.delete
        }
      }
      case increment: IncrementOp[T, R] => {
        increments += increment.increment
      }
    }

    (deletes, puts, increments)
  }

  def execute(tableName: String = table.tableName) = {
    val (deletes, puts, increments) = prepareOperations

    if (deletes.size == 0 && puts.size == 0 && increments.size == 0) {
      //No need to do anything if there are no real operations to execute
    } else {
      table.withTable(tableName) {
        table =>
          if (puts.size > 0) {
            table.put(puts)
            //IN THEORY, the operations will happen in order.  If not, break this into two different batched calls for deletes and puts
          }
          if (deletes.size > 0) {
            table.delete(deletes)
          }
          if (increments.size > 0) {
            increments.foreach(increment => table.increment(increment))
          }
      }
    }


    OpsResult(0, puts.size, increments.size)
  }
}

case class OpsResult(numDeletes: Int, numPuts: Int, numIncrements: Int)

/**
  * An increment operation -- can increment multiple columns in a single go.
  */
class IncrementOp[T <: HbaseTable[T, R, _], R](table: HbaseTable[T, R, _], key: Array[Byte], previous: Buffer[OpBase[T, R]] = Buffer[OpBase[T, R]]()) extends OpBase[T, R](table, key, previous) {
  val increment = new Increment(key)
  increment.setWriteToWAL(false)

  def value[F, K, Long](column: (T) => Column[T, R, F, K, Long], value: java.lang.Long) = {
    val col = column(table.pops)
    increment.addColumn(col.familyBytes, col.columnBytes, value)
    this
  }

  def valueMap[F, K, Long](family: (T) => ColumnFamily[T, R, F, K, Long], values: Map[K, Long]) = {
    val fam = family(table.pops)
    for ((key, value) <- values) {
      increment.addColumn(fam.familyBytes, fam.keyConverter.toBytes(key), value.asInstanceOf[java.lang.Long])
    }
    this
  }
}

/**
  * A Put operation.  Can work across multiple columns or entire column families treated as Maps.
  */
class PutOp[T <: HbaseTable[T, R, _], R](table: HbaseTable[T, R, _], key: Array[Byte], previous: Buffer[OpBase[T, R]] = Buffer[OpBase[T, R]](), writeToWAL: Boolean = true) extends OpBase[T, R](table, key, previous) {
  val put = new Put(key)
  put.setWriteToWAL(writeToWAL)

  def value[F, K, V](column: (T) => Column[T, R, F, K, V], value: V, timeStamp: DateTime = null) = {
    val col = column(table.asInstanceOf[T])
    if (timeStamp == null) {
      put.add(col.familyBytes, col.columnBytes, col.valueConverter.toBytes(value))
    } else {
      put.add(col.familyBytes, col.columnBytes, timeStamp.getMillis, col.valueConverter.toBytes(value))
    }
    this
  }

  def valueMap[F, K, V](family: (T) => ColumnFamily[T, R, F, K, V], values: Map[K, V]) = {
    val fam = family(table.pops)
    for ((key, value) <- values) {
      put.add(fam.familyBytes, fam.keyConverter.toBytes(key), fam.valueConverter.toBytes(value))
    }
    this
  }
}

/**
  * A deletion operation.  If nothing is specified but a key, will delete the whole row.  If a family is specified, will just delete the values in
  * that family.
  */
class DeleteOp[T <: HbaseTable[T, R, _], R](table: HbaseTable[T, R, _], key: Array[Byte], previous: Buffer[OpBase[T, R]] = Buffer[OpBase[T, R]]()) extends OpBase[T, R](table, key, previous) {
  val delete = new Delete(key)

  def family[F, K, V](family: (T) => ColumnFamily[T, R, F, K, V]) = {
    val fam = family(table.pops)
    delete.deleteFamily(fam.familyBytes)
    this
  }

  def values[F, K, V](family: (T) => ColumnFamily[T, R, F, K, V], qualifiers: Set[K]) = {
    val fam = family(table.pops)
    for (q <- qualifiers) {
      delete.deleteColumns(fam.familyBytes, fam.keyConverter.toBytes(q))
    }
    this
  }
}

/**
  * A query for retrieving values.  It works somewhat differently than the data modification operations, in that you do the following:
  * 1. Specify one or more keys
  * 2. Specify columns and families to scan in for ALL the specified keys
  *
  * In other words there's no concept of having multiple rows fetched with different columns for each row (that seems to be a rare use-case and
  * would make the API very complex).
  */

trait KeyValueConvertible[F, K, V] {
  val familyConverter: ByteConverter[F]
  val keyConverter: ByteConverter[K]
  val valueConverter: ByteConverter[V]

  def keyToBytes(key: K) = keyConverter.toBytes(key)

  def valueToBytes(value: V) = valueConverter.toBytes(value)

  def keyToBytesUnsafe(key: AnyRef) = keyConverter.toBytes(key.asInstanceOf[K])

  def valueToBytesUnsafe(value: AnyRef) = valueConverter.toBytes(value.asInstanceOf[V])

  def keyFromBytesUnsafe(bytes: Array[Byte]) = keyConverter.fromBytes(bytes).asInstanceOf[AnyRef]

  def valueFromBytesUnsafe(bytes: Array[Byte]) = valueConverter.fromBytes(bytes).asInstanceOf[AnyRef]

  def family: ColumnFamily[_, _, _, _, _]
}

/**
  * Represents the specification of a Column Family
  */
class ColumnFamily[T <: HbaseTable[T, R, _], R, F, K, V](val table: HbaseTable[T, R, _], val familyName: F, val compressed: Boolean = false, val versions: Int = 1, val index: Int, val ttlInSeconds: Int = HColumnDescriptor.DEFAULT_TTL)(implicit c: ByteConverter[F], d: ByteConverter[K], e: ByteConverter[V]) extends KeyValueConvertible[F, K, V] {
  val familyConverter = c
  val keyConverter = d
  val valueConverter = e
  val familyBytes = c.toBytes(familyName)


  def family = this
}

/**
  * Represents the specification of a Column.
  */
class Column[T <: HbaseTable[T, R, _], R, F, K, V](table: HbaseTable[T, R, _], columnFamily: ColumnFamily[T, R, F, K, _], val columnName: K, val columnIndex: Int)(implicit fc: ByteConverter[F], kc: ByteConverter[K], kv: ByteConverter[V]) extends KeyValueConvertible[F, K, V] {
  val columnBytes = kc.toBytes(columnName)
  val familyBytes = columnFamily.familyBytes
  val columnNameRef = columnName.asInstanceOf[AnyRef]

  val familyConverter = fc
  val keyConverter = kc
  val valueConverter = kv

  def getQualifier: K = columnName

  def family = columnFamily.asInstanceOf[ColumnFamily[_, _, _, _, _]]


}

trait Schema {
  val tables = scala.collection.mutable.Set[HbaseTable[_, _, _]]()

  def table[T <: HbaseTable[T, _, _]](table: T) = {
    tables += table
    table
  }

}


case class DeserializedResult(rowid: AnyRef, famCount: Int) {

  def isEmpty = values.size == 0

  def getRow[R]() = rowid.asInstanceOf[R]


  def familyValueMap[K, V](fam: ColumnFamily[_, _, _, _, _]) = {
    val famMap = family(fam)
    if (famMap != null) {
      famMap.asInstanceOf[java.util.HashMap[K, V]]
    } else {
      new java.util.HashMap[K, V]()
    }
  }

  def familyKeySet[K](fam: ColumnFamily[_, _, _, _, _]) = {
    val famMap = family(fam)
    if (famMap != null) {
      famMap.keySet.asInstanceOf[java.util.Set[K]]
    } else {
      new java.util.HashSet[K]()
    }
  }

  def family(family: ColumnFamily[_, _, _, _, _]) = {
    values(family.index)
  }

  def familyOf(column: Column[_, _, _, _, _]) = family(column.family)

  def familyMap(fam: ColumnFamily[_, _, _, _, _]) = family(fam)

  def hasColumn(column: Column[_, _, _, _, _]) = {
    val valueMap = familyOf(column)
    if (valueMap == null || valueMap.size == 0) false else true
  }

  def columnValue(fam: ColumnFamily[_, _, _, _, _], columnName: AnyRef) = {
    val valueMap = family(fam)
    if (valueMap == null) {
      null
    } else {
      valueMap.get(columnName)
    }
  }

  def columnTimestamp(fam: ColumnFamily[_, _, _, _, _], columnName: AnyRef) = {
    val res = timestampLookaside(fam.index)
    if (res != null) {
      val colRes = res.get(columnName)
      colRes
    }
    else {
      0l
    }
  }


  def columnTimestampAsDate(column: Column[_, _, _, _, _]) = {
    val cts = columnTimestamp(column.family, column.columnNameRef)
    if (cts > 0) {
      new DateTime(cts)
    } else {
      null
    }
  }

  def columnTimestampByName(fam: ColumnFamily[_, _, _, _, _], columnName: AnyRef) = {
    val cts = columnTimestamp(fam, columnName)
    cts
  }

  def columnTimestampByNameAsDate(fam: ColumnFamily[_, _, _, _, _], columnName: AnyRef) = {
    val cts = columnTimestamp(fam, columnName)
    if (cts > 0) {
      new DateTime(cts)
    }
    else {
      null
    }
  }


  def columnValueSpecific(column: Column[_, _, _, _, _]) = {
    columnValue(column.family, column.columnNameRef)
  }


  var values = new Array[java.util.HashMap[AnyRef, AnyRef]](famCount)

  var timestampLookaside = new Array[java.util.HashMap[AnyRef, Long]](famCount)


  /** This is a map whose key is the family type, and whose values are maps of column keys to columnvalues paired with their timestamps */
  //  val values = new java.util.HashMap[ColumnFamily[_, _, _, _, _], java.util.HashMap[AnyRef, AnyRef]]()

  //  val timestampLookaside = new java.util.HashMap[ColumnFamily[_, _, _, _, _], java.util.HashMap[AnyRef, Long]]()

  def add(family: ColumnFamily[_, _, _, _, _], qualifier: AnyRef, value: AnyRef, timeStamp: Long) {
    var map = values(family.index)
    if (map == null) {
      map = new java.util.HashMap[AnyRef, AnyRef]()
      values(family.index) = map
    }
    map.put(qualifier, value)

    var tsMap = timestampLookaside(family.index)
    if (tsMap == null) {
      tsMap = new java.util.HashMap[AnyRef, Long]()
      timestampLookaside(family.index) = tsMap
    }
    tsMap.put(qualifier, timeStamp)
    //Add timestamp lookaside
  }

  var errorBuffer: Buffer[(Array[Byte], Array[Byte], Array[Byte], Long)] = _

  def addErrorBuffer(family: Array[Byte], qualifier: Array[Byte], value: Array[Byte], timestamp: Long) {
    if (errorBuffer == null) {
      errorBuffer = Buffer()
    }
    errorBuffer.append((family, qualifier, value, timestamp))
  }

  def hasErrors = (errorBuffer != null)
}

/** Standard base class for all Row objects.
  *
  * Inside of a *Row object, it is good to use lazy val and def as opposed to val.
  * Because HRow objects are now the first-class instantiation of a query result, and because they are the type cached in Ehcache, they are good places to cache values.
  */
abstract class HRow[T <: HbaseTable[T, R, _], R](result: DeserializedResult, table: HbaseTable[T, R, _]) extends QueryResult[T, R](result, table, table.tableName) {

  def prettyPrint() {println(prettyFormat())}

  def prettyPrintNoValues() {println(prettyFormatNoValues())}

  def prettyFormatNoValues() = {
    val sb = new StringBuilder()
    sb.append("Row Key: " + result.rowid + " (" + result.values.size + " families)" + "\n")
    for (i <- 0 until result.values.length) {
      val familyMap = result.values(i)
      if (familyMap != null) {
        val family = table.familyByIndex(i)
        sb.append("\tFamily: " + family.familyName + " (" + familyMap.values.size + " items)\n")
      }
    }
    sb.toString
  }


  def prettyFormat() = {
    val sb = new StringBuilder()
    sb.append("Row Key: " + result.rowid + " (" + result.values.size + " families)" + "\n")
    for (i <- 0 until result.values.length) {
      val familyMap = result.values(i)
      if (familyMap != null) {
        val family = table.familyByIndex(i)
        sb.append("\tFamily: " + family.familyName + " (" + familyMap.values.size + " items)\n")
        for ((key, value) <- familyMap) {
          sb.append("\t\tColumn: " + key + "\n")
          sb.append("\t\t\tValue: " + value + "\n")
          sb.append("\t\t\tTimestamp: " + result.columnTimestampByNameAsDate(family, key) + "\n")
        }

      }
    }
    sb.toString
  }
}


/**
  * Represents a Table.  Expects an instance of HBaseConfiguration to be present.
  * A parameter-type T should be the actual table that is implementing this one (this is to allow syntactic sugar for easily specifying columns during
  * queries).
  * A parameter-type R should be the type of the key for the table.
  */
abstract class HbaseTable[T <: HbaseTable[T, R, RR], R, RR <: HRow[T, R]](val tableName: String, var cache: QueryResultCache[T, R, RR] = new NoOpCache[T, R, RR](), rowKeyClass: Class[R])(implicit conf: Configuration, keyConverter: ByteConverter[R]) {

  def rowBuilder(result: DeserializedResult): RR

  val rowKeyConverter = keyConverter
  /** Provides the client with an instance of the superclass this table was defined against. */
  def pops = this.asInstanceOf[T]

  /** A method injected by the super class that will build a strongly-typed row object.  */
  def buildRow(result: Result): RR = {
    rowBuilder(convertResult(result))
  }

  /** A pool of table objects with AutoFlush set to true */
  val tablePool = new HTablePool(conf, 50)

  /** A pool of table objects with AutoFlush set to false --therefore usable for asynchronous write buffering */
  val bufferTablePool = new HTablePool(conf, 1, new HTableInterfaceFactory {
    def createHTableInterface(config: Configuration, tableName: Array[Byte]): HTableInterface = {
      val table = new HTable(conf, tableName)
      table.setWriteBufferSize(2000000L)
      table.setAutoFlush(false)
      table
    }

    def releaseHTableInterface(table: HTableInterface) {
      try {
        table.close()
      } catch {
        case ex: IOException => throw new RuntimeException(ex)
      }
    }
  })


  var famLookup: Array[Array[Byte]] = null
  var colFamLookup: Array[Array[Byte]] = null
  var famIdx: IndexedSeq[KeyValueConvertible[_, _, _]] = null
  var colFamIdx: IndexedSeq[KeyValueConvertible[_, _, _]] = null

  val bc = new ByteArrayComparator()

  implicit val o = new math.Ordering[Array[Byte]] {
    def compare(a: Array[Byte], b: Array[Byte]): Int = {
      bc.compare(a, b)
    }
  }

  /** Looks up a KeyValueConvertible by the family and column bytes provided.
    * Because of the rules of the system, the lookup goes as follows:
    * 1. Find a column first.  If you find a column first, it means there is a strongly-typed column defined.
    * 2. If no column, then find the family.
    *
    */
  def converterByBytes(famBytes: Array[Byte], colBytes: Array[Byte]): KeyValueConvertible[_, _, _] = {

    if (famLookup == null) {
      famLookup = Array.ofDim[Array[Byte]](families.size)
      for ((fam, idx) <- families.zipWithIndex) {
        famLookup(idx) = fam.familyBytes
      }
      Arrays.sort(famLookup, bc)
      famIdx = families.sortBy(_.familyBytes).toIndexedSeq
    }

    if (colFamLookup == null) {
      colFamLookup = Array.ofDim[Array[Byte]](columns.size)


      for ((col, idx) <- columns.zipWithIndex) {
        colFamLookup(idx) = ArrayUtils.addAll(col.familyBytes, col.columnBytes)
      }
      Arrays.sort(colFamLookup, bc)
      colFamIdx = columns.sortBy(col => ArrayUtils.addAll(col.familyBytes, col.columnBytes)).toIndexedSeq
    }

    val fullKey = ArrayUtils.addAll(famBytes, colBytes)
    val resIdx = Arrays.binarySearch(colFamLookup, fullKey, bc)
    if (resIdx > -1) {
      colFamIdx(resIdx)
    } else {
      val resFamIdx = Arrays.binarySearch(famLookup, famBytes, bc)
      if (resFamIdx > -1) {
        famIdx(resFamIdx)
      }
      else {
        throw new RuntimeException("Unable to locate family or column definition")
      }
    }


  }

  /** Converts a result to a DeserializedObject. A conservative implementation that is slower than convertResultRaw but will always be more stable against
    * binary changes to Hbase's KeyValue format.
    */
  def convertResult(result: Result) = {
    if (result.isEmpty) {
      throw new RuntimeException("Attempting to deserialize an empty result.  If you want to handle the eventuality of an empty result, call singleOption() instead of single()")
    }
    val keyValues = result.raw()
    val buff = result.getBytes.get()

    val rowId = keyConverter.fromBytes(buff, keyValues(0).getRowOffset, keyValues(0).getRowLength).asInstanceOf[AnyRef]

    val ds = DeserializedResult(rowId, families.size)

    var itr = 0

    while (itr < keyValues.length) {
      val kv = keyValues(itr)
      val family = kv.getFamily
      val key = kv.getQualifier
      try {
        val c = converterByBytes(family, key)
        if (!c.isInstanceOf[ByteConverter[Any]]) {
          val f = c.family
          val k = c.keyConverter.fromBytes(buff, kv.getQualifierOffset, kv.getQualifierLength).asInstanceOf[AnyRef]
          val r = c.valueConverter.fromBytes(buff, kv.getValueOffset, kv.getValueLength).asInstanceOf[AnyRef]
          val ts = kv.getTimestamp

          ds.add(f, k, r, ts)
        } else {
          //TODO: Just like AnyNotSupportException, add a counter here because this means a column was removed, but the data is still in the database.
        }
      } catch {
        case ex: AnyNotSupportedException => {
          //This means a column came back that is no longer part of the specification
          //TODO: Keep counters of columns that were encountered and we were unable to deserialize
          //          println("Attempted to lookup column: " + new String(key) + " in family: " + new String(family) + " and didn't find a serializer")
          //          ds.addErrorBuffer(family, key, value, kv.getTimestamp)
        }
        case ex: Exception => {
          //          println(ex.getMessage)
          //          println(ex.getStackTraceString)
          //        ds.addErrorBuffer(family, key, value, kv.getTimestamp)
        }
      } finally {
        itr = itr + 1
      }

    }
    ds
  }

  /** Converts a result to a DeserializedObject, avoiding the Result object's inner parsing by moving it out and directly into the deserializers.
    *
    */
  def convertResultRaw(result: Result) = {


    val bytes = result.getBytes()
    val buf = bytes.get()
    var offset = bytes.getOffset
    val finalOffset = bytes.getSize + offset
    var row: Array[Byte] = null
    var ds: DeserializedResult = null

    while (offset < finalOffset) {
      val keyLength = Bytes.toInt(buf, offset)
      offset = offset + Bytes.SIZEOF_INT

      val keyOffset = offset + KeyValue.ROW_OFFSET
      val rowLength = Bytes.toShort(buf, keyOffset)
      val familyOffset = offset + KeyValue.ROW_OFFSET + Bytes.SIZEOF_SHORT + rowLength + Bytes.SIZEOF_BYTE
      val familyLength = buf(familyOffset - 1)
      val family = new Array[Byte](familyLength)
      System.arraycopy(buf, familyOffset, family, 0, familyLength)

      val qualifierOffset = familyOffset + familyLength
      val qualifierLength = keyLength - (KeyValue.KEY_INFRASTRUCTURE_SIZE + rowLength + familyLength)
      val key = new Array[Byte](qualifierLength)
      System.arraycopy(buf, qualifierOffset, key, 0, qualifierLength)

      val valueOffset = keyOffset + keyLength
      val valueLength = Bytes.toInt(buf, offset + Bytes.SIZEOF_INT)
      val value = new Array[Byte](valueLength)
      System.arraycopy(buf, valueOffset, value, 0, valueLength)

      val tsOffset = keyOffset + keyLength - KeyValue.TIMESTAMP_TYPE_SIZE
      val ts = Bytes.toLong(buf, tsOffset)

      if (row == null) {
        val rowOffset = keyOffset + Bytes.SIZEOF_SHORT
        row = new Array[Byte](rowLength)
        System.arraycopy(buf, rowOffset, row, 0, rowLength)
        val rowId = keyConverter.fromBytes(result.getRow).asInstanceOf[AnyRef]
        ds = DeserializedResult(rowId, families.size)
      }

      try {
        val c = converterByBytes(family, key)
        val f = c.family
        val k = c.keyConverter.fromBytes(key).asInstanceOf[AnyRef]
        val r = c.valueConverter.fromBytes(value).asInstanceOf[AnyRef]
        println("Adding value " + r)
        ds.add(f, k, r, ts)
      } catch {
        case ex: Exception => {
          println("Adding error buffer")
          ds.addErrorBuffer(family, key, value, ts)
        }
      }

      offset = offset + keyLength
    }
    ds
  }


  def familyBytes = families.map(family => family.familyBytes)

  def familyByIndex(idx: Int) = familyArray(idx)

  lazy val familyArray = {
    val arr = new Array[ColumnFamily[_, _, _, _, _]](families.length)
    families.foreach {
      fam =>
        arr(fam.index) = fam
    }
    arr
  }

  def columnByIndex(idx: Int) = columnArray(idx)

  lazy val columnArray = {
    val arr = new Array[Column[_, _, _, _, _]](columns.length)
    columns.foreach {col => arr(col.columnIndex) = col}
    arr
  }


  //alter 'articles', NAME => 'html', VERSIONS =>1, COMPRESSION=>'lzo'

  /*
  WARNING - Currently assumes the family names are strings (which is probably a best practice, but we support byte families)
   */
  def createScript(tableNameOverride: String = tableName) = {
    val create = "create '" + tableNameOverride + "', "
    create + (for (family <- families) yield {
      familyDef(family)
    }).mkString(",")
  }

  def deleteScript(tableNameOverride: String = tableName) = {
    val delete = "disable '" + tableNameOverride + "'\n"

    delete + "delete '" + tableNameOverride + "'"
  }

  def alterScript(tableNameOverride: String = tableName, families: Seq[ColumnFamily[T, _, _, _, _]] = families) = {

    var alter = "flush '" + tableNameOverride + "'\n"
    alter += "disable '" + tableNameOverride + "'\n"
    alter += "alter '" + tableNameOverride + "', "
    alter += (for (family <- families) yield {
      familyDef(family)
    }).mkString(",")
    alter += "\nenable '" + tableNameOverride + "'"
    alter
  }

  def familyDef(family: ColumnFamily[T, _, _, _, _]) = {
    val compression = if (family.compressed) ", COMPRESSION=>'lzo'" else ""
    val ttl = if (family.ttlInSeconds < HColumnDescriptor.DEFAULT_TTL) ", TTL=>'" + family.ttlInSeconds + "'" else ""
    "{NAME => '%s', VERSIONS => %d%s%s}".format(Bytes.toString(family.familyBytes), family.versions, compression, ttl)
  }


  def getTable(name: String) = tablePool.getTable(name)

  def getBufferedTable(name: String) = bufferTablePool.getTable(name)

  private val columns = ArrayBuffer[Column[T, R, _, _, _]]()
  val families = ArrayBuffer[ColumnFamily[T, R, _, _, _]]()

  val columnsByName = mutable.Map[AnyRef, Column[T, R, _, _, _]]()

  private val columnsByBytes = mutable.Map[ByteBuffer, KeyValueConvertible[_, _, _]]()
  private val familiesByBytes = mutable.Map[ByteBuffer, KeyValueConvertible[_, _, _]]()

  var columnIdx = 0

  def column[F, K, V](columnFamily: ColumnFamily[T, R, F, K, _], columnName: K, valueClass: Class[V])(implicit fc: ByteConverter[F], kc: ByteConverter[K], kv: ByteConverter[V]) = {
    val c = new Column[T, R, F, K, V](this, columnFamily, columnName, columnIdx)
    columns += c

    val famBytes = columnFamily.familyBytes
    val colBytes = c.columnBytes
    val fullKey = ArrayUtils.addAll(famBytes, colBytes)
    val bufferKey = ByteBuffer.wrap(fullKey)

    columnsByName.put(columnName.asInstanceOf[AnyRef], c)
    columnsByBytes.put(bufferKey, c)
    columnIdx = columnIdx + 1
    c
  }

  var familyIdx = 0

  def family[F, K, V](familyName: F, compressed: Boolean = false, versions: Int = 1, rowTtlInSeconds: Int = Int.MaxValue)(implicit c: ByteConverter[F], d: ByteConverter[K], e: ByteConverter[V]) = {
    val family = new ColumnFamily[T, R, F, K, V](this, familyName, compressed, versions, familyIdx, rowTtlInSeconds)
    familyIdx = familyIdx + 1
    families += family
    familiesByBytes.put(ByteBuffer.wrap(family.familyBytes), family)
    family
  }

  def getTableOption(name: String) = {
    try {
      Some(getTable(name))
    } catch {
      case e: Exception => None
    }
  }


  def withTableOption[Q](name: String)(work: (Option[HTableInterface]) => Q): Q = {
    val table = getTableOption(name)
    try {
      work(table)
    } finally {
      table foreach (tbl => tablePool.putTable(tbl))
    }
  }

  def withBufferedTable[Q](mytableName: String = tableName)(work: (HTableInterface) => Q): Q = {
    val table = getBufferedTable(mytableName)
    try {
      work(table)
    } finally {
      bufferTablePool.putTable(table)
    }
  }

  def withTable[Q](mytableName: String = tableName)(funct: (HTableInterface) => Q): Q = {
    withTableOption(mytableName) {
      case Some(table) => {
        funct(table)
      }
      case None => throw new RuntimeException("Table " + tableName + " does not exist")
    }
  }

  def scan = new ScanQuery(this)

  def query = new Query(this)

  def query2 = new Query2(this)

  def put(key: R, writeToWAL: Boolean = true) = new PutOp(this, keyConverter.toBytes(key))

  def delete(key: R) = new DeleteOp(this, keyConverter.toBytes(key))

  def increment(key: R) = new IncrementOp(this, keyConverter.toBytes(key))


}

case class YearDay(year: Int, day: Int)

case class CommaSet(items: Set[String]) {
  def mkString: String = items.mkString

  def mkString(sep: String): String = items.mkString(sep)

  def mkString(start: String, sep: String, end: String): String = items.mkString(start, sep, end)
}

object CommaSet {
  val empty = CommaSet(Set.empty[String])

  def apply(items: String*): CommaSet = CommaSet(items.toSet)
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy