com.gravity.hbase.schema.Serialization.scala Maven / Gradle / Ivy
The newest version!
/** Licensed to Gravity.com under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Gravity.com licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.gravity.hbase.schema
import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.util._
import scala.collection.JavaConversions._
import org.apache.hadoop.conf.Configuration
import java.io._
import org.apache.hadoop.io.{BytesWritable, Writable}
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp
import org.apache.hadoop.hbase.filter.{Filter, FilterList, SingleColumnValueFilter}
import scala.collection._
import org.joda.time.DateTime
import scala.collection.mutable.{ListBuffer, Buffer}
import java.util.{HashMap, NavigableSet}
/* )\._.,--....,'``.
.b--. /; _.. \ _\ (`._ ,.
`=,-,-'~~~ `----(,_..'--(,_..'`-.;.' */
/** Expresses an input stream that can read ordered primitives from a binary input, and can also use the ByteConverter[T] interface to read serializable objects.
*
*/
class PrimitiveInputStream(input: InputStream) extends DataInputStream(input) {
/**
* Read an object, assuming the existence of a ComplexByteConverter[T] implementation
* The byte converter is stateless and should be therefore defined somewhere as an implicit object
*/
def readObj[T](implicit c: ComplexByteConverter[T]) = {
c.read(this)
}
def skipLong() {this.skipBytes(8)}
//WORK IN PROGRESS
def readRow[T <: HbaseTable[T, R, RR], R, RR <: HRow[T, R]](table: HbaseTable[T, R, RR]) = {
val rowBytesLength = readInt()
val rowBytes = new Array[Byte](rowBytesLength)
read(rowBytes)
val rowId = table.rowKeyConverter.fromBytes(rowBytes)
val ds = DeserializedResult(rowId.asInstanceOf[AnyRef], table.families.length)
val famCount = readInt()
for (i <- 0 until famCount) {
val fam = table.familyByIndex(i)
val kvLength = readInt()
for (ii <- 0 until kvLength) {
val isTypedColumn = readBoolean
val converter = if (isTypedColumn) {
val colIdx = readInt
val col = table.columnByIndex(colIdx)
col
} else {
fam
}
val keyLength = readInt
val keyBytes = new Array[Byte](keyLength)
read(keyBytes)
val valueLength = readInt
val valueBytes = new Array[Byte](valueLength)
read(valueBytes)
val key = converter.keyFromBytesUnsafe(keyBytes)
val value = converter.valueFromBytesUnsafe(valueBytes)
ds.add(fam,key,value,0l)
}
}
table.rowBuilder(ds)
}
}
/** Expresses an output stream that can write ordered primitives into a binary output, and can also use the ByteConverter[T] interface to write serializable objects.
*/
class PrimitiveOutputStream(output: OutputStream) extends DataOutputStream(output) {
//WORK IN PROGRESS
def writeRow[T <: HbaseTable[T, R, RR], R, RR <: HRow[T, R]](table: HbaseTable[T,R,RR],row: RR) {
//Serialize row id
val rowIdBytes = row.table.rowKeyConverter.toBytes(row.rowid)
writeInt(rowIdBytes.length)
write(rowIdBytes)
//Write number of families
writeInt(row.result.values.length)
var idx = 0
while (idx < row.result.values.length) {
val family = row.result.values(idx)
val colFam = row.table.familyByIndex(idx)
if(family == null) {
writeInt(0)
}else {
writeInt(family.size())
family.foreach {
case (colKey: AnyRef, colVal: AnyRef) =>
//See if it's a strongly typed column
val converters: KeyValueConvertible[_, _, _] = row.table.columnsByName.get(colKey) match {
case Some(col) => {
writeBoolean(true)
writeInt(col.columnIndex)
col
}
case None => {
writeBoolean(false)
colFam
}
}
val keyBytes = converters.keyToBytesUnsafe(colKey)
writeInt(keyBytes.length)
write(keyBytes)
val valBytes = converters.valueToBytesUnsafe(colVal)
writeInt(valBytes.length)
write(valBytes)
}
}
idx += 1
}
}
/**
* Write an object, assuming the existence of a ComplexByteConverter[T] implementation.
* The byte converter is stateless and should be therefore defined somewhere as an implicit object
*/
def writeObj[T](obj: T)(implicit c: ComplexByteConverter[T]) {
c.write(obj, this)
}
}
/**
* Class to be implemented by custom converters
*/
abstract class ByteConverter[T] {
def toBytes(t: T): Array[Byte]
def fromBytes(bytes: Array[Byte]): T = fromBytes(bytes, 0, bytes.length)
def fromBytes(bytes: Array[Byte], offset: Int, length: Int): T
def fromByteString(str: String): T = {
fromBytes(Bytes.toBytesBinary(str))
}
def toByteString(item: T) = {
Bytes.toStringBinary(toBytes(item))
}
def toBytesWritable(t: T): BytesWritable = {
new BytesWritable(toBytes(t))
}
def fromBytesWritable(bytes: BytesWritable): T = {
fromBytes(bytes.getBytes)
}
}
/**
* Simple high performance conversions from complex types to bytes
*/
abstract class ComplexByteConverter[T] extends ByteConverter[T] {
override def toBytes(t: T): Array[Byte] = {
val bos = new ByteArrayOutputStream()
val dout = new PrimitiveOutputStream(bos)
write(t, dout)
bos.toByteArray
}
def write(data: T, output: PrimitiveOutputStream)
override def fromBytes(bytes: Array[Byte], offset: Int, length: Int): T = {
val din = new PrimitiveInputStream(new ByteArrayInputStream(bytes, offset, length))
read(din)
}
override def fromBytes(bytes: Array[Byte]): T = {
val din = new PrimitiveInputStream(new ByteArrayInputStream(bytes))
read(din)
}
def read(input: PrimitiveInputStream): T
def safeReadField[A](input: PrimitiveInputStream)(readField: (PrimitiveInputStream)=>A, valueOnFail: A): A = {
if (input.available() < 1) return valueOnFail
try {
readField(input)
}
catch {
case _: IOException => valueOnFail
}
}
}
class MapConverter[K, V](implicit c: ByteConverter[K], d: ByteConverter[V]) extends ComplexByteConverter[Map[K, V]] {
override def write(map: Map[K, V], output: PrimitiveOutputStream) {
val length = map.size
output.writeInt(length)
for ((k, v) <- map) {
val keyBytes = c.toBytes(k)
val valBytes = d.toBytes(v)
output.writeInt(keyBytes.length)
output.write(keyBytes)
output.writeInt(valBytes.length)
output.write(valBytes)
}
}
override def read(input: PrimitiveInputStream) = {
val length = input.readInt()
val kvarr = Array.ofDim[(K, V)](length)
var i = 0
while (i < length) {
val keyLength = input.readInt
val keyArr = new Array[Byte](keyLength)
input.read(keyArr)
val key = c.fromBytes(keyArr)
val valLength = input.readInt
val valArr = new Array[Byte](valLength)
input.read(valArr)
val value = d.fromBytes(valArr)
kvarr(i) = (key -> value)
i = i + 1
}
Map[K, V](kvarr: _*)
}
}
//TODO: T is not available at runtime, and Arrays are not generic. Figure out classmanifest workaround
//class ArrayConverter[T](implicit c: ByteConverter[T]) extends ComplexByteConverter[Array[T]] {
// override def write(set: Array[T], output: PrimitiveOutputStream) {
// val length = set.length
// output.writeInt(length)
// var i = 0
// while(i < length) {
// val itm = set(i)
// val bytes = c.toBytes(itm)
// output.writeInt(bytes.length)
// output.write(bytes)
// i = i+1
// }
// }
//
// override def read(input: PrimitiveInputStream) : Array[T] = {
// val length = input.readInt()
// val arr = Array.ofDim[T](length)
// var i = 0
// while(i < length) {
// val byteLength = input.readInt()
// val itmArr = new Array[Byte](byteLength)
// val itm = c.fromBytes(itmArr)
// arr(i) = itm
// i = i + 1
// }
// arr
// }
//}
class SetConverter[T](implicit c: ByteConverter[T]) extends ComplexByteConverter[Set[T]] with CollStream[T] {
override def write(set: Set[T], output: PrimitiveOutputStream) {
writeColl(set, set.size, output, c)
}
override def read(input: PrimitiveInputStream): Set[T] = {
readColl(input, c).toSet
}
}
class SeqConverter[T](implicit c: ByteConverter[T]) extends ComplexByteConverter[Seq[T]] with CollStream[T] {
override def write(seq: Seq[T], output: PrimitiveOutputStream) {
writeColl(seq, seq.length, output, c)
}
override def read(input: PrimitiveInputStream) = readColl(input, c).toSeq
}
class BufferConverter[T](implicit c: ByteConverter[T]) extends ComplexByteConverter[Buffer[T]] with CollStream[T] {
override def write(buf: Buffer[T], output: PrimitiveOutputStream) {
writeBuf(buf, output)
}
def writeBuf(buf: Buffer[T], output: PrimitiveOutputStream) {
writeColl(buf, buf.length, output, c)
}
override def read(input: PrimitiveInputStream) = readColl(input, c)
}
trait CollStream[T] {
def writeColl(items: Iterable[T], length: Int, output: PrimitiveOutputStream, c: ByteConverter[T]) {
output.writeInt(length)
val iter = items.iterator
while (iter.hasNext) {
val t = iter.next()
val bytes = c.toBytes(t)
output.writeInt(bytes.length)
output.write(bytes)
}
}
def readColl(input: PrimitiveInputStream, c: ByteConverter[T]): Buffer[T] = {
val length = input.readInt()
val cpx = if (c.isInstanceOf[ComplexByteConverter[T]]) c.asInstanceOf[ComplexByteConverter[T]] else null
var i = 0
val buff = Buffer[T]()
while (i < length) {
val arrLength = input.readInt()
if (cpx != null) {
buff += cpx.read(input)
} else {
val arr = new Array[Byte](arrLength)
input.read(arr)
buff += c.fromBytes(arr)
}
i = i + 1
}
buff
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy