All Downloads are FREE. Search and download functionalities are using the official Maven repository.

xerial.compress.FPC.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2012 Taro L. Saito
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//--------------------------------------
//
// FPC.scala
// Since: 2012/12/06 12:46 PM
//
//--------------------------------------

package xerial.compress


import java.nio.ByteBuffer
import xerial.core.log.Logger

/**
 * An implementation of the FPC 64-bit floating point value compression algorithm:
 * M. Burtscher and P. Ratanaworaban. High Troughput Compression of Double-Precision Floating-Point Data. DCC2007
 * http://csl.cornell.edu/~burtscher/research/FPC
 *
 *
 * @author Taro L. Saito
 */
object FPC extends Logger {

  def compress(input:Array[Double], tableSize:Int = 8 * 1024) : Array[Byte] = {

    val hashTableSizeInLog2 = math.max(4, (32 - Integer.numberOfLeadingZeros(tableSize)))  // truncate to 2^i value
    val hashTableSize = 1 << hashTableSizeInLog2

    trace(f"hash table size: $hashTableSize%,d")
    val tableMask = hashTableSize - 1
    trace(s"table mask:${(for(i <- 0 until 32) yield { if(((1 << i) & tableMask) == 0) "1" else "0"}).mkString}")
    val fcm = new Array[Long](hashTableSize)
    val dfcm = new Array[Long](hashTableSize)

    val N = input.length
    //val M = 1024  // num elements in a block
    // output buffer that has enough size for compressing M Double elements
    val buf = new Array[Byte](6 + ((N + 1)/ 2) + (N * 8))
    val bufOut = ByteBuffer.wrap(buf)

    // write hash table size (1 byte)
    bufOut.put(hashTableSizeInLog2.toByte)
    // write element size (4 bytes)
    bufOut.putInt(N)

    var residualOffset = 6 + ((N + 1)/ 2)
    var pred1 = 0L
    var pred2 = 0L
    var c  = 0
    var last = 0L
    var hash1 = 0
    var hash2 = 0
    while(c < N) {
      // Read Double value as Long
      val v = java.lang.Double.doubleToLongBits(input(c))

      // FCM
      val xor1 = v ^ pred1 // Take XOR with the prediction
      fcm(hash1) = v       // Update the hash table with the current value
      // Predict the next value
      hash1 = ((hash1 << 6) ^ (v >>> 48L).toInt) & tableMask
      pred1 = fcm(hash1)

      // dFCM
      val diff = v - last
      val xor2 = v ^ (last + pred2) // Take XOR with the prediction
      dfcm(hash2) = diff   // Update the hash table
      // Predict the next value
      hash2 = ((hash2 << 2) ^ (diff >>> 40L).toInt) & tableMask
      pred2 = dfcm(hash2)

      var code : Int = 0
      var xor : Long = xor1
      if(xor1 > xor2) {
        code = 0x8
        xor = xor2
      }
      // bcode encodes the residual block size.
      // We give up using 4 bytes as a block size since its frequency is quite low
      var bcode = 7 // 8 bytes
      if((xor >> 56) == 0) bcode = 6  // 7 bytes
      if((xor >> 48) == 0) bcode = 5 // 6 bytes
      if((xor >> 40) == 0) bcode = 4 // 5 bytes
      if((xor >> 24) == 0) bcode = 3 // 3 bytes
      if((xor >> 16) == 0) bcode = 2 // 2 bytes
      if((xor >> 8) == 0) bcode = 1 // 1 byte
      if(xor == 0) bcode = 0 // 0 byte

      code |= bcode
      val pos = 6 + (c >> 1)
      buf(pos) = (buf(pos) | (code << ((1 - (c & 1)) << 2))).toByte
      val residualSize = bcode + (bcode >> 2) // The last term is a compensation for missing 4 bytes code
      var i = 0
      while(i < residualSize) {
        val vi = ((xor >>> ((residualSize - i - 1) << 3)) & 0xFF).toByte
        buf(residualOffset + i) = vi
        i += 1
      }
      residualOffset += residualSize
      last = v
      c += 1
    }

    val compressed = new Array[Byte](residualOffset)
    Array.copy(buf, 0, compressed, 0, compressed.length)
    compressed
  }

  def decompress(compressed:Array[Byte]) : Array[Double] = {

    val in = ByteBuffer.wrap(compressed)
    val hashTableSizeInLog2 = in.get()
    val N = in.getInt
    val hashTableSize = 1 << hashTableSizeInLog2
    trace(f"hash table size: $hashTableSize%,d")

    val tableMask = hashTableSize - 1
    val fcm = new Array[Long](hashTableSize)
    val dfcm = new Array[Long](hashTableSize)

    val decompressed = new Array[Double](N)

    var c = 0
    var residualOffset = 6 + ((N +1) / 2)
    var pred1 = 0L
    var pred2 = 0L
    var last = 0L
    var hash1 = 0
    var hash2 = 0
    while(c < N) {
      val pos = 6 + (c >> 1)
      val code = (compressed(pos) >>> ((1 - (c & 1)) << 2)) & 0x0F
      val bcode = code & 0x7
      val residualSize = bcode + (bcode >> 2)

      var xor = 0L
      var r = 0
      while(r < residualSize) {
        xor <<= 8L
        val vi = compressed(residualOffset + r) & 0xFF
        xor |= vi
        r += 1
      }
      val pred = if((code & 0x8) == 0) pred1 else pred2
      val v = xor ^ pred

      fcm(hash1) = v
      hash1 = ((hash1 << 6) ^ (v >>> 48L).toInt) & tableMask
      pred1 = fcm(hash1)

      val diff = v - last
      dfcm(hash2) = diff
      hash2 = ((hash2 << 2) ^ (diff >>> 40L).toInt) & tableMask
      pred2 = v + dfcm(hash2)

      decompressed(c) = java.lang.Double.longBitsToDouble(v)

      last = v
      residualOffset += residualSize
      c += 1
    }

    decompressed
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy