All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.comcast.xfinity.sirius.uberstore.seqindex.DiskOnlySeqIndex.scala Maven / Gradle / Ivy

The newest version!
/*
 *  Copyright 2012-2014 Comcast Cable Communications Management, LLC
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package com.comcast.xfinity.sirius.uberstore.seqindex

import java.io.RandomAccessFile
import com.comcast.xfinity.sirius.uberstore.common.Fnv1aChecksummer
import scala.annotation.tailrec

object DiskOnlySeqIndex {

  /**
   * Create an instance from sequence file name
   *
   * @param seqIndexFileName the file name to use
   */
  def apply(seqIndexFileName: String) = {
    val handle = new RandomAccessFile(seqIndexFileName, "rw")
    new DiskOnlySeqIndex(handle, SeqIndexBinaryFileOps())
  }
}

/**
 * UberStore SeqIndex implementation that has next to no memory overhead
 * at the expense of relying heavily on disk operations.  This implementation
 * should play well with the file system cache, which probably does a better
 * job than anything we could put together at the JVM level, so disk operations
 * shouldn't be too terrible.
 *
 * Offset lookups are done using an on disk binary search.
 *
 * @param handle the RandomAccessFile associated with the index file.
 * @param fileOps the SeqIndexBinaryFileOps to use when accessing the index file,
 *          DiskOnlySeqIndex does all disk access through this helper
 */
class DiskOnlySeqIndex private(handle: RandomAccessFile,
                               fileOps: SeqIndexBinaryFileOps) extends SeqIndex {

  // via some scala magic this is also exposed as a method :)
  var isClosed = false
  var size: Long = handle.length() / 24

  var maxSeq = {
    if (handle.length == 0)
      None
    else {
      handle.seek(handle.length - 24)
      val (seq, _) = fileOps.readEntry(handle)
      Some(seq)
    }
  }

  /**
   * {@inheritdoc}
   */
  def getOffsetFor(soughtSeq: Long): Option[Long] = synchronized {
    // binary search seq index for what we want
    @tailrec
    def getOffsetForAux(begin: Long, end: Long): Option[Long] = {
      if (begin >= end) {
        None
      } else {
        val mid = ((begin + end) / 24 / 2) * 24
        handle.seek(mid)
        val (seq, offset) = fileOps.readEntry(handle)
        if (seq == soughtSeq) {
          Some(offset)
        } else if (soughtSeq < seq) {
          getOffsetForAux(begin, mid)
        } else {
          getOffsetForAux(mid + 24, end)
        }
      }
    }

    getOffsetForAux(0, handle.length)
  }

  /**
   * {@inheritdoc}
   */
  def getMaxSeq(): Option[Long] = maxSeq

  /**
   * {@inheritdoc}
   */
  def put(seq: Long, offset: Long): Unit = synchronized {
    handle.seek(handle.length)
    fileOps.put(handle, seq, offset)
    maxSeq = Some(seq)
    size += 1
  }

  /**
   * {@inheritdoc}
   */
  def getOffsetRange(firstSeq: Long, lastSeq: Long): (Long, Long) = synchronized {
    val rangeOpt = for (
        lowerBound <- getLowerBoundOffset(firstSeq);
        upperBound <- getUpperBoundOffset(lastSeq);
        if lowerBound <= upperBound
      ) yield (lowerBound, upperBound)
    rangeOpt.getOrElse((0, -1))
  }

  /**
   * {@inheritdoc}
   */
  def close(): Unit = synchronized {
    if (!isClosed) {
      handle.close()
      isClosed = true
    }
  }

  private def getLowerBoundOffset(soughtSeq: Long): Option[Long] = {
    @tailrec
    def getLowerBoundOffsetAux(begin: Long, end: Long, closest: Option[Long]): Option[Long] = {
      if (begin >= end) {
        closest
      } else {
        val mid = ((begin + end) / 24 / 2) * 24
        handle.seek(mid)
        val (seq, offset) = fileOps.readEntry(handle)
        if (seq == soughtSeq) {
          Some(offset)
        } else if (soughtSeq < seq) {
          getLowerBoundOffsetAux(begin, mid, Some(offset))
        } else {
          getLowerBoundOffsetAux(mid + 24, end, closest)
        }
      }
    }

    getLowerBoundOffsetAux(0, handle.length, None)
  }

  private def getUpperBoundOffset(soughtSeq: Long): Option[Long] = {
    @tailrec
    def getLowerBoundOffsetAux(begin: Long, end: Long, closest: Option[Long]): Option[Long] = {
      if (begin >= end) {
        closest
      } else {
        val mid = ((begin + end) / 24 / 2) * 24
        handle.seek(mid)
        val (seq, offset) = fileOps.readEntry(handle)
        if (seq == soughtSeq) {
          Some(offset)
        } else if (soughtSeq < seq) {
          getLowerBoundOffsetAux(begin, mid, closest)
        } else {
          getLowerBoundOffsetAux(mid + 24, end, Some(offset))
        }
      }
    }

    getLowerBoundOffsetAux(0, handle.length, None)
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy