All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.comcast.xfinity.sirius.uberstore.UberTool.scala Maven / Gradle / Ivy

The newest version!
/*
 *  Copyright 2012-2014 Comcast Cable Communications Management, LLC
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package com.comcast.xfinity.sirius.uberstore

import com.comcast.xfinity.sirius.writeaheadlog.SiriusLog
import scala.collection.mutable.{HashMap => MutableHashMap}
import com.comcast.xfinity.sirius.api.impl.{Put, Delete, OrderedEvent}
import scala.collection.mutable.WrappedArray
import java.util.Arrays
import java.io.File
import com.comcast.xfinity.sirius.uberstore.segmented.SegmentedUberStore

object UberTool {

  def isLegacy(walDirName: String): Boolean =
    new File(walDirName, "1.data").exists()

  def isSegmented(walDirName: String): Boolean =
    new File(walDirName, SegmentedUberStore.versionId).exists()

  /**
   * Copies data from inFile to outFile.  outFile is appended to.
   *
   * Has the side effect of writing data into outFile.
   *
   * @param inLog the SiriusLog to copy data from, this file is
   *          not modified
   * @param outLog the SiriusLog to copy data into
   */
  def copyLog(inLog: SiriusLog, outLog: SiriusLog) {
    inLog.foldLeft(())(
      (_, evt) => outLog.writeEntry(evt)
    )
  }

  /**
   * Compacts events in inFile into outFile, appending.
   *
   * Has the side effects of writing data into outFile.
   *
   * NOTE: this is an unoptimized, high level compaction
   * algorithm and can take a lot of memory.  Make sure your
   * JVM is configured properly when you use this.
   *
   * @param inLog the SiriusLog to compact events from
   * @param outLog the SiriusLog to write the compacted
   *          log into
   * @param deleteCutoff deletes with a timestamp before this
   *          point are completely removed from the log. defaults
   *          to 0, meaning any deletes from 12AM Jan 1, 1970 or
   *          before are completely removed from the log
   */
  def compact(inLog: SiriusLog, outLog: SiriusLog, deleteCutoff: Long = 0) {
    val toKeep = new MutableHashMap[WrappedArray[Byte], OrderedEvent]

    inLog.foldLeft(()) {
      case (_, evt @ OrderedEvent(_, ts, Delete(key))) =>
        if (ts <= deleteCutoff) {
          toKeep.remove(WrappedArray.make(key.getBytes))
        } else {
          toKeep.put(WrappedArray.make(key.getBytes), evt)
        }
      case (_, evt @ OrderedEvent(_, _, Put(key, _))) =>
        toKeep.put(WrappedArray.make(key.getBytes), evt)
    }

    val toWrite = new Array[OrderedEvent](toKeep.size)
    var i = 0
    toKeep.foreach(kv => { toWrite(i) = kv._2; i += 1 })

    toWrite.sortWith(_.sequence < _.sequence).foreach(outLog.writeEntry(_))
  }

  /**
   * Also not perfect, and slower than single pass, but this compaction implementation
   * will run with a more reasonable memory footprint.
   *
   * @param inLog input log
   * @param outLog output log, needs to be empty (or you're likely to get an
   *               "out of order write" exception)
   * @param deleteCutoff deletes with a timestamp before this
   *          point are completely removed from the log. defaults
   *          to 0, meaning any deletes from 12AM Jan 1, 1970 or
   *          before are completely removed from the log
   */
  def twoPassCompact(inLog: SiriusLog, outLog: SiriusLog, deleteCutoff: Long = 0) {
    // Pass 1: get offsets of all keepable events (ie: last delete/put per key)
    val keepableOffsetIterator = gatherKeepableEventOffsets(inLog, deleteCutoff).iterator

    // Pass 2: write it out, skip if there's nothing worth keeping, saves time
    //          and energy- Sirius is a green product
    if (!keepableOffsetIterator.isEmpty) {
      writeKeepableEvents(inLog, outLog, keepableOffsetIterator)
    }
  }

  private def gatherKeepableEventOffsets(inLog: SiriusLog, deleteCutoff: Long) = {
    val toKeep = new MutableHashMap[WrappedArray[Byte], Long]()

    // generate map of Id (Key) -> EntryNum (logical offset)
    var index = 1
    inLog.foldLeft(()) {
      case (_, OrderedEvent(_, ts, Delete(key))) =>
        if (ts <= deleteCutoff) {
          toKeep.remove(WrappedArray.make(key.getBytes))
        } else {
          toKeep.put(WrappedArray.make(key.getBytes), index)
        }
        index += 1
      case (_, OrderedEvent(_, _, Put(key, _))) =>
        toKeep.put(WrappedArray.make(key.getBytes), index)
        index += 1
    }

    // keys are useless, only need offsets, sorted
    val keepableOffsets = new Array[Long](toKeep.size)
    var i = 0
    toKeep.foreach(kv => { keepableOffsets(i) = kv._2; i += 1})
    Arrays.sort(keepableOffsets)
    keepableOffsets
  }

  // don't call with an empty iterator- you will have a bad time
  private def writeKeepableEvents(inLog: SiriusLog, outLog: SiriusLog, toWriteIterator: Iterator[Long]) {
    var nextWrite = toWriteIterator.next()

    // write events whose positions appear in toWriteIterator
    var index = 1
    inLog.foldLeft(())(
      (_, evt) => {
        if (index == nextWrite) {
          outLog.writeEntry(evt)
          if (toWriteIterator.hasNext) {
            nextWrite = toWriteIterator.next()
          }
        }
        index += 1
      }
    )
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy