All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.activemq.leveldb.RecordLog.scala Maven / Gradle / Ivy

There is a newer version: 6.1.2
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.activemq.leveldb

import java.{lang=>jl}
import java.{util=>ju}

import java.util.zip.CRC32
import java.util.Map.Entry
import java.util.concurrent.atomic.{AtomicInteger, AtomicLong}
import java.io._
import org.fusesource.hawtbuf.{DataByteArrayInputStream, DataByteArrayOutputStream, Buffer}
import org.fusesource.hawtdispatch.BaseRetained
import org.apache.activemq.leveldb.util.FileSupport._
import org.apache.activemq.util.LRUCache
import util.TimeMetric._
import util.{TimeMetric, Log}
import java.util.TreeMap
import java.util.concurrent.locks.{ReentrantReadWriteLock, ReadWriteLock}
import java.util.concurrent.CountDownLatch

object RecordLog extends Log {

  // The log files contain a sequence of variable length log records:
  // record := header + data
  //
  // header :=
  //   '*'      : int8       // Start of Record Magic
  //   kind     : int8       // Help identify content type of the data.
  //   checksum : uint32     // crc32c of the data[]
  //   length   : uint32     // the length the the data

  val LOG_HEADER_PREFIX = '*'.toByte
  val UOW_END_RECORD = -1.toByte

  val LOG_HEADER_SIZE = 10

  val BUFFER_SIZE = 1024*512
  val BYPASS_BUFFER_SIZE = 1024*16

  case class LogInfo(file:File, position:Long, length:Long) {
    def limit = position+length
  }

  def encode_long(a1:Long) = {
    val out = new DataByteArrayOutputStream(8)
    out.writeLong(a1)
    out.toBuffer
  }

  def decode_long(value:Buffer):Long = {
    val in = new DataByteArrayInputStream(value)
    in.readLong()
  }

}

class SuspendCallSupport {

  val lock = new ReentrantReadWriteLock()
  var resumeLatch:CountDownLatch = _
  var resumedLatch:CountDownLatch = _
  @volatile
  var threads = new AtomicInteger()

  def suspend = this.synchronized {
    val suspended = new CountDownLatch(1)
    resumeLatch = new CountDownLatch(1)
    resumedLatch = new CountDownLatch(1)
    new Thread("Suspend Lock") {
      override def run = {
        try {
          lock.writeLock().lock()
          suspended.countDown()
          resumeLatch.await()
        } finally {
          lock.writeLock().unlock();
          resumedLatch.countDown()
        }
      }
    }.start()
    suspended.await()
  }

  def resume = this.synchronized {
    if( resumedLatch != null ) {
      resumeLatch.countDown()
      resumedLatch.await();
      resumeLatch = null
      resumedLatch = null
    }
  }

  def call[T](func: =>T):T= {
    threads.incrementAndGet()
    lock.readLock().lock()
    try {
      func
    } finally {
      threads.decrementAndGet()
      lock.readLock().unlock()
    }
  }

}

class RecordLogTestSupport {

  val forceCall = new SuspendCallSupport()
  val writeCall = new SuspendCallSupport()
  val deleteCall = new SuspendCallSupport()

}

case class RecordLog(directory: File, logSuffix:String) {
  import RecordLog._

  directory.mkdirs()

  var logSize = 1024 * 1024 * 100L
  var current_appender:LogAppender = _
  var verify_checksums = false
  val log_infos = new TreeMap[Long, LogInfo]()

  var recordLogTestSupport:RecordLogTestSupport =
    if( java.lang.Boolean.getBoolean("org.apache.activemq.leveldb.test") ) {
      new RecordLogTestSupport()
    } else {
      null
    }


  object log_mutex

  def delete(id:Long) = {
    log_mutex.synchronized {
      // We can't delete the current appender.
      if( current_appender.position != id ) {
        Option(log_infos.get(id)).foreach { info =>
          onDelete(info.file)
          onDelete(id)
          log_infos.remove(id)
        }
      }
    }
  }

  protected def onDelete(file:Long) = {
  }

  protected def onDelete(file:File) = {
    if( recordLogTestSupport!=null ) {
      recordLogTestSupport.deleteCall.call {
        file.delete()
      }
    } else {
      file.delete()
    }
  }

  def checksum(data: Buffer): Int = {
    val checksum = new CRC32
    checksum.update(data.data, data.offset, data.length)
    (checksum.getValue & 0xFFFFFFFF).toInt
  }

  class LogAppender(file:File, position:Long, var append_offset:Long=0L) extends LogReader(file, position) {

    val info = new LogInfo(file, position, 0)

    override def open = new RandomAccessFile(file, "rw")

    override def on_close ={
      force
    }

    val flushed_offset = new AtomicLong(append_offset)

    def append_position = {
      position+append_offset
    }

    // set the file size ahead of time so that we don't have to sync the file
    // meta-data on every log sync.
    if( append_offset==0 ) {
      channel.position(logSize-1)
      channel.write(new Buffer(1).toByteBuffer)
      channel.force(true)
      channel.position(0)
    }

    val write_buffer = new DataByteArrayOutputStream(BUFFER_SIZE+LOG_HEADER_SIZE)

    def force = {
      flush
      max_log_flush_latency {
        // only need to update the file metadata if the file size changes..
        if( recordLogTestSupport!=null ) {
          recordLogTestSupport.forceCall.call {
            channel.force(append_offset > logSize)
          }
        } else {
          channel.force(append_offset > logSize)
        }
      }
    }


    def skip(length:Long) = this.synchronized {
      flush
      append_offset += length
      flushed_offset.addAndGet(length)
    }

    /**
     * returns the offset position of the data record.
     */
    def append(id:Byte, data: Buffer) = this.synchronized {
      val record_position = append_position
      val data_length = data.length
      val total_length = LOG_HEADER_SIZE + data_length

      if( write_buffer.position() + total_length > BUFFER_SIZE ) {
        flush
      }

      val cs: Int = checksum(data)
//      trace("Writing at: "+record_position+" len: "+data_length+" with checksum: "+cs)

      if( false && total_length > BYPASS_BUFFER_SIZE ) {

        // Write the header and flush..
        write_buffer.writeByte(LOG_HEADER_PREFIX)
        write_buffer.writeByte(id)
        write_buffer.writeInt(cs)
        write_buffer.writeInt(data_length)

        append_offset += LOG_HEADER_SIZE
        flush

        // Directly write the data to the channel since it's large.
        val buffer = data.toByteBuffer
        val pos = append_offset+LOG_HEADER_SIZE
        val remaining = buffer.remaining

        if( recordLogTestSupport!=null ) {
          recordLogTestSupport.writeCall.call {
            channel.write(buffer, pos)
          }
        } else {
          channel.write(buffer, pos)
        }

        flushed_offset.addAndGet(remaining)
        if( buffer.hasRemaining ) {
          throw new IOException("Short write")
        }
        append_offset += data_length

      } else {
        write_buffer.writeByte(LOG_HEADER_PREFIX)
        write_buffer.writeByte(id)
        write_buffer.writeInt(cs)
        write_buffer.writeInt(data_length)
        write_buffer.write(data.data, data.offset, data_length)
        append_offset += total_length
      }
      (record_position, info)
    }

    def flush = max_log_flush_latency { this.synchronized {
      if( write_buffer.position() > 0 ) {
        val buffer = write_buffer.toBuffer.toByteBuffer
        val remaining = buffer.remaining
        val pos = append_offset-remaining

        if( recordLogTestSupport!=null ) {
          recordLogTestSupport.writeCall.call {
            channel.write(buffer, pos)
          }
        } else {
          channel.write(buffer, pos)
        }

        flushed_offset.addAndGet(remaining)
        if( buffer.hasRemaining ) {
          throw new IOException("Short write")
        }
        write_buffer.reset()
      } }
    }

    override def check_read_flush(end_offset:Long) = {
      if( flushed_offset.get() < end_offset )  {
        flush
      }
    }

  }

  case class LogReader(file:File, position:Long) extends BaseRetained {

    def open = new RandomAccessFile(file, "r")

    val fd = open
    val channel = fd.getChannel

    override def dispose() {
      on_close
      fd.close()
    }

    def on_close = {}

    def check_read_flush(end_offset:Long) = {}

    def read(record_position:Long, length:Int) = {
      val offset = record_position-position
      assert(offset >=0 )

      check_read_flush(offset+LOG_HEADER_SIZE+length)

      if(verify_checksums) {

        val record = new Buffer(LOG_HEADER_SIZE+length)

        def record_is_not_changing = {
          using(open) { fd =>
            val channel = fd.getChannel
            val new_record = new Buffer(LOG_HEADER_SIZE+length)
            channel.read(new_record.toByteBuffer, offset)
            var same = record == new_record
            println(same)
            same
          }
        }

        if( channel.read(record.toByteBuffer, offset) != record.length ) {
          assert( record_is_not_changing )
          throw new IOException("short record at position: "+record_position+" in file: "+file+", offset: "+offset)
        }

        val is = new DataByteArrayInputStream(record)
        val prefix = is.readByte()
        if( prefix != LOG_HEADER_PREFIX ) {
          assert(record_is_not_changing)
          throw new IOException("invalid record at position: "+record_position+" in file: "+file+", offset: "+offset)
        }

        val id = is.readByte()
        val expectedChecksum = is.readInt()
        val expectedLength = is.readInt()
        val data = is.readBuffer(length)

        // If your reading the whole record we can verify the data checksum
        if( expectedLength == length ) {
          if( expectedChecksum != checksum(data) ) {
            assert(record_is_not_changing)
            throw new IOException("checksum does not match at position: "+record_position+" in file: "+file+", offset: "+offset)
          }
        }

        data
      } else {
        val data = new Buffer(length)
        var bb = data.toByteBuffer
        var position = offset+LOG_HEADER_SIZE
        while( bb.hasRemaining  ) {
          var count = channel.read(bb, position)
          if( count == 0 ) {
            throw new IOException("zero read at file '%s' offset: %d".format(file, position))
          }
          if( count < 0 ) {
            throw new EOFException("File '%s' offset: %d".format(file, position))
          }
          position += count
        }
        data
      }
    }

    def read(record_position:Long) = {
      val offset = record_position-position
      val header = new Buffer(LOG_HEADER_SIZE)
      check_read_flush(offset+LOG_HEADER_SIZE)
      channel.read(header.toByteBuffer, offset)
      val is = header.bigEndianEditor();
      val prefix = is.readByte()
      if( prefix != LOG_HEADER_PREFIX ) {
        // Does not look like a record.
        throw new IOException("invalid record position %d (file: %s, offset: %d)".format(record_position, file.getAbsolutePath, offset))
      }
      val id = is.readByte()
      val expectedChecksum = is.readInt()
      val length = is.readInt()
      val data = new Buffer(length)

      check_read_flush(offset+LOG_HEADER_SIZE+length)
      if( channel.read(data.toByteBuffer, offset+LOG_HEADER_SIZE) != length ) {
        throw new IOException("short record")
      }

      if(verify_checksums) {
        if( expectedChecksum != checksum(data) ) {
          throw new IOException("checksum does not match")
        }
      }
      (id, data, record_position+LOG_HEADER_SIZE+length)
    }

    def check(record_position:Long):Option[(Long, Option[Long])] = {
      var offset = record_position-position
      val header = new Buffer(LOG_HEADER_SIZE)
      channel.read(header.toByteBuffer, offset)
      val is = header.bigEndianEditor();
      val prefix = is.readByte()
      if( prefix != LOG_HEADER_PREFIX ) {
        return None // Does not look like a record.
      }
      val kind = is.readByte()
      val expectedChecksum = is.readInt()
      val length = is.readInt()

      val chunk = new Buffer(1024*4)
      val chunkbb = chunk.toByteBuffer
      offset += LOG_HEADER_SIZE

      // Read the data in in chunks to avoid
      // OOME if we are checking an invalid record
      // with a bad record length
      val checksumer = new CRC32
      var remaining = length
      while( remaining > 0 ) {
        val chunkSize = remaining.min(1024*4);
        chunkbb.position(0)
        chunkbb.limit(chunkSize)
        channel.read(chunkbb, offset)
        if( chunkbb.hasRemaining ) {
          return None
        }
        checksumer.update(chunk.data, 0, chunkSize)
        offset += chunkSize
        remaining -= chunkSize
      }

      val checksum = ( checksumer.getValue & 0xFFFFFFFF).toInt
      if( expectedChecksum !=  checksum ) {
        return None
      }
      val uow_start_pos = if(kind == UOW_END_RECORD && length==8) Some(decode_long(chunk)) else None
      return Some(record_position+LOG_HEADER_SIZE+length, uow_start_pos)
    }

    def verifyAndGetEndOffset:Long = {
      var pos = position;
      var current_uow_start = pos
      val limit = position+channel.size()
      while(pos < limit) {
        check(pos) match {
          case Some((next, uow_start_pos)) =>
            uow_start_pos.foreach { uow_start_pos =>
              if( uow_start_pos == current_uow_start ) {
                current_uow_start = next
              } else {
                return current_uow_start-position
              }
            }
            pos = next
          case None =>
            return current_uow_start-position
        }
      }
      return current_uow_start-position
    }
  }

  def create_log_appender(position: Long, offset:Long) = {
    new LogAppender(next_log(position), position, offset)
  }

  def create_appender(position: Long, offset:Long): Any = {
    log_mutex.synchronized {
      if(current_appender!=null) {
        log_infos.put (position, new LogInfo(current_appender.file, current_appender.position, current_appender.append_offset))
      }
      current_appender = create_log_appender(position, offset)
      log_infos.put(position, new LogInfo(current_appender.file, position, 0))
    }
  }

  val max_log_write_latency = TimeMetric()
  val max_log_flush_latency = TimeMetric()
  val max_log_rotate_latency = TimeMetric()

  def open(appender_size:Long= -1) = {
    log_mutex.synchronized {
      log_infos.clear()
      LevelDBClient.find_sequence_files(directory, logSuffix).foreach { case (position,file) =>
        log_infos.put(position, LogInfo(file, position, file.length()))
      }

      if( log_infos.isEmpty ) {
        create_appender(0,0)
      } else {
        val file = log_infos.lastEntry().getValue
        if( appender_size == -1 ) {
          val r = LogReader(file.file, file.position)
          try {
            val endOffset = r.verifyAndGetEndOffset
            using(new RandomAccessFile(file.file, "rw")) { file=>
              try {
                file.getChannel.truncate(endOffset)
              }
              catch {
                case e:Throwable =>
                  e.printStackTrace()
              }
              file.getChannel.force(true)
            }
            create_appender(file.position,endOffset)
          } finally {
            r.release()
          }
        } else {
          create_appender(file.position,appender_size)
        }
      }
    }
  }

  def isOpen = {
    log_mutex.synchronized {
      current_appender!=null;
    }
  }

  def close = {
    log_mutex.synchronized {
      if( current_appender!=null ) {
        current_appender.release
      }
    }
  }

  def appender_limit = current_appender.append_position
  def appender_start = current_appender.position

  def next_log(position:Long) = LevelDBClient.create_sequence_file(directory, position, logSuffix)

  def appender[T](func: (LogAppender)=>T):T= {
    val intial_position = current_appender.append_position
    try {
      max_log_write_latency {
        val rc = func(current_appender)
        if( current_appender.append_position != intial_position ) {
          // Record a UOW_END_RECORD so that on recovery we only replay full units of work.
          current_appender.append(UOW_END_RECORD,encode_long(intial_position))
        }
        rc
      }
    } finally {
      current_appender.flush
      max_log_rotate_latency {
        log_mutex.synchronized {
          if ( current_appender.append_offset >= logSize ) {
            rotate
          }
        }
      }
    }
  }


  def rotate[T] = log_mutex.synchronized {
    current_appender.release()
    on_log_rotate()
    create_appender(current_appender.append_position, 0)
  }

  var on_log_rotate: ()=>Unit = ()=>{}

  private val reader_cache = new LRUCache[File, LogReader](100) {
    protected override def onCacheEviction(entry: Entry[File, LogReader]) = {
      entry.getValue.release()
    }
  }

  def log_info(pos:Long) = log_mutex.synchronized { Option(log_infos.floorEntry(pos)).map(_.getValue) }

  def log_file_positions = log_mutex.synchronized {
    import collection.JavaConversions._
    log_infos.map(_._2.position).toArray
  }

  private def get_reader[T](record_position:Long)(func: (LogReader)=>T):Option[T] = {

    val (info, appender) = log_mutex.synchronized {
      log_info(record_position) match {
        case None =>
          warn("No reader available for position: %x, log_infos: %s", record_position, log_infos)
          return None
        case Some(info) =>
          if(info.position == current_appender.position) {
            current_appender.retain()
            (info, current_appender)
          } else {
            (info, null)
          }
      }
    }

    val reader = if( appender!=null ) {
      // read from the current appender.
      appender
    } else {
      // Checkout a reader from the cache...
      reader_cache.synchronized {
        var reader = reader_cache.get(info.file)
        if(reader==null) {
          reader = LogReader(info.file, info.position)
          reader_cache.put(info.file, reader)
        }
        reader.retain()
        reader
      }
    }

    try {
      Some(func(reader))
    } finally {
      reader.release
    }
  }

  def read(pos:Long) = {
    get_reader(pos)(_.read(pos))
  }
  def read(pos:Long, length:Int) = {
    get_reader(pos)(_.read(pos, length))
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy