org.apache.spark.streaming.util.BatchedWriteAheadLog.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of spark-streaming_2.12 Show documentation
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.streaming.util

import java.nio.ByteBuffer
import java.util.{Iterator => JIterator}
import java.util.concurrent.LinkedBlockingQueue
import java.util.concurrent.atomic.AtomicBoolean

import scala.collection.JavaConverters._
import scala.collection.mutable.ArrayBuffer
import scala.concurrent.Promise
import scala.concurrent.duration._
import scala.util.control.NonFatal

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.network.util.JavaUtils
import org.apache.spark.util.{ThreadUtils, Utils}

/**
 * A wrapper for a WriteAheadLog that batches records before writing data. Handles aggregation
 * during writes, and de-aggregation in the `readAll` method. The end consumer has to handle
 * de-aggregation after the `read` method. In addition, the `WriteAheadLogRecordHandle` returned
 * after the write will contain the batch of records rather than individual records.
 *
 * When writing a batch of records, the `time` passed to the `wrappedLog` will be the timestamp
 * of the latest record in the batch. This is very important in achieving correctness. Consider the
 * following example:
 * We receive records with timestamps 1, 3, 5, 7. We use "log-1" as the filename. Once we receive
 * a clean up request for timestamp 3, we would clean up the file "log-1", and lose data regarding
 * 5 and 7.
 *
 * This means the caller can assume the same write semantics as any other WriteAheadLog
 * implementation despite the batching in the background - when the write() returns, the data is
 * written to the WAL and is durable. To take advantage of the batching, the caller can write from
 * multiple threads, each of which will stay blocked until the corresponding data has been written.
 *
 * All other methods of the WriteAheadLog interface will be passed on to the wrapped WriteAheadLog.
 */
private[util] class BatchedWriteAheadLog(val wrappedLog: WriteAheadLog, conf: SparkConf)
  extends WriteAheadLog with Logging {

  import BatchedWriteAheadLog._

  private val walWriteQueue = new LinkedBlockingQueue[Record]()

  // Whether the writer thread is active
  private val active: AtomicBoolean = new AtomicBoolean(true)
  private val buffer = new ArrayBuffer[Record]()

  private val batchedWriterThread = startBatchedWriterThread()

  /**
   * Write a byte buffer to the log file. This method adds the byteBuffer to a queue and blocks
   * until the record is properly written by the parent.
   */
  override def write(byteBuffer: ByteBuffer, time: Long): WriteAheadLogRecordHandle = {
    val promise = Promise[WriteAheadLogRecordHandle]()
    val putSuccessfully = synchronized {
      if (active.get()) {
        walWriteQueue.offer(Record(byteBuffer, time, promise))
        true
      } else {
        false
      }
    }
    if (putSuccessfully) {
      ThreadUtils.awaitResult(
        promise.future, WriteAheadLogUtils.getBatchingTimeout(conf).milliseconds)
    } else {
      throw new IllegalStateException("close() was called on BatchedWriteAheadLog before " +
        s"write request with time $time could be fulfilled.")
    }
  }

  /**
   * This method is not supported as the resulting ByteBuffer would actually require de-aggregation.
   * This method is primarily used in testing, and to ensure that it is not used in production,
   * we throw an UnsupportedOperationException.
   */
  override def read(segment: WriteAheadLogRecordHandle): ByteBuffer = {
    throw new UnsupportedOperationException("read() is not supported for BatchedWriteAheadLog " +
      "as the data may require de-aggregation.")
  }

  /**
   * Read all the existing logs from the log directory. The output of the wrapped WriteAheadLog
   * will be de-aggregated.
   */
  override def readAll(): JIterator[ByteBuffer] = {
    wrappedLog.readAll().asScala.flatMap(deaggregate).asJava
  }

  /**
   * Delete the log files that are older than the threshold time.
   *
   * This method is handled by the parent WriteAheadLog.
   */
  override def clean(threshTime: Long, waitForCompletion: Boolean): Unit = {
    wrappedLog.clean(threshTime, waitForCompletion)
  }


  /**
   * Stop the batched writer thread, fulfill promises with failures and close the wrapped WAL.
   */
  override def close(): Unit = {
    logInfo(s"BatchedWriteAheadLog shutting down at time: ${System.currentTimeMillis()}.")
    if (!active.getAndSet(false)) return
    batchedWriterThread.interrupt()
    batchedWriterThread.join()
    while (!walWriteQueue.isEmpty) {
      val Record(_, time, promise) = walWriteQueue.poll()
      promise.failure(new IllegalStateException("close() was called on BatchedWriteAheadLog " +
        s"before write request with time $time could be fulfilled."))
    }
    wrappedLog.close()
  }

  /** Start the actual log writer on a separate thread. */
  private def startBatchedWriterThread(): Thread = {
    val thread = new Thread(() => {
      while (active.get()) {
        try {
          flushRecords()
        } catch {
          case NonFatal(e) =>
            logWarning("Encountered exception in Batched Writer Thread.", e)
        }
      }
      logInfo("BatchedWriteAheadLog Writer thread exiting.")
    }, "BatchedWriteAheadLog Writer")
    thread.setDaemon(true)
    thread.start()
    thread
  }

  /** Write all the records in the buffer to the write ahead log. */
  private def flushRecords(): Unit = {
    try {
      buffer += walWriteQueue.take()
      val numBatched = walWriteQueue.drainTo(buffer.asJava) + 1
      logDebug(s"Received $numBatched records from queue")
    } catch {
      case _: InterruptedException =>
        logWarning("BatchedWriteAheadLog Writer queue interrupted.")
    }
    try {
      var segment: WriteAheadLogRecordHandle = null
      if (buffer.nonEmpty) {
        logDebug(s"Batched ${buffer.length} records for Write Ahead Log write")
        // threads may not be able to add items in order by time
        val sortedByTime = buffer.sortBy(_.time)
        // We take the latest record for the timestamp. Please refer to the class Javadoc for
        // detailed explanation
        val time = sortedByTime.last.time
        segment = wrappedLog.write(aggregate(sortedByTime.toSeq), time)
      }
      buffer.foreach(_.promise.success(segment))
    } catch {
      case e: InterruptedException =>
        logWarning("BatchedWriteAheadLog Writer queue interrupted.", e)
        buffer.foreach(_.promise.failure(e))
      case NonFatal(e) =>
        logWarning(s"BatchedWriteAheadLog Writer failed to write $buffer", e)
        buffer.foreach(_.promise.failure(e))
    } finally {
      buffer.clear()
    }
  }

  /** Method for querying the queue length. Should only be used in tests. */
  private def getQueueLength(): Int = walWriteQueue.size()
}

/** Static methods for aggregating and de-aggregating records. */
private[util] object BatchedWriteAheadLog {

  /**
   * Wrapper class for representing the records that we will write to the WriteAheadLog. Coupled
   * with the timestamp for the write request of the record, and the promise that will block the
   * write request, while a separate thread is actually performing the write.
   */
  case class Record(data: ByteBuffer, time: Long, promise: Promise[WriteAheadLogRecordHandle])

  /** Aggregate multiple serialized ReceivedBlockTrackerLogEvents in a single ByteBuffer. */
  def aggregate(records: Seq[Record]): ByteBuffer = {
    ByteBuffer.wrap(Utils.serialize[Array[Array[Byte]]](
      records.map(record => JavaUtils.bufferToArray(record.data)).toArray))
  }

  /**
   * De-aggregate serialized ReceivedBlockTrackerLogEvents in a single ByteBuffer.
   * A stream may not have used batching initially, but started using it after a restart. This
   * method therefore needs to be backwards compatible.
   */
  def deaggregate(buffer: ByteBuffer): Array[ByteBuffer] = {
    val prevPosition = buffer.position()
    try {
      Utils.deserialize[Array[Array[Byte]]](JavaUtils.bufferToArray(buffer)).map(ByteBuffer.wrap)
    } catch {
      case _: ClassCastException => // users may restart a stream with batching enabled
        // Restore `position` so that the user can read `buffer` later
        buffer.position(prevPosition)
        Array(buffer)
    }
  }
}