All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flume.channel.file.FlumeEventQueue Maven / Gradle / Ivy

There is a newer version: 1.11.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.flume.channel.file;

import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer;
import java.nio.LongBuffer;
import java.security.MessageDigest;
import java.util.Arrays;
import java.util.Collection;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.ArrayUtils;
import org.mapdb.DB;
import org.mapdb.DBMaker;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.SetMultimap;

/**
 * Queue of events in the channel. This queue stores only
 * {@link FlumeEventPointer} objects which are represented
 * as 8 byte longs internally. Additionally the queue itself
 * of longs is stored as a memory mapped file with a fixed
 * header and circular queue semantics. The header of the queue
 * contains the timestamp of last sync, the queue size and
 * the head position.
 */
final class FlumeEventQueue {
  private static final Logger LOG = LoggerFactory
  .getLogger(FlumeEventQueue.class);
  private static final int EMPTY = 0;
  private final EventQueueBackingStore backingStore;
  private final String channelNameDescriptor;
  private final InflightEventWrapper inflightTakes;
  private final InflightEventWrapper inflightPuts;
  private long searchTime = 0;
  private long searchCount = 0;
  private long copyTime = 0;
  private long copyCount = 0;
  private DB db;
  private Set queueSet;

  /**
   * @param capacity max event capacity of queue
   * @throws IOException
   */
  FlumeEventQueue(EventQueueBackingStore backingStore, File inflightTakesFile,
          File inflightPutsFile, File queueSetDBDir) throws Exception {
    Preconditions.checkArgument(backingStore.getCapacity() > 0,
        "Capacity must be greater than zero");
    Preconditions.checkNotNull(backingStore, "backingStore");
    this.channelNameDescriptor = "[channel=" + backingStore.getName() + "]";
    Preconditions.checkNotNull(inflightTakesFile, "inflightTakesFile");
    Preconditions.checkNotNull(inflightPutsFile, "inflightPutsFile");
    Preconditions.checkNotNull(queueSetDBDir, "queueSetDBDir");
    this.backingStore = backingStore;
    try {
      inflightPuts = new InflightEventWrapper(inflightPutsFile);
      inflightTakes = new InflightEventWrapper(inflightTakesFile);
    } catch (Exception e) {
      LOG.error("Could not read checkpoint.", e);
      throw e;
    }
    if(queueSetDBDir.isDirectory()) {
      FileUtils.deleteDirectory(queueSetDBDir);
    } else if(queueSetDBDir.isFile() && !queueSetDBDir.delete()) {
      throw new IOException("QueueSetDir " + queueSetDBDir + " is a file and"
          + " could not be deleted");
    }
    if(!queueSetDBDir.mkdirs()) {
      throw new IllegalStateException("Could not create QueueSet Dir "
          + queueSetDBDir);
    }
    File dbFile = new File(queueSetDBDir, "db");
    db = DBMaker.newFileDB(dbFile)
        .closeOnJvmShutdown()
        .transactionDisable()
        .syncOnCommitDisable()
        .deleteFilesAfterClose()
        .cacheDisable()
        .make();
    queueSet = db.createTreeSet("QueueSet").make();
    long start = System.currentTimeMillis();
    for (int i = 0; i < backingStore.getSize(); i++) {
      queueSet.add(get(i));
    }
    LOG.info("QueueSet population inserting " + backingStore.getSize()
        + " took " + (System.currentTimeMillis() - start));
  }

  SetMultimap deserializeInflightPuts()
          throws IOException, BadCheckpointException{
    return inflightPuts.deserialize();
  }

  SetMultimap deserializeInflightTakes()
          throws IOException, BadCheckpointException{
    return inflightTakes.deserialize();
  }

  synchronized long getLogWriteOrderID() {
    return backingStore.getLogWriteOrderID();
  }

  synchronized boolean checkpoint(boolean force) throws Exception {
    if (!backingStore.syncRequired()
            && !inflightTakes.syncRequired()
            && !force) { //No need to check inflight puts, since that would
                         //cause elements.syncRequired() to return true.
      LOG.debug("Checkpoint not required");
      return false;
    }
    backingStore.beginCheckpoint();
    inflightPuts.serializeAndWrite();
    inflightTakes.serializeAndWrite();
    backingStore.checkpoint();
    return true;
  }

  /**
   * Retrieve and remove the head of the queue.
   *
   * @return FlumeEventPointer or null if queue is empty
   */
  synchronized FlumeEventPointer removeHead(long transactionID) {
    if(backingStore.getSize()  == 0) {
      return null;
    }

    long value = remove(0, transactionID);
    Preconditions.checkState(value != EMPTY, "Empty value "
          + channelNameDescriptor);

    FlumeEventPointer ptr = FlumeEventPointer.fromLong(value);
    backingStore.decrementFileID(ptr.getFileID());
    return ptr;
  }

  /**
   * Add a FlumeEventPointer to the head of the queue.
   * Called during rollbacks.
   * @param FlumeEventPointer to be added
   * @return true if space was available and pointer was
   * added to the queue
   */
  synchronized boolean addHead(FlumeEventPointer e) {
    //Called only during rollback, so should not consider inflight takes' size,
    //because normal puts through addTail method already account for these
    //events since they are in the inflight takes. So puts will not happen
    //in such a way that these takes cannot go back in. If this if returns true,
    //there is a buuuuuuuug!
    if (backingStore.getSize() == backingStore.getCapacity()) {
      LOG.error("Could not reinsert to queue, events which were taken but "
              + "not committed. Please report this issue.");
      return false;
    }

    long value = e.toLong();
    Preconditions.checkArgument(value != EMPTY);
    backingStore.incrementFileID(e.getFileID());

    add(0, value);
    return true;
  }


  /**
   * Add a FlumeEventPointer to the tail of the queue.
   * @param FlumeEventPointer to be added
   * @return true if space was available and pointer
   * was added to the queue
   */
  synchronized boolean addTail(FlumeEventPointer e) {
    if (getSize() == backingStore.getCapacity()) {
      return false;
    }

    long value = e.toLong();
    Preconditions.checkArgument(value != EMPTY);
    backingStore.incrementFileID(e.getFileID());

    add(backingStore.getSize(), value);
    return true;
  }

  /**
   * Must be called when a put happens to the log. This ensures that put commits
   * after checkpoints will retrieve all events committed in that txn.
   * @param e
   * @param transactionID
   */
  synchronized void addWithoutCommit(FlumeEventPointer e, long transactionID) {
    inflightPuts.addEvent(transactionID, e.toLong());
  }

  /**
   * Remove FlumeEventPointer from queue, will
   * only be used when recovering from a crash. It is not
   * legal to call this method after replayComplete has been
   * called.
   * @param FlumeEventPointer to be removed
   * @return true if the FlumeEventPointer was found
   * and removed
   */
  synchronized boolean remove(FlumeEventPointer e) {
    long value = e.toLong();
    Preconditions.checkArgument(value != EMPTY);
    if (queueSet == null) {
     throw new IllegalStateException("QueueSet is null, thus replayComplete"
         + " has been called which is illegal");
    }
    if (!queueSet.contains(value)) {
      return false;
    }
    searchCount++;
    long start = System.currentTimeMillis();
    for (int i = 0; i < backingStore.getSize(); i++) {
      if(get(i) == value) {
        remove(i, 0);
        FlumeEventPointer ptr = FlumeEventPointer.fromLong(value);
        backingStore.decrementFileID(ptr.getFileID());
        searchTime += System.currentTimeMillis() - start;
        return true;
      }
    }
    searchTime += System.currentTimeMillis() - start;
    return false;
  }
  /**
   * @return a copy of the set of fileIDs which are currently on the queue
   * will be normally be used when deciding which data files can
   * be deleted
   */
  synchronized SortedSet getFileIDs() {
    //Java implements clone pretty well. The main place this is used
    //in checkpointing and deleting old files, so best
    //to use a sorted set implementation.
    SortedSet fileIDs =
        new TreeSet(backingStore.getReferenceCounts());
    fileIDs.addAll(inflightPuts.getFileIDs());
    fileIDs.addAll(inflightTakes.getFileIDs());
    return fileIDs;
  }

  protected long get(int index) {
    if (index < 0 || index > backingStore.getSize() - 1) {
      throw new IndexOutOfBoundsException(String.valueOf(index)
          + channelNameDescriptor);
    }
    return backingStore.get(index);
  }

  private void set(int index, long value) {
    if (index < 0 || index > backingStore.getSize() - 1) {
      throw new IndexOutOfBoundsException(String.valueOf(index)
          + channelNameDescriptor);
    }
    backingStore.put(index, value);
  }

  protected boolean add(int index, long value) {
    if (index < 0 || index > backingStore.getSize()) {
      throw new IndexOutOfBoundsException(String.valueOf(index)
          + channelNameDescriptor);
    }

    if (backingStore.getSize() == backingStore.getCapacity()) {
      return false;
    }

    backingStore.setSize(backingStore.getSize() + 1);

    if (index <= backingStore.getSize()/2) {
      // Shift left
      backingStore.setHead(backingStore.getHead() - 1);
      if (backingStore.getHead() < 0) {
        backingStore.setHead(backingStore.getCapacity() - 1);
      }
      for (int i = 0; i < index; i++) {
        set(i, get(i+1));
      }
    } else {
      // Sift right
      for (int i = backingStore.getSize() - 1; i > index; i--) {
        set(i, get(i-1));
      }
    }
    set(index, value);
    if (queueSet != null) {
      queueSet.add(value);
    }
    return true;
  }

  /**
   * Must be called when a transaction is being committed or rolled back.
   * @param transactionID
   */
  synchronized void completeTransaction(long transactionID) {
    if (!inflightPuts.completeTransaction(transactionID)) {
      inflightTakes.completeTransaction(transactionID);
    }
  }

  protected synchronized long remove(int index, long transactionID) {
    if (index < 0 || index > backingStore.getSize() - 1) {
      throw new IndexOutOfBoundsException("index = " + index
          + ", queueSize " + backingStore.getSize() +" " + channelNameDescriptor);
    }
    copyCount++;
    long start = System.currentTimeMillis();
    long value = get(index);
    if (queueSet != null) {
      queueSet.remove(value);
    }
    //if txn id = 0, we are recovering from a crash.
    if(transactionID != 0) {
      inflightTakes.addEvent(transactionID, value);
    }
    if (index > backingStore.getSize()/2) {
      // Move tail part to left
      for (int i = index; i < backingStore.getSize() - 1; i++) {
        long rightValue = get(i+1);
        set(i, rightValue);
      }
      set(backingStore.getSize() - 1, EMPTY);
    } else {
      // Move head part to right
      for (int i = index - 1; i >= 0; i--) {
        long leftValue = get(i);
        set(i+1, leftValue);
      }
      set(0, EMPTY);
      backingStore.setHead(backingStore.getHead() + 1);
      if (backingStore.getHead() == backingStore.getCapacity()) {
        backingStore.setHead(0);
      }
    }
    backingStore.setSize(backingStore.getSize() - 1);
    copyTime += System.currentTimeMillis() - start;
    return value;
  }

  protected synchronized int getSize() {
    return backingStore.getSize() + inflightTakes.getSize();
  }

  /**
   * @return max capacity of the queue
   */
  public int getCapacity() {
    return backingStore.getCapacity();
  }

  synchronized void close() throws IOException {
    try {
      if (db != null) {
        db.close();
      }
    } catch(Exception ex) {
      LOG.warn("Error closing db", ex);
    }
    try {
      backingStore.close();
      inflightPuts.close();
      inflightTakes.close();
    } catch (IOException e) {
      LOG.warn("Error closing backing store", e);
    }
  }

  /**
   * Called when ReplayHandler has completed and thus remove(FlumeEventPointer)
   * will no longer be called.
   */
  synchronized void replayComplete() {
    String msg = "Search Count = " + searchCount + ", Search Time = " +
        searchTime + ", Copy Count = " + copyCount + ", Copy Time = " +
        copyTime;
    LOG.info(msg);
    if(db != null) {
      db.close();
    }
    queueSet = null;
    db = null;
  }

  @VisibleForTesting
  long getSearchCount() {
    return searchCount;
  }

  @VisibleForTesting
  long getCopyCount() {
    return copyCount;
  }

  /**
   * A representation of in flight events which have not yet been committed.
   * None of the methods are thread safe, and should be called from thread
   * safe methods only.
   */
  class InflightEventWrapper {
    private SetMultimap inflightEvents = HashMultimap.create();
    // Both these are volatile for safe publication, they are never accessed by
    // more than 1 thread at a time.
    private volatile RandomAccessFile file;
    private volatile java.nio.channels.FileChannel fileChannel;
    private final MessageDigest digest;
    private final File inflightEventsFile;
    private volatile boolean syncRequired = false;
    private SetMultimap inflightFileIDs = HashMultimap.create();

    public InflightEventWrapper(File inflightEventsFile) throws Exception{
      if(!inflightEventsFile.exists()){
        Preconditions.checkState(inflightEventsFile.createNewFile(),"Could not"
                + "create inflight events file: "
                + inflightEventsFile.getCanonicalPath());
      }
      this.inflightEventsFile = inflightEventsFile;
      file = new RandomAccessFile(inflightEventsFile, "rw");
      fileChannel = file.getChannel();
      digest = MessageDigest.getInstance("MD5");
    }

    /**
     * Complete the transaction, and remove all events from inflight list.
     * @param transactionID
     */
    public boolean completeTransaction(Long transactionID) {
      if(!inflightEvents.containsKey(transactionID)) {
        return false;
      }
      inflightEvents.removeAll(transactionID);
      inflightFileIDs.removeAll(transactionID);
      syncRequired = true;
      return true;
    }

    /**
     * Add an event pointer to the inflights list.
     * @param transactionID
     * @param pointer
     */
    public void addEvent(Long transactionID, Long pointer){
      inflightEvents.put(transactionID, pointer);
      inflightFileIDs.put(transactionID,
              FlumeEventPointer.fromLong(pointer).getFileID());
      syncRequired = true;
    }

    /**
     * Serialize the set of in flights into a byte longBuffer.
     * @return Returns the checksum of the buffer that is being
     * asynchronously written to disk.
     */
    public void serializeAndWrite() throws Exception {
      Collection values = inflightEvents.values();
      if(!fileChannel.isOpen()){
        file = new RandomAccessFile(inflightEventsFile, "rw");
        fileChannel = file.getChannel();
      }
      if(values.isEmpty()){
        file.setLength(0L);
      }
      //What is written out?
      //Checksum - 16 bytes
      //and then each key-value pair from the map:
      //transactionid numberofeventsforthistxn listofeventpointers

      try {
        int expectedFileSize = (((inflightEvents.keySet().size() * 2) //For transactionIDs and events per txn ID
                + values.size()) * 8) //Event pointers
                + 16; //Checksum
        //There is no real need of filling the channel with 0s, since we
        //will write the exact nummber of bytes as expected file size.
        file.setLength(expectedFileSize);
        Preconditions.checkState(file.length() == expectedFileSize,
                "Expected File size of inflight events file does not match the "
                + "current file size. Checkpoint is incomplete.");
        file.seek(0);
        final ByteBuffer buffer = ByteBuffer.allocate(expectedFileSize);
        LongBuffer longBuffer = buffer.asLongBuffer();
        for (Long txnID : inflightEvents.keySet()) {
          Set pointers = inflightEvents.get(txnID);
          longBuffer.put(txnID);
          longBuffer.put((long) pointers.size());
          LOG.debug("Number of events inserted into "
                  + "inflights file: " + String.valueOf(pointers.size())
                  + " file: " + inflightEventsFile.getCanonicalPath());
          long[] written = ArrayUtils.toPrimitive(
                  pointers.toArray(new Long[0]));
          longBuffer.put(written);
        }
        byte[] checksum = digest.digest(buffer.array());
        file.write(checksum);
        buffer.position(0);
        fileChannel.write(buffer);
        fileChannel.force(true);
        syncRequired = false;
      } catch (IOException ex) {
        LOG.error("Error while writing checkpoint to disk.", ex);
        throw ex;
      }
    }

    /**
     * Read the inflights file and return a
     * {@link com.google.common.collect.SetMultimap}
     * of transactionIDs to events that were inflight.
     *
     * @return - map of inflight events per txnID.
     *
     */
    public SetMultimap deserialize()
            throws IOException, BadCheckpointException {
      SetMultimap inflights = HashMultimap.create();
      if (!fileChannel.isOpen()) {
        file = new RandomAccessFile(inflightEventsFile, "rw");
        fileChannel = file.getChannel();
      }
      if(file.length() == 0) {
        return inflights;
      }
      file.seek(0);
      byte[] checksum = new byte[16];
      file.read(checksum);
      ByteBuffer buffer = ByteBuffer.allocate(
              (int)(file.length() - file.getFilePointer()));
      fileChannel.read(buffer);
      byte[] fileChecksum = digest.digest(buffer.array());
      if (!Arrays.equals(checksum, fileChecksum)) {
        throw new BadCheckpointException("Checksum of inflights file differs"
                + " from the checksum expected.");
      }
      buffer.position(0);
      LongBuffer longBuffer = buffer.asLongBuffer();
      try {
        while (true) {
          long txnID = longBuffer.get();
          int numEvents = (int)(longBuffer.get());
          for(int i = 0; i < numEvents; i++) {
            long val = longBuffer.get();
            inflights.put(txnID, val);
          }
        }
      } catch (BufferUnderflowException ex) {
        LOG.debug("Reached end of inflights buffer. Long buffer position ="
                + String.valueOf(longBuffer.position()));
      }
      return  inflights;
    }

    public int getSize() {
      return inflightEvents.size();
    }

    public boolean syncRequired(){
      return syncRequired;
    }

    public Collection getFileIDs(){
      return inflightFileIDs.values();
    }

    //Needed for testing.
    public Collection getInFlightPointers() {
      return inflightEvents.values();
    }

    public void close() throws IOException {
      file.close();
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy