org.apache.flume.sink.hbase.AsyncHBaseSink Maven / Gradle / Ivy

Go to download
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.flume.sink.hbase;

import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicBoolean;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Maps;
import com.google.common.primitives.UnsignedBytes;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.flume.Channel;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.FlumeException;
import org.apache.flume.Transaction;
import org.apache.flume.conf.Configurable;
import org.apache.flume.sink.AbstractSink;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.zookeeper.ZKConfig;
import org.hbase.async.AtomicIncrementRequest;
import org.hbase.async.HBaseClient;
import org.hbase.async.PutRequest;
import org.jboss.netty.channel.socket.nio
  .NioClientSocketChannelFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.stumbleupon.async.Callback;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.flume.ChannelException;
import org.apache.flume.instrumentation.SinkCounter;

/**
*
* A simple sink which reads events from a channel and writes them to HBase.
* This Sink uses an aysnchronous API internally and is likely to
* perform better.
* The Hbase configution is picked up from the first hbase-site.xml
* encountered in the classpath. This sink supports batch reading of
* events from the channel, and writing them to Hbase, to minimize the number
* of flushes on the hbase tables. To use this sink, it has to be configured
* with certain mandatory parameters:
*
* table:  The name of the table in Hbase to write to. 

* columnFamily:  The column family in Hbase to write to.

* Other optional parameters are:

* serializer: A class implementing
*  {@link AsyncHbaseEventSerializer}.
*  An instance of
* this class will be used to serialize events which are written to hbase.

* serializer.*: Passed in the configure() method to
* serializer
* as an object of {@link org.apache.flume.Context}.

* batchSize: This is the batch size used by the client. This is the
* maximum number of events the sink will commit per transaction. The default
* batch size is 100 events.
* 

* timeout:  The length of time in milliseconds the sink waits for
* callbacks from hbase for all events in a transaction.
* If no timeout is specified, the sink will wait forever.
*
* Note:  Hbase does not guarantee atomic commits on multiple
* rows. So if a subset of events in a batch are written to disk by Hbase and
* Hbase fails, the flume transaction is rolled back, causing flume to write
* all the events in the transaction all over again, which will cause
* duplicates. The serializer is expected to take care of the handling of
* duplicates etc. HBase also does not support batch increments, so if
* multiple increments are returned by the serializer, then HBase failure
* will cause them to be re-written, when HBase comes back up.
*/
public class AsyncHBaseSink extends AbstractSink implements Configurable {

  private String tableName;
  private byte[] columnFamily;
  private long batchSize;
  private static final Logger logger =
          LoggerFactory.getLogger(AsyncHBaseSink.class);
  private AsyncHbaseEventSerializer serializer;
  private String eventSerializerType;
  private Context serializerContext;
  private HBaseClient client;
  private Configuration conf;
  private Transaction txn;
  private volatile boolean open = false;
  private SinkCounter sinkCounter;
  private long timeout;
  private String zkQuorum;
  private String zkBaseDir;
  private ExecutorService sinkCallbackPool;
  private boolean isTimeoutTest;
  private boolean isCoalesceTest;
  private boolean enableWal = true;
  private boolean batchIncrements = false;
  private volatile int totalCallbacksReceived = 0;
  private Map incrementBuffer;

  // Does not need to be thread-safe. Always called only from the sink's
  // process method.
  private final Comparator COMPARATOR = UnsignedBytes
    .lexicographicalComparator();

  public AsyncHBaseSink(){
    this(null);
  }

  public AsyncHBaseSink(Configuration conf) {
    this(conf, false, false);
  }

  @VisibleForTesting
  AsyncHBaseSink(Configuration conf, boolean isTimeoutTest,
    boolean isCoalesceTest) {
    this.conf = conf;
    this.isTimeoutTest = isTimeoutTest;
    this.isCoalesceTest = isCoalesceTest;
  }

  @Override
  public Status process() throws EventDeliveryException {
    /*
     * Reference to the boolean representing failure of the current transaction.
     * Since each txn gets a new boolean, failure of one txn will not affect
     * the next even if errbacks for the current txn get called while
     * the next one is being processed.
     *
     */
    if (!open) {
      throw new EventDeliveryException("Sink was never opened. " +
          "Please fix the configuration.");
    }
    AtomicBoolean txnFail = new AtomicBoolean(false);
    AtomicInteger callbacksReceived = new AtomicInteger(0);
    AtomicInteger callbacksExpected = new AtomicInteger(0);
    final Lock lock = new ReentrantLock();
    final Condition condition = lock.newCondition();
    if (incrementBuffer != null) {
      incrementBuffer.clear();
    }
    /*
     * Callbacks can be reused per transaction, since they share the same
     * locks and conditions.
     */
    Callback putSuccessCallback =
            new SuccessCallback(
            lock, callbacksReceived, condition);
    Callback putFailureCallback =
            new FailureCallback(
            lock, callbacksReceived, txnFail, condition);

    Callback incrementSuccessCallback =
            new SuccessCallback(
            lock, callbacksReceived, condition);
    Callback incrementFailureCallback =
            new FailureCallback(
            lock, callbacksReceived, txnFail, condition);

    Status status = Status.READY;
    Channel channel = getChannel();
    int i = 0;
    try {
      txn = channel.getTransaction();
      txn.begin();
      for (; i < batchSize; i++) {
        Event event = channel.take();
        if (event == null) {
          status = Status.BACKOFF;
          if (i == 0) {
            sinkCounter.incrementBatchEmptyCount();
          } else {
            sinkCounter.incrementBatchUnderflowCount();
          }
          break;
        } else {
          serializer.setEvent(event);
          List actions = serializer.getActions();
          List increments = serializer.getIncrements();
          callbacksExpected.addAndGet(actions.size());
          if (!batchIncrements) {
            callbacksExpected.addAndGet(increments.size());
          }

          for (PutRequest action : actions) {
            action.setDurable(enableWal);
            client.put(action).addCallbacks(putSuccessCallback, putFailureCallback);
          }
          for (AtomicIncrementRequest increment : increments) {
            if (batchIncrements) {
              CellIdentifier identifier = new CellIdentifier(increment.key(),
                increment.qualifier());
              AtomicIncrementRequest request
                = incrementBuffer.get(identifier);
              if (request == null) {
                incrementBuffer.put(identifier, increment);
              } else {
                request.setAmount(request.getAmount() + increment.getAmount());
              }
            } else {
              client.atomicIncrement(increment).addCallbacks(
                incrementSuccessCallback, incrementFailureCallback);
            }
          }
        }
      }
      if (batchIncrements) {
        Collection increments = incrementBuffer.values();
        for (AtomicIncrementRequest increment : increments) {
          client.atomicIncrement(increment).addCallbacks(
            incrementSuccessCallback, incrementFailureCallback);
        }
        callbacksExpected.addAndGet(increments.size());
      }
      client.flush();
    } catch (Throwable e) {
      this.handleTransactionFailure(txn);
      this.checkIfChannelExceptionAndThrow(e);
    }
    if (i == batchSize) {
      sinkCounter.incrementBatchCompleteCount();
    }
    sinkCounter.addToEventDrainAttemptCount(i);

    lock.lock();
    long startTime = System.nanoTime();
    long timeRemaining;
    try {
      while ((callbacksReceived.get() < callbacksExpected.get())
              && !txnFail.get()) {
        timeRemaining = timeout - (System.nanoTime() - startTime);
        timeRemaining = (timeRemaining >= 0) ? timeRemaining : 0;
        try {
          if (!condition.await(timeRemaining, TimeUnit.NANOSECONDS)) {
            txnFail.set(true);
            logger.warn("HBase callbacks timed out. "
                    + "Transaction will be rolled back.");
          }
        } catch (Exception ex) {
          logger.error("Exception while waiting for callbacks from HBase.");
          this.handleTransactionFailure(txn);
          Throwables.propagate(ex);
        }
      }
    } finally {
      lock.unlock();
    }

    if (isCoalesceTest) {
      totalCallbacksReceived += callbacksReceived.get();
    }

    /*
     * At this point, either the txn has failed
     * or all callbacks received and txn is successful.
     *
     * This need not be in the monitor, since all callbacks for this txn
     * have been received. So txnFail will not be modified any more(even if
     * it is, it is set from true to true only - false happens only
     * in the next process call).
     *
     */
    if (txnFail.get()) {
      this.handleTransactionFailure(txn);
      throw new EventDeliveryException("Could not write events to Hbase. " +
          "Transaction failed, and rolled back.");
    } else {
      try {
        txn.commit();
        txn.close();
        sinkCounter.addToEventDrainSuccessCount(i);
      } catch (Throwable e) {
        this.handleTransactionFailure(txn);
        this.checkIfChannelExceptionAndThrow(e);
      }
    }

    return status;
  }

  @Override
  public void configure(Context context) {
    tableName = context.getString(HBaseSinkConfigurationConstants.CONFIG_TABLE);
    String cf = context.getString(
        HBaseSinkConfigurationConstants.CONFIG_COLUMN_FAMILY);
    batchSize = context.getLong(
        HBaseSinkConfigurationConstants.CONFIG_BATCHSIZE, new Long(100));
    serializerContext = new Context();
    //If not specified, will use HBase defaults.
    eventSerializerType = context.getString(
        HBaseSinkConfigurationConstants.CONFIG_SERIALIZER);
    Preconditions.checkNotNull(tableName,
        "Table name cannot be empty, please specify in configuration file");
    Preconditions.checkNotNull(cf,
        "Column family cannot be empty, please specify in configuration file");
    //Check foe event serializer, if null set event serializer type
    if(eventSerializerType == null || eventSerializerType.isEmpty()) {
      eventSerializerType =
          "org.apache.flume.sink.hbase.SimpleAsyncHbaseEventSerializer";
      logger.info("No serializer defined, Will use default");
    }
    serializerContext.putAll(context.getSubProperties(
            HBaseSinkConfigurationConstants.CONFIG_SERIALIZER_PREFIX));
    columnFamily = cf.getBytes(Charsets.UTF_8);
    try {
      @SuppressWarnings("unchecked")
      Class clazz =
      (Class)
      Class.forName(eventSerializerType);
      serializer = clazz.newInstance();
      serializer.configure(serializerContext);
      serializer.initialize(tableName.getBytes(Charsets.UTF_8), columnFamily);
    } catch (Exception e) {
      logger.error("Could not instantiate event serializer." , e);
      Throwables.propagate(e);
    }

    if(sinkCounter == null) {
      sinkCounter = new SinkCounter(this.getName());
    }
    timeout = context.getLong(HBaseSinkConfigurationConstants.CONFIG_TIMEOUT,
            HBaseSinkConfigurationConstants.DEFAULT_TIMEOUT);
    if(timeout <= 0){
      logger.warn("Timeout should be positive for Hbase sink. "
              + "Sink will not timeout.");
      timeout = HBaseSinkConfigurationConstants.DEFAULT_TIMEOUT;
    }
    //Convert to nanos.
    timeout = TimeUnit.MILLISECONDS.toNanos(timeout);

    zkQuorum = context.getString(
        HBaseSinkConfigurationConstants.ZK_QUORUM, "").trim();
    if(!zkQuorum.isEmpty()) {
      zkBaseDir = context.getString(
          HBaseSinkConfigurationConstants.ZK_ZNODE_PARENT,
          HBaseSinkConfigurationConstants.DEFAULT_ZK_ZNODE_PARENT);
    } else {
      if (conf == null) { //In tests, we pass the conf in.
        conf = HBaseConfiguration.create();
      }
      zkQuorum = ZKConfig.getZKQuorumServersString(conf);
      zkBaseDir = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT,
        HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
    }
    Preconditions.checkState(zkQuorum != null && !zkQuorum.isEmpty(),
        "The Zookeeper quorum cannot be null and should be specified.");

    enableWal = context.getBoolean(HBaseSinkConfigurationConstants
      .CONFIG_ENABLE_WAL, HBaseSinkConfigurationConstants.DEFAULT_ENABLE_WAL);
    logger.info("The write to WAL option is set to: " + String.valueOf(enableWal));
    if(!enableWal) {
      logger.warn("AsyncHBaseSink's enableWal configuration is set to false. " +
        "All writes to HBase will have WAL disabled, and any data in the " +
        "memstore of this region in the Region Server could be lost!");
    }

    batchIncrements = context.getBoolean(
      HBaseSinkConfigurationConstants.CONFIG_COALESCE_INCREMENTS,
      HBaseSinkConfigurationConstants.DEFAULT_COALESCE_INCREMENTS);

    if(batchIncrements) {
      incrementBuffer = Maps.newHashMap();
      logger.info("Increment coalescing is enabled. Increments will be " +
        "buffered.");
    }
  }

  @VisibleForTesting
  int getTotalCallbacksReceived() {
    return totalCallbacksReceived;
  }

  @VisibleForTesting
  boolean isConfNull() {
    return conf == null;
  }
  @Override
  public void start(){
    Preconditions.checkArgument(client == null, "Please call stop "
            + "before calling start on an old instance.");
    sinkCounter.start();
    sinkCounter.incrementConnectionCreatedCount();
      sinkCallbackPool = Executors.newCachedThreadPool(new ThreadFactoryBuilder()
        .setNameFormat(this.getName() + " HBase Call Pool").build());
    logger.info("Callback pool created");
    if(!isTimeoutTest) {
      client = new HBaseClient(zkQuorum, zkBaseDir, sinkCallbackPool);
    } else {
      client = new HBaseClient(zkQuorum, zkBaseDir,
        new NioClientSocketChannelFactory(Executors
          .newSingleThreadExecutor(),
          Executors.newSingleThreadExecutor()));
    }
    final CountDownLatch latch = new CountDownLatch(1);
    final AtomicBoolean fail = new AtomicBoolean(false);
    client.ensureTableFamilyExists(
            tableName.getBytes(Charsets.UTF_8), columnFamily).addCallbacks(
            new Callback() {
              @Override
              public Object call(Object arg) throws Exception {
                latch.countDown();
                logger.info("table found");
                return null;
              }
            },
            new Callback() {
              @Override
              public Object call(Object arg) throws Exception {
                fail.set(true);
                latch.countDown();
                return null;
              }
            });

    try {
      logger.info("waiting on callback");
      latch.await();
      logger.info("callback received");
    } catch (InterruptedException e) {
      sinkCounter.incrementConnectionFailedCount();
      throw new FlumeException(
          "Interrupted while waiting for Hbase Callbacks", e);
    }
    if(fail.get()){
      sinkCounter.incrementConnectionFailedCount();
      client.shutdown();
      client = null;
      throw new FlumeException(
          "Could not start sink. " +
          "Table or column family does not exist in Hbase.");
    } else {
      open = true;
    }
    client.setFlushInterval((short) 0);
    super.start();
  }

  @Override
  public void stop(){
    serializer.cleanUp();
    if (client != null) {
      client.shutdown();
    }
    sinkCounter.incrementConnectionClosedCount();
    sinkCounter.stop();

    try {
      if (sinkCallbackPool != null) {
        sinkCallbackPool.shutdown();
        if (!sinkCallbackPool.awaitTermination(5, TimeUnit.SECONDS)) {
          sinkCallbackPool.shutdownNow();
        }
      }
    } catch (InterruptedException e) {
      logger.error("Interrupted while waiting for asynchbase sink pool to " +
        "die", e);
      if (sinkCallbackPool != null) {
        sinkCallbackPool.shutdownNow();
      }
    }
    sinkCallbackPool = null;
    client = null;
    conf = null;
    open = false;
    super.stop();
  }

  private void handleTransactionFailure(Transaction txn)
      throws EventDeliveryException {
    try {
      txn.rollback();
    } catch (Throwable e) {
      logger.error("Failed to commit transaction." +
          "Transaction rolled back.", e);
      if(e instanceof Error || e instanceof RuntimeException){
        logger.error("Failed to commit transaction." +
            "Transaction rolled back.", e);
        Throwables.propagate(e);
      } else {
        logger.error("Failed to commit transaction." +
            "Transaction rolled back.", e);
        throw new EventDeliveryException("Failed to commit transaction." +
            "Transaction rolled back.", e);
      }
    } finally {
      txn.close();
    }
  }
  private class SuccessCallback implements Callback {
    private Lock lock;
    private AtomicInteger callbacksReceived;
    private Condition condition;
    private final boolean isTimeoutTesting;

    public SuccessCallback(Lock lck, AtomicInteger callbacksReceived,
            Condition condition) {
      lock = lck;
      this.callbacksReceived = callbacksReceived;
      this.condition = condition;
      isTimeoutTesting = isTimeoutTest;
    }

    @Override
    public R call(T arg) throws Exception {
      if (isTimeoutTesting) {
        try {
          //tests set timeout to 10 seconds, so sleep for 4 seconds
          TimeUnit.NANOSECONDS.sleep(TimeUnit.SECONDS.toNanos(4));
        } catch (InterruptedException e) {
          //ignore
        }
      }
      doCall();
      return null;
    }

    private void doCall() throws Exception {
      callbacksReceived.incrementAndGet();
      lock.lock();
      try{
        condition.signal();
      } finally {
        lock.unlock();
      }
    }
  }

  private class FailureCallback implements Callback {
    private Lock lock;
    private AtomicInteger callbacksReceived;
    private AtomicBoolean txnFail;
    private Condition condition;
    private final boolean isTimeoutTesting;
    public FailureCallback(Lock lck, AtomicInteger callbacksReceived,
            AtomicBoolean txnFail, Condition condition){
      this.lock = lck;
      this.callbacksReceived = callbacksReceived;
      this.txnFail = txnFail;
      this.condition = condition;
      isTimeoutTesting = isTimeoutTest;
    }

    @Override
    public R call(T arg) throws Exception {
      if (isTimeoutTesting) {
        //tests set timeout to 10 seconds, so sleep for 4 seconds
        try {
          TimeUnit.NANOSECONDS.sleep(TimeUnit.SECONDS.toNanos(4));
        } catch (InterruptedException e) {
          //ignore
        }
      }
      doCall();
      return null;
    }

    private void doCall() throws Exception {
      callbacksReceived.incrementAndGet();
      this.txnFail.set(true);
      lock.lock();
      try {
        condition.signal();
      } finally {
        lock.unlock();
      }
    }
  }

  private void checkIfChannelExceptionAndThrow(Throwable e)
          throws EventDeliveryException {
    if (e instanceof ChannelException) {
      throw new EventDeliveryException("Error in processing transaction.", e);
    } else if (e instanceof Error || e instanceof RuntimeException) {
      Throwables.propagate(e);
    }
    throw new EventDeliveryException("Error in processing transaction.", e);
  }

  private class CellIdentifier {
    private final byte[] row;
    private final byte[] column;
    private final int hashCode;
    // Since the sink operates only on one table and one cf,
    // we use the data from the owning sink
    public CellIdentifier(byte[] row, byte[] column) {
      this.row = row;
      this.column = column;
      this.hashCode =
        (Arrays.hashCode(row) * 31) * (Arrays.hashCode(column) * 31);
    }

    @Override
    public int hashCode() {
      return hashCode;
    }

    // Since we know that this class is used from only this class,
    // skip the class comparison to save time
    @Override
    public boolean equals(Object other) {
      CellIdentifier o = (CellIdentifier) other;
      if (other == null) {
        return false;
      } else {
        return (COMPARATOR.compare(row, o.row) == 0
          && COMPARATOR.compare(column, o.column) == 0);
      }
    }
  }
}