Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.flume.sink.hbase;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.Maps;
import com.google.common.primitives.UnsignedBytes;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.stumbleupon.async.Callback;
import org.apache.flume.Channel;
import org.apache.flume.ChannelException;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.FlumeException;
import org.apache.flume.Transaction;
import org.apache.flume.conf.BatchSizeSupported;
import org.apache.flume.conf.Configurable;
import org.apache.flume.conf.ConfigurationException;
import org.apache.flume.instrumentation.SinkCounter;
import org.apache.flume.sink.AbstractSink;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.zookeeper.ZKConfig;
import org.hbase.async.AtomicIncrementRequest;
import org.hbase.async.Config;
import org.hbase.async.HBaseClient;
import org.hbase.async.PutRequest;
import org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
/**
* A simple sink which reads events from a channel and writes them to HBase.
* This Sink uses an asynchronous API internally and is likely to
* perform better.
* The Hbase configuration is picked up from the first hbase-site.xml
* encountered in the classpath. This sink supports batch reading of
* events from the channel, and writing them to Hbase, to minimize the number
* of flushes on the hbase tables. To use this sink, it has to be configured
* with certain mandatory parameters:
*
* table: The name of the table in Hbase to write to.
* columnFamily: The column family in Hbase to write to.
* Other optional parameters are:
* serializer: A class implementing
* {@link AsyncHbaseEventSerializer}.
* An instance of
* this class will be used to serialize events which are written to hbase.
* serializer.*: Passed in the configure() method to
* serializer
* as an object of {@link org.apache.flume.Context}.
* batchSize: This is the batch size used by the client. This is the
* maximum number of events the sink will commit per transaction. The default
* batch size is 100 events.
*
* timeout: The length of time in milliseconds the sink waits for
* callbacks from hbase for all events in a transaction.
* If no timeout is specified, the sink will wait forever.
*
* Note: Hbase does not guarantee atomic commits on multiple
* rows. So if a subset of events in a batch are written to disk by Hbase and
* Hbase fails, the flume transaction is rolled back, causing flume to write
* all the events in the transaction all over again, which will cause
* duplicates. The serializer is expected to take care of the handling of
* duplicates etc. HBase also does not support batch increments, so if
* multiple increments are returned by the serializer, then HBase failure
* will cause them to be re-written, when HBase comes back up.
*/
public class AsyncHBaseSink extends AbstractSink implements Configurable, BatchSizeSupported {
private String tableName;
private byte[] columnFamily;
private long batchSize;
private static final Logger logger = LoggerFactory.getLogger(AsyncHBaseSink.class);
private AsyncHbaseEventSerializer serializer;
@VisibleForTesting
Config asyncClientConfig;
private String eventSerializerType;
private Context serializerContext;
private HBaseClient client;
private Configuration conf;
private Transaction txn;
private volatile boolean open = false;
private SinkCounter sinkCounter;
private long timeout;
private String zkQuorum;
private String zkBaseDir;
private ExecutorService sinkCallbackPool;
private boolean isTimeoutTest;
private boolean isCoalesceTest;
private boolean enableWal = true;
private boolean batchIncrements = false;
private volatile int totalCallbacksReceived = 0;
private int maxConsecutiveFails;
private Map incrementBuffer;
// The HBaseClient buffers the requests until a callback is received. In the event of a
// timeout, there is no way to clear these buffers. If there is a major cluster issue, this
// buffer can become too big and cause crashes. So if we hit a fixed number of HBase write
// failures/timeouts, then close the HBase Client (gracefully or not) and force a GC to get rid
// of the buffered data.
private int consecutiveHBaseFailures = 0;
private boolean lastTxnFailed = false;
// Does not need to be thread-safe. Always called only from the sink's
// process method.
private final Comparator COMPARATOR = UnsignedBytes.lexicographicalComparator();
public AsyncHBaseSink() {
this(null);
}
public AsyncHBaseSink(Configuration conf) {
this(conf, false, false);
}
@VisibleForTesting
AsyncHBaseSink(Configuration conf, boolean isTimeoutTest,
boolean isCoalesceTest) {
this.conf = conf;
this.isTimeoutTest = isTimeoutTest;
this.isCoalesceTest = isCoalesceTest;
}
@Override
public Status process() throws EventDeliveryException {
/*
* Reference to the boolean representing failure of the current transaction.
* Since each txn gets a new boolean, failure of one txn will not affect
* the next even if errbacks for the current txn get called while
* the next one is being processed.
*
*/
if (!open) {
throw new EventDeliveryException("Sink was never opened. " +
"Please fix the configuration.");
}
if (client == null) {
client = initHBaseClient();
if (client == null) {
throw new EventDeliveryException("Could not establish connection to HBase!");
}
}
AtomicBoolean txnFail = new AtomicBoolean(false);
AtomicInteger callbacksReceived = new AtomicInteger(0);
AtomicInteger callbacksExpected = new AtomicInteger(0);
final Lock lock = new ReentrantLock();
final Condition condition = lock.newCondition();
if (incrementBuffer != null) {
incrementBuffer.clear();
}
/*
* Callbacks can be reused per transaction, since they share the same
* locks and conditions.
*/
Callback