com.hmsonline.storm.cassandra.bolt.AbstractBatchingBolt Maven / Gradle / Ivy
package com.hmsonline.storm.cassandra.bolt;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.LinkedBlockingQueue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.hmsonline.storm.cassandra.bolt.mapper.TupleMapper;
import com.hmsonline.storm.cassandra.StormCassandraConstants;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.tuple.Tuple;
import backtype.storm.utils.Utils;
/**
* Abstract IRichBolt
implementation that caches/batches
* backtype.storm.tuple.Tuple
and processes them on a separate
* thread.
*
*
* Subclasses are obligated to implement the
* executeBatch(List inputs)
method, called when a batch of
* tuples should be processed.
*
* Subclasses that overide the prepare()
and cleanup()
* methods must call the corresponding methods on the superclass
* (i.e. super.prepare()
and super.cleanup()
) to
* ensure proper initialization and termination.
*
* @author ptgoetz
*/
@SuppressWarnings("serial")
public abstract class AbstractBatchingBolt extends CassandraBolt implements IRichBolt {
private static final Logger LOG = LoggerFactory.getLogger(AbstractBatchingBolt.class);
protected AckStrategy ackStrategy = AckStrategy.ACK_IGNORE;
protected OutputCollector collector;
protected LinkedBlockingQueue queue;
private BatchThread batchThread;
public AbstractBatchingBolt(String clientConfigKey, TupleMapper tupleMapper) {
super(clientConfigKey, tupleMapper);
}
@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
super.prepare(stormConf, context);
int batchMaxSize = Utils.getInt(Utils.get(stormConf, StormCassandraConstants.CASSANDRA_BATCH_MAX_SIZE, 0));
this.collector = collector;
this.queue = new LinkedBlockingQueue();
this.batchThread = new BatchThread(batchMaxSize);
this.batchThread.start();
}
@Override
public void execute(Tuple input) {
if (this.ackStrategy == AckStrategy.ACK_ON_RECEIVE) {
this.collector.ack(input);
}
this.queue.offer(input);
}
@Override
public void cleanup() {
this.batchThread.stopRunning();
super.cleanup();
}
/**
* Process a java.util.List
of
* backtype.storm.tuple.Tuple
objects that have been
* cached/batched.
*
* This method is analagous to the execute(Tuple input)
method
* defined in the bolt interface. Subclasses are responsible for processing
* and/or ack'ing tuples as necessary. The only difference is that tuples
* are passed in as a list, as opposed to one at a time.
*
*
*
* @param inputs
*/
public abstract void executeBatch(List inputs);
private class BatchThread extends Thread {
int batchMaxSize;
boolean stopRequested = false;
BatchThread(int batchMaxSize) {
super("batch-bolt-thread");
super.setDaemon(true);
this.batchMaxSize = batchMaxSize;
}
@Override
public void run() {
while (!stopRequested) {
try {
ArrayList batch = new ArrayList();
// drainTo() does not block, take() does.
Tuple t = queue.take();
batch.add(t);
if (batchMaxSize > 0) {
queue.drainTo(batch, batchMaxSize);
} else {
queue.drainTo(batch);
}
executeBatch(batch);
} catch (InterruptedException e) {
LOG.error("Interupted in batching bolt.", e);
}
}
}
void stopRunning() {
this.stopRequested = true;
}
}
public AckStrategy getAckStrategy() {
return ackStrategy;
}
public void setAckStrategy(AckStrategy ackStrategy) {
this.ackStrategy = ackStrategy;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy