All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hmsonline.storm.cassandra.bolt.AbstractBatchingBolt Maven / Gradle / Ivy

package com.hmsonline.storm.cassandra.bolt;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.LinkedBlockingQueue;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.hmsonline.storm.cassandra.bolt.mapper.TupleMapper;
import com.hmsonline.storm.cassandra.StormCassandraConstants;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.tuple.Tuple;
import backtype.storm.utils.Utils;

/**
 * Abstract IRichBolt implementation that caches/batches
 * backtype.storm.tuple.Tuple and processes them on a separate
 * thread.
 * 

*

* Subclasses are obligated to implement the * executeBatch(List inputs) method, called when a batch of * tuples should be processed. *

* Subclasses that overide the prepare() and cleanup() * methods must call the corresponding methods on the superclass * (i.e. super.prepare() and super.cleanup()) to * ensure proper initialization and termination. * * @author ptgoetz */ @SuppressWarnings("serial") public abstract class AbstractBatchingBolt extends CassandraBolt implements IRichBolt { private static final Logger LOG = LoggerFactory.getLogger(AbstractBatchingBolt.class); protected AckStrategy ackStrategy = AckStrategy.ACK_IGNORE; protected OutputCollector collector; protected LinkedBlockingQueue queue; private BatchThread batchThread; public AbstractBatchingBolt(String clientConfigKey, TupleMapper tupleMapper) { super(clientConfigKey, tupleMapper); } @SuppressWarnings({ "rawtypes", "unchecked" }) @Override public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { super.prepare(stormConf, context); int batchMaxSize = Utils.getInt(Utils.get(stormConf, StormCassandraConstants.CASSANDRA_BATCH_MAX_SIZE, 0)); this.collector = collector; this.queue = new LinkedBlockingQueue(); this.batchThread = new BatchThread(batchMaxSize); this.batchThread.start(); } @Override public void execute(Tuple input) { if (this.ackStrategy == AckStrategy.ACK_ON_RECEIVE) { this.collector.ack(input); } this.queue.offer(input); } @Override public void cleanup() { this.batchThread.stopRunning(); super.cleanup(); } /** * Process a java.util.List of * backtype.storm.tuple.Tuple objects that have been * cached/batched. *

* This method is analagous to the execute(Tuple input) method * defined in the bolt interface. Subclasses are responsible for processing * and/or ack'ing tuples as necessary. The only difference is that tuples * are passed in as a list, as opposed to one at a time. *

* * * @param inputs */ public abstract void executeBatch(List inputs); private class BatchThread extends Thread { int batchMaxSize; boolean stopRequested = false; BatchThread(int batchMaxSize) { super("batch-bolt-thread"); super.setDaemon(true); this.batchMaxSize = batchMaxSize; } @Override public void run() { while (!stopRequested) { try { ArrayList batch = new ArrayList(); // drainTo() does not block, take() does. Tuple t = queue.take(); batch.add(t); if (batchMaxSize > 0) { queue.drainTo(batch, batchMaxSize); } else { queue.drainTo(batch); } executeBatch(batch); } catch (InterruptedException e) { LOG.error("Interupted in batching bolt.", e); } } } void stopRunning() { this.stopRequested = true; } } public AckStrategy getAckStrategy() { return ackStrategy; } public void setAckStrategy(AckStrategy ackStrategy) { this.ackStrategy = ackStrategy; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy