com.yahoo.bullet.storm.drpc.DRPCQuerySubscriber Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of bullet-storm Show documentation
Show all versions of bullet-storm Show documentation
This is the implementation of Bullet - a real-time query engine - in Apache Storm.
The newest version!
/*
* Copyright 2017, Yahoo Inc.
* Licensed under the terms of the Apache License, Version 2.0.
* See the LICENSE file associated with the project for terms.
*/
package com.yahoo.bullet.storm.drpc;
import com.yahoo.bullet.common.BulletConfig;
import com.yahoo.bullet.pubsub.BufferingSubscriber;
import com.yahoo.bullet.pubsub.Metadata;
import com.yahoo.bullet.pubsub.PubSubMessage;
import com.yahoo.bullet.storm.drpc.utils.DRPCOutputCollector;
import lombok.extern.slf4j.Slf4j;
import org.apache.storm.drpc.DRPCSpout;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* This class wraps a {@link DRPCSpout} and uses it to read messages from Storm DRPC. It needs all the Storm config to
* be able to connect to and read from the DRPC servers using Thrift.
*
* It buffers read queries in memory upto a specified limit (and stops till further commits are received) and can
* re-emit failed queries. However, it is not resilient if the Subscriber is closed or reinitialized elsewhere.
*/
@Slf4j
public class DRPCQuerySubscriber extends BufferingSubscriber {
private DRPCSpout spout;
private DRPCOutputCollector collector;
// PubSubMessage id to DRPCMessageIds. For failing requests if the subscriber is closed.
private Map emittedIDs;
/**
* Creates and initializes a Subscriber that reads from the DRPC servers. Intended to be used inside a Storm
* spout in a Storm topology.
*
* @param config The config containing the String function in {@link DRPCConfig#DRPC_FUNCTION}, the Storm configuration
* {@link Map} as {@link com.yahoo.bullet.storm.BulletStormConfig#STORM_CONFIG} and the Storm
* {@link TopologyContext} as {@link com.yahoo.bullet.storm.BulletStormConfig#STORM_CONTEXT}.
* @param maxUnCommittedQueries The maximum number of queries that can be read without committing them.
*/
public DRPCQuerySubscriber(BulletConfig config, int maxUnCommittedQueries) {
// Get the DRPC function we should subscribe to
this(config, maxUnCommittedQueries, new DRPCOutputCollector(),
new DRPCSpout(config.getRequiredConfigAs(DRPCConfig.DRPC_FUNCTION, String.class)));
}
/**
* Exposed for testing.
*
* @param config The config containing the String function in {@link DRPCConfig#DRPC_FUNCTION}, the Storm configuration
* {@link Map} as {@link com.yahoo.bullet.storm.BulletStormConfig#STORM_CONFIG} and the Storm
* {@link TopologyContext} as {@link com.yahoo.bullet.storm.BulletStormConfig#STORM_CONTEXT}.
* @param maxUnCommittedQueries The maximum number of queries that can be read without committing them.
* @param collector The {@link DRPCOutputCollector} to use.
* @param spout The {@link DRPCSpout} to use.
*/
DRPCQuerySubscriber(BulletConfig config, int maxUnCommittedQueries, DRPCOutputCollector collector, DRPCSpout spout) {
super(maxUnCommittedQueries);
this.collector = collector;
this.spout = spout;
emittedIDs = new HashMap<>();
// Get the Storm Config that has all the relevant cluster settings and properties
Map stormConfig = config.getRequiredConfigAs(DRPCConfig.STORM_CONFIG, Map.class);
// Get the TopologyContext
TopologyContext context = config.getRequiredConfigAs(DRPCConfig.STORM_CONTEXT, TopologyContext.class);
// Wrap the collector in a SpoutOutputCollector (it just delegates to the underlying DRPCOutputCollector)
SpoutOutputCollector spoutOutputCollector = new SpoutOutputCollector(collector);
spout.open(stormConfig, context, spoutOutputCollector);
}
@Override
public List getMessages() {
// Try and read from DRPC. The DRPCSpout does a sleep for 1 ms if there are no tuples, so we don't have to do it.
spout.nextTuple();
if (!collector.haveOutput()) {
return null;
}
// The DRPCSpout only should have emitted one tuple
List> tuples = collector.reset();
log.debug("Have a message through DRPC {}", tuples);
List