
com.datatorrent.stram.engine.StreamingContainer Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.datatorrent.stram.engine;
import java.io.IOException;
import java.lang.Thread.State;
import java.lang.management.GarbageCollectorMXBean;
import java.lang.management.ManagementFactory;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.SocketAddress;
import java.net.UnknownHostException;
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.log4j.DTLoggerFactory;
import org.apache.log4j.LogManager;
import com.datatorrent.api.Attribute;
import com.datatorrent.api.Component;
import com.datatorrent.api.Context;
import com.datatorrent.api.DAG.Locality;
import com.datatorrent.api.Operator;
import com.datatorrent.api.Operator.InputPort;
import com.datatorrent.api.Operator.OutputPort;
import com.datatorrent.api.Operator.ProcessingMode;
import com.datatorrent.api.StatsListener;
import com.datatorrent.api.StatsListener.OperatorRequest;
import com.datatorrent.api.StorageAgent;
import com.datatorrent.api.StreamCodec;
import com.datatorrent.api.StreamingApplication;
import com.datatorrent.api.StringCodec;
import com.datatorrent.api.annotation.Stateless;
import com.datatorrent.bufferserver.server.Server;
import com.datatorrent.bufferserver.storage.DiskStorage;
import com.datatorrent.bufferserver.util.Codec;
import com.datatorrent.common.util.ScheduledThreadPoolExecutor;
import com.datatorrent.netlet.DefaultEventLoop;
import com.datatorrent.netlet.util.Slice;
import com.datatorrent.stram.ComponentContextPair;
import com.datatorrent.stram.RecoverableRpcProxy;
import com.datatorrent.stram.StramUtils;
import com.datatorrent.stram.StramUtils.YarnContainerMain;
import com.datatorrent.stram.StringCodecs;
import com.datatorrent.stram.api.Checkpoint;
import com.datatorrent.stram.api.ContainerContext;
import com.datatorrent.stram.api.ContainerEvent;
import com.datatorrent.stram.api.ContainerEvent.ContainerStatsEvent;
import com.datatorrent.stram.api.ContainerEvent.NodeActivationEvent;
import com.datatorrent.stram.api.ContainerEvent.NodeDeactivationEvent;
import com.datatorrent.stram.api.ContainerEvent.StreamActivationEvent;
import com.datatorrent.stram.api.ContainerEvent.StreamDeactivationEvent;
import com.datatorrent.stram.api.OperatorDeployInfo;
import com.datatorrent.stram.api.OperatorDeployInfo.OperatorType;
import com.datatorrent.stram.api.OperatorDeployInfo.UnifierDeployInfo;
import com.datatorrent.stram.api.RequestFactory;
import com.datatorrent.stram.api.StramToNodeChangeLoggersRequest;
import com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol;
import com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.ContainerHeartbeat;
import com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.ContainerHeartbeatResponse;
import com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.ContainerStats;
import com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.OperatorHeartbeat;
import com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.OperatorHeartbeat.DeployState;
import com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.StramToNodeRequest;
import com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.StreamingContainerContext;
import com.datatorrent.stram.debug.StdOutErrLog;
import com.datatorrent.stram.plan.logical.LogicalPlan;
import com.datatorrent.stram.plan.logical.Operators.PortContextPair;
import com.datatorrent.stram.plan.logical.Operators.PortMappingDescriptor;
import com.datatorrent.stram.plan.logical.StreamCodecWrapperForPersistance;
import com.datatorrent.stram.security.StramUserLogin;
import com.datatorrent.stram.stream.BufferServerPublisher;
import com.datatorrent.stram.stream.BufferServerSubscriber;
import com.datatorrent.stram.stream.FastPublisher;
import com.datatorrent.stram.stream.FastSubscriber;
import com.datatorrent.stram.stream.InlineStream;
import com.datatorrent.stram.stream.MuxStream;
import com.datatorrent.stram.stream.OiOStream;
import com.datatorrent.stram.stream.PartitionAwareSink;
import com.datatorrent.stram.stream.PartitionAwareSinkForPersistence;
import net.engio.mbassy.bus.MBassador;
import net.engio.mbassy.bus.config.BusConfiguration;
/**
* Object which controls the container process launched by {@link com.datatorrent.stram.StreamingAppMaster}.
*
* @since 0.3.2
*/
public class StreamingContainer extends YarnContainerMain
{
public static final String PROP_APP_PATH = StreamingApplication.DT_PREFIX + Context.DAGContext.APPLICATION_PATH.getName();
private final transient String jvmName;
private final String containerId;
private final transient StreamingContainerUmbilicalProtocol umbilical;
protected final Map> nodes = new ConcurrentHashMap<>();
protected final Set failedNodes = Collections.newSetFromMap(new ConcurrentHashMap());
private final Map> streams = new ConcurrentHashMap<>();
protected final Map generators = new ConcurrentHashMap<>();
private transient List garbageCollectorMXBeans;
/**
* OIO groups map
* key: operator id of oio owning thread node
* value: list of nodes which are in oio with oio owning thread node
*/
protected final Map> oioGroups = new ConcurrentHashMap<>();
private final Map activeStreams = new ConcurrentHashMap<>();
private final Map activeGenerators = new ConcurrentHashMap<>();
private int heartbeatIntervalMillis = 1000;
private volatile boolean exitHeartbeatLoop = false;
private final Object heartbeatTrigger = new Object();
public static DefaultEventLoop eventloop;
/**
* List of listeners interested in listening into the status change of the nodes.
*/
private long firstWindowMillis;
private int windowWidthMillis;
private InetSocketAddress bufferServerAddress;
private com.datatorrent.bufferserver.server.Server bufferServer;
private int checkpointWindowCount;
private boolean fastPublisherSubscriber;
private StreamingContainerContext containerContext;
private List nodeRequests;
private final HashMap singletons;
private final MBassador eventBus; // event bus for publishing container events
HashSet> components;
private RequestFactory requestFactory;
static {
try {
eventloop = DefaultEventLoop.createEventLoop("ProcessWideEventLoop");
} catch (IOException io) {
throw new RuntimeException(io);
}
}
protected StreamingContainer(String containerId, StreamingContainerUmbilicalProtocol umbilical)
{
this.jvmName = java.lang.management.ManagementFactory.getRuntimeMXBean().getName();
this.components = new HashSet<>();
this.eventBus = new MBassador<>(BusConfiguration.Default(1, 1, 1));
this.singletons = new HashMap<>();
this.nodeRequests = new ArrayList<>();
logger.debug("instantiated StramChild {}", containerId);
this.umbilical = umbilical;
this.containerId = containerId;
}
@SuppressWarnings("unchecked")
public void setup(StreamingContainerContext ctx)
{
containerContext = ctx;
/* add a request factory local to this container */
this.requestFactory = new RequestFactory();
ctx.attributes.put(ContainerContext.REQUEST_FACTORY, requestFactory);
heartbeatIntervalMillis = ctx.getValue(Context.DAGContext.HEARTBEAT_INTERVAL_MILLIS);
firstWindowMillis = ctx.startWindowMillis;
windowWidthMillis = ctx.getValue(Context.DAGContext.STREAMING_WINDOW_SIZE_MILLIS);
checkpointWindowCount = ctx.getValue(Context.DAGContext.CHECKPOINT_WINDOW_COUNT);
fastPublisherSubscriber = ctx.getValue(LogicalPlan.FAST_PUBLISHER_SUBSCRIBER);
Map, Class extends StringCodec>>> codecs = ctx.getValue(Context.DAGContext.STRING_CODECS);
StringCodecs.loadConverters(codecs);
try {
if (ctx.deployBufferServer) {
eventloop.start();
int bufferServerRAM = ctx.getValue(ContainerContext.BUFFER_SERVER_MB);
logger.debug("buffer server memory {}", bufferServerRAM);
int blockCount;
int blocksize;
if (bufferServerRAM < ContainerContext.BUFFER_SERVER_MB.defaultValue) {
blockCount = 8;
blocksize = bufferServerRAM / blockCount;
if (blocksize < 1) {
blocksize = 1;
}
} else {
blocksize = 64;
blockCount = bufferServerRAM / blocksize;
}
// start buffer server, if it was not set externally
bufferServer = new Server(0, blocksize * 1024 * 1024, blockCount);
bufferServer.setAuthToken(ctx.getValue(StreamingContainerContext.BUFFER_SERVER_TOKEN));
if (ctx.getValue(Context.DAGContext.BUFFER_SPOOLING)) {
bufferServer.setSpoolStorage(new DiskStorage());
}
SocketAddress bindAddr = bufferServer.run(eventloop);
logger.debug("Buffer server started: {}", bindAddr);
this.bufferServerAddress = NetUtils.getConnectAddress(((InetSocketAddress)bindAddr));
}
} catch (IOException ex) {
logger.warn("deploy request failed due to {}", ex);
throw new IllegalStateException("Failed to deploy buffer server", ex);
}
for (Class> clazz : ContainerEvent.CONTAINER_EVENTS_LISTENERS) {
try {
Object newInstance = clazz.newInstance();
singletons.put(clazz.getName(), newInstance);
if (newInstance instanceof Component) {
components.add((Component)newInstance);
}
eventBus.subscribe(newInstance);
} catch (InstantiationException ex) {
logger.warn("Container Event Listener Instantiation", ex);
} catch (IllegalAccessException ex) {
logger.warn("Container Event Listener Instantiation", ex);
}
}
operateListeners(ctx, true);
}
public String getContainerId()
{
return this.containerId;
}
/**
* This method is introduced as replacement for getTupleRecorder method which was cluttering the code.
*
* @param classname
* @return
*/
public Object getInstance(String classname)
{
return singletons.get(classname);
}
/**
* Initialize container. Establishes heartbeat connection to the master
* distribute through the callback address provided on the command line. Deploys
* initial modules, then enters the heartbeat loop, which will only terminate
* once container receives shutdown request from the master. On shutdown,
* after exiting heartbeat loop, shutdown all modules and terminate
* processing threads.
*
* @param args
* @throws Throwable
*/
public static void main(String[] args) throws Throwable
{
StdOutErrLog.tieSystemOutAndErrToLog();
logger.debug("PID: " + System.getenv().get("JVM_PID"));
logger.info("Child starting with classpath: {}", System.getProperty("java.class.path"));
String appPath = System.getProperty(PROP_APP_PATH);
if (appPath == null) {
logger.error("{} not set in container environment.", PROP_APP_PATH);
System.exit(1);
}
int exitStatus = 1; // interpreted as unrecoverable container failure
RecoverableRpcProxy rpcProxy = new RecoverableRpcProxy(appPath, new Configuration());
final StreamingContainerUmbilicalProtocol umbilical = rpcProxy.getProxy();
final String childId = System.getProperty(StreamingApplication.DT_PREFIX + "cid");
try {
StreamingContainerContext ctx = umbilical.getInitContext(childId);
StreamingContainer stramChild = new StreamingContainer(childId, umbilical);
logger.debug("Container Context = {}", ctx);
stramChild.setup(ctx);
try {
/* main thread enters heartbeat loop */
stramChild.heartbeatLoop();
exitStatus = 0;
} finally {
stramChild.teardown();
}
} catch (Error error) {
logger.error("Fatal error in container!", error);
/* Report back any failures, for diagnostic purposes */
String msg = ExceptionUtils.getStackTrace(error);
umbilical.reportError(childId, null, "FATAL: " + msg);
} catch (Exception exception) {
logger.error("Fatal exception in container!", exception);
/* Report back any failures, for diagnostic purposes */
String msg = ExceptionUtils.getStackTrace(exception);
umbilical.reportError(childId, null, msg);
} finally {
rpcProxy.close();
DefaultMetricsSystem.shutdown();
logger.info("Exit status for container: {}", exitStatus);
LogManager.shutdown();
}
if (exitStatus != 0) {
System.exit(exitStatus);
}
}
public synchronized void deactivate()
{
ArrayList activeThreads = new ArrayList<>();
ArrayList activeOperators = new ArrayList<>();
for (Map.Entry> e : nodes.entrySet()) {
Thread t = e.getValue().context.getThread();
if (t == null || !t.isAlive()) {
disconnectNode(e.getKey());
} else {
activeThreads.add(t);
activeOperators.add(e.getKey());
e.getValue().shutdown();
}
}
try {
Iterator iterator = activeOperators.iterator();
for (Thread t : activeThreads) {
t.join(1000);
if (!t.getState().equals(State.TERMINATED)) {
t.interrupt();
}
disconnectNode(iterator.next());
}
} catch (InterruptedException ex) {
logger.warn("Aborting wait for operators to get deactivated!", ex);
}
for (WindowGenerator wg : activeGenerators.keySet()) {
wg.deactivate();
}
activeGenerators.clear();
for (Stream stream : activeStreams.keySet()) {
stream.deactivate();
}
activeStreams.clear();
}
private void disconnectNode(int nodeid)
{
Node> node = nodes.get(nodeid);
disconnectWindowGenerator(nodeid, node);
PortMappingDescriptor portMappingDescriptor = node.getPortMappingDescriptor();
Iterator outputPorts = portMappingDescriptor.outputPorts.keySet().iterator();
while (outputPorts.hasNext()) {
String sourceIdentifier = String.valueOf(nodeid).concat(Component.CONCAT_SEPARATOR).concat(outputPorts.next());
ComponentContextPair pair = streams.remove(sourceIdentifier);
if (pair != null) {
if (activeStreams.remove(pair.component) != null) {
pair.component.deactivate();
eventBus.publish(new StreamDeactivationEvent(pair));
}
if (pair.component instanceof Stream.MultiSinkCapableStream) {
String sinks = pair.context.getSinkId();
if (sinks == null) {
logger.error("mux sinks found connected at {} with sink id null", sourceIdentifier);
} else {
String[] split = sinks.split(MuxStream.MULTI_SINK_ID_CONCAT_SEPARATOR);
for (int i = split.length; i-- > 0; ) {
ComponentContextPair spair = streams.remove(split[i]);
if (spair == null) {
logger.error("mux is missing the stream for sink {}", split[i]);
} else {
if (activeStreams.remove(spair.component) != null) {
spair.component.deactivate();
eventBus.publish(new StreamDeactivationEvent(spair));
}
spair.component.teardown();
}
}
}
} else {
// it's either inline stream or it's bufferserver publisher.
}
pair.component.teardown();
}
}
Iterator inputPorts = portMappingDescriptor.inputPorts.keySet().iterator();
while (inputPorts.hasNext()) {
String sinkIdentifier = String.valueOf(nodeid).concat(Component.CONCAT_SEPARATOR).concat(inputPorts.next());
ComponentContextPair pair = streams.remove(sinkIdentifier);
if (pair != null) {
if (activeStreams.remove(pair.component) != null) {
pair.component.deactivate();
eventBus.publish(new StreamDeactivationEvent(pair));
}
pair.component.teardown();
/**
* we should also make sure that if this stream is connected to mux stream,
* we deregister it from the mux stream to avoid clogged sink problem.
*/
ComponentContextPair sourcePair = streams.get(pair.context.getSourceId());
if (sourcePair != null) {
if (sourcePair == pair) {
/* for some reason we had the stream stored against both source and sink identifiers */
streams.remove(pair.context.getSourceId());
} else {
/* the stream was one of the many streams sourced by a muxstream */
unregisterSinkFromMux(sourcePair, sinkIdentifier);
}
}
}
}
}
private boolean unregisterSinkFromMux(ComponentContextPair muxpair, String sinkIdentifier)
{
String[] sinks = muxpair.context.getSinkId().split(MuxStream.MULTI_SINK_ID_CONCAT_SEPARATOR);
boolean found = false;
for (int i = sinks.length; i-- > 0; ) {
if (sinks[i].equals(sinkIdentifier)) {
sinks[i] = null;
found = true;
break;
}
}
if (found) {
((Stream.MultiSinkCapableStream)muxpair.component).setSink(sinkIdentifier, null);
if (sinks.length == 1) {
muxpair.context.setSinkId(null);
streams.remove(muxpair.context.getSourceId());
if (activeStreams.remove(muxpair.component) != null) {
muxpair.component.deactivate();
eventBus.publish(new StreamDeactivationEvent(muxpair));
}
muxpair.component.teardown();
} else {
StringBuilder builder = new StringBuilder(muxpair.context.getSinkId().length() - MuxStream.MULTI_SINK_ID_CONCAT_SEPARATOR.length() - sinkIdentifier.length());
found = false;
for (int i = sinks.length; i-- > 0; ) {
if (sinks[i] != null) {
if (found) {
builder.append(MuxStream.MULTI_SINK_ID_CONCAT_SEPARATOR).append(sinks[i]);
} else {
builder.append(sinks[i]);
found = true;
}
}
}
muxpair.context.setSinkId(builder.toString());
}
} else {
logger.error("{} was not connected to stream connected to {}", sinkIdentifier, muxpair.context.getSourceId());
}
return found;
}
private void disconnectWindowGenerator(int nodeid, Node> node)
{
WindowGenerator chosen1 = generators.remove(nodeid);
if (chosen1 != null) {
chosen1.releaseReservoir(Integer.toString(nodeid).concat(Component.CONCAT_SEPARATOR).concat(Node.INPUT));
// should we send the closure of the port to the node?
int count = 0;
for (WindowGenerator wg : generators.values()) {
if (chosen1 == wg) {
count++;
}
}
if (count == 0) {
activeGenerators.remove(chosen1);
chosen1.deactivate();
chosen1.teardown();
}
}
}
private synchronized void undeploy(List nodeList)
{
/**
* make sure that all the operators which we are asked to undeploy are in this container.
*/
HashMap> toUndeploy = new HashMap<>();
for (Integer operatorId : nodeList) {
Node> node = nodes.get(operatorId);
if (node == null) {
throw new IllegalArgumentException("Node " + operatorId + " is not hosted in this container!");
} else if (toUndeploy.containsKey(operatorId)) {
throw new IllegalArgumentException("Node " + operatorId + " is requested to be undeployed more than once");
} else {
toUndeploy.put(operatorId, node);
}
}
ArrayList joinList = new ArrayList<>();
ArrayList discoList = new ArrayList<>();
for (Integer operatorId : nodeList) {
Thread t = nodes.get(operatorId).context.getThread();
if (t == null || !t.isAlive()) {
disconnectNode(operatorId);
} else {
joinList.add(t);
discoList.add(operatorId);
nodes.get(operatorId).shutdown();
}
}
try {
Iterator iterator = discoList.iterator();
for (Thread t : joinList) {
t.join(1000);
if (!t.getState().equals(State.TERMINATED)) {
t.interrupt();
}
disconnectNode(iterator.next());
}
logger.info("Undeploy complete.");
} catch (InterruptedException ex) {
logger.warn("Aborting wait for operators to get deactivated!", ex);
}
for (Integer operatorId : nodeList) {
nodes.remove(operatorId);
}
}
public void teardown()
{
operateListeners(containerContext, false);
deactivate();
assert (streams.isEmpty());
eventBus.shutdown();
nodes.clear();
HashSet gens = new HashSet<>();
gens.addAll(generators.values());
generators.clear();
for (WindowGenerator wg : gens) {
wg.teardown();
}
if (bufferServer != null) {
eventloop.stop(bufferServer);
eventloop.stop();
}
gens.clear();
}
public void triggerHeartbeat()
{
synchronized (heartbeatTrigger) {
heartbeatTrigger.notifyAll();
}
}
public void heartbeatLoop() throws Exception
{
umbilical.log(containerId, "[" + containerId + "] Entering heartbeat loop..");
logger.debug("Entering heartbeat loop (interval is {} ms)", this.heartbeatIntervalMillis);
final YarnConfiguration conf = new YarnConfiguration();
long tokenLifeTime = (long)(containerContext.getValue(LogicalPlan.TOKEN_REFRESH_ANTICIPATORY_FACTOR) * containerContext.getValue(LogicalPlan.HDFS_TOKEN_LIFE_TIME));
long expiryTime = System.currentTimeMillis();
final Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
String stackTrace = null;
Iterator> iter = credentials.getAllTokens().iterator();
while (iter.hasNext()) {
Token> token = iter.next();
logger.debug("token: {}", token);
}
String hdfsKeyTabFile = containerContext.getValue(LogicalPlan.KEY_TAB_FILE);
while (!exitHeartbeatLoop) {
if (UserGroupInformation.isSecurityEnabled() && System.currentTimeMillis() >= expiryTime && hdfsKeyTabFile != null) {
expiryTime = StramUserLogin.refreshTokens(tokenLifeTime, FileUtils.getTempDirectoryPath(), containerId, conf, hdfsKeyTabFile, credentials, null, false);
}
synchronized (this.heartbeatTrigger) {
try {
this.heartbeatTrigger.wait(heartbeatIntervalMillis);
} catch (InterruptedException e1) {
logger.warn("Interrupted in heartbeat loop, exiting..");
break;
}
}
long currentTime = System.currentTimeMillis();
ContainerHeartbeat msg = new ContainerHeartbeat();
msg.jvmName = jvmName;
if (this.bufferServerAddress != null) {
msg.bufferServerHost = this.bufferServerAddress.getHostName();
msg.bufferServerPort = this.bufferServerAddress.getPort();
if (bufferServer != null && !eventloop.isActive()) {
logger.warn("Requesting restart due to terminated event loop");
msg.restartRequested = true;
}
}
msg.memoryMBFree = ((int)(Runtime.getRuntime().freeMemory() / (1024 * 1024)));
garbageCollectorMXBeans = ManagementFactory.getGarbageCollectorMXBeans();
for (GarbageCollectorMXBean bean : garbageCollectorMXBeans) {
msg.gcCollectionTime += bean.getCollectionTime();
msg.gcCollectionCount += bean.getCollectionCount();
}
ContainerHeartbeatResponse rsp;
do {
ContainerStats stats = new ContainerStats(containerId);
// gather heartbeat info for all operators
for (Map.Entry> e : nodes.entrySet()) {
OperatorHeartbeat hb = new OperatorHeartbeat();
hb.setNodeId(e.getKey());
hb.setGeneratedTms(currentTime);
hb.setIntervalMs(heartbeatIntervalMillis);
if (e.getValue().commandResponse.size() > 0) {
BlockingQueue commandResponse = e.getValue().commandResponse;
ArrayList response = new ArrayList<>();
for (int i = 0; i < commandResponse.size(); i++) {
response.add(commandResponse.poll());
}
hb.requestResponse = response;
}
OperatorContext context = e.getValue().context;
context.drainStats(hb.getOperatorStatsContainer());
if (context.getThread() == null || context.getThread().getState() != Thread.State.TERMINATED) {
hb.setState(DeployState.ACTIVE);
} else if (failedNodes.contains(hb.nodeId)) {
hb.setState(DeployState.FAILED);
} else {
logger.debug("Reporting SHUTDOWN state because thread is {} and failedNodes is {}", context.getThread(), failedNodes);
hb.setState(DeployState.SHUTDOWN);
}
stats.addNodeStats(hb);
}
/**
* Container stats published for whoever is interested in listening.
* Currently interested candidates are TupleRecorderCollection and BufferServerStatsSubscriber
*/
eventBus.publish(new ContainerStatsEvent(stats));
msg.setContainerStats(stats);
// heartbeat call and follow-up processing
//logger.debug("Sending heartbeat for {} operators.", msg.getContainerStats().size());
msg.sentTms = System.currentTimeMillis();
msg.stackTrace = stackTrace;
rsp = umbilical.processHeartbeat(msg);
if (rsp.stackTraceRequired) {
stackTrace = StramUtils.getStackTrace().toString();
} else {
stackTrace = null;
}
processHeartbeatResponse(rsp);
if (rsp.hasPendingRequests) {
logger.info("Waiting for pending request.");
synchronized (this.heartbeatTrigger) {
try {
this.heartbeatTrigger.wait(500);
} catch (InterruptedException ie) {
logger.warn("Interrupted in heartbeat loop", ie);
break;
}
}
}
} while (rsp.hasPendingRequests);
}
logger.debug("Exiting hearbeat loop");
umbilical.log(containerId, "[" + containerId + "] Exiting heartbeat loop..");
}
private long lastCommittedWindowId = WindowGenerator.MIN_WINDOW_ID - 1;
private void processNodeRequests(boolean flagInvalid)
{
for (StramToNodeRequest req : nodeRequests) {
if (req.isDeleted()) {
continue;
}
if (req instanceof StramToNodeChangeLoggersRequest) {
handleChangeLoggersRequest((StramToNodeChangeLoggersRequest)req);
continue;
}
Node> node = nodes.get(req.getOperatorId());
if (node == null) {
logger.warn("Node for operator {} is not found, probably not deployed yet", req.getOperatorId());
continue;
}
Thread thread = node.context.getThread();
if (thread == null || !thread.isAlive()) {
if (flagInvalid) {
logger.warn("Received request with invalid operator id {} ({})", req.getOperatorId(), req);
req.setDeleted(true);
}
} else {
logger.debug("request received: {}", req);
OperatorRequest requestExecutor = requestFactory.getRequestExecutor(nodes.get(req.operatorId), req);
if (requestExecutor != null) {
node.context.request(requestExecutor);
} else {
logger.warn("No executor identified for the request {}", req);
}
req.setDeleted(true);
}
}
}
public void processHeartbeatResponse(ContainerHeartbeatResponse rsp)
{
if (rsp.nodeRequests != null) {
nodeRequests = rsp.nodeRequests;
}
if (rsp.committedWindowId != lastCommittedWindowId) {
lastCommittedWindowId = rsp.committedWindowId;
OperatorRequest nr = null;
for (Entry> e : nodes.entrySet()) {
final Thread thread = e.getValue().context.getThread();
if (thread == null || !thread.isAlive()) {
continue;
}
if (e.getValue().getOperator() instanceof Operator.CheckpointListener) {
if (nr == null) {
nr = new OperatorRequest()
{
@Override
public StatsListener.OperatorResponse execute(Operator operator, int operatorId, long windowId) throws IOException
{
((Operator.CheckpointListener)operator).committed(lastCommittedWindowId);
return null;
}
};
}
e.getValue().context.request(nr);
}
}
}
if (rsp.undeployRequest != null) {
logger.info("Undeploy request: {}", rsp.undeployRequest);
processNodeRequests(false);
undeploy(rsp.undeployRequest);
}
if (rsp.shutdown) {
logger.info("Received shutdown request");
processNodeRequests(false);
this.exitHeartbeatLoop = true;
return;
}
if (rsp.deployRequest != null) {
logger.info("Deploy request: {}", rsp.deployRequest);
try {
deploy(rsp.deployRequest);
} catch (Exception e) {
logger.error("deploy request failed", e);
try {
umbilical.log(this.containerId, "deploy request failed: " + rsp.deployRequest + " " + ExceptionUtils.getStackTrace(e));
} catch (IOException ioe) {
// ignore
}
this.exitHeartbeatLoop = true;
throw new IllegalStateException("Deploy request failed: " + rsp.deployRequest, e);
}
}
processNodeRequests(true);
}
private int getOutputQueueCapacity(List operatorList, int sourceOperatorId, String sourcePortName)
{
for (OperatorDeployInfo odi : operatorList) {
if (odi.id == sourceOperatorId) {
for (OperatorDeployInfo.OutputDeployInfo odiodi : odi.outputs) {
if (odiodi.portName.equals(sourcePortName)) {
return getValue(PortContext.QUEUE_CAPACITY, odiodi, odi);
}
}
}
}
return PortContext.QUEUE_CAPACITY.defaultValue;
}
private synchronized void deploy(List nodeList) throws Exception
{
/*
* A little bit of up front sanity check would reduce the percentage of deploy failures later.
*/
for (OperatorDeployInfo ndi : nodeList) {
if (nodes.containsKey(ndi.id)) {
throw new IllegalStateException("Node with id: " + ndi.id + " already present in container " + containerId + "!");
}
}
deployNodes(nodeList);
HashMap> groupedInputStreams = new HashMap<>();
for (OperatorDeployInfo ndi : nodeList) {
groupInputStreams(groupedInputStreams, ndi);
}
HashMap> newStreams = deployOutputStreams(nodeList, groupedInputStreams);
deployInputStreams(nodeList, newStreams);
for (ComponentContextPair pair : newStreams.values()) {
pair.component.setup(pair.context);
}
streams.putAll(newStreams);
HashMap operatorMap = new HashMap<>(nodeList.size());
for (OperatorDeployInfo o : nodeList) {
operatorMap.put(o.id, o);
}
activate(operatorMap, newStreams);
}
public static String getUnifierInputPortName(String portName, int sourceNodeId, String sourcePortName)
{
return portName + "(" + sourceNodeId + Component.CONCAT_SEPARATOR + sourcePortName + ")";
}
private void massageUnifierDeployInfo(OperatorDeployInfo odi)
{
for (OperatorDeployInfo.InputDeployInfo idi : odi.inputs) {
idi.portName = getUnifierInputPortName(idi.portName, idi.sourceNodeId, idi.sourcePortName);
}
}
private void deployNodes(List nodeList) throws IOException
{
for (OperatorDeployInfo ndi : nodeList) {
StorageAgent backupAgent = getValue(OperatorContext.STORAGE_AGENT, ndi);
assert (backupAgent != null);
Context parentContext;
if (ndi instanceof UnifierDeployInfo) {
OperatorContext unifiedOperatorContext = new OperatorContext(0, ((UnifierDeployInfo)ndi).operatorAttributes, containerContext);
parentContext = new PortContext(ndi.inputs.get(0).contextAttributes, unifiedOperatorContext);
massageUnifierDeployInfo(ndi);
} else {
parentContext = containerContext;
}
OperatorContext ctx = new OperatorContext(ndi.id, ndi.contextAttributes, parentContext);
ctx.attributes.put(OperatorContext.ACTIVATION_WINDOW_ID, ndi.checkpoint.windowId);
logger.debug("Restoring operator {} to checkpoint {} stateless={}.", ndi.id, Codec.getStringWindowId(ndi.checkpoint.windowId), ctx.stateless);
Node> node = Node.retrieveNode(backupAgent.load(ndi.id, ctx.stateless ? Stateless.WINDOW_ID : ndi.checkpoint.windowId), ctx, ndi.type);
node.currentWindowId = ndi.checkpoint.windowId;
node.applicationWindowCount = ndi.checkpoint.applicationWindowCount;
node.firstWindowMillis = firstWindowMillis;
node.windowWidthMillis = windowWidthMillis;
node.setId(ndi.id);
nodes.put(ndi.id, node);
logger.debug("Marking operator {} as deployed.", node);
}
}
private HashMap.SimpleEntry> deployBufferServerPublisher(
String connIdentifier, StreamCodec> streamCodec, long finishedWindowId, int queueCapacity,
OperatorDeployInfo.OutputDeployInfo nodi)
throws UnknownHostException
{
String sinkIdentifier = "tcp://".concat(nodi.bufferServerHost).concat(":").concat(String.valueOf(nodi.bufferServerPort)).concat("/").concat(connIdentifier);
StreamContext bssc = new StreamContext(nodi.declaredStreamId);
bssc.setPortId(nodi.portName);
bssc.setSourceId(connIdentifier);
bssc.setSinkId(sinkIdentifier);
bssc.setFinishedWindowId(finishedWindowId);
bssc.put(StreamContext.CODEC, streamCodec);
bssc.put(StreamContext.EVENT_LOOP, eventloop);
bssc.setBufferServerAddress(InetSocketAddress.createUnresolved(nodi.bufferServerHost, nodi.bufferServerPort));
bssc.put(StreamContext.BUFFER_SERVER_TOKEN, nodi.bufferServerToken);
InetAddress inetAddress = bssc.getBufferServerAddress().getAddress();
if (inetAddress != null && NetUtils.isLocalAddress(inetAddress)) {
bssc.setBufferServerAddress(new InetSocketAddress(InetAddress.getByName(null), nodi.bufferServerPort));
}
Stream publisher = fastPublisherSubscriber ? new FastPublisher(connIdentifier, queueCapacity * 256) : new BufferServerPublisher(connIdentifier, queueCapacity);
return new HashMap.SimpleEntry<>(sinkIdentifier, new ComponentContextPair<>(publisher, bssc));
}
private HashMap> deployOutputStreams(
List nodeList, HashMap> groupedInputStreams)
throws Exception
{
HashMap> newStreams = new HashMap<>();
/*
* We proceed to deploy all the output streams. At the end of this block, our streams collection
* will contain all the streams which originate at the output port of the operators. The streams
* are generally mapped against the "nodename.portname" string. But the BufferServerPublishers which
* share the output port with other inline streams are mapped against the Buffer Server port to
* avoid collision and at the same time keep track of these buffer streams.
*/
for (OperatorDeployInfo ndi : nodeList) {
Node> node = nodes.get(ndi.id);
long checkpointWindowId = ndi.checkpoint.windowId;
for (OperatorDeployInfo.OutputDeployInfo nodi : ndi.outputs) {
String sourceIdentifier = Integer.toString(ndi.id).concat(Component.CONCAT_SEPARATOR).concat(nodi.portName);
int queueCapacity = getValue(PortContext.QUEUE_CAPACITY, nodi, ndi);
logger.debug("for stream {} the queue capacity is {}", sourceIdentifier, queueCapacity);
ArrayList collection = groupedInputStreams.get(sourceIdentifier);
Map> streamCodecs = nodi.streamCodecs;
if ((collection == null) && (streamCodecs.size() == 1)) {
assert (nodi.bufferServerHost != null) : "resulting stream cannot be inline: " + nodi;
/*
* Let's create a stream to carry the data to the Buffer Server.
* Nobody in this container is interested in the output placed on this stream, but
* this stream exists. That means someone outside of this container must be interested.
*/
Map.Entry> entry = streamCodecs.entrySet().iterator().next();
StreamCodec> streamCodec = entry.getValue();
Integer streamCodecIdentifier = entry.getKey();
String connIdentifier = sourceIdentifier + Component.CONCAT_SEPARATOR + streamCodecIdentifier;
SimpleEntry> deployBufferServerPublisher =
deployBufferServerPublisher(connIdentifier, streamCodec, checkpointWindowId, queueCapacity, nodi);
newStreams.put(sourceIdentifier, deployBufferServerPublisher.getValue());
node.connectOutputPort(nodi.portName, deployBufferServerPublisher.getValue().component);
} else {
/*
* In this case we have 2 possibilities, either we have 1 inline or multiple streams.
* Since we cannot tell at this point, we assume that we will have multiple streams and
* plan accordingly. we possibly will come to this code block multiple times. We create
* the MuxStream only the first time and use it for subsequent calls of this block.
*
* There is also the possibility that we have a stream with multiple sinks having distinct codecs
*/
ComponentContextPair pair = newStreams.get(sourceIdentifier);
if (pair == null) {
/**
* Let's multiplex the output placed on this stream.
* This container itself contains more than one parties interested.
*/
StreamContext context = new StreamContext(nodi.declaredStreamId);
context.setSourceId(sourceIdentifier);
context.setFinishedWindowId(checkpointWindowId);
Stream stream = new MuxStream();
newStreams.put(sourceIdentifier, pair = new ComponentContextPair<>(stream, context));
node.connectOutputPort(nodi.portName, stream);
}
if (nodi.bufferServerHost != null) {
/*
* Although there is a node in this container interested in output placed on this stream, there
* seems to at least one more party interested but placed in a container other than this one.
*/
for (Map.Entry> entry : streamCodecs.entrySet()) {
Integer streamCodecIdentifier = entry.getKey();
StreamCodec> streamCodec = entry.getValue();
String connIdentifier = sourceIdentifier + Component.CONCAT_SEPARATOR + streamCodecIdentifier;
SimpleEntry> deployBufferServerPublisher =
deployBufferServerPublisher(connIdentifier, streamCodec, checkpointWindowId, queueCapacity, nodi);
newStreams.put(deployBufferServerPublisher.getKey(), deployBufferServerPublisher.getValue());
String sinkIdentifier = pair.context.getSinkId();
if (sinkIdentifier == null) {
pair.context.setSinkId(deployBufferServerPublisher.getKey());
} else {
pair.context.setSinkId(sinkIdentifier.concat(", ").concat(deployBufferServerPublisher.getKey()));
}
((Stream.MultiSinkCapableStream)pair.component).setSink(deployBufferServerPublisher.getKey(), deployBufferServerPublisher.getValue().component);
}
}
}
}
}
return newStreams;
}
/**
* If the port is connected, return the declared stream Id.
*
* @param operatorId id of the operator to which the port belongs.
* @param portname name of port to which the stream is connected.
* @return Stream Id if connected, null otherwise.
*/
public final String getDeclaredStreamId(int operatorId, String portname)
{
String identifier = String.valueOf(operatorId).concat(Component.CONCAT_SEPARATOR).concat(portname);
ComponentContextPair spair = streams.get(identifier);
if (spair == null) {
return null;
}
return spair.context.getId();
}
@SuppressWarnings("unchecked")
private void deployInputStreams(List operatorList, HashMap> newStreams) throws UnknownHostException
{
/*
* collect any input operators along with their smallest window id,
* those are subsequently used to setup the window generator
*/
ArrayList inputNodes = new ArrayList<>();
long smallestCheckpointedWindowId = Long.MAX_VALUE;
//a simple map which maps the oio node to it's the node which owns the thread.
Map oioNodes = new ConcurrentHashMap<>();
/*
* Hook up all the downstream ports. There are 2 places where we deal with more than 1
* downstream ports. The first one follows immediately for WindowGenerator. The second
* case is when source for the input port of some node in this container is in another
* container. So we need to create the stream. We need to track this stream along with
* other streams,and many such streams may exist, we hash them against buffer server
* info as we did for outputs but throw in the sinkid in the mix as well.
*/
for (OperatorDeployInfo ndi : operatorList) {
if (ndi.inputs == null || ndi.inputs.isEmpty()) {
/*
* This has to be InputNode, so let's hook the WindowGenerator to it.
* A node which does not take any input cannot exist in the DAG since it would be completely
* unaware of the windows. So for that reason, AbstractInputNode allows Component.INPUT port.
*/
inputNodes.add(ndi);
/*
* When we activate the window Generator, we plan to activate it only from required windowId.
*/
ndi.checkpoint = getFinishedCheckpoint(ndi);
if (ndi.checkpoint.windowId < smallestCheckpointedWindowId) {
smallestCheckpointedWindowId = ndi.checkpoint.windowId;
}
} else {
Node> node = nodes.get(ndi.id);
for (OperatorDeployInfo.InputDeployInfo nidi : ndi.inputs) {
if (nidi.streamCodecs.size() != 1) {
throw new IllegalStateException("Only one input codec configuration should be present");
}
Map.Entry> entry = nidi.streamCodecs.entrySet().iterator().next();
Integer streamCodecIdentifier = entry.getKey();
StreamCodec> streamCodec = entry.getValue();
String sourceIdentifier = Integer.toString(nidi.sourceNodeId).concat(Component.CONCAT_SEPARATOR).concat(nidi.sourcePortName);
String sinkIdentifier = Integer.toString(ndi.id).concat(Component.CONCAT_SEPARATOR).concat(nidi.portName);
int queueCapacity = getValue(PortContext.QUEUE_CAPACITY, nidi, ndi);
Checkpoint checkpoint = getFinishedCheckpoint(ndi);
ComponentContextPair pair = streams.get(sourceIdentifier);
if (pair == null) {
pair = newStreams.get(sourceIdentifier);
}
if (pair == null) {
/*
* We connect to the buffer server for the input on this port.
* We have already placed all the output streams for all the operators in this container.
* Yet, there is no stream which can source this port so it has to come from the buffer
* server, so let's make a connection to it.
*/
assert (nidi.locality != Locality.CONTAINER_LOCAL && nidi.locality != Locality.THREAD_LOCAL);
StreamContext context = new StreamContext(nidi.declaredStreamId);
context.setBufferServerAddress(InetSocketAddress.createUnresolved(nidi.bufferServerHost, nidi.bufferServerPort));
InetAddress inetAddress = context.getBufferServerAddress().getAddress();
if (inetAddress != null && NetUtils.isLocalAddress(inetAddress)) {
context.setBufferServerAddress(new InetSocketAddress(InetAddress.getByName(null), nidi.bufferServerPort));
}
context.put(StreamContext.BUFFER_SERVER_TOKEN, nidi.bufferServerToken);
String connIdentifier = sourceIdentifier + Component.CONCAT_SEPARATOR + streamCodecIdentifier;
context.setPortId(nidi.portName);
context.put(StreamContext.CODEC, streamCodec);
context.put(StreamContext.EVENT_LOOP, eventloop);
context.setPartitions(nidi.partitionMask, nidi.partitionKeys);
//context.setSourceId(sourceIdentifier);
context.setSourceId(connIdentifier);
context.setSinkId(sinkIdentifier);
context.setFinishedWindowId(checkpoint.windowId);
BufferServerSubscriber subscriber = fastPublisherSubscriber
? new FastSubscriber("tcp://".concat(nidi.bufferServerHost).concat(":").concat(String.valueOf(nidi.bufferServerPort)).concat("/").concat(connIdentifier), queueCapacity)
: new BufferServerSubscriber("tcp://".concat(nidi.bufferServerHost).concat(":").concat(String.valueOf(nidi.bufferServerPort)).concat("/").concat(connIdentifier), queueCapacity);
if (streamCodec instanceof StreamCodecWrapperForPersistance) {
subscriber.acquireReservoirForPersistStream(sinkIdentifier, queueCapacity, streamCodec);
}
SweepableReservoir reservoir = subscriber.acquireReservoir(sinkIdentifier, queueCapacity);
if (checkpoint.windowId >= 0) {
node.connectInputPort(nidi.portName, new WindowIdActivatedReservoir(sinkIdentifier, reservoir, checkpoint.windowId));
}
node.connectInputPort(nidi.portName, reservoir);
newStreams.put(sinkIdentifier, new ComponentContextPair(subscriber, context));
logger.debug("put input stream {} against key {}", subscriber, sinkIdentifier);
} else {
assert (nidi.locality == Locality.CONTAINER_LOCAL || nidi.locality == Locality.THREAD_LOCAL);
/* we are still dealing with the MuxStream originating at the output of the source port */
StreamContext inlineContext = new StreamContext(nidi.declaredStreamId);
inlineContext.setSourceId(sourceIdentifier);
inlineContext.setSinkId(sinkIdentifier);
Stream stream;
SweepableReservoir reservoir;
switch (nidi.locality) {
case CONTAINER_LOCAL:
int outputQueueCapacity = getOutputQueueCapacity(operatorList, nidi.sourceNodeId, nidi.sourcePortName);
if (outputQueueCapacity > queueCapacity) {
queueCapacity = outputQueueCapacity;
}
stream = new InlineStream(queueCapacity);
reservoir = ((InlineStream)stream).getReservoir();
if (checkpoint.windowId >= 0) {
node.connectInputPort(nidi.portName, new WindowIdActivatedReservoir(sinkIdentifier, reservoir,
checkpoint.windowId));
}
break;
case THREAD_LOCAL:
stream = new OiOStream();
reservoir = ((OiOStream)stream).getReservoir();
((OiOStream.OiOReservoir)reservoir).setControlSink(((OiONode)node).getControlSink(reservoir));
oioNodes.put(ndi.id, nidi.sourceNodeId);
break;
default:
throw new IllegalStateException("Locality can be either ContainerLocal or ThreadLocal");
}
node.connectInputPort(nidi.portName, reservoir);
newStreams.put(sinkIdentifier, new ComponentContextPair<>(stream, inlineContext));
if (!(pair.component instanceof Stream.MultiSinkCapableStream)) {
String originalSinkId = pair.context.getSinkId();
/* we come here only if we are trying to augment the dag */
StreamContext muxContext = new StreamContext(nidi.declaredStreamId);
muxContext.setSourceId(sourceIdentifier);
muxContext.setFinishedWindowId(checkpoint.windowId);
muxContext.setSinkId(originalSinkId);
MuxStream muxStream = new MuxStream();
muxStream.setSink(originalSinkId, pair.component);
streams.put(originalSinkId, pair);
Node> sourceNode = nodes.get(nidi.sourceNodeId);
sourceNode.connectOutputPort(nidi.sourcePortName, muxStream);
newStreams.put(sourceIdentifier, pair = new ComponentContextPair(muxStream, muxContext));
}
/* here everything should be multisink capable */
if (streamCodec instanceof StreamCodecWrapperForPersistance) {
PartitionAwareSinkForPersistence pas;
if (nidi.partitionKeys == null) {
pas = new PartitionAwareSinkForPersistence((StreamCodecWrapperForPersistance
© 2015 - 2025 Weber Informatics LLC | Privacy Policy