com.lumiomedical.flow.impl.parallel.ParallelRuntime Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lumio-flow Show documentation
Show all versions of lumio-flow Show documentation
A library providing an opinionated way of structuring data processing programs such as ETLs
The newest version!
package com.lumiomedical.flow.impl.parallel;
import com.lumiomedical.flow.actor.generator.Generator;
import com.lumiomedical.flow.compiler.FlowRuntime;
import com.lumiomedical.flow.compiler.RunException;
import com.lumiomedical.flow.impl.parallel.compiler.ParallelIndexes;
import com.lumiomedical.flow.impl.parallel.runtime.executor.ExecutorServiceProvider;
import com.lumiomedical.flow.impl.parallel.runtime.heap.ConcurrentHashHeap;
import com.lumiomedical.flow.impl.parallel.runtime.state.RuntimeState;
import com.lumiomedical.flow.impl.pipeline.runtime.execution.Execution;
import com.lumiomedical.flow.impl.pipeline.runtime.heap.Heap;
import com.lumiomedical.flow.impl.pipeline.runtime.node.OffsetNode;
import com.lumiomedical.flow.io.input.Input;
import com.lumiomedical.flow.io.output.Output;
import com.lumiomedical.flow.node.Node;
import com.lumiomedical.flow.stream.StreamAccumulator;
import com.lumiomedical.flow.stream.StreamGenerator;
import com.lumiomedical.flow.stream.StreamNode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.*;
/**
* @author Pierre Lecerf ([email protected])
* Created on 2020/03/03
*/
public class ParallelRuntime implements FlowRuntime
{
private final Execution execution;
private final List startNodes;
private final ParallelIndexes indexes;
private final ExecutorServiceProvider poolProvider;
private final boolean autoRefresh;
private ExecutorService pool;
private CompletionService completionService;
private static final Logger logger = LoggerFactory.getLogger(ParallelRuntime.class);
/**
*
* @param compiledNodes
* @param executorServiceProvider
* @param autoRefresh
* @param indexes
*/
protected ParallelRuntime(List compiledNodes, ExecutorServiceProvider executorServiceProvider, boolean autoRefresh, ParallelIndexes indexes)
{
this.execution = new Execution();
this.startNodes = compiledNodes;
this.indexes = indexes;
this.poolProvider = executorServiceProvider;
this.autoRefresh = autoRefresh;
this.regenerateThreadPool();
}
/**
*
*/
synchronized private void regenerateThreadPool()
{
this.pool = this.poolProvider.provide();
this.completionService = new ExecutorCompletionService<>(this.pool);
}
/**
*
*/
synchronized public void shutdownThreadPool()
{
if (this.pool != null)
{
this.pool.shutdown();
this.pool = null;
this.completionService = null;
}
}
@Override
synchronized public Output run(Input input) throws RunException
{
Heap heap = new ConcurrentHashHeap(input);
RuntimeState state = new RuntimeState(this.indexes);
if (this.pool == null)
this.regenerateThreadPool();
/* Add all start nodes to the waiting queue. */
state.queueAll(this.startNodes);
try {
/* We loop as long as we have nodes to process (submitted to the pool or awaiting submission) */
while (state.hasSubmitted() || state.hasWaiting())
{
/* First, we go over the whole waiting list */
Iterator waitingIterator = state.waitingIterator();
while (waitingIterator.hasNext())
{
Node waitingNode = waitingIterator.next();
boolean removeIfSubmitted = true;
/* If this is a stream generator, its prolonged presence in the waiting list is conditioned by special clauses */
if (waitingNode instanceof StreamGenerator)
{
StreamGenerator generatorNode = (StreamGenerator) waitingNode;
Generator generator = heap.getStreamGenerator(generatorNode);
/* If the generator is exhausted, we remove it from the waiting pool and skip it */
if (!generator.hasNext())
{
waitingIterator.remove();
continue;
}
/* Generators can't be ran concurrently (as they are expected to be stateful and generated once per run) */
if (state.isSubmitted(waitingNode))
continue;
/* If the stream has reached the max parallelism factor defined on the generator, we keep the node on the waiting pool */
if (state.hasReachedMaxParallelism(generatorNode))
continue;
/* If the generator is to be executed, we do not want to remove it from the waiting pool */
removeIfSubmitted = false;
}
NodeState readiness = this.isReady(waitingNode, state, heap);
if (readiness == NodeState.READY)
{
if (removeIfSubmitted)
waitingIterator.remove();
this.submitNode(waitingNode, heap, state);
}
else if (readiness == NodeState.BLOCKED)
waitingIterator.remove();
}
/* If we have submitted nodes, we use the blocking completion service in order to wait for the first completed node */
if (state.hasSubmitted())
{
Future future = this.completionService.take();
Node completedNode = future.get();
/* We update the state collections */
state.unsubmit(completedNode);
state.complete(completedNode);
/* For each node downstream from the one that just completed, we push it to the waiting list, if it wasn't already */
for (Node downstream : completedNode.getDownstream())
{
/* Non-encapsulated StreamNodes should be ignored (they should already be added as OffsetNodes by the stream trunk */
if (downstream instanceof StreamNode)
continue;
if (state.isWaiting(downstream) || state.isSubmitted(downstream))
continue;
state.queue(downstream);
}
}
}
return heap.getOutput();
}
catch (InterruptedException e) {
throw new ParallelRunException(e.getMessage(), e, heap);
}
catch (ExecutionException e) {
if (e.getCause() instanceof ParallelRunException)
throw (ParallelRunException) e.getCause();
throw new ParallelRunException(e.getMessage(), e, heap);
}
finally {
if (this.autoRefresh)
this.shutdownThreadPool();
}
}
/**
*
* @param node
* @param heap
* @param state
*/
private void submitNode(Node node, Heap heap, RuntimeState state)
{
if (node instanceof OffsetNode)
{
Node actualNode = ((OffsetNode) node).getNode();
logger.debug("Submitting flow node #{} offset {} ({})", actualNode.getUid(), ((OffsetNode) node).getOffset(), actualNode.getClass().getSimpleName());
}
else
logger.debug("Submitting flow node #{} ({})", node.getUid(), node.getClass().getSimpleName());
if (node instanceof StreamGenerator)
{
int offset = heap.getNextStreamOffset((StreamGenerator) node);
OffsetNode offsetNode = new OffsetNode(node, offset);
state.submit(offsetNode);
state.initiateStream(offsetNode);
this.completionService.submit(() -> {
if (!this.execution.launch(offsetNode, heap))
state.blockAll(offsetNode.getDownstream());
state.completeStreamItem(offsetNode);
return offsetNode;
});
}
else {
state.submit(node);
this.completionService.submit(() -> {
boolean isSuccess = this.execution.launch(node, heap);
if (!isSuccess)
state.blockAll(node.getDownstream());
if (node instanceof OffsetNode)
{
if (!isSuccess)
state.terminateStream((OffsetNode) node);
else
state.completeStreamItem((OffsetNode) node);
}
return node;
});
}
}
/**
*
* @param node Target node
* @param state
* @param heap
* @return The current NodeState for the Node
*/
private NodeState isReady(Node node, RuntimeState state, Heap heap)
{
if (node instanceof OffsetNode)
{
OffsetNode offsetNode = (OffsetNode) node;
Node actualNode = offsetNode.getNode();
if (state.isBlocked(offsetNode) || state.isBlocked(actualNode))
return NodeState.BLOCKED;
for (Node usn : actualNode.getUpstream())
{
if (!heap.has(usn.getUid(), offsetNode.getOffset()))
return NodeState.NOT_READY;
}
return NodeState.READY;
}
else if (node instanceof StreamAccumulator)
{
if (state.isBlocked(node))
return NodeState.BLOCKED;
if (!state.isStreamComplete((StreamAccumulator) node))
return NodeState.NOT_READY;
for (Node nr : node.getRequirements())
{
/* We already take care of stream requirements by checking for the stream completion */
if (nr instanceof StreamGenerator || nr instanceof StreamNode)
continue;
if (!state.isCompleted(nr))
return NodeState.NOT_READY;
}
return NodeState.READY;
}
else {
if (state.isBlocked(node))
return NodeState.BLOCKED;
for (Node nr : node.getRequirements())
{
if (!state.isCompleted(nr))
return NodeState.NOT_READY;
}
return NodeState.READY;
}
}
/**
* The NodeState enum represents the different states a Node can be in at runtime before its execution.
* READY is for Nodes that are ready to be executed
* NOT_READY is for Nodes that aren't yet ready to be executed
* BLOCKED is for Nodes that will not be executed either because they will never get all of their inputs, or because a parent Node blocked their branch
*/
protected enum NodeState
{
READY, NOT_READY, BLOCKED
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy