com.marklogic.hub.flow.impl.FlowRunnerImpl Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of marklogic-data-hub Show documentation
Show all versions of marklogic-data-hub Show documentation
Library for Creating an Operational Data Hub on MarkLogic
package com.marklogic.hub.flow.impl;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.marklogic.hub.FlowManager;
import com.marklogic.hub.HubClient;
import com.marklogic.hub.HubConfig;
import com.marklogic.hub.dataservices.JobService;
import com.marklogic.hub.flow.Flow;
import com.marklogic.hub.flow.FlowInputs;
import com.marklogic.hub.flow.FlowRunner;
import com.marklogic.hub.flow.FlowStatusListener;
import com.marklogic.hub.flow.RunFlowResponse;
import com.marklogic.hub.impl.FlowManagerImpl;
import com.marklogic.hub.impl.HubConfigImpl;
import com.marklogic.hub.step.RunStepResponse;
import com.marklogic.hub.step.StepRunner;
import com.marklogic.hub.step.StepRunnerFactory;
import com.marklogic.hub.step.impl.Step;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Queue;
import java.util.UUID;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
@Component
public class FlowRunnerImpl implements FlowRunner {
@Autowired
private HubConfig hubConfig;
@Autowired
private FlowManager flowManager;
// Only populated when constructed with a HubClient, which implies that a HubProject is not available
private HubClient hubClient;
final AtomicBoolean isRunning = new AtomicBoolean(false);
final AtomicBoolean isJobCancelled = new AtomicBoolean(false);
final AtomicBoolean isJobSuccess = new AtomicBoolean(true);
final AtomicBoolean jobStoppedOnError = new AtomicBoolean(false);
protected final Logger logger = LoggerFactory.getLogger(getClass());
String runningJobId;
Step runningStep;
Flow runningFlow;
StepRunner stepRunner;
/*
* Using concrete type ConcurrentHashMap instead of Map so coverity static scan does not complain about
* "modification without proper synchronization" when we call `remove(key)` on the `stepsMap` later in the code.
*/
final ConcurrentHashMap> stepsMap = new ConcurrentHashMap<>();
final Map flowMap = new ConcurrentHashMap<>();
final Map flowResp = new ConcurrentHashMap<>();
// TODO Hoping to combine these maps together into a single one soon
final Map flowContextMap = new ConcurrentHashMap<>();
final Queue jobQueue = new ConcurrentLinkedQueue<>();
final List flowStatusListeners = new ArrayList<>();
ThreadPoolExecutor threadPool;
public FlowRunnerImpl() {
}
/**
* Simulates construction of this object via Spring, where the Autowired objects are set manually.
*
* @param hubConfig
* @param flowManager
*/
public FlowRunnerImpl(HubConfig hubConfig, FlowManager flowManager) {
this.hubConfig = hubConfig;
this.flowManager = flowManager;
}
/**
* Convenience constructor for running flows with no dependency on project files on the filesystem, and where a
* user can authenticate with just a username and a password.
*
* @param host the host of the Data Hub instance to connect to
* @param username the username of the MarkLogic user for running a flow
* @param password the password of the MarkLogic user for running a flow
*/
public FlowRunnerImpl(String host, String username, String password) {
this(new HubConfigImpl(host, username, password).newHubClient());
}
/**
* Constructs a FlowRunnerImpl that can be used for running flows without any reference to project files on a
* filesystem - and thus, this constructor will not instantiate an instance of FlowManager, which is used for reading
* project files from the filesystem.
*
* This constructor handles ensuring that step definitions are retrieved from MarkLogic as opposed to from the
* filesystem. It is expected that the "runFlow(FlowInputs)" method will then be used, which ensures that flow
* artifacts are also retrieved from MarkLogic as opposed to from the filesystem.
*
* @param hubClient
*/
public FlowRunnerImpl(HubClient hubClient) {
this.hubClient = hubClient;
this.flowManager = new FlowManagerImpl(hubClient);
}
@Deprecated // since 5.3.0-beta; must be retained because the 5.2 dh-5-example project used it in an example. Should use FlowRunnerImpl(HubClient) instead.
public FlowRunnerImpl(HubConfig hubConfig) {
this(hubConfig.newHubClient());
}
@Override
public FlowRunner onStatusChanged(FlowStatusListener listener) {
this.flowStatusListeners.add(listener);
return this;
}
@Deprecated
public RunFlowResponse runFlow(String flowName) {
return runFlow(flowName, null, null, new HashMap<>(), new HashMap<>());
}
@Deprecated
public RunFlowResponse runFlow(String flowName, List stepNums) {
return runFlow(flowName, stepNums, null, new HashMap<>(), new HashMap<>());
}
@Deprecated
public RunFlowResponse runFlow(String flowName, String jobId) {
return runFlow(flowName, null, jobId, new HashMap<>(), new HashMap<>());
}
@Deprecated
public RunFlowResponse runFlow(String flowName, List stepNums, String jobId) {
return runFlow(flowName, stepNums, jobId, new HashMap<>(), new HashMap<>());
}
@Deprecated
public RunFlowResponse runFlow(String flowName, String jobId, Map runtimeOptions) {
return runFlow(flowName, null, jobId, runtimeOptions, new HashMap<>());
}
@Deprecated
public RunFlowResponse runFlow(String flowName, List stepNums, String jobId, Map runtimeOptions) {
return runFlow(flowName, stepNums, jobId, runtimeOptions, new HashMap<>());
}
@Deprecated
public RunFlowResponse runFlow(String flowName, List stepNums, String jobId, Map runtimeOptions, Map stepConfig) {
Flow flow = flowManager.getFullFlow(flowName);
if (flow == null) {
throw new RuntimeException("Flow " + flowName + " not found");
}
return runFlow(flow, stepNums, jobId, runtimeOptions, stepConfig);
}
/**
* Retrieves the given flow from the staging database in MarkLogic, and then proceeds with the normal execution of
* the flow.
*
* @param flowInputs
* @return
*/
@Override
public RunFlowResponse runFlow(FlowInputs flowInputs) {
final String flowName = flowInputs.getFlowName();
if (StringUtils.isEmpty(flowName)) {
throw new IllegalArgumentException("Cannot run flow; no flow name provided");
}
Flow flow = flowManager.getFullFlow(flowName);
return runFlow(flow, flowInputs.getSteps(), flowInputs.getJobId(), flowInputs.getOptions(), flowInputs.getStepConfig());
}
protected RunFlowResponse runFlow(Flow flow, List stepNumbers, String jobId, Map runtimeOptions, Map stepConfig) {
FlowContext flowContext = new FlowContext(flow, runtimeOptions);
if (flowContext.jobOutputIsEnabled) {
flowContext.jobService = JobService.on(hubClient != null ? hubClient.getJobsClient() : hubConfig.newJobDbClient());
}
configureStopOnError(flow, runtimeOptions);
if(stepNumbers == null) {
stepNumbers = new ArrayList<>(flow.getSteps().keySet());
}
if(stepConfig != null && !stepConfig.isEmpty()) {
flow.setOverrideStepConfig(stepConfig);
}
flow.setRuntimeOptions(runtimeOptions);
Iterator stepItr = stepNumbers.iterator();
Queue stepsQueue = new ConcurrentLinkedQueue<>();
while(stepItr.hasNext()) {
String stepNum = stepItr.next();
Step tmpStep = flow.getStep(stepNum);
if(tmpStep == null){
throw new RuntimeException("Step " + stepNum + " not found in the flow");
}
stepsQueue.add(stepNum);
}
if(jobId == null) {
jobId = UUID.randomUUID().toString();
}
RunFlowResponse response = new RunFlowResponse(jobId);
response.setFlowName(flow.getName());
//Put response, steps and flow in maps with jobId as key
flowResp.put(jobId, response);
stepsMap.put(jobId, stepsQueue);
flowMap.put(jobId, flow);
flowContextMap.put(jobId, flowContext);
//add jobId to a queue
jobQueue.add(jobId);
if(!isRunning.get()){
// Construct a stepRunnerFactory for the execution of this flow. It will be passed to additional instances
// of FlowRunnerTask that need to be created.
StepRunnerFactory stepRunnerFactory = hubClient != null ?
new StepRunnerFactory(hubClient) : new StepRunnerFactory(hubConfig);
initializeFlow(stepRunnerFactory, jobId);
}
return response;
}
/**
* To support the "failHard" parameter in CommandLineFlowInputs, this method checks for stopOnError in the options
* map. If true, this will set stopOnError in the flow. That will cause no other steps to be run once a step fails.
*
* @param flow
* @param options
*/
protected static void configureStopOnError(Flow flow, Map options) {
if (options != null) {
Object value = options.get("stopOnError");
if (Boolean.TRUE.equals(value) || "true".equals(value)) {
flow.setStopOnError(true);
}
}
}
void initializeFlow(StepRunnerFactory stepRunnerFactory, String jobId) {
//Reset the states to initial values before starting a flow run
isRunning.set(true);
isJobSuccess.set(true);
isJobCancelled.set(false);
jobStoppedOnError.set(false);
runningJobId = jobId;
runningFlow = flowMap.get(runningJobId);
FlowContext flowContext = flowContextMap.get(jobId);
if (flowContext.jobOutputIsEnabled) {
flowContext.jobService.startJob(jobId, runningFlow.getName());
}
if(threadPool == null || threadPool.isTerminated()) {
// thread pool size needs to be at least 2, so the current step thread can kick-off the next step thread
int maxPoolSize = Math.max(Runtime.getRuntime().availableProcessors()/2, 2);
threadPool = new CustomPoolExecutor(2, maxPoolSize, 0L, TimeUnit.MILLISECONDS
, new LinkedBlockingQueue<>());
}
threadPool.execute(new FlowRunnerTask(stepRunnerFactory, runningFlow, runningJobId));
}
public void stopJob(String jobId) {
if (stepsMap.containsKey(jobId)) {
stepsMap.get(jobId).clear();
stepsMap.remove(jobId);
}
isJobCancelled.set(true);
if (jobId.equals(runningJobId)) {
if (stepRunner != null) {
stepRunner.stop();
}
}
}
protected static void copyJobDataToResponse(RunFlowResponse response, RunFlowResponse jobDocument) {
response.setStartTime(jobDocument.getStartTime());
response.setEndTime(jobDocument.getEndTime());
response.setUser(jobDocument.getUser());
response.setLastAttemptedStep(jobDocument.getLastAttemptedStep());
response.setLastCompletedStep(jobDocument.getLastCompletedStep());
}
private class FlowRunnerTask implements Runnable {
final StepRunnerFactory stepRunnerFactory;
private final String jobId;
private final Flow flow;
private Queue stepQueue;
public Queue getStepQueue() {
return stepQueue;
}
FlowRunnerTask(StepRunnerFactory stepRunnerFactory, Flow flow, String jobId) {
this.stepRunnerFactory = stepRunnerFactory;
this.jobId = jobId;
this.flow = flow;
}
@Override
public void run() {
RunFlowResponse resp = flowResp.get(runningJobId);
resp.setFlowName(runningFlow.getName());
stepQueue = stepsMap.get(jobId);
FlowContext flowContext = flowContextMap.get(jobId);
Map stepOutputs = new HashMap<>();
String stepNum;
final long[] currSuccessfulEvents = {0};
final long[] currFailedEvents = {0};
final int[] currPercentComplete = {0};
while (! stepQueue.isEmpty()) {
stepNum = stepQueue.poll();
runningStep = runningFlow.getSteps().get(stepNum);
Map runtimeOptions = flow.getRuntimeOptions() != null ?
new HashMap<>(flow.getRuntimeOptions()) : new HashMap<>();
AtomicLong errorCount = new AtomicLong();
AtomicLong successCount = new AtomicLong();
RunStepResponse stepResp = null;
try {
stepRunner = stepRunnerFactory.getStepRunner(runningFlow, stepNum)
.withJobId(jobId)
.withRuntimeOptions(runtimeOptions)
.onItemComplete((jobID, itemID) -> {
successCount.incrementAndGet();
})
.onItemFailed((jobId, itemId)-> {
errorCount.incrementAndGet();
if(flow.isStopOnError()){
stopJobOnError(jobId);
}
})
.onStatusChanged((jobId, percentComplete, jobStatus, successfulEvents, failedEvents, message) ->{
flowStatusListeners.forEach((FlowStatusListener listener) -> {
currSuccessfulEvents[0] = successfulEvents;
currFailedEvents[0] = failedEvents;
currPercentComplete[0] = percentComplete;
listener.onStatusChanged(jobId, runningStep, jobStatus, percentComplete, successfulEvents, failedEvents, runningStep.getName() + " : " + message);
});
});
//If property values are overriden in UI, use those values over any other.
if(flow.getOverrideStepConfig() != null) {
stepRunner.withStepConfig(flow.getOverrideStepConfig());
}
stepResp = stepRunner.run();
stepRunner.awaitCompletion();
final boolean stepFailed = stepResp.getStatus() != null && stepResp.getStatus().startsWith("failed");
if (stepFailed && runningFlow.isStopOnError()) {
stopJobOnError(runningJobId);
}
}
catch (Exception e) {
stepResp = RunStepResponse.withFlow(flow).withStep(stepNum);
stepResp.withJobId(runningJobId);
if(stepRunner != null){
stepResp.setCounts(successCount.get() + errorCount.get(), successCount.get(), errorCount.get(), (long) Math.ceil((double) successCount.get() / stepRunner.getBatchSize()), (long) Math.ceil((double) errorCount.get() / stepRunner.getBatchSize()));
}
else {
stepResp.setCounts(0, 0, 0, 0, 0);
}
StringWriter errors = new StringWriter();
e.printStackTrace(new PrintWriter(errors));
stepResp.withStepOutput(errors.toString());
stepResp.withSuccess(false);
if(successCount.get() > 0) {
stepResp.withStatus(JobStatus.COMPLETED_WITH_ERRORS_PREFIX + stepNum);
}
else{
stepResp.withStatus(JobStatus.FAILED_PREFIX + stepNum);
}
RunStepResponse finalStepResp = stepResp;
try {
flowStatusListeners.forEach((FlowStatusListener listener) -> {
listener.onStatusChanged(jobId, runningStep, JobStatus.FAILED.toString(), currPercentComplete[0], currSuccessfulEvents[0], currFailedEvents[0],
runningStep.getName() + " " + Arrays.toString(finalStepResp.stepOutput.toArray()));
});
} catch (Exception ex) {
logger.error("Unable to invoke FlowStatusListener: " + ex.getMessage());
}
if(runningFlow.isStopOnError()) {
stopJobOnError(runningJobId);
}
}
finally {
stepOutputs.put(stepNum, stepResp);
if(stepResp != null && !stepResp.isSuccess()) {
isJobSuccess.set(false);
}
}
}
resp.setStepResponses(stepOutputs);
final JobStatus jobStatus;
//Update status of job
if (isJobCancelled.get()) {
if(runningFlow.isStopOnError() && jobStoppedOnError.get()){
jobStatus = JobStatus.STOP_ON_ERROR;
}
else {
jobStatus = JobStatus.CANCELED;
}
}
else if (!isJobSuccess.get()) {
Collection stepResps = stepOutputs.values();
long failedStepCount = stepResps.stream().filter((stepResp) -> stepResp.getStatus()
.contains(JobStatus.FAILED_PREFIX)).count();
if(failedStepCount == stepResps.size()){
jobStatus = JobStatus.FAILED;
}
else {
jobStatus = JobStatus.FINISHED_WITH_ERRORS;
}
}
else {
jobStatus = JobStatus.FINISHED;
}
resp.setJobStatus(jobStatus.toString());
try {
if (flowContext.jobOutputIsEnabled) {
flowContext.jobService.finishJob(jobId, jobStatus.toString());
}
}
catch (Exception e) {
logger.error("Unable to finish job with ID: " + jobId + "; cause: " + e.getMessage());
}
finally {
JsonNode jobNode = null;
if (flowContext.jobOutputIsEnabled) {
try {
jobNode = flowContext.jobService.getJob(jobId);
} catch (Exception e) {
logger.error("Unable to get job document with ID: " + jobId + ": cause: " + e.getMessage());
}
}
if(jobNode != null) {
try {
RunFlowResponse jobDoc = new ObjectMapper().treeToValue(jobNode.get("job"), RunFlowResponse.class);
copyJobDataToResponse(resp, jobDoc);
}
catch (Exception e) {
logger.error("Unable to copy job data to RunFlowResponse, cause: " + e.getMessage());
}
}
if (!isJobSuccess.get()) {
try {
flowStatusListeners.forEach((FlowStatusListener listener) -> {
listener.onStatusChanged(jobId, runningStep, jobStatus.toString(), currPercentComplete[0], currSuccessfulEvents[0], currFailedEvents[0], JobStatus.FAILED.toString());
});
} catch (Exception ex) {
logger.error("Unable to invoke FlowStatusListener: " + ex.getMessage());
}
} else {
try {
flowStatusListeners.forEach((FlowStatusListener listener) -> {
listener.onStatusChanged(jobId, runningStep, jobStatus.toString(), currPercentComplete[0], currSuccessfulEvents[0], currFailedEvents[0], JobStatus.FINISHED.toString());
});
} catch (Exception ex) {
logger.error("Unable to invoke FlowStatusListener: " + ex.getMessage());
}
}
jobQueue.remove();
stepsMap.remove(jobId);
flowMap.remove(jobId);
flowContextMap.remove(jobId);
flowResp.remove(runningJobId);
if (!jobQueue.isEmpty()) {
initializeFlow(stepRunnerFactory, jobQueue.peek());
} else {
isRunning.set(false);
threadPool.shutdownNow();
runningFlow = null;
}
}
}
private void stopJobOnError(String jobId) {
jobStoppedOnError.set(true);
stopJob(jobId);
}
}
public void awaitCompletion() {
try {
awaitCompletion(Long.MAX_VALUE, TimeUnit.DAYS);
}
catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
public void awaitCompletion(long timeout, TimeUnit unit) throws InterruptedException {
if (threadPool != null) {
threadPool.awaitTermination(timeout, unit);
}
}
class CustomPoolExecutor extends ThreadPoolExecutor {
public CustomPoolExecutor(int corePoolSize, int maximumPoolSize, long keepAliveTime,
TimeUnit unit, BlockingQueue workQueue) {
super(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue);
}
@Override
public void afterExecute(Runnable r, Throwable t) {
super.afterExecute(r, t);
// If submit() method is called instead of execute()
if (t == null && r instanceof Future>) {
try {
Object result = ((Future>) r).get();
} catch (CancellationException e) {
t = e;
} catch (ExecutionException e) {
t = e.getCause();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
if (t != null) {
logger.error("Caught error while running FlowRunnerTask: " + t.getMessage());
FlowRunnerTask flowRunnerTask = (FlowRunnerTask)r;
//Run the next queued flow if stop-on-error is set or if the step queue is empty
if (flowRunnerTask.getStepQueue() == null || flowRunnerTask.getStepQueue().isEmpty() || runningFlow.isStopOnError()) {
jobQueue.remove();
if (!jobQueue.isEmpty()) {
initializeFlow(flowRunnerTask.stepRunnerFactory, jobQueue.peek());
} else {
isRunning.set(false);
threadPool.shutdownNow();
}
}
//Run the next step
else {
if (threadPool != null && !threadPool.isTerminating()) {
threadPool.execute(new FlowRunnerTask(flowRunnerTask.stepRunnerFactory, runningFlow, runningJobId));
}
}
}
}
}
//These methods are used by UI.
public List getQueuedJobIdsFromFlow(String flowName) {
return flowMap
.entrySet()
.stream()
.filter(entry -> flowName.equals(entry.getValue().getName()))
.map(Map.Entry::getKey)
.collect(Collectors.toList());
}
public RunFlowResponse getJobResponseById(String jobId) {
return flowResp.get(jobId);
}
public boolean isJobRunning() {
return isRunning.get();
}
public String getRunningStepKey() {
return runningFlow.getSteps().entrySet()
.stream()
.filter(entry -> Objects.equals(entry.getValue(), runningStep))
.map(Map.Entry::getKey)
.collect(Collectors.joining());
}
public Flow getRunningFlow() {
return this.runningFlow;
}
static class FlowContext {
boolean jobOutputIsEnabled = true;
JobService jobService;
FlowContext(Flow flow, Map runtimeOptions) {
calculateJobOutputIsEnabled(flow, runtimeOptions);
}
private void calculateJobOutputIsEnabled(Flow flow, Map runtimeOptions) {
final String optionName = "disableJobOutput";
if (runtimeOptions != null && runtimeOptions.containsKey(optionName)) {
this.jobOutputIsEnabled = !Boolean.parseBoolean(runtimeOptions.get(optionName).toString());
} else {
JsonNode flowOptions = flow.getOptions();
if (flowOptions != null && flowOptions.has(optionName)) {
this.jobOutputIsEnabled = !flowOptions.get(optionName).asBoolean();
}
}
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy