com.hazelcast.jet.impl.JobCoordinationService Maven / Gradle / Ivy
/*
* Copyright (c) 2008-2023, Hazelcast, Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.jet.impl;
import com.hazelcast.cluster.ClusterState;
import com.hazelcast.cluster.Member;
import com.hazelcast.cluster.impl.MemberImpl;
import com.hazelcast.core.HazelcastInstanceNotActiveException;
import com.hazelcast.function.FunctionEx;
import com.hazelcast.instance.impl.Node;
import com.hazelcast.internal.cluster.ClusterService;
import com.hazelcast.internal.metrics.MetricDescriptor;
import com.hazelcast.internal.metrics.MetricsRegistry;
import com.hazelcast.internal.metrics.Probe;
import com.hazelcast.internal.partition.impl.InternalPartitionServiceImpl;
import com.hazelcast.internal.partition.impl.PartitionServiceState;
import com.hazelcast.internal.serialization.Data;
import com.hazelcast.internal.util.counters.Counter;
import com.hazelcast.internal.util.counters.MwCounter;
import com.hazelcast.internal.util.executor.ManagedExecutorService;
import com.hazelcast.jet.JetException;
import com.hazelcast.jet.JobAlreadyExistsException;
import com.hazelcast.jet.config.DeltaJobConfig;
import com.hazelcast.jet.config.JetConfig;
import com.hazelcast.jet.config.JobConfig;
import com.hazelcast.jet.config.JobConfigArguments;
import com.hazelcast.jet.core.DAG;
import com.hazelcast.jet.core.JobNotFoundException;
import com.hazelcast.jet.core.JobStatus;
import com.hazelcast.jet.core.JobSuspensionCause;
import com.hazelcast.jet.core.TopologyChangedException;
import com.hazelcast.jet.core.Vertex;
import com.hazelcast.jet.core.metrics.MetricNames;
import com.hazelcast.jet.core.metrics.MetricTags;
import com.hazelcast.jet.datamodel.Tuple2;
import com.hazelcast.jet.impl.MasterJobContext.TerminationRequest;
import com.hazelcast.jet.impl.exception.EnteringPassiveClusterStateException;
import com.hazelcast.jet.impl.execution.DoneItem;
import com.hazelcast.jet.impl.metrics.RawJobMetrics;
import com.hazelcast.jet.impl.observer.ObservableImpl;
import com.hazelcast.jet.impl.observer.WrappedThrowable;
import com.hazelcast.jet.impl.operation.GetJobIdsOperation.GetJobIdsResult;
import com.hazelcast.jet.impl.operation.NotifyMemberShutdownOperation;
import com.hazelcast.jet.impl.pipeline.PipelineImpl;
import com.hazelcast.jet.impl.pipeline.PipelineImpl.Context;
import com.hazelcast.jet.impl.util.ExceptionUtil;
import com.hazelcast.jet.impl.util.LoggingUtil;
import com.hazelcast.jet.impl.util.Util;
import com.hazelcast.logging.ILogger;
import com.hazelcast.ringbuffer.OverflowPolicy;
import com.hazelcast.ringbuffer.Ringbuffer;
import com.hazelcast.security.SecurityContext;
import com.hazelcast.spi.exception.RetryableHazelcastException;
import com.hazelcast.spi.impl.NodeEngineImpl;
import com.hazelcast.spi.impl.eventservice.impl.Registration;
import com.hazelcast.spi.impl.executionservice.ExecutionService;
import com.hazelcast.spi.properties.HazelcastProperties;
import com.hazelcast.version.Version;
import javax.annotation.CheckReturnValue;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.security.auth.Subject;
import java.security.Permission;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.Spliterators;
import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.CancellationException;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import static com.hazelcast.cluster.ClusterState.IN_TRANSITION;
import static com.hazelcast.cluster.ClusterState.PASSIVE;
import static com.hazelcast.cluster.memberselector.MemberSelectors.DATA_MEMBER_SELECTOR;
import static com.hazelcast.internal.util.executor.ExecutorType.CACHED;
import static com.hazelcast.jet.Util.idToString;
import static com.hazelcast.jet.core.JobStatus.COMPLETING;
import static com.hazelcast.jet.core.JobStatus.FAILED;
import static com.hazelcast.jet.core.JobStatus.NOT_RUNNING;
import static com.hazelcast.jet.core.JobStatus.RUNNING;
import static com.hazelcast.jet.core.JobStatus.SUSPENDED;
import static com.hazelcast.jet.datamodel.Tuple2.tuple2;
import static com.hazelcast.jet.impl.AbstractJobProxy.cannotAddStatusListener;
import static com.hazelcast.jet.impl.JobClassLoaderService.JobPhase.COORDINATOR;
import static com.hazelcast.jet.impl.TerminationMode.CANCEL_FORCEFUL;
import static com.hazelcast.jet.impl.execution.init.CustomClassLoadedObject.deserializeWithCustomClassLoader;
import static com.hazelcast.jet.impl.operation.GetJobIdsOperation.ALL_JOBS;
import static com.hazelcast.jet.impl.util.ExceptionUtil.sneakyThrow;
import static com.hazelcast.jet.impl.util.ExceptionUtil.withTryCatch;
import static com.hazelcast.jet.impl.util.LoggingUtil.logFine;
import static com.hazelcast.jet.impl.util.LoggingUtil.logFinest;
import static com.hazelcast.spi.properties.ClusterProperty.JOB_SCAN_PERIOD;
import static java.util.Collections.emptyList;
import static java.util.Comparator.comparing;
import static java.util.concurrent.CompletableFuture.completedFuture;
import static java.util.concurrent.TimeUnit.HOURS;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.concurrent.TimeUnit.SECONDS;
import static java.util.function.Function.identity;
import static java.util.stream.Collectors.toList;
/**
* A service that handles MasterContexts on the coordinator member.
* Job-control operations from client are handled here.
*/
public class JobCoordinationService {
private static final String COORDINATOR_EXECUTOR_NAME = "jet:coordinator";
/**
* The delay before retrying to start/scale up a job.
*/
private static final long RETRY_DELAY_IN_MILLIS = SECONDS.toMillis(2);
private static final ThreadLocal IS_JOB_COORDINATOR_THREAD = ThreadLocal.withInitial(() -> false);
private static final int COORDINATOR_THREADS_POOL_SIZE = 4;
private static final int MIN_JOB_SCAN_PERIOD_MILLIS = 100;
/**
* Inserted temporarily to {@link #lightMasterContexts} to safely check for double job submission.
* When reading, it's treated as if the job doesn't exist.
*/
private static final Object UNINITIALIZED_LIGHT_JOB_MARKER = new Object();
private final NodeEngineImpl nodeEngine;
private final JetServiceBackend jetServiceBackend;
private final JetConfig config;
private final Context pipelineToDagContext;
private final ILogger logger;
private final JobRepository jobRepository;
private final ConcurrentMap masterContexts = new ConcurrentHashMap<>();
private final ConcurrentMap lightMasterContexts = new ConcurrentHashMap<>();
private final ConcurrentMap> membersShuttingDown = new ConcurrentHashMap<>();
private final ConcurrentMap> scheduledJobTimeouts = new ConcurrentHashMap<>();
/**
* Map of {memberUuid; removeTime}.
*
* A collection of UUIDs of members which left the cluster and for which we
* didn't receive {@link NotifyMemberShutdownOperation}.
*/
private final Map removedMembers = new ConcurrentHashMap<>();
private final Object lock = new Object();
private volatile boolean isClusterEnteringPassiveState;
private volatile boolean jobsScanned;
private final AtomicInteger scaleUpScheduledCount = new AtomicInteger();
@Probe(name = MetricNames.JOBS_SUBMITTED)
private final Counter jobSubmitted = MwCounter.newMwCounter();
@Probe(name = MetricNames.JOBS_COMPLETED_SUCCESSFULLY)
private final Counter jobCompletedSuccessfully = MwCounter.newMwCounter();
@Probe(name = MetricNames.JOBS_COMPLETED_WITH_FAILURE)
private final Counter jobCompletedWithFailure = MwCounter.newMwCounter();
private long maxJobScanPeriodInMillis;
JobCoordinationService(
NodeEngineImpl nodeEngine, JetServiceBackend jetServiceBackend, JetConfig config, JobRepository jobRepository
) {
this.nodeEngine = nodeEngine;
this.jetServiceBackend = jetServiceBackend;
this.config = config;
this.pipelineToDagContext = () -> this.config.getCooperativeThreadCount();
this.logger = nodeEngine.getLogger(getClass());
this.jobRepository = jobRepository;
ExecutionService executionService = nodeEngine.getExecutionService();
executionService.register(COORDINATOR_EXECUTOR_NAME, COORDINATOR_THREADS_POOL_SIZE, Integer.MAX_VALUE, CACHED);
// register metrics
MetricsRegistry registry = nodeEngine.getMetricsRegistry();
MetricDescriptor descriptor = registry.newMetricDescriptor()
.withTag(MetricTags.MODULE, "jet");
registry.registerStaticMetrics(descriptor, this);
}
public JobRepository jobRepository() {
return jobRepository;
}
public void startScanningForJobs() {
ExecutionService executionService = nodeEngine.getExecutionService();
HazelcastProperties properties = nodeEngine.getProperties();
maxJobScanPeriodInMillis = properties.getMillis(JOB_SCAN_PERIOD);
try {
executionService.schedule(COORDINATOR_EXECUTOR_NAME, this::scanJobs, 0, MILLISECONDS);
logger.info("Jet started scanning for jobs");
} catch (RejectedExecutionException ex) {
logger.info("Scan jobs task is rejected on the execution service since the executor service" +
" has shutdown", ex);
}
}
public CompletableFuture submitJob(
long jobId,
Data serializedJobDefinition,
JobConfig jobConfig,
Subject subject
) {
CompletableFuture res = new CompletableFuture<>();
submitToCoordinatorThread(() -> {
MasterContext masterContext;
try {
assertIsMaster("Cannot submit job " + idToString(jobId) + " to non-master node");
checkOperationalState();
// the order of operations is important.
// first, check if the job is already completed
JobResult jobResult = jobRepository.getJobResult(jobId);
if (jobResult != null) {
logger.fine("Not starting job " + idToString(jobId) + " since already completed with result: "
+ jobResult);
return;
}
if (!config.isResourceUploadEnabled() && !jobConfig.getResourceConfigs().isEmpty()) {
throw new JetException(Util.JET_RESOURCE_UPLOAD_DISABLED_MESSAGE);
}
int quorumSize = jobConfig.isSplitBrainProtectionEnabled() ? getQuorumSize() : 0;
Object jobDefinition = deserializeJobDefinition(jobId, jobConfig, serializedJobDefinition);
DAG dag;
Data serializedDag;
if (jobDefinition instanceof PipelineImpl) {
dag = ((PipelineImpl) jobDefinition).toDag(pipelineToDagContext);
serializedDag = nodeEngine().getSerializationService().toData(dag);
} else {
dag = (DAG) jobDefinition;
serializedDag = serializedJobDefinition;
}
checkPermissions(subject, dag);
Set ownedObservables = ownedObservables(dag);
JobRecord jobRecord = new JobRecord(nodeEngine.getClusterService().getClusterVersion(), jobId, serializedDag,
dagToJson(dag), jobConfig, ownedObservables, subject);
JobExecutionRecord jobExecutionRecord = new JobExecutionRecord(jobId, quorumSize);
masterContext = createMasterContext(jobRecord, jobExecutionRecord);
boolean hasDuplicateJobName;
synchronized (lock) {
assertIsMaster("Cannot submit job " + idToString(jobId) + " to non-master node");
checkOperationalState();
hasDuplicateJobName = jobConfig.getName() != null && hasActiveJobWithName(jobConfig.getName());
if (!hasDuplicateJobName) {
// just try to initiate the coordination
MasterContext prev = masterContexts.putIfAbsent(jobId, masterContext);
if (prev != null) {
logger.fine("Joining to already existing masterContext " + prev.jobIdString());
return;
}
}
}
if (hasDuplicateJobName) {
jobRepository.deleteJob(jobId);
throw new JobAlreadyExistsException("Another active job with equal name (" + jobConfig.getName()
+ ") exists: " + idToString(jobId));
}
// If job is not currently running, it might be that it is just completed
if (completeMasterContextIfJobAlreadyCompleted(masterContext)) {
return;
}
// If there is no master context and job result at the same time, it means this is the first submission
jobSubmitted.inc();
jobRepository.putNewJobRecord(jobRecord);
logger.info("Starting job " + idToString(masterContext.jobId()) + " based on submit request");
} catch (Throwable e) {
jetServiceBackend.getJobClassLoaderService()
.tryRemoveClassloadersForJob(jobId, COORDINATOR);
res.completeExceptionally(e);
throw e;
} finally {
res.complete(null);
}
masterContext.jobContext().tryStartJob();
});
return res;
}
public CompletableFuture submitLightJob(
long jobId,
Object deserializedJobDefinition,
Data serializedJobDefinition,
JobConfig jobConfig,
Subject subject
) {
if (deserializedJobDefinition == null) {
deserializedJobDefinition = nodeEngine().getSerializationService().toObject(serializedJobDefinition);
}
DAG dag;
if (deserializedJobDefinition instanceof DAG) {
dag = (DAG) deserializedJobDefinition;
} else {
dag = ((PipelineImpl) deserializedJobDefinition).toDag(pipelineToDagContext);
}
// First insert just a marker into the map. This is to prevent initializing the light job if the jobId
// was submitted twice. This can happen e.g. if the client retries.
Object oldContext = lightMasterContexts.putIfAbsent(jobId, UNINITIALIZED_LIGHT_JOB_MARKER);
if (oldContext != null) {
throw new JetException("duplicate jobId " + idToString(jobId));
}
checkPermissions(subject, dag);
// Initialize and start the job. We do this before adding the actual
// LightMasterContext to the map to avoid possible races of the job initialization and cancellation.
return LightMasterContext.createContext(nodeEngine, this, dag, jobId, jobConfig, subject)
.thenComposeAsync(mc -> {
Object oldCtx = lightMasterContexts.put(jobId, mc);
assert oldCtx == UNINITIALIZED_LIGHT_JOB_MARKER;
scheduleJobTimeout(jobId, jobConfig.getTimeoutMillis());
return mc.getCompletionFuture()
.whenComplete((r, t) -> {
Object removed = lightMasterContexts.remove(jobId);
assert removed instanceof LightMasterContext : "LMC not found: " + removed;
unscheduleJobTimeout(jobId);
});
}, coordinationExecutor());
}
public long getJobSubmittedCount() {
return jobSubmitted.get();
}
public JobConfig getLightJobConfig(long jobId) {
Object mc = lightMasterContexts.get(jobId);
if (mc == null || mc == UNINITIALIZED_LIGHT_JOB_MARKER) {
throw new JobNotFoundException(jobId);
}
return ((LightMasterContext) mc).getJobConfig();
}
private void checkPermissions(Subject subject, DAG dag) {
SecurityContext securityContext = nodeEngine.getNode().securityContext;
if (securityContext == null || subject == null) {
return;
}
for (Vertex vertex : dag) {
Permission requiredPermission = vertex.getMetaSupplier().getRequiredPermission();
if (requiredPermission != null) {
securityContext.checkPermission(subject, requiredPermission);
}
}
}
private static Set ownedObservables(DAG dag) {
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(dag.iterator(), 0), false)
.map(vertex -> vertex.getMetaSupplier().getTags().get(ObservableImpl.OWNED_OBSERVABLE))
.filter(Objects::nonNull)
.collect(Collectors.toSet());
}
@SuppressWarnings("WeakerAccess") // used by jet-enterprise
MasterContext createMasterContext(JobRecord jobRecord, JobExecutionRecord jobExecutionRecord) {
return new MasterContext(nodeEngine, this, jobRecord, jobExecutionRecord);
}
private boolean hasActiveJobWithName(@Nonnull String jobName) {
// if scanJob() has not run yet, master context objects may not be initialized.
// in this case, we cannot check if the new job submission has a duplicate job name.
// therefore, we will retry until scanJob() task runs at least once.
if (!jobsScanned) {
throw new RetryableHazelcastException("Cannot submit job with name '" + jobName
+ "' before the master node initializes job coordination service's state");
}
return masterContexts.values()
.stream()
.anyMatch(ctx -> jobName.equals(ctx.jobConfig().getName()));
}
public CompletableFuture prepareForPassiveClusterState() {
assertIsMaster("Cannot prepare for passive cluster state on a non-master node");
synchronized (lock) {
isClusterEnteringPassiveState = true;
}
return submitToCoordinatorThread(() -> {
CompletableFuture[] futures = masterContexts
.values().stream()
.map(mc -> mc.jobContext().gracefullyTerminate())
.toArray(CompletableFuture[]::new);
return CompletableFuture.allOf(futures);
}).thenCompose(identity());
}
public void clusterChangeDone() {
synchronized (lock) {
isClusterEnteringPassiveState = false;
}
}
public void reset() {
assert !isMaster() : "this member is a master";
List contexts;
synchronized (lock) {
contexts = new ArrayList<>(masterContexts.values());
masterContexts.clear();
jobsScanned = false;
}
contexts.forEach(ctx -> ctx.jobContext().setFinalResult(new CancellationException()));
}
public CompletableFuture joinSubmittedJob(long jobId) {
checkOperationalState();
CompletableFuture> future = callWithJob(jobId,
mc -> mc.jobContext().jobCompletionFuture()
.handle((r, t) -> {
if (t == null) {
return null;
}
if (t instanceof CancellationException || t instanceof JetException) {
throw sneakyThrow(t);
}
throw new JetException(ExceptionUtil.stackTraceToString(t));
}),
JobResult::asCompletableFuture,
jobRecord -> {
JobExecutionRecord jobExecutionRecord = ensureExecutionRecord(jobId,
jobRepository.getJobExecutionRecord(jobId));
return startJobIfNotStartedOrCompleted(jobRecord, jobExecutionRecord, "join request from client");
},
null
);
return future
.thenCompose(identity()); // unwrap the inner future
}
public CompletableFuture joinLightJob(long jobId) {
Object mc = lightMasterContexts.get(jobId);
if (mc == null || mc == UNINITIALIZED_LIGHT_JOB_MARKER) {
throw new JobNotFoundException(jobId);
}
return ((LightMasterContext) mc).getCompletionFuture();
}
public CompletableFuture terminateJob(long jobId, TerminationMode terminationMode, boolean userInitiated) {
return runWithJob(jobId,
masterContext -> {
// User can cancel in any state, other terminations are allowed only when running.
// This is not technically required (we can request termination in any state),
// but this method is only called by the user. It would be weird for the client to
// request a restart if the job didn't start yet etc.
// Also, it would be weird to restart the job during STARTING: as soon as it will start,
// it will restart.
// In any case, it doesn't make sense to restart a suspended job.
JobStatus jobStatus = masterContext.jobStatus();
if (jobStatus != RUNNING && terminationMode != CANCEL_FORCEFUL) {
throw new IllegalStateException("Cannot " + terminationMode + ", job status is " + jobStatus
+ ", should be " + RUNNING);
}
String terminationResult = masterContext.jobContext()
.requestTermination(terminationMode, false, userInitiated)
.f1();
if (terminationResult != null) {
throw new IllegalStateException("Cannot " + terminationMode + ": " + terminationResult);
}
},
jobResult -> {
if (terminationMode != CANCEL_FORCEFUL) {
throw new IllegalStateException("Cannot " + terminationMode + " job " + idToString(jobId)
+ " because it already has a result: " + jobResult);
}
logger.fine("Ignoring cancellation of a completed job " + idToString(jobId));
},
jobRecord -> {
// we'll eventually learn of the job through scanning of records or from a join operation
throw new RetryableHazelcastException("No MasterContext found for job " + idToString(jobId) + " for "
+ terminationMode);
}
);
}
public void terminateLightJob(long jobId, boolean userInitiated) {
Object mc = lightMasterContexts.get(jobId);
if (mc == null || mc == UNINITIALIZED_LIGHT_JOB_MARKER) {
throw new JobNotFoundException(jobId);
}
((LightMasterContext) mc).requestTermination(userInitiated);
}
/**
* Return the job IDs of jobs with the given name, sorted by
* {active/completed, creation time}, active & newest first.
*/
public CompletableFuture getJobIds(@Nullable String onlyName, long onlyJobId) {
if (onlyName != null) {
assertIsMaster("Cannot query list of job IDs by name on non-master node");
}
return submitToCoordinatorThread(() -> {
if (onlyJobId != ALL_JOBS) {
Object lmc = lightMasterContexts.get(onlyJobId);
if (lmc != null && lmc != UNINITIALIZED_LIGHT_JOB_MARKER) {
return new GetJobIdsResult(onlyJobId, true);
}
if (isMaster()) {
try {
callWithJob(onlyJobId, mc -> null, jobResult -> null, jobRecord -> null, null)
.get();
} catch (ExecutionException e) {
if (e.getCause() instanceof JobNotFoundException) {
return GetJobIdsResult.EMPTY;
}
throw e;
}
return new GetJobIdsResult(onlyJobId, false);
}
return GetJobIdsResult.EMPTY;
}
List> result = new ArrayList<>();
// add light jobs - only if no name is requested, light jobs can't have a name
if (onlyName == null) {
for (Object ctx : lightMasterContexts.values()) {
if (ctx != UNINITIALIZED_LIGHT_JOB_MARKER) {
result.add(tuple2(((LightMasterContext) ctx).getJobId(), true));
}
}
}
// add normal jobs - only on master
if (isMaster()) {
if (onlyName != null) {
// we first need to collect to a map where the jobId is the key to eliminate possible duplicates
// in JobResult and also to be able to sort from newest to oldest
Map jobs = new HashMap<>();
for (MasterContext ctx : masterContexts.values()) {
if (onlyName.equals(ctx.jobConfig().getName())) {
jobs.put(ctx.jobId(), Long.MAX_VALUE);
}
}
for (JobResult jobResult : jobRepository.getJobResults(onlyName)) {
jobs.put(jobResult.getJobId(), jobResult.getCreationTime());
}
jobs.entrySet().stream()
.sorted(
comparing(Entry::getValue)
.thenComparing(Entry::getKey)
.reversed()
)
.forEach(entry -> result.add(tuple2(entry.getKey(), false)));
} else {
for (Long jobId : jobRepository.getAllJobIds()) {
result.add(tuple2(jobId, false));
}
}
}
return new GetJobIdsResult(result);
});
}
/**
* Returns the job status or fails with {@link JobNotFoundException}
* if the requested job is not found.
*/
public CompletableFuture getJobStatus(long jobId) {
// Logic of determining job status should be in sync
// with getJobAndSqlSummary and getJobAndSqlSummaryList.
return callWithJob(jobId,
JobCoordinationService::determineJobStatusFromMasterContext,
JobResult::getJobStatus,
jobRecord -> NOT_RUNNING,
jobExecutionRecord -> jobExecutionRecord.isSuspended() ? SUSPENDED : NOT_RUNNING
);
}
private static JobStatus determineJobStatusFromMasterContext(MasterContext mc) {
// When the job finishes running, we write NOT_RUNNING to jobStatus first and then
// write null to terminationRequest (see MasterJobContext.finalizeJob()). We
// have to read them in the opposite order.
Optional maybeTerminationRequest = mc.jobContext().getTerminationRequest();
JobStatus jobStatus = mc.jobStatus();
return jobStatus == RUNNING && maybeTerminationRequest.isPresent()
? COMPLETING
: jobStatus;
}
private static boolean determineIsJobUserCancelledFromMasterContext(MasterContext mc) {
// order of reads is important, see comment in determineJobStatusFromMasterContext
boolean userInitiatedTermination = mc.jobContext().isUserInitiatedTermination();
JobStatus jobStatus = mc.jobStatus();
switch (jobStatus) {
case COMPLETED:
return false;
case FAILED:
return userInitiatedTermination;
default:
throw new IllegalStateException("Job not finished");
}
}
/**
* Returns the reason why this job has been suspended in a human-readable
* form.
*
* Fails with {@link JobNotFoundException} if the requested job is not found.
*
* Fails with {@link IllegalStateException} if the requested job is not
* currently in a suspended state.
*/
public CompletableFuture getJobSuspensionCause(long jobId) {
FunctionEx jobExecutionRecordHandler = jobExecutionRecord -> {
JobSuspensionCause cause = jobExecutionRecord.getSuspensionCause();
if (cause == null) {
throw new IllegalStateException("Job not suspended");
}
return cause;
};
return callWithJob(jobId,
mc -> {
JobExecutionRecord jobExecutionRecord = mc.jobExecutionRecord();
return jobExecutionRecordHandler.apply(jobExecutionRecord);
},
jobResult -> {
throw new IllegalStateException("Job not suspended");
},
jobRecord -> {
throw new IllegalStateException("Job not suspended");
},
jobExecutionRecordHandler
);
}
public CompletableFuture isJobUserCancelled(long jobId) {
// Logic of determining userCancelled should be in sync
// with getJobAndSqlSummary and getJobAndSqlSummaryList.
return callWithJob(jobId,
JobCoordinationService::determineIsJobUserCancelledFromMasterContext,
JobResult::isUserCancelled,
// If we do not have result, the job has not finished yet so cannot be cancelled.
jobRecord -> {
throw new IllegalStateException("Job not finished");
},
null
);
}
/**
* Returns the latest metrics for a job or fails with {@link JobNotFoundException}
* if the requested job is not found.
*/
public CompletableFuture> getJobMetrics(long jobId) {
CompletableFuture> cf = new CompletableFuture<>();
runWithJob(jobId,
mc -> mc.jobContext().collectMetrics(cf),
jobResult -> {
List metrics = jobRepository.getJobMetrics(jobId);
cf.complete(metrics != null ? metrics : emptyList());
},
jobRecord -> cf.complete(emptyList())
);
return cf;
}
/**
* Returns the job submission time or fails with {@link JobNotFoundException}
* if the requested job is not found.
*/
public CompletableFuture getJobSubmissionTime(long jobId, boolean isLightJob) {
if (isLightJob) {
Object mc = lightMasterContexts.get(jobId);
if (mc == null || mc == UNINITIALIZED_LIGHT_JOB_MARKER) {
throw new JobNotFoundException(jobId);
}
return completedFuture(((LightMasterContext) mc).getStartTime());
}
return callWithJob(jobId,
mc -> mc.jobRecord().getCreationTime(),
JobResult::getCreationTime,
JobRecord::getCreationTime,
null
);
}
public CompletableFuture resumeJob(long jobId) {
return runWithJob(jobId,
masterContext -> masterContext.jobContext().resumeJob(),
jobResult -> {
throw new IllegalStateException("Job already completed");
},
jobRecord -> {
throw new RetryableHazelcastException("Job " + idToString(jobId) + " not yet discovered");
}
);
}
/**
* Return a summary of all jobs
* @deprecated Since 5.3, to be removed in 6.0. Use {@link #getJobAndSqlSummaryList()} instead
*/
@Deprecated
public CompletableFuture> getJobSummaryList() {
return getJobAndSqlSummaryList().thenApply(jobAndSqlSummaries -> jobAndSqlSummaries.stream()
.map(this::toJobSummary)
.collect(toList()));
}
private JobSummary toJobSummary(JobAndSqlSummary jobAndSqlSummary) {
return new JobSummary(jobAndSqlSummary.isLightJob(), jobAndSqlSummary.getJobId(), jobAndSqlSummary.getExecutionId(),
jobAndSqlSummary.getNameOrId(), jobAndSqlSummary.getStatus(), jobAndSqlSummary.getSubmissionTime(),
jobAndSqlSummary.getCompletionTime(), jobAndSqlSummary.getFailureText());
}
/**
* Return a summary of all jobs with sql data
*/
public CompletableFuture> getJobAndSqlSummaryList() {
return submitToCoordinatorThread(() -> {
Map jobs = new HashMap<>();
if (isMaster()) {
// running jobs
jobRepository.getJobRecords().stream()
.map(this::getJobAndSqlSummary)
.forEach(s -> jobs.put(s.getJobId(), s));
// completed jobs
// (can overwrite entries created from JobRecords but that is fine and in fact desired
// because JobResult is always more recent than JobRecord for given job)
jobRepository.getJobResults().stream()
.map(r -> {
// Pre-review note : volatile read at supplier, should not read under lock path.
// Q: Any other better way to get executionRecord?
JobExecutionRecord executionRecord = jobRepository.getJobExecutionRecord(r.getJobId());
return new JobAndSqlSummary(
false, r.getJobId(), 0, r.getJobNameOrId(), r.getJobStatus(), r.getCreationTime(),
r.getCompletionTime(), r.getFailureText(), null,
executionRecord == null || executionRecord.getSuspensionCause() == null ? null :
executionRecord.getSuspensionCause().description(),
r.isUserCancelled());
})
.forEach(s -> jobs.put(s.getJobId(), s));
}
// light jobs
lightMasterContexts.values().stream()
.filter(lmc -> lmc != UNINITIALIZED_LIGHT_JOB_MARKER)
.map(LightMasterContext.class::cast)
.map(this::getJobAndSqlSummary)
.forEach(s -> jobs.put(s.getJobId(), s));
return jobs.values().stream().sorted(comparing(JobAndSqlSummary::getSubmissionTime).reversed()).collect(toList());
});
}
private JobAndSqlSummary getJobAndSqlSummary(LightMasterContext lmc) {
String query = lmc.getJobConfig().getArgument(JobConfigArguments.KEY_SQL_QUERY_TEXT);
Object unbounded = lmc.getJobConfig().getArgument(JobConfigArguments.KEY_SQL_UNBOUNDED);
SqlSummary sqlSummary = query != null && unbounded != null ?
new SqlSummary(query, Boolean.TRUE.equals(unbounded)) : null;
// For simplicity, we assume here that light job is running iff LightMasterContext exists:
// running jobs are not cancelled and others are not visible.
//
// It is possible that LightMasterContext still exists (for a short period of time)
// when the job is already terminated.
// LightMasterContext is removed from map in submitLightJob() _after_ setting result
// on the jobCompletionFuture in LightMasterContext.finalizeJob().
// jobCompletionFuture is also used in join operation so join operation sees
// finished job even though master context still exists.
// Also, future completion handlers (thenApply etc.) are not guaranteed to run in
// any particular order and can be executed in parallel.
//
// This is unlikely and we do not care however such scenario is possible:
// 1. user submits a light job
// 2. user gets the job by id and joins it (separate Job proxy instance is necessary
// because different future will be used than for submit)
// 3. job finishes (either normally or via error or cancellation)
// 4. join finishes - user get information that the job completed (from join, not submit)
// 5. user asks for jobs list and the job is reported as running
//
// In such scenario finished job will be reported as running.
//
// Note: suspensionCause is not supported for light jobs.
return new JobAndSqlSummary(
true, lmc.getJobId(), lmc.getJobId(), idToString(lmc.getJobId()),
RUNNING, lmc.getStartTime(), 0, null, sqlSummary, null,
false);
}
/**
* Applies the specified delta configuration if the job is suspended.
* Otherwise, an {@link IllegalStateException} is thrown by the returned future.
*/
public CompletableFuture updateJobConfig(long jobId, @Nonnull DeltaJobConfig deltaConfig) {
return callWithJob(jobId,
masterContext -> masterContext.updateJobConfig(deltaConfig),
jobResult -> {
throw new IllegalStateException("Job not suspended, but " + jobResult.getJobStatus());
},
jobRecord -> {
throw new IllegalStateException("Job not suspended");
},
null
);
}
/**
* Applies the specified listener registration if the job is not completed/failed.
* Otherwise, an {@link IllegalStateException} is thrown by the returned future.
*/
public CompletableFuture addJobStatusListener(long jobId, boolean isLightJob, Registration registration) {
if (isLightJob) {
Object mc = lightMasterContexts.get(jobId);
if (mc == null || mc == UNINITIALIZED_LIGHT_JOB_MARKER) {
throw new JobNotFoundException(jobId);
} else {
return completedFuture(((LightMasterContext) mc).addStatusListener(registration));
}
}
return callWithJob(jobId,
masterContext -> masterContext.addStatusListener(registration),
jobResult -> {
throw cannotAddStatusListener(jobResult.getJobStatus());
},
jobRecord -> {
JobEventService jobEventService = nodeEngine.getService(JobEventService.SERVICE_NAME);
return jobEventService.handleAllRegistrations(jobId, registration).getId();
},
null);
}
/**
* Add the given member to shutting down members. This will prevent
* submission of more executions until the member actually leaves the
* cluster. The returned future will complete when all executions of which
* the member is a participant terminate.
*
* The method is idempotent, the {@link NotifyMemberShutdownOperation}
* which calls it can be retried.
*/
@Nonnull
public CompletableFuture addShuttingDownMember(UUID uuid) {
CompletableFuture future = new CompletableFuture<>();
CompletableFuture oldFuture = membersShuttingDown.putIfAbsent(uuid, future);
if (oldFuture != null) {
return oldFuture;
}
if (removedMembers.containsKey(uuid)) {
logFine(logger, "NotifyMemberShutdownOperation received for a member that was already " +
"removed from the cluster: %s", uuid);
return completedFuture(null);
}
logFine(logger, "Added a shutting-down member: %s", uuid);
CompletableFuture[] futures = masterContexts.values().stream()
.map(mc -> mc.jobContext().onParticipantGracefulShutdown(uuid))
.toArray(CompletableFuture[]::new);
// Need to do this even if futures.length == 0, we need to perform the action in whenComplete
CompletableFuture.allOf(futures)
.whenComplete(withTryCatch(logger, (r, e) -> future.complete(null)));
return future;
}
// only for testing
public Map getMasterContexts() {
return new HashMap<>(masterContexts);
}
// only for testing
public Map getLightMasterContexts() {
return new HashMap<>(lightMasterContexts);
}
// only for testing
public MasterContext getMasterContext(long jobId) {
return masterContexts.get(jobId);
}
JetServiceBackend getJetServiceBackend() {
return jetServiceBackend;
}
boolean shouldStartJobs() {
if (!isMaster() || !nodeEngine.isRunning()) {
return false;
}
ClusterState clusterState = nodeEngine.getClusterService().getClusterState();
if (isClusterEnteringPassiveState || clusterState == PASSIVE || clusterState == IN_TRANSITION) {
logger.fine("Not starting jobs because cluster is in passive state or in transition.");
return false;
}
// if there are any members in a shutdown process, don't start jobs
if (!membersShuttingDown.isEmpty()) {
LoggingUtil.logFine(logger, "Not starting jobs because members are shutting down: %s",
membersShuttingDown.keySet());
return false;
}
Version clusterVersion = nodeEngine.getClusterService().getClusterVersion();
for (Member m : nodeEngine.getClusterService().getMembers()) {
if (!clusterVersion.equals(m.getVersion().asVersion())) {
logger.fine("Not starting non-light jobs because rolling upgrade is in progress");
return false;
}
}
PartitionServiceState state =
getInternalPartitionService().getPartitionReplicaStateChecker().getPartitionServiceState();
if (state != PartitionServiceState.SAFE) {
logger.fine("Not starting jobs because partition replication is not in safe state, but in " + state);
return false;
}
if (!getInternalPartitionService().getPartitionStateManager().isInitialized()) {
logger.fine("Not starting jobs because partitions are not yet initialized.");
return false;
}
if (nodeEngine.getNode().isClusterStateManagementAutomatic()
&& !nodeEngine.getNode().isManagedClusterStable()) {
LoggingUtil.logFine(logger, "Not starting jobs because cluster is running in managed context "
+ "and is not yet stable. Current cluster topology intentL %s, "
+ "expected cluster size: %d, current: %d.",
nodeEngine.getNode().getClusterTopologyIntent(),
nodeEngine.getNode().currentSpecifiedReplicaCount(), nodeEngine.getClusterService().getSize());
return false;
}
return true;
}
private CompletableFuture runWithJob(
long jobId,
@Nonnull Consumer masterContextHandler,
@Nonnull Consumer jobResultHandler,
@Nonnull Consumer jobRecordHandler
) {
return callWithJob(jobId,
toNullFunction(masterContextHandler),
toNullFunction(jobResultHandler),
toNullFunction(jobRecordHandler),
null
);
}
/**
* Returns a function that passes its argument to the given {@code
* consumer} and returns {@code null}.
*/
@Nonnull
private Function toNullFunction(@Nonnull Consumer consumer) {
return val -> {
consumer.accept(val);
return null;
};
}
private CompletableFuture callWithJob(
long jobId,
@Nonnull Function masterContextHandler,
@Nonnull Function jobResultHandler,
@Nonnull Function jobRecordHandler,
@Nullable Function jobExecutionRecordHandler
) {
assertIsMaster("Cannot do this task on non-master. jobId=" + idToString(jobId));
return submitToCoordinatorThread(() -> {
// when job is finalized, actions happen in this order:
// - JobResult and JobMetrics are created
// - JobRecord and JobExecutionRecord are deleted (asynchronously and in parallel)
// - masterContext is removed from the map
// We check them in reverse order so that no race is possible.
//
// We check the MasterContext before JobResult for optimization. In
// most cases there will either be MasterContext or JobResult.
// Neither of them is present only after master failed and the new
// master didn't yet scan jobs. We check the JobResult again at the
// end for correctness to avoid race with job completion.
//
// We check the JobResult before JobRecord and JobExecutionRecord
// because JobResult is more recent and contains more information.
// In some cases (slow deleteJob execution) there can exist
// JobResult, one or both JobRecord and JobExecutionRecord, and no
// MasterContext.
// check masterContext first
MasterContext mc = masterContexts.get(jobId);
if (mc != null) {
return masterContextHandler.apply(mc);
}
// early check of JobResult.
JobResult jobResult = jobRepository.getJobResult(jobId);
if (jobResult != null) {
return jobResultHandler.apply(jobResult);
}
// the job might not be yet discovered by job record scanning
JobExecutionRecord jobExRecord;
if (jobExecutionRecordHandler != null && (jobExRecord = jobRepository.getJobExecutionRecord(jobId)) != null) {
return jobExecutionRecordHandler.apply(jobExRecord);
}
JobRecord jobRecord;
if ((jobRecord = jobRepository.getJobRecord(jobId)) != null) {
return jobRecordHandler.apply(jobRecord);
}
// second check for JobResult, see comment at the top of the method
jobResult = jobRepository.getJobResult(jobId);
if (jobResult != null) {
return jobResultHandler.apply(jobResult);
}
// job doesn't exist
throw new JobNotFoundException(jobId);
});
}
void onMemberAdded(MemberImpl addedMember) {
// the member can re-join with the same UUID in certain scenarios
removedMembers.remove(addedMember.getUuid());
if (addedMember.isLiteMember()) {
return;
}
updateQuorumValues();
scheduleScaleUp(config.getScaleUpDelayMillis());
}
void onMemberRemoved(UUID uuid) {
if (membersShuttingDown.remove(uuid) != null) {
logFine(logger, "Removed a shutting-down member: %s, now shuttingDownMembers=%s",
uuid, membersShuttingDown.keySet());
} else {
removedMembers.put(uuid, System.nanoTime());
}
// clean up old entries from removedMembers (the value is time when the member was removed)
long removeThreshold = System.nanoTime() - HOURS.toNanos(1);
removedMembers.entrySet().removeIf(en -> en.getValue() < removeThreshold);
}
boolean isQuorumPresent(int quorumSize) {
return getDataMemberCount() >= quorumSize;
}
/**
* Completes the job which is coordinated with the given master context object.
*/
@CheckReturnValue
CompletableFuture completeJob(MasterContext masterContext, Throwable error, long completionTime,
boolean userCancelled) {
return submitToCoordinatorThread(() -> {
// the order of operations is important.
List jobMetrics =
masterContext.jobConfig().isStoreMetricsAfterJobCompletion()
? masterContext.jobContext().jobMetrics()
: null;
jobRepository.completeJob(masterContext, jobMetrics, error, completionTime, userCancelled);
if (removeMasterContext(masterContext)) {
completeObservables(masterContext.jobRecord().getOwnedObservables(), error);
logger.fine(masterContext.jobIdString() + " is completed");
(error == null ? jobCompletedSuccessfully : jobCompletedWithFailure).inc();
} else {
MasterContext existing = masterContexts.get(masterContext.jobId());
if (existing != null) {
logger.severe("Different master context found to complete " + masterContext.jobIdString()
+ ", master context execution " + idToString(existing.executionId()));
} else {
logger.severe("No master context found to complete " + masterContext.jobIdString());
}
}
unscheduleJobTimeout(masterContext.jobId());
});
}
private boolean removeMasterContext(MasterContext masterContext) {
synchronized (lock) {
return masterContexts.remove(masterContext.jobId(), masterContext);
}
}
/**
* Schedules a restart task that will be run in future for the given job
*/
void scheduleRestart(long jobId) {
MasterContext masterContext = masterContexts.get(jobId);
if (masterContext == null) {
logger.severe("Master context for job " + idToString(jobId) + " not found to schedule restart");
return;
}
logger.fine("Scheduling restart on master for job " + masterContext.jobName());
nodeEngine.getExecutionService().schedule(COORDINATOR_EXECUTOR_NAME, () -> restartJob(jobId),
RETRY_DELAY_IN_MILLIS, MILLISECONDS);
}
void scheduleSnapshot(MasterContext mc, long executionId) {
long snapshotInterval = mc.jobConfig().getSnapshotIntervalMillis();
ExecutionService executionService = nodeEngine.getExecutionService();
if (logger.isFineEnabled()) {
logger.fine(mc.jobIdString() + " snapshot is scheduled in " + snapshotInterval + "ms");
}
executionService.schedule(COORDINATOR_EXECUTOR_NAME,
() -> mc.snapshotContext().startScheduledSnapshot(executionId),
snapshotInterval, MILLISECONDS);
}
/**
* Restarts a job for a new execution if the cluster is stable.
* Otherwise, it reschedules the restart task.
*/
void restartJob(long jobId) {
MasterContext masterContext = masterContexts.get(jobId);
if (masterContext == null) {
logger.severe("Master context for job " + idToString(jobId) + " not found to restart");
return;
}
masterContext.jobContext().tryStartJob();
}
private void checkOperationalState() {
if (isClusterEnteringPassiveState) {
throw new EnteringPassiveClusterStateException();
}
}
private void scheduleScaleUp(long delay) {
int counter = scaleUpScheduledCount.incrementAndGet();
nodeEngine.getExecutionService().schedule(() -> scaleJobsUpNow(counter), delay, MILLISECONDS);
}
private void scaleJobsUpNow(int counter) {
// if another scale-up was scheduled after this one, ignore this one
if (scaleUpScheduledCount.get() != counter) {
return;
}
// if we can't start jobs yet, we also won't tear them down
if (!shouldStartJobs()) {
scheduleScaleUp(RETRY_DELAY_IN_MILLIS);
return;
}
submitToCoordinatorThread(() -> {
boolean allSucceeded = true;
int dataMembersCount = nodeEngine.getClusterService().getMembers(DATA_MEMBER_SELECTOR).size();
int partitionCount = nodeEngine.getPartitionService().getPartitionCount();
// If the number of partitions is lower than the data member count, some members won't have
// any partitions assigned. Jet doesn't use such members.
int dataMembersWithPartitionsCount = Math.min(dataMembersCount, partitionCount);
for (MasterContext mc : masterContexts.values()) {
allSucceeded &= mc.jobContext().maybeScaleUp(dataMembersWithPartitionsCount);
}
if (!allSucceeded) {
scheduleScaleUp(RETRY_DELAY_IN_MILLIS);
}
});
}
/**
* Scans all job records and updates quorum size of a split-brain protection enabled
* job with current cluster quorum size if the current cluster quorum size is larger
*/
private void updateQuorumValues() {
if (!shouldCheckQuorumValues()) {
return;
}
submitToCoordinatorThread(() -> {
try {
int currentQuorumSize = getQuorumSize();
for (JobRecord jobRecord : jobRepository.getJobRecords()) {
try {
if (!jobRecord.getConfig().isSplitBrainProtectionEnabled()) {
continue;
}
MasterContext masterContext = masterContexts.get(jobRecord.getJobId());
// if MasterContext doesn't exist, update in the IMap directly, using a sync method
if (masterContext == null) {
jobRepository.updateJobQuorumSizeIfSmaller(jobRecord.getJobId(), currentQuorumSize);
// check the master context again, it might have been just created and have picked
// up the JobRecord before being updated
masterContext = masterContexts.get(jobRecord.getJobId());
}
if (masterContext != null) {
masterContext.updateQuorumSize(currentQuorumSize);
}
} catch (Exception e) {
logger.severe("Quorum of job " + idToString(jobRecord.getJobId())
+ " could not be updated to " + currentQuorumSize, e);
}
}
} catch (Exception e) {
logger.severe("update quorum values task failed", e);
}
});
}
private boolean shouldCheckQuorumValues() {
return isMaster() && nodeEngine.isRunning()
&& getInternalPartitionService().getPartitionStateManager().isInitialized();
}
private Object deserializeJobDefinition(long jobId, JobConfig jobConfig, Data jobDefinitionData) {
JobClassLoaderService jobClassLoaderService = jetServiceBackend.getJobClassLoaderService();
ClassLoader classLoader = jobClassLoaderService.getOrCreateClassLoader(jobConfig, jobId, COORDINATOR);
try {
jobClassLoaderService.prepareProcessorClassLoaders(jobId);
return deserializeWithCustomClassLoader(nodeEngine().getSerializationService(), classLoader, jobDefinitionData);
} finally {
jobClassLoaderService.clearProcessorClassLoaders();
}
}
private String dagToJson(DAG dag) {
int coopThreadCount = config.getCooperativeThreadCount();
return dag.toJson(coopThreadCount).toString();
}
private CompletableFuture startJobIfNotStartedOrCompleted(
@Nonnull JobRecord jobRecord,
@Nonnull JobExecutionRecord jobExecutionRecord, String reason
) {
// the order of operations is important.
long jobId = jobRecord.getJobId();
MasterContext masterContext;
MasterContext oldMasterContext;
synchronized (lock) {
// We check the JobResult while holding the lock to avoid this scenario:
// 1. We find no job result
// 2. Another thread creates the result and removes the master context in completeJob
// 3. We re-create the master context below
JobResult jobResult = jobRepository.getJobResult(jobId);
if (jobResult != null) {
logger.fine("Not starting job " + idToString(jobId) + ", already has result: " + jobResult);
return jobResult.asCompletableFuture();
}
checkOperationalState();
masterContext = createMasterContext(jobRecord, jobExecutionRecord);
oldMasterContext = masterContexts.putIfAbsent(jobId, masterContext);
}
if (oldMasterContext != null) {
return oldMasterContext.jobContext().jobCompletionFuture();
}
assert jobRepository.getJobResult(jobId) == null : "jobResult should not exist at this point";
if (finalizeJobIfAutoScalingOff(masterContext)) {
return masterContext.jobContext().jobCompletionFuture();
}
if (jobExecutionRecord.isSuspended()) {
logFinest(logger, "MasterContext for suspended %s is created", masterContext.jobIdString());
} else {
logger.info("Starting job " + idToString(jobId) + ": " + reason);
masterContext.jobContext().tryStartJob();
}
return masterContext.jobContext().jobCompletionFuture();
}
// If a job result is present, it completes the master context using the job result
private boolean completeMasterContextIfJobAlreadyCompleted(MasterContext masterContext) {
long jobId = masterContext.jobId();
JobResult jobResult = jobRepository.getJobResult(jobId);
if (jobResult != null) {
logger.fine("Completing master context for " + masterContext.jobIdString()
+ " since already completed with result: " + jobResult);
masterContext.jobContext().setFinalResult(jobResult.getFailureAsThrowable());
return removeMasterContext(masterContext);
}
return finalizeJobIfAutoScalingOff(masterContext);
}
private boolean finalizeJobIfAutoScalingOff(MasterContext masterContext) {
if (!masterContext.jobConfig().isAutoScaling() && masterContext.jobExecutionRecord().executed()) {
logger.info("Suspending or failing " + masterContext.jobIdString()
+ " since auto-restart is disabled and the job has been executed before");
masterContext.jobContext().finalizeExecution(new TopologyChangedException());
return true;
}
return false;
}
int getQuorumSize() {
return (getDataMemberCount() / 2) + 1;
}
private int getDataMemberCount() {
ClusterService clusterService = nodeEngine.getClusterService();
return clusterService.getMembers(DATA_MEMBER_SELECTOR).size();
}
private JobAndSqlSummary getJobAndSqlSummary(JobRecord record) {
MasterContext ctx = masterContexts.get(record.getJobId());
long execId = ctx == null ? 0 : ctx.executionId();
JobExecutionRecord executionRecord = jobRepository.getJobExecutionRecord(record.getJobId());
String suspensionCause = executionRecord != null && executionRecord.getSuspensionCause() != null
? executionRecord.getSuspensionCause().description() : null;
JobStatus status;
boolean userCancelled;
if (ctx == null) {
// If we have a JobRecord but not the MasterContext, it may mean that:
// 1) job has not yet created MasterContext => NOT_RUNNING
// 2) job is suspended => SUSPENDED
// 3) job has already ended but JobRecord has not yet been deleted =>
// do not care, result will be overwritten by the one obtained from JobResult
// which is guaranteed to exist in this case
status = executionRecord != null && executionRecord.isSuspended()
? JobStatus.SUSPENDED : JobStatus.NOT_RUNNING;
userCancelled = false;
} else {
// order of reads is important, see comment in determineJobStatusFromMasterContext
// for consistent result we must use single instance of TerminationRequest for all checks
Optional maybeTerminationRequest = ctx.jobContext().getTerminationRequest();
JobStatus jobStatus = ctx.jobStatus();
status = jobStatus == RUNNING && maybeTerminationRequest.isPresent()
? COMPLETING
: jobStatus;
// job is running, so not cancelled
// or has just ended but MasterContext still exists
userCancelled = status == FAILED &&
maybeTerminationRequest.map(TerminationRequest::isUserInitiated).orElse(false);
}
return new JobAndSqlSummary(false, record.getJobId(), execId, record.getJobNameOrId(), status,
record.getCreationTime(), 0, null, null, suspensionCause,
userCancelled);
}
private InternalPartitionServiceImpl getInternalPartitionService() {
Node node = nodeEngine.getNode();
return (InternalPartitionServiceImpl) node.getPartitionService();
}
// runs periodically to restart jobs on coordinator failure and perform GC
private void scanJobs() {
long scanStart = System.currentTimeMillis();
long nextScanDelay = maxJobScanPeriodInMillis;
try {
// explicit check for master because we don't want to use shorter delay on non-master nodes
// it will be checked again in shouldStartJobs()
if (isMaster()) {
if (shouldStartJobs()) {
doScanJobs();
} else {
// use a smaller delay when cluster is not in ready state
nextScanDelay = MIN_JOB_SCAN_PERIOD_MILLIS;
}
}
} catch (HazelcastInstanceNotActiveException ignored) {
// ignore this exception
} catch (Throwable e) {
logger.severe("Scanning jobs failed", e);
}
// Adjust the delay by the time taken by the scan to avoid accumulating more and more job results with each scan
long scanTime = System.currentTimeMillis() - scanStart;
nextScanDelay = Math.max(0, nextScanDelay - scanTime);
ExecutionService executionService = nodeEngine.getExecutionService();
executionService.schedule(this::scanJobs, nextScanDelay, MILLISECONDS);
}
private void doScanJobs() {
Collection jobs = jobRepository.getJobRecords();
for (JobRecord jobRecord : jobs) {
JobExecutionRecord jobExecutionRecord = ensureExecutionRecord(jobRecord.getJobId(),
jobRepository.getJobExecutionRecord(jobRecord.getJobId()));
startJobIfNotStartedOrCompleted(jobRecord, jobExecutionRecord, "discovered by scanning of JobRecords");
}
jobRepository.cleanup(nodeEngine);
if (!jobsScanned) {
synchronized (lock) {
// Note that setting jobsScanned is required for Jet to accept submitted jobs.
// When a new cluster is started, job records IMap does not exist until first job is submitted.
// This causes slight possibility of accepting duplicated job in case of HotRestart,
// but that is acceptable risk.
// See comment in JobRepository.cleanup().
jobsScanned = true;
}
}
}
private JobExecutionRecord ensureExecutionRecord(long jobId, JobExecutionRecord record) {
return record != null ? record : new JobExecutionRecord(jobId, getQuorumSize());
}
@SuppressWarnings("WeakerAccess")
// used by jet-enterprise
void assertIsMaster(String error) {
if (!isMaster()) {
throw new JetException(error + ". Master address: " + nodeEngine.getClusterService().getMasterAddress());
}
}
private boolean isMaster() {
return nodeEngine.getClusterService().isMaster();
}
@SuppressWarnings("unused") // used in jet-enterprise
NodeEngineImpl nodeEngine() {
return nodeEngine;
}
ManagedExecutorService coordinationExecutor() {
return nodeEngine.getExecutionService().getExecutor(COORDINATOR_EXECUTOR_NAME);
}
CompletableFuture submitToCoordinatorThread(Runnable action) {
return submitToCoordinatorThread(() -> {
action.run();
return null;
});
}
CompletableFuture submitToCoordinatorThread(Callable action) {
// if we are on our thread already, execute directly in a blocking way
if (IS_JOB_COORDINATOR_THREAD.get()) {
try {
return completedFuture(action.call());
} catch (Throwable e) {
// most callers ignore the failure on the returned future, let's log it at least
logger.warning(null, e);
return com.hazelcast.jet.impl.util.Util.exceptionallyCompletedFuture(e);
}
}
Future future = nodeEngine.getExecutionService().submit(COORDINATOR_EXECUTOR_NAME, () -> {
assert !IS_JOB_COORDINATOR_THREAD.get() : "flag already raised";
IS_JOB_COORDINATOR_THREAD.set(true);
try {
return action.call();
} catch (Throwable e) {
// most callers ignore the failure on the returned future, let's log it at least
logger.warning(null, e);
throw e;
} finally {
IS_JOB_COORDINATOR_THREAD.set(false);
}
});
return nodeEngine.getExecutionService().asCompletableFuture(future);
}
void assertOnCoordinatorThread() {
assert IS_JOB_COORDINATOR_THREAD.get() : "not on coordinator thread";
}
private void completeObservables(Set observables, Throwable error) {
for (String observable : observables) {
try {
String ringbufferName = ObservableImpl.ringbufferName(observable);
Ringbuffer