com.hazelcast.jet.impl.JobCoordinationService Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hazelcast Show documentation
Core Hazelcast Module
There is a newer version: 5.5.0
/*
 * Copyright (c) 2008-2023, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.impl;

import com.hazelcast.cluster.ClusterState;
import com.hazelcast.cluster.Member;
import com.hazelcast.cluster.impl.MemberImpl;
import com.hazelcast.core.HazelcastInstanceNotActiveException;
import com.hazelcast.function.FunctionEx;
import com.hazelcast.instance.impl.Node;
import com.hazelcast.internal.cluster.ClusterService;
import com.hazelcast.internal.metrics.MetricDescriptor;
import com.hazelcast.internal.metrics.MetricsRegistry;
import com.hazelcast.internal.metrics.Probe;
import com.hazelcast.internal.partition.impl.InternalPartitionServiceImpl;
import com.hazelcast.internal.partition.impl.PartitionServiceState;
import com.hazelcast.internal.serialization.Data;
import com.hazelcast.internal.util.counters.Counter;
import com.hazelcast.internal.util.counters.MwCounter;
import com.hazelcast.internal.util.executor.ManagedExecutorService;
import com.hazelcast.jet.JetException;
import com.hazelcast.jet.JobAlreadyExistsException;
import com.hazelcast.jet.config.DeltaJobConfig;
import com.hazelcast.jet.config.JetConfig;
import com.hazelcast.jet.config.JobConfig;
import com.hazelcast.jet.config.JobConfigArguments;
import com.hazelcast.jet.core.DAG;
import com.hazelcast.jet.core.JobNotFoundException;
import com.hazelcast.jet.core.JobStatus;
import com.hazelcast.jet.core.JobSuspensionCause;
import com.hazelcast.jet.core.TopologyChangedException;
import com.hazelcast.jet.core.Vertex;
import com.hazelcast.jet.core.metrics.MetricNames;
import com.hazelcast.jet.core.metrics.MetricTags;
import com.hazelcast.jet.datamodel.Tuple2;
import com.hazelcast.jet.impl.MasterJobContext.TerminationRequest;
import com.hazelcast.jet.impl.exception.EnteringPassiveClusterStateException;
import com.hazelcast.jet.impl.execution.DoneItem;
import com.hazelcast.jet.impl.metrics.RawJobMetrics;
import com.hazelcast.jet.impl.observer.ObservableImpl;
import com.hazelcast.jet.impl.observer.WrappedThrowable;
import com.hazelcast.jet.impl.operation.GetJobIdsOperation.GetJobIdsResult;
import com.hazelcast.jet.impl.operation.NotifyMemberShutdownOperation;
import com.hazelcast.jet.impl.pipeline.PipelineImpl;
import com.hazelcast.jet.impl.pipeline.PipelineImpl.Context;
import com.hazelcast.jet.impl.util.ExceptionUtil;
import com.hazelcast.jet.impl.util.LoggingUtil;
import com.hazelcast.jet.impl.util.Util;
import com.hazelcast.logging.ILogger;
import com.hazelcast.ringbuffer.OverflowPolicy;
import com.hazelcast.ringbuffer.Ringbuffer;
import com.hazelcast.security.SecurityContext;
import com.hazelcast.spi.exception.RetryableHazelcastException;
import com.hazelcast.spi.impl.NodeEngineImpl;
import com.hazelcast.spi.impl.eventservice.impl.Registration;
import com.hazelcast.spi.impl.executionservice.ExecutionService;
import com.hazelcast.spi.properties.HazelcastProperties;
import com.hazelcast.version.Version;

import javax.annotation.CheckReturnValue;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.security.auth.Subject;
import java.security.Permission;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.Spliterators;
import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.CancellationException;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

import static com.hazelcast.cluster.ClusterState.IN_TRANSITION;
import static com.hazelcast.cluster.ClusterState.PASSIVE;
import static com.hazelcast.cluster.memberselector.MemberSelectors.DATA_MEMBER_SELECTOR;
import static com.hazelcast.internal.util.executor.ExecutorType.CACHED;
import static com.hazelcast.jet.Util.idToString;
import static com.hazelcast.jet.core.JobStatus.COMPLETING;
import static com.hazelcast.jet.core.JobStatus.FAILED;
import static com.hazelcast.jet.core.JobStatus.NOT_RUNNING;
import static com.hazelcast.jet.core.JobStatus.RUNNING;
import static com.hazelcast.jet.core.JobStatus.SUSPENDED;
import static com.hazelcast.jet.datamodel.Tuple2.tuple2;
import static com.hazelcast.jet.impl.AbstractJobProxy.cannotAddStatusListener;
import static com.hazelcast.jet.impl.JobClassLoaderService.JobPhase.COORDINATOR;
import static com.hazelcast.jet.impl.TerminationMode.CANCEL_FORCEFUL;
import static com.hazelcast.jet.impl.execution.init.CustomClassLoadedObject.deserializeWithCustomClassLoader;
import static com.hazelcast.jet.impl.operation.GetJobIdsOperation.ALL_JOBS;
import static com.hazelcast.jet.impl.util.ExceptionUtil.sneakyThrow;
import static com.hazelcast.jet.impl.util.ExceptionUtil.withTryCatch;
import static com.hazelcast.jet.impl.util.LoggingUtil.logFine;
import static com.hazelcast.jet.impl.util.LoggingUtil.logFinest;
import static com.hazelcast.spi.properties.ClusterProperty.JOB_SCAN_PERIOD;
import static java.util.Collections.emptyList;
import static java.util.Comparator.comparing;
import static java.util.concurrent.CompletableFuture.completedFuture;
import static java.util.concurrent.TimeUnit.HOURS;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.concurrent.TimeUnit.SECONDS;
import static java.util.function.Function.identity;
import static java.util.stream.Collectors.toList;

/**
 * A service that handles MasterContexts on the coordinator member.
 * Job-control operations from client are handled here.
 */
public class JobCoordinationService {

    private static final String COORDINATOR_EXECUTOR_NAME = "jet:coordinator";

    /**
     * The delay before retrying to start/scale up a job.
     */
    private static final long RETRY_DELAY_IN_MILLIS = SECONDS.toMillis(2);
    private static final ThreadLocal IS_JOB_COORDINATOR_THREAD = ThreadLocal.withInitial(() -> false);
    private static final int COORDINATOR_THREADS_POOL_SIZE = 4;

    private static final int MIN_JOB_SCAN_PERIOD_MILLIS = 100;

    /**
     * Inserted temporarily to {@link #lightMasterContexts} to safely check for double job submission.
     * When reading, it's treated as if the job doesn't exist.
     */
    private static final Object UNINITIALIZED_LIGHT_JOB_MARKER = new Object();

    private final NodeEngineImpl nodeEngine;
    private final JetServiceBackend jetServiceBackend;
    private final JetConfig config;
    private final Context pipelineToDagContext;
    private final ILogger logger;
    private final JobRepository jobRepository;
    private final ConcurrentMap masterContexts = new ConcurrentHashMap<>();
    private final ConcurrentMap lightMasterContexts = new ConcurrentHashMap<>();
    private final ConcurrentMap> membersShuttingDown = new ConcurrentHashMap<>();
    private final ConcurrentMap> scheduledJobTimeouts = new ConcurrentHashMap<>();
    /**
     * Map of {memberUuid; removeTime}.
     *
     * A collection of UUIDs of members which left the cluster and for which we
     * didn't receive {@link NotifyMemberShutdownOperation}.
     */
    private final Map removedMembers = new ConcurrentHashMap<>();
    private final Object lock = new Object();
    private volatile boolean isClusterEnteringPassiveState;
    private volatile boolean jobsScanned;

    private final AtomicInteger scaleUpScheduledCount = new AtomicInteger();

    @Probe(name = MetricNames.JOBS_SUBMITTED)
    private final Counter jobSubmitted = MwCounter.newMwCounter();
    @Probe(name = MetricNames.JOBS_COMPLETED_SUCCESSFULLY)
    private final Counter jobCompletedSuccessfully = MwCounter.newMwCounter();
    @Probe(name = MetricNames.JOBS_COMPLETED_WITH_FAILURE)
    private final Counter jobCompletedWithFailure = MwCounter.newMwCounter();

    private long maxJobScanPeriodInMillis;

    JobCoordinationService(
            NodeEngineImpl nodeEngine, JetServiceBackend jetServiceBackend, JetConfig config, JobRepository jobRepository
    ) {
        this.nodeEngine = nodeEngine;
        this.jetServiceBackend = jetServiceBackend;
        this.config = config;
        this.pipelineToDagContext = () -> this.config.getCooperativeThreadCount();
        this.logger = nodeEngine.getLogger(getClass());
        this.jobRepository = jobRepository;

        ExecutionService executionService = nodeEngine.getExecutionService();
        executionService.register(COORDINATOR_EXECUTOR_NAME, COORDINATOR_THREADS_POOL_SIZE, Integer.MAX_VALUE, CACHED);

        // register metrics
        MetricsRegistry registry = nodeEngine.getMetricsRegistry();
        MetricDescriptor descriptor = registry.newMetricDescriptor()
                .withTag(MetricTags.MODULE, "jet");
        registry.registerStaticMetrics(descriptor, this);
    }

    public JobRepository jobRepository() {
        return jobRepository;
    }

    public void startScanningForJobs() {
        ExecutionService executionService = nodeEngine.getExecutionService();
        HazelcastProperties properties = nodeEngine.getProperties();
        maxJobScanPeriodInMillis = properties.getMillis(JOB_SCAN_PERIOD);
        try {
            executionService.schedule(COORDINATOR_EXECUTOR_NAME, this::scanJobs, 0, MILLISECONDS);
            logger.info("Jet started scanning for jobs");
        } catch (RejectedExecutionException ex) {
            logger.info("Scan jobs task is rejected on the execution service since the executor service" +
                    " has shutdown", ex);
        }
    }

    public CompletableFuture submitJob(
            long jobId,
            Data serializedJobDefinition,
            JobConfig jobConfig,
            Subject subject
    ) {
        CompletableFuture res = new CompletableFuture<>();
        submitToCoordinatorThread(() -> {
            MasterContext masterContext;
            try {
                assertIsMaster("Cannot submit job " + idToString(jobId) + " to non-master node");
                checkOperationalState();

                // the order of operations is important.

                // first, check if the job is already completed
                JobResult jobResult = jobRepository.getJobResult(jobId);
                if (jobResult != null) {
                    logger.fine("Not starting job " + idToString(jobId) + " since already completed with result: "
                            + jobResult);
                    return;
                }
                if (!config.isResourceUploadEnabled() && !jobConfig.getResourceConfigs().isEmpty()) {
                    throw new JetException(Util.JET_RESOURCE_UPLOAD_DISABLED_MESSAGE);
                }

                int quorumSize = jobConfig.isSplitBrainProtectionEnabled() ? getQuorumSize() : 0;
                Object jobDefinition = deserializeJobDefinition(jobId, jobConfig, serializedJobDefinition);
                DAG dag;
                Data serializedDag;
                if (jobDefinition instanceof PipelineImpl) {
                    dag = ((PipelineImpl) jobDefinition).toDag(pipelineToDagContext);
                    serializedDag = nodeEngine().getSerializationService().toData(dag);
                } else {
                    dag = (DAG) jobDefinition;
                    serializedDag = serializedJobDefinition;
                }

                checkPermissions(subject, dag);

                Set ownedObservables = ownedObservables(dag);
                JobRecord jobRecord = new JobRecord(nodeEngine.getClusterService().getClusterVersion(), jobId, serializedDag,
                        dagToJson(dag), jobConfig, ownedObservables, subject);
                JobExecutionRecord jobExecutionRecord = new JobExecutionRecord(jobId, quorumSize);
                masterContext = createMasterContext(jobRecord, jobExecutionRecord);

                boolean hasDuplicateJobName;
                synchronized (lock) {
                    assertIsMaster("Cannot submit job " + idToString(jobId) + " to non-master node");
                    checkOperationalState();
                    hasDuplicateJobName = jobConfig.getName() != null && hasActiveJobWithName(jobConfig.getName());
                    if (!hasDuplicateJobName) {
                        // just try to initiate the coordination
                        MasterContext prev = masterContexts.putIfAbsent(jobId, masterContext);
                        if (prev != null) {
                            logger.fine("Joining to already existing masterContext " + prev.jobIdString());
                            return;
                        }
                    }
                }

                if (hasDuplicateJobName) {
                    jobRepository.deleteJob(jobId);
                    throw new JobAlreadyExistsException("Another active job with equal name (" + jobConfig.getName()
                            + ") exists: " + idToString(jobId));
                }

                // If job is not currently running, it might be that it is just completed
                if (completeMasterContextIfJobAlreadyCompleted(masterContext)) {
                    return;
                }

                // If there is no master context and job result at the same time, it means this is the first submission
                jobSubmitted.inc();
                jobRepository.putNewJobRecord(jobRecord);
                logger.info("Starting job " + idToString(masterContext.jobId()) + " based on submit request");
            } catch (Throwable e) {
                jetServiceBackend.getJobClassLoaderService()
                                 .tryRemoveClassloadersForJob(jobId, COORDINATOR);

                res.completeExceptionally(e);
                throw e;
            } finally {
                res.complete(null);
            }
            masterContext.jobContext().tryStartJob();
        });
        return res;
    }

    public CompletableFuture submitLightJob(
            long jobId,
            Object deserializedJobDefinition,
            Data serializedJobDefinition,
            JobConfig jobConfig,
            Subject subject
    ) {
        if (deserializedJobDefinition == null) {
            deserializedJobDefinition = nodeEngine().getSerializationService().toObject(serializedJobDefinition);
        }

        DAG dag;
        if (deserializedJobDefinition instanceof DAG) {
            dag = (DAG) deserializedJobDefinition;
        } else {
            dag = ((PipelineImpl) deserializedJobDefinition).toDag(pipelineToDagContext);
        }

        // First insert just a marker into the map. This is to prevent initializing the light job if the jobId
        // was submitted twice. This can happen e.g. if the client retries.
        Object oldContext = lightMasterContexts.putIfAbsent(jobId, UNINITIALIZED_LIGHT_JOB_MARKER);
        if (oldContext != null) {
            throw new JetException("duplicate jobId " + idToString(jobId));
        }

        checkPermissions(subject, dag);

        // Initialize and start the job. We do this before adding the actual
        // LightMasterContext to the map to avoid possible races of the job initialization and cancellation.
        return LightMasterContext.createContext(nodeEngine, this, dag, jobId, jobConfig, subject)
                .thenComposeAsync(mc -> {
                    Object oldCtx = lightMasterContexts.put(jobId, mc);
                    assert oldCtx == UNINITIALIZED_LIGHT_JOB_MARKER;
                    scheduleJobTimeout(jobId, jobConfig.getTimeoutMillis());

                    return mc.getCompletionFuture()
                      .whenComplete((r, t) -> {
                          Object removed = lightMasterContexts.remove(jobId);
                          assert removed instanceof LightMasterContext : "LMC not found: " + removed;
                          unscheduleJobTimeout(jobId);
                      });
                }, coordinationExecutor());
    }

    public long getJobSubmittedCount() {
        return jobSubmitted.get();
    }

    public JobConfig getLightJobConfig(long jobId) {
        Object mc = lightMasterContexts.get(jobId);
        if (mc == null || mc == UNINITIALIZED_LIGHT_JOB_MARKER) {
            throw new JobNotFoundException(jobId);
        }
        return ((LightMasterContext) mc).getJobConfig();
    }

    private void checkPermissions(Subject subject, DAG dag) {
        SecurityContext securityContext = nodeEngine.getNode().securityContext;
        if (securityContext == null || subject == null) {
            return;
        }
        for (Vertex vertex : dag) {
            Permission requiredPermission = vertex.getMetaSupplier().getRequiredPermission();
            if (requiredPermission != null) {
                securityContext.checkPermission(subject, requiredPermission);
            }
        }
    }

    private static Set ownedObservables(DAG dag) {
        return StreamSupport.stream(Spliterators.spliteratorUnknownSize(dag.iterator(), 0), false)
                .map(vertex -> vertex.getMetaSupplier().getTags().get(ObservableImpl.OWNED_OBSERVABLE))
                .filter(Objects::nonNull)
                .collect(Collectors.toSet());
    }

    @SuppressWarnings("WeakerAccess") // used by jet-enterprise
    MasterContext createMasterContext(JobRecord jobRecord, JobExecutionRecord jobExecutionRecord) {
        return new MasterContext(nodeEngine, this, jobRecord, jobExecutionRecord);
    }

    private boolean hasActiveJobWithName(@Nonnull String jobName) {
        // if scanJob() has not run yet, master context objects may not be initialized.
        // in this case, we cannot check if the new job submission has a duplicate job name.
        // therefore, we will retry until scanJob() task runs at least once.
        if (!jobsScanned) {
            throw new RetryableHazelcastException("Cannot submit job with name '" + jobName
                    + "' before the master node initializes job coordination service's state");
        }

        return masterContexts.values()
                             .stream()
                             .anyMatch(ctx -> jobName.equals(ctx.jobConfig().getName()));
    }

    public CompletableFuture prepareForPassiveClusterState() {
        assertIsMaster("Cannot prepare for passive cluster state on a non-master node");
        synchronized (lock) {
            isClusterEnteringPassiveState = true;
        }
        return submitToCoordinatorThread(() -> {
            CompletableFuture[] futures = masterContexts
                    .values().stream()
                    .map(mc -> mc.jobContext().gracefullyTerminate())
                    .toArray(CompletableFuture[]::new);
            return CompletableFuture.allOf(futures);
        }).thenCompose(identity());
    }

    public void clusterChangeDone() {
        synchronized (lock) {
            isClusterEnteringPassiveState = false;
        }
    }

    public void reset() {
        assert !isMaster() : "this member is a master";
        List contexts;
        synchronized (lock) {
            contexts = new ArrayList<>(masterContexts.values());
            masterContexts.clear();
            jobsScanned = false;
        }

        contexts.forEach(ctx -> ctx.jobContext().setFinalResult(new CancellationException()));
    }

    public CompletableFuture joinSubmittedJob(long jobId) {
        checkOperationalState();
        CompletableFuture> future = callWithJob(jobId,
                mc -> mc.jobContext().jobCompletionFuture()
                        .handle((r, t) -> {
                            if (t == null) {
                                return null;
                            }
                            if (t instanceof CancellationException || t instanceof JetException) {
                                throw sneakyThrow(t);
                            }
                            throw new JetException(ExceptionUtil.stackTraceToString(t));
                        }),
                JobResult::asCompletableFuture,
                jobRecord -> {
                    JobExecutionRecord jobExecutionRecord = ensureExecutionRecord(jobId,
                            jobRepository.getJobExecutionRecord(jobId));
                    return startJobIfNotStartedOrCompleted(jobRecord, jobExecutionRecord, "join request from client");
                },
                null
        );

        return future
                .thenCompose(identity()); // unwrap the inner future
    }

    public CompletableFuture joinLightJob(long jobId) {
        Object mc = lightMasterContexts.get(jobId);
        if (mc == null || mc == UNINITIALIZED_LIGHT_JOB_MARKER) {
            throw new JobNotFoundException(jobId);
        }
        return ((LightMasterContext) mc).getCompletionFuture();
    }

    public CompletableFuture terminateJob(long jobId, TerminationMode terminationMode, boolean userInitiated) {
        return runWithJob(jobId,
                masterContext -> {
                    // User can cancel in any state, other terminations are allowed only when running.
                    // This is not technically required (we can request termination in any state),
                    // but this method is only called by the user. It would be weird for the client to
                    // request a restart if the job didn't start yet etc.
                    // Also, it would be weird to restart the job during STARTING: as soon as it will start,
                    // it will restart.
                    // In any case, it doesn't make sense to restart a suspended job.
                    JobStatus jobStatus = masterContext.jobStatus();
                    if (jobStatus != RUNNING && terminationMode != CANCEL_FORCEFUL) {
                        throw new IllegalStateException("Cannot " + terminationMode + ", job status is " + jobStatus
                                + ", should be " + RUNNING);
                    }

                    String terminationResult = masterContext.jobContext()
                            .requestTermination(terminationMode, false, userInitiated)
                            .f1();
                    if (terminationResult != null) {
                        throw new IllegalStateException("Cannot " + terminationMode + ": " + terminationResult);
                    }
                },
                jobResult -> {
                    if (terminationMode != CANCEL_FORCEFUL) {
                        throw new IllegalStateException("Cannot " + terminationMode + " job " + idToString(jobId)
                                + " because it already has a result: " + jobResult);
                    }
                    logger.fine("Ignoring cancellation of a completed job " + idToString(jobId));
                },
                jobRecord -> {
                    // we'll eventually learn of the job through scanning of records or from a join operation
                    throw new RetryableHazelcastException("No MasterContext found for job " + idToString(jobId) + " for "
                            + terminationMode);
                }
        );
    }

    public void terminateLightJob(long jobId, boolean userInitiated) {
        Object mc = lightMasterContexts.get(jobId);
        if (mc == null || mc == UNINITIALIZED_LIGHT_JOB_MARKER) {
            throw new JobNotFoundException(jobId);
        }
        ((LightMasterContext) mc).requestTermination(userInitiated);
    }

    /**
     * Return the job IDs of jobs with the given name, sorted by
     * {active/completed, creation time}, active & newest first.
     */
    public CompletableFuture getJobIds(@Nullable String onlyName, long onlyJobId) {
        if (onlyName != null) {
            assertIsMaster("Cannot query list of job IDs by name on non-master node");
        }

        return submitToCoordinatorThread(() -> {
            if (onlyJobId != ALL_JOBS) {
                Object lmc = lightMasterContexts.get(onlyJobId);
                if (lmc != null && lmc != UNINITIALIZED_LIGHT_JOB_MARKER) {
                    return new GetJobIdsResult(onlyJobId, true);
                }

                if (isMaster()) {
                    try {
                        callWithJob(onlyJobId, mc -> null, jobResult -> null, jobRecord -> null, null)
                                .get();
                    } catch (ExecutionException e) {
                        if (e.getCause() instanceof JobNotFoundException) {
                            return GetJobIdsResult.EMPTY;
                        }
                        throw e;
                    }
                    return new GetJobIdsResult(onlyJobId, false);
                }
                return GetJobIdsResult.EMPTY;
            }

            List> result = new ArrayList<>();

            // add light jobs - only if no name is requested, light jobs can't have a name
            if (onlyName == null) {
                for (Object ctx : lightMasterContexts.values()) {
                    if (ctx != UNINITIALIZED_LIGHT_JOB_MARKER) {
                        result.add(tuple2(((LightMasterContext) ctx).getJobId(), true));
                    }
                }
            }

            // add normal jobs - only on master
            if (isMaster()) {
                if (onlyName != null) {
                    // we first need to collect to a map where the jobId is the key to eliminate possible duplicates
                    // in JobResult and also to be able to sort from newest to oldest
                    Map jobs = new HashMap<>();
                    for (MasterContext ctx : masterContexts.values()) {
                        if (onlyName.equals(ctx.jobConfig().getName())) {
                            jobs.put(ctx.jobId(), Long.MAX_VALUE);
                        }
                    }

                    for (JobResult jobResult : jobRepository.getJobResults(onlyName)) {
                        jobs.put(jobResult.getJobId(), jobResult.getCreationTime());
                    }

                    jobs.entrySet().stream()
                        .sorted(
                                comparing(Entry::getValue)
                                        .thenComparing(Entry::getKey)
                                        .reversed()
                        )
                        .forEach(entry -> result.add(tuple2(entry.getKey(), false)));
                } else {
                    for (Long jobId : jobRepository.getAllJobIds()) {
                        result.add(tuple2(jobId, false));
                    }
                }
            }

            return new GetJobIdsResult(result);
        });
    }

    /**
     * Returns the job status or fails with {@link JobNotFoundException}
     * if the requested job is not found.
     */
    public CompletableFuture getJobStatus(long jobId) {
        // Logic of determining job status should be in sync
        // with getJobAndSqlSummary and getJobAndSqlSummaryList.
        return callWithJob(jobId,
                JobCoordinationService::determineJobStatusFromMasterContext,
                JobResult::getJobStatus,
                jobRecord -> NOT_RUNNING,
                jobExecutionRecord -> jobExecutionRecord.isSuspended() ? SUSPENDED : NOT_RUNNING
        );
    }

    private static JobStatus determineJobStatusFromMasterContext(MasterContext mc) {
        // When the job finishes running, we write NOT_RUNNING to jobStatus first and then
        // write null to terminationRequest (see MasterJobContext.finalizeJob()). We
        // have to read them in the opposite order.
        Optional maybeTerminationRequest = mc.jobContext().getTerminationRequest();
        JobStatus jobStatus = mc.jobStatus();
        return jobStatus == RUNNING && maybeTerminationRequest.isPresent()
                ? COMPLETING
                : jobStatus;
    }

    private static boolean determineIsJobUserCancelledFromMasterContext(MasterContext mc) {
        // order of reads is important, see comment in determineJobStatusFromMasterContext
        boolean userInitiatedTermination = mc.jobContext().isUserInitiatedTermination();
        JobStatus jobStatus = mc.jobStatus();
        switch (jobStatus) {
            case COMPLETED:
                return false;
            case FAILED:
                return userInitiatedTermination;
            default:
                throw new IllegalStateException("Job not finished");
        }
    }

    /**
     * Returns the reason why this job has been suspended in a human-readable
     * form.
     * 
     * Fails with {@link JobNotFoundException} if the requested job is not found.
     * 

     * Fails with {@link IllegalStateException} if the requested job is not
     * currently in a suspended state.
     */
    public CompletableFuture getJobSuspensionCause(long jobId) {
        FunctionEx jobExecutionRecordHandler = jobExecutionRecord -> {
            JobSuspensionCause cause = jobExecutionRecord.getSuspensionCause();
            if (cause == null) {
                throw new IllegalStateException("Job not suspended");
            }
            return cause;
        };
        return callWithJob(jobId,
                mc -> {
                    JobExecutionRecord jobExecutionRecord = mc.jobExecutionRecord();
                    return jobExecutionRecordHandler.apply(jobExecutionRecord);
                },
                jobResult -> {
                    throw new IllegalStateException("Job not suspended");
                },
                jobRecord -> {
                    throw new IllegalStateException("Job not suspended");
                },
                jobExecutionRecordHandler
        );
    }

    public CompletableFuture isJobUserCancelled(long jobId) {
        // Logic of determining userCancelled should be in sync
        // with getJobAndSqlSummary and getJobAndSqlSummaryList.
        return callWithJob(jobId,
                JobCoordinationService::determineIsJobUserCancelledFromMasterContext,
                JobResult::isUserCancelled,
                // If we do not have result, the job has not finished yet so cannot be cancelled.
                jobRecord -> {
                    throw new IllegalStateException("Job not finished");
                },
                null
        );
    }

    /**
     * Returns the latest metrics for a job or fails with {@link JobNotFoundException}
     * if the requested job is not found.
     */
    public CompletableFuture> getJobMetrics(long jobId) {
        CompletableFuture> cf = new CompletableFuture<>();
        runWithJob(jobId,
                mc -> mc.jobContext().collectMetrics(cf),
                jobResult -> {
                    List metrics = jobRepository.getJobMetrics(jobId);
                    cf.complete(metrics != null ? metrics : emptyList());
                },
                jobRecord -> cf.complete(emptyList())
        );
        return cf;
    }

    /**
     * Returns the job submission time or fails with {@link JobNotFoundException}
     * if the requested job is not found.
     */
    public CompletableFuture getJobSubmissionTime(long jobId, boolean isLightJob) {
        if (isLightJob) {
            Object mc = lightMasterContexts.get(jobId);
            if (mc == null || mc == UNINITIALIZED_LIGHT_JOB_MARKER) {
                throw new JobNotFoundException(jobId);
            }
            return completedFuture(((LightMasterContext) mc).getStartTime());
        }
        return callWithJob(jobId,
                mc -> mc.jobRecord().getCreationTime(),
                JobResult::getCreationTime,
                JobRecord::getCreationTime,
                null
        );
    }

    public CompletableFuture resumeJob(long jobId) {
        return runWithJob(jobId,
                masterContext -> masterContext.jobContext().resumeJob(),
                jobResult -> {
                    throw new IllegalStateException("Job already completed");
                },
                jobRecord -> {
                    throw new RetryableHazelcastException("Job " + idToString(jobId) + " not yet discovered");
                }
        );
    }

    /**
     * Return a summary of all jobs
     * @deprecated Since 5.3, to be removed in 6.0. Use {@link #getJobAndSqlSummaryList()} instead
     */
    @Deprecated
    public CompletableFuture> getJobSummaryList() {
        return getJobAndSqlSummaryList().thenApply(jobAndSqlSummaries -> jobAndSqlSummaries.stream()
                .map(this::toJobSummary)
                .collect(toList()));
    }

    private JobSummary toJobSummary(JobAndSqlSummary jobAndSqlSummary) {
        return new JobSummary(jobAndSqlSummary.isLightJob(), jobAndSqlSummary.getJobId(), jobAndSqlSummary.getExecutionId(),
                jobAndSqlSummary.getNameOrId(), jobAndSqlSummary.getStatus(), jobAndSqlSummary.getSubmissionTime(),
                jobAndSqlSummary.getCompletionTime(), jobAndSqlSummary.getFailureText());
    }

    /**
     * Return a summary of all jobs with sql data
     */
    public CompletableFuture> getJobAndSqlSummaryList() {
        return submitToCoordinatorThread(() -> {
            Map jobs = new HashMap<>();
            if (isMaster()) {
                // running jobs
                jobRepository.getJobRecords().stream()
                        .map(this::getJobAndSqlSummary)
                        .forEach(s -> jobs.put(s.getJobId(), s));

                // completed jobs
                // (can overwrite entries created from JobRecords but that is fine and in fact desired
                // because JobResult is always more recent than JobRecord for given job)
                jobRepository.getJobResults().stream()
                        .map(r -> {
                            // Pre-review note : volatile read at supplier, should not read under lock path.
                            // Q: Any other better way to get executionRecord?
                            JobExecutionRecord executionRecord = jobRepository.getJobExecutionRecord(r.getJobId());
                            return new JobAndSqlSummary(
                                    false, r.getJobId(), 0, r.getJobNameOrId(), r.getJobStatus(), r.getCreationTime(),
                                    r.getCompletionTime(), r.getFailureText(), null,
                                    executionRecord == null || executionRecord.getSuspensionCause() == null ? null :
                                            executionRecord.getSuspensionCause().description(),
                                    r.isUserCancelled());
                        })
                        .forEach(s -> jobs.put(s.getJobId(), s));
            }

            // light jobs
            lightMasterContexts.values().stream()
                    .filter(lmc -> lmc != UNINITIALIZED_LIGHT_JOB_MARKER)
                    .map(LightMasterContext.class::cast)
                    .map(this::getJobAndSqlSummary)
                    .forEach(s -> jobs.put(s.getJobId(), s));

            return jobs.values().stream().sorted(comparing(JobAndSqlSummary::getSubmissionTime).reversed()).collect(toList());
        });
    }

    private JobAndSqlSummary getJobAndSqlSummary(LightMasterContext lmc) {
        String query = lmc.getJobConfig().getArgument(JobConfigArguments.KEY_SQL_QUERY_TEXT);
        Object unbounded = lmc.getJobConfig().getArgument(JobConfigArguments.KEY_SQL_UNBOUNDED);
        SqlSummary sqlSummary = query != null && unbounded != null ?
                new SqlSummary(query, Boolean.TRUE.equals(unbounded)) : null;

        // For simplicity, we assume here that light job is running iff LightMasterContext exists:
        // running jobs are not cancelled and others are not visible.
        //
        // It is possible that LightMasterContext still exists (for a short period of time)
        // when the job is already terminated.
        // LightMasterContext is removed from map in submitLightJob() _after_ setting result
        // on the jobCompletionFuture in LightMasterContext.finalizeJob().
        // jobCompletionFuture is also used in join operation so join operation sees
        // finished job even though master context still exists.
        // Also, future completion handlers (thenApply etc.) are not guaranteed to run in
        // any particular order and can be executed in parallel.
        //
        // This is unlikely and we do not care however such scenario is possible:
        // 1. user submits a light job
        // 2. user gets the job by id and joins it (separate Job proxy instance is necessary
        //    because different future will be used than for submit)
        // 3. job finishes (either normally or via error or cancellation)
        // 4. join finishes - user get information that the job completed (from join, not submit)
        // 5. user asks for jobs list and the job is reported as running
        //
        // In such scenario finished job will be reported as running.
        //
        // Note: suspensionCause is not supported for light jobs.
        return new JobAndSqlSummary(
                true, lmc.getJobId(), lmc.getJobId(), idToString(lmc.getJobId()),
                RUNNING, lmc.getStartTime(), 0, null, sqlSummary, null,
                false);
    }

    /**
     * Applies the specified delta configuration if the job is suspended.
     * Otherwise, an {@link IllegalStateException} is thrown by the returned future.
     */
    public CompletableFuture updateJobConfig(long jobId, @Nonnull DeltaJobConfig deltaConfig) {
        return callWithJob(jobId,
                masterContext -> masterContext.updateJobConfig(deltaConfig),
                jobResult -> {
                    throw new IllegalStateException("Job not suspended, but " + jobResult.getJobStatus());
                },
                jobRecord -> {
                    throw new IllegalStateException("Job not suspended");
                },
                null
        );
    }

    /**
     * Applies the specified listener registration if the job is not completed/failed.
     * Otherwise, an {@link IllegalStateException} is thrown by the returned future.
     */
    public CompletableFuture addJobStatusListener(long jobId, boolean isLightJob, Registration registration) {
        if (isLightJob) {
            Object mc = lightMasterContexts.get(jobId);
            if (mc == null || mc == UNINITIALIZED_LIGHT_JOB_MARKER) {
                throw new JobNotFoundException(jobId);
            } else {
                return completedFuture(((LightMasterContext) mc).addStatusListener(registration));
            }
        }
        return callWithJob(jobId,
                masterContext -> masterContext.addStatusListener(registration),
                jobResult -> {
                    throw cannotAddStatusListener(jobResult.getJobStatus());
                },
                jobRecord -> {
                    JobEventService jobEventService = nodeEngine.getService(JobEventService.SERVICE_NAME);
                    return jobEventService.handleAllRegistrations(jobId, registration).getId();
                },
                null);
    }

    /**
     * Add the given member to shutting down members. This will prevent
     * submission of more executions until the member actually leaves the
     * cluster. The returned future will complete when all executions of which
     * the member is a participant terminate.
     * 

     * The method is idempotent, the {@link NotifyMemberShutdownOperation}
     * which calls it can be retried.
     */
    @Nonnull
    public CompletableFuture addShuttingDownMember(UUID uuid) {
        CompletableFuture future = new CompletableFuture<>();
        CompletableFuture oldFuture = membersShuttingDown.putIfAbsent(uuid, future);
        if (oldFuture != null) {
            return oldFuture;
        }
        if (removedMembers.containsKey(uuid)) {
            logFine(logger, "NotifyMemberShutdownOperation received for a member that was already " +
                    "removed from the cluster: %s", uuid);
            return completedFuture(null);
        }
        logFine(logger, "Added a shutting-down member: %s", uuid);
        CompletableFuture[] futures = masterContexts.values().stream()
                                                    .map(mc -> mc.jobContext().onParticipantGracefulShutdown(uuid))
                                                    .toArray(CompletableFuture[]::new);
        // Need to do this even if futures.length == 0, we need to perform the action in whenComplete
        CompletableFuture.allOf(futures)
                         .whenComplete(withTryCatch(logger, (r, e) -> future.complete(null)));
        return future;
    }

    // only for testing
    public Map getMasterContexts() {
        return new HashMap<>(masterContexts);
    }

    // only for testing
    public Map getLightMasterContexts() {
        return new HashMap<>(lightMasterContexts);
    }

    // only for testing
    public MasterContext getMasterContext(long jobId) {
        return masterContexts.get(jobId);
    }

    JetServiceBackend getJetServiceBackend() {
        return jetServiceBackend;
    }

    boolean shouldStartJobs() {
        if (!isMaster() || !nodeEngine.isRunning()) {
            return false;
        }

        ClusterState clusterState = nodeEngine.getClusterService().getClusterState();
        if (isClusterEnteringPassiveState || clusterState == PASSIVE || clusterState == IN_TRANSITION) {
            logger.fine("Not starting jobs because cluster is in passive state or in transition.");
            return false;
        }
        // if there are any members in a shutdown process, don't start jobs
        if (!membersShuttingDown.isEmpty()) {
            LoggingUtil.logFine(logger, "Not starting jobs because members are shutting down: %s",
                    membersShuttingDown.keySet());
            return false;
        }

        Version clusterVersion = nodeEngine.getClusterService().getClusterVersion();
        for (Member m : nodeEngine.getClusterService().getMembers()) {
            if (!clusterVersion.equals(m.getVersion().asVersion())) {
                logger.fine("Not starting non-light jobs because rolling upgrade is in progress");
                return false;
            }
        }

        PartitionServiceState state =
                getInternalPartitionService().getPartitionReplicaStateChecker().getPartitionServiceState();
        if (state != PartitionServiceState.SAFE) {
            logger.fine("Not starting jobs because partition replication is not in safe state, but in " + state);
            return false;
        }
        if (!getInternalPartitionService().getPartitionStateManager().isInitialized()) {
            logger.fine("Not starting jobs because partitions are not yet initialized.");
            return false;
        }
        if (nodeEngine.getNode().isClusterStateManagementAutomatic()
            && !nodeEngine.getNode().isManagedClusterStable()) {
            LoggingUtil.logFine(logger, "Not starting jobs because cluster is running in managed context "
                            + "and is not yet stable. Current cluster topology intentL %s, "
                            + "expected cluster size: %d, current: %d.",
                    nodeEngine.getNode().getClusterTopologyIntent(),
                    nodeEngine.getNode().currentSpecifiedReplicaCount(), nodeEngine.getClusterService().getSize());
            return false;
        }
        return true;
    }

    private CompletableFuture runWithJob(
            long jobId,
            @Nonnull Consumer masterContextHandler,
            @Nonnull Consumer jobResultHandler,
            @Nonnull Consumer jobRecordHandler
    ) {
        return callWithJob(jobId,
                toNullFunction(masterContextHandler),
                toNullFunction(jobResultHandler),
                toNullFunction(jobRecordHandler),
                null
        );
    }

    /**
     * Returns a function that passes its argument to the given {@code
     * consumer} and returns {@code null}.
     */
    @Nonnull
    private  Function toNullFunction(@Nonnull Consumer consumer) {
        return val -> {
            consumer.accept(val);
            return null;
        };
    }

    private  CompletableFuture callWithJob(
            long jobId,
            @Nonnull Function masterContextHandler,
            @Nonnull Function jobResultHandler,
            @Nonnull Function jobRecordHandler,
            @Nullable Function jobExecutionRecordHandler
    ) {
        assertIsMaster("Cannot do this task on non-master. jobId=" + idToString(jobId));

        return submitToCoordinatorThread(() -> {
            // when job is finalized, actions happen in this order:
            // - JobResult and JobMetrics are created
            // - JobRecord and JobExecutionRecord are deleted (asynchronously and in parallel)
            // - masterContext is removed from the map
            // We check them in reverse order so that no race is possible.
            //
            // We check the MasterContext before JobResult for optimization. In
            // most cases there will either be MasterContext or JobResult.
            // Neither of them is present only after master failed and the new
            // master didn't yet scan jobs. We check the JobResult again at the
            // end for correctness to avoid race with job completion.
            //
            // We check the JobResult before JobRecord and JobExecutionRecord
            // because JobResult is more recent and contains more information.
            // In some cases (slow deleteJob execution) there can exist
            // JobResult, one or both JobRecord and JobExecutionRecord, and no
            // MasterContext.

            // check masterContext first
            MasterContext mc = masterContexts.get(jobId);
            if (mc != null) {
                return masterContextHandler.apply(mc);
            }

            // early check of JobResult.
            JobResult jobResult = jobRepository.getJobResult(jobId);
            if (jobResult != null) {
                return jobResultHandler.apply(jobResult);
            }

            // the job might not be yet discovered by job record scanning
            JobExecutionRecord jobExRecord;
            if (jobExecutionRecordHandler != null && (jobExRecord = jobRepository.getJobExecutionRecord(jobId)) != null) {
                return jobExecutionRecordHandler.apply(jobExRecord);
            }
            JobRecord jobRecord;
            if ((jobRecord = jobRepository.getJobRecord(jobId)) != null) {
                return jobRecordHandler.apply(jobRecord);
            }

            // second check for JobResult, see comment at the top of the method
            jobResult = jobRepository.getJobResult(jobId);
            if (jobResult != null) {
                return jobResultHandler.apply(jobResult);
            }

            // job doesn't exist
            throw new JobNotFoundException(jobId);
        });
    }

    void onMemberAdded(MemberImpl addedMember) {
        // the member can re-join with the same UUID in certain scenarios
        removedMembers.remove(addedMember.getUuid());
        if (addedMember.isLiteMember()) {
            return;
        }

        updateQuorumValues();
        scheduleScaleUp(config.getScaleUpDelayMillis());
    }

    void onMemberRemoved(UUID uuid) {
        if (membersShuttingDown.remove(uuid) != null) {
            logFine(logger, "Removed a shutting-down member: %s, now shuttingDownMembers=%s",
                    uuid, membersShuttingDown.keySet());
        } else {
            removedMembers.put(uuid, System.nanoTime());
        }

        // clean up old entries from removedMembers (the value is time when the member was removed)
        long removeThreshold = System.nanoTime() - HOURS.toNanos(1);
        removedMembers.entrySet().removeIf(en -> en.getValue() < removeThreshold);
    }

    boolean isQuorumPresent(int quorumSize) {
        return getDataMemberCount() >= quorumSize;
    }

    /**
     * Completes the job which is coordinated with the given master context object.
     */
    @CheckReturnValue
    CompletableFuture completeJob(MasterContext masterContext, Throwable error, long completionTime,
                                        boolean userCancelled) {
        return submitToCoordinatorThread(() -> {
            // the order of operations is important.
            List jobMetrics =
                    masterContext.jobConfig().isStoreMetricsAfterJobCompletion()
                            ? masterContext.jobContext().jobMetrics()
                            : null;
            jobRepository.completeJob(masterContext, jobMetrics, error, completionTime, userCancelled);
            if (removeMasterContext(masterContext)) {
                completeObservables(masterContext.jobRecord().getOwnedObservables(), error);
                logger.fine(masterContext.jobIdString() + " is completed");
                (error == null ? jobCompletedSuccessfully : jobCompletedWithFailure).inc();
            } else {
                MasterContext existing = masterContexts.get(masterContext.jobId());
                if (existing != null) {
                    logger.severe("Different master context found to complete " + masterContext.jobIdString()
                            + ", master context execution " + idToString(existing.executionId()));
                } else {
                    logger.severe("No master context found to complete " + masterContext.jobIdString());
                }
            }
            unscheduleJobTimeout(masterContext.jobId());
        });
    }

    private boolean removeMasterContext(MasterContext masterContext) {
        synchronized (lock) {
            return masterContexts.remove(masterContext.jobId(), masterContext);
        }
    }

    /**
     * Schedules a restart task that will be run in future for the given job
     */
    void scheduleRestart(long jobId) {
        MasterContext masterContext = masterContexts.get(jobId);
        if (masterContext == null) {
            logger.severe("Master context for job " + idToString(jobId) + " not found to schedule restart");
            return;
        }
        logger.fine("Scheduling restart on master for job " + masterContext.jobName());
        nodeEngine.getExecutionService().schedule(COORDINATOR_EXECUTOR_NAME, () -> restartJob(jobId),
                RETRY_DELAY_IN_MILLIS, MILLISECONDS);
    }

    void scheduleSnapshot(MasterContext mc, long executionId) {
        long snapshotInterval = mc.jobConfig().getSnapshotIntervalMillis();
        ExecutionService executionService = nodeEngine.getExecutionService();
        if (logger.isFineEnabled()) {
            logger.fine(mc.jobIdString() + " snapshot is scheduled in " + snapshotInterval + "ms");
        }
        executionService.schedule(COORDINATOR_EXECUTOR_NAME,
                () -> mc.snapshotContext().startScheduledSnapshot(executionId),
                snapshotInterval, MILLISECONDS);
    }

    /**
     * Restarts a job for a new execution if the cluster is stable.
     * Otherwise, it reschedules the restart task.
     */
    void restartJob(long jobId) {
        MasterContext masterContext = masterContexts.get(jobId);
        if (masterContext == null) {
            logger.severe("Master context for job " + idToString(jobId) + " not found to restart");
            return;
        }
        masterContext.jobContext().tryStartJob();
    }

    private void checkOperationalState() {
        if (isClusterEnteringPassiveState) {
            throw new EnteringPassiveClusterStateException();
        }
    }

    private void scheduleScaleUp(long delay) {
        int counter = scaleUpScheduledCount.incrementAndGet();
        nodeEngine.getExecutionService().schedule(() -> scaleJobsUpNow(counter), delay, MILLISECONDS);
    }

    private void scaleJobsUpNow(int counter) {
        // if another scale-up was scheduled after this one, ignore this one
        if (scaleUpScheduledCount.get() != counter) {
            return;
        }
        // if we can't start jobs yet, we also won't tear them down
        if (!shouldStartJobs()) {
            scheduleScaleUp(RETRY_DELAY_IN_MILLIS);
            return;
        }

        submitToCoordinatorThread(() -> {
            boolean allSucceeded = true;
            int dataMembersCount = nodeEngine.getClusterService().getMembers(DATA_MEMBER_SELECTOR).size();
            int partitionCount = nodeEngine.getPartitionService().getPartitionCount();
            // If the number of partitions is lower than the data member count, some members won't have
            // any partitions assigned. Jet doesn't use such members.
            int dataMembersWithPartitionsCount = Math.min(dataMembersCount, partitionCount);
            for (MasterContext mc : masterContexts.values()) {
                allSucceeded &= mc.jobContext().maybeScaleUp(dataMembersWithPartitionsCount);
            }
            if (!allSucceeded) {
                scheduleScaleUp(RETRY_DELAY_IN_MILLIS);
            }
        });
    }

    /**
     * Scans all job records and updates quorum size of a split-brain protection enabled
     * job with current cluster quorum size if the current cluster quorum size is larger
     */
    private void updateQuorumValues() {
        if (!shouldCheckQuorumValues()) {
            return;
        }

        submitToCoordinatorThread(() -> {
            try {
                int currentQuorumSize = getQuorumSize();
                for (JobRecord jobRecord : jobRepository.getJobRecords()) {
                    try {
                        if (!jobRecord.getConfig().isSplitBrainProtectionEnabled()) {
                            continue;
                        }
                        MasterContext masterContext = masterContexts.get(jobRecord.getJobId());
                        // if MasterContext doesn't exist, update in the IMap directly, using a sync method
                        if (masterContext == null) {
                            jobRepository.updateJobQuorumSizeIfSmaller(jobRecord.getJobId(), currentQuorumSize);
                            // check the master context again, it might have been just created and have picked
                            // up the JobRecord before being updated
                            masterContext = masterContexts.get(jobRecord.getJobId());
                        }
                        if (masterContext != null) {
                            masterContext.updateQuorumSize(currentQuorumSize);
                        }
                    } catch (Exception e) {
                        logger.severe("Quorum of job " + idToString(jobRecord.getJobId())
                                + " could not be updated to " + currentQuorumSize, e);
                    }
                }
            } catch (Exception e) {
                logger.severe("update quorum values task failed", e);
            }
        });
    }

    private boolean shouldCheckQuorumValues() {
        return isMaster() && nodeEngine.isRunning()
                && getInternalPartitionService().getPartitionStateManager().isInitialized();
    }

    private Object deserializeJobDefinition(long jobId, JobConfig jobConfig, Data jobDefinitionData) {
        JobClassLoaderService jobClassLoaderService = jetServiceBackend.getJobClassLoaderService();
        ClassLoader classLoader = jobClassLoaderService.getOrCreateClassLoader(jobConfig, jobId, COORDINATOR);
        try {
            jobClassLoaderService.prepareProcessorClassLoaders(jobId);
            return deserializeWithCustomClassLoader(nodeEngine().getSerializationService(), classLoader, jobDefinitionData);
        } finally {
            jobClassLoaderService.clearProcessorClassLoaders();
        }
    }

    private String dagToJson(DAG dag) {
        int coopThreadCount = config.getCooperativeThreadCount();
        return dag.toJson(coopThreadCount).toString();
    }

    private CompletableFuture startJobIfNotStartedOrCompleted(
            @Nonnull JobRecord jobRecord,
            @Nonnull JobExecutionRecord jobExecutionRecord, String reason
    ) {
        // the order of operations is important.
        long jobId = jobRecord.getJobId();

        MasterContext masterContext;
        MasterContext oldMasterContext;
        synchronized (lock) {
            // We check the JobResult while holding the lock to avoid this scenario:
            // 1. We find no job result
            // 2. Another thread creates the result and removes the master context in completeJob
            // 3. We re-create the master context below
            JobResult jobResult = jobRepository.getJobResult(jobId);
            if (jobResult != null) {
                logger.fine("Not starting job " + idToString(jobId) + ", already has result: " + jobResult);
                return jobResult.asCompletableFuture();
            }

            checkOperationalState();

            masterContext = createMasterContext(jobRecord, jobExecutionRecord);
            oldMasterContext = masterContexts.putIfAbsent(jobId, masterContext);
        }

        if (oldMasterContext != null) {
            return oldMasterContext.jobContext().jobCompletionFuture();
        }

        assert jobRepository.getJobResult(jobId) == null : "jobResult should not exist at this point";

        if (finalizeJobIfAutoScalingOff(masterContext)) {
            return masterContext.jobContext().jobCompletionFuture();
        }

        if (jobExecutionRecord.isSuspended()) {
            logFinest(logger, "MasterContext for suspended %s is created", masterContext.jobIdString());
        } else {
            logger.info("Starting job " + idToString(jobId) + ": " + reason);
            masterContext.jobContext().tryStartJob();
        }

        return masterContext.jobContext().jobCompletionFuture();
    }

    // If a job result is present, it completes the master context using the job result
    private boolean completeMasterContextIfJobAlreadyCompleted(MasterContext masterContext) {
        long jobId = masterContext.jobId();
        JobResult jobResult = jobRepository.getJobResult(jobId);
        if (jobResult != null) {
            logger.fine("Completing master context for " + masterContext.jobIdString()
                    + " since already completed with result: " + jobResult);
            masterContext.jobContext().setFinalResult(jobResult.getFailureAsThrowable());
            return removeMasterContext(masterContext);
        }

        return finalizeJobIfAutoScalingOff(masterContext);
    }

    private boolean finalizeJobIfAutoScalingOff(MasterContext masterContext) {
        if (!masterContext.jobConfig().isAutoScaling() && masterContext.jobExecutionRecord().executed()) {
            logger.info("Suspending or failing " + masterContext.jobIdString()
                    + " since auto-restart is disabled and the job has been executed before");
            masterContext.jobContext().finalizeExecution(new TopologyChangedException());
            return true;
        }
        return false;
    }

    int getQuorumSize() {
        return (getDataMemberCount() / 2) + 1;
    }

    private int getDataMemberCount() {
        ClusterService clusterService = nodeEngine.getClusterService();
        return clusterService.getMembers(DATA_MEMBER_SELECTOR).size();
    }

    private JobAndSqlSummary getJobAndSqlSummary(JobRecord record) {
        MasterContext ctx = masterContexts.get(record.getJobId());
        long execId = ctx == null ? 0 : ctx.executionId();
        JobExecutionRecord executionRecord = jobRepository.getJobExecutionRecord(record.getJobId());
        String suspensionCause = executionRecord != null && executionRecord.getSuspensionCause() != null
                ? executionRecord.getSuspensionCause().description() : null;
        JobStatus status;
        boolean userCancelled;
        if (ctx == null) {
            // If we have a JobRecord but not the MasterContext, it may mean that:
            // 1) job has not yet created MasterContext => NOT_RUNNING
            // 2) job is suspended => SUSPENDED
            // 3) job has already ended but JobRecord has not yet been deleted =>
            //    do not care, result will be overwritten by the one obtained from JobResult
            //    which is guaranteed to exist in this case
            status = executionRecord != null && executionRecord.isSuspended()
                    ? JobStatus.SUSPENDED : JobStatus.NOT_RUNNING;
            userCancelled = false;
        } else {
            // order of reads is important, see comment in determineJobStatusFromMasterContext
            // for consistent result we must use single instance of TerminationRequest for all checks
            Optional maybeTerminationRequest = ctx.jobContext().getTerminationRequest();
            JobStatus jobStatus = ctx.jobStatus();
            status = jobStatus == RUNNING && maybeTerminationRequest.isPresent()
                    ? COMPLETING
                    : jobStatus;

            // job is running, so not cancelled
            // or has just ended but MasterContext still exists
            userCancelled = status == FAILED &&
                    maybeTerminationRequest.map(TerminationRequest::isUserInitiated).orElse(false);
        }
        return new JobAndSqlSummary(false, record.getJobId(), execId, record.getJobNameOrId(), status,
                record.getCreationTime(), 0, null, null, suspensionCause,
                userCancelled);
    }

    private InternalPartitionServiceImpl getInternalPartitionService() {
        Node node = nodeEngine.getNode();
        return (InternalPartitionServiceImpl) node.getPartitionService();
    }

    // runs periodically to restart jobs on coordinator failure and perform GC
    private void scanJobs() {
        long scanStart = System.currentTimeMillis();
        long nextScanDelay = maxJobScanPeriodInMillis;
        try {
            // explicit check for master because we don't want to use shorter delay on non-master nodes
            // it will be checked again in shouldStartJobs()
            if (isMaster()) {
                if (shouldStartJobs()) {
                    doScanJobs();
                } else {
                    // use a smaller delay when cluster is not in ready state
                    nextScanDelay = MIN_JOB_SCAN_PERIOD_MILLIS;
                }
            }
        } catch (HazelcastInstanceNotActiveException ignored) {
            // ignore this exception
        } catch (Throwable e) {
            logger.severe("Scanning jobs failed", e);
        }

        // Adjust the delay by the time taken by the scan to avoid accumulating more and more job results with each scan
        long scanTime = System.currentTimeMillis() - scanStart;
        nextScanDelay = Math.max(0, nextScanDelay - scanTime);

        ExecutionService executionService = nodeEngine.getExecutionService();
        executionService.schedule(this::scanJobs, nextScanDelay, MILLISECONDS);
    }

    private void doScanJobs() {
        Collection jobs = jobRepository.getJobRecords();
        for (JobRecord jobRecord : jobs) {
            JobExecutionRecord jobExecutionRecord = ensureExecutionRecord(jobRecord.getJobId(),
                    jobRepository.getJobExecutionRecord(jobRecord.getJobId()));
            startJobIfNotStartedOrCompleted(jobRecord, jobExecutionRecord, "discovered by scanning of JobRecords");
        }
        jobRepository.cleanup(nodeEngine);
        if (!jobsScanned) {
            synchronized (lock) {
                // Note that setting jobsScanned is required for Jet to accept submitted jobs.
                // When a new cluster is started, job records IMap does not exist until first job is submitted.
                // This causes slight possibility of accepting duplicated job in case of HotRestart,
                // but that is acceptable risk.
                // See comment in JobRepository.cleanup().
                jobsScanned = true;
            }
        }
    }

    private JobExecutionRecord ensureExecutionRecord(long jobId, JobExecutionRecord record) {
        return record != null ? record : new JobExecutionRecord(jobId, getQuorumSize());
    }

    @SuppressWarnings("WeakerAccess")
    // used by jet-enterprise
    void assertIsMaster(String error) {
        if (!isMaster()) {
            throw new JetException(error + ". Master address: " + nodeEngine.getClusterService().getMasterAddress());
        }
    }

    private boolean isMaster() {
        return nodeEngine.getClusterService().isMaster();
    }

    @SuppressWarnings("unused") // used in jet-enterprise
    NodeEngineImpl nodeEngine() {
        return nodeEngine;
    }

    ManagedExecutorService coordinationExecutor() {
        return nodeEngine.getExecutionService().getExecutor(COORDINATOR_EXECUTOR_NAME);
    }

    CompletableFuture submitToCoordinatorThread(Runnable action) {
        return submitToCoordinatorThread(() -> {
            action.run();
            return null;
        });
    }

     CompletableFuture submitToCoordinatorThread(Callable action) {
        // if we are on our thread already, execute directly in a blocking way
        if (IS_JOB_COORDINATOR_THREAD.get()) {
            try {
                return completedFuture(action.call());
            } catch (Throwable e) {
                // most callers ignore the failure on the returned future, let's log it at least
                logger.warning(null, e);
                return com.hazelcast.jet.impl.util.Util.exceptionallyCompletedFuture(e);
            }
        }

        Future future = nodeEngine.getExecutionService().submit(COORDINATOR_EXECUTOR_NAME, () -> {
            assert !IS_JOB_COORDINATOR_THREAD.get() : "flag already raised";
            IS_JOB_COORDINATOR_THREAD.set(true);
            try {
                return action.call();
            } catch (Throwable e) {
                // most callers ignore the failure on the returned future, let's log it at least
                logger.warning(null, e);
                throw e;
            } finally {
                IS_JOB_COORDINATOR_THREAD.set(false);
            }
        });
        return nodeEngine.getExecutionService().asCompletableFuture(future);
    }

    void assertOnCoordinatorThread() {
        assert IS_JOB_COORDINATOR_THREAD.get() : "not on coordinator thread";
    }

    private void completeObservables(Set observables, Throwable error) {
        for (String observable : observables) {
            try {
                String ringbufferName = ObservableImpl.ringbufferName(observable);
                Ringbuffer