io.mantisrx.master.jobcluster.job.JobActor Maven / Gradle / Ivy
/*
* Copyright 2019 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.mantisrx.master.jobcluster.job;
import static io.mantisrx.master.StringConstants.MANTIS_MASTER_USER;
import static io.mantisrx.master.StringConstants.MANTIS_STAGE_CONTAINER_SIZE_NAME_KEY;
import static io.mantisrx.master.events.LifecycleEventsProto.StatusEvent.StatusEventType.ERROR;
import static io.mantisrx.master.events.LifecycleEventsProto.StatusEvent.StatusEventType.INFO;
import static io.mantisrx.master.events.LifecycleEventsProto.StatusEvent.StatusEventType.WARN;
import static io.mantisrx.master.jobcluster.job.worker.MantisWorkerMetadataImpl.MANTIS_SYSTEM_ALLOCATED_NUM_PORTS;
import static io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.CLIENT_ERROR;
import static io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SERVER_ERROR;
import static io.mantisrx.master.jobcluster.proto.BaseResponse.ResponseCode.SUCCESS;
import static java.util.Optional.empty;
import static java.util.Optional.of;
import static java.util.Optional.ofNullable;
import akka.actor.AbstractActorWithTimers;
import akka.actor.ActorRef;
import akka.actor.PoisonPill;
import akka.actor.Props;
import akka.actor.SupervisorStrategy;
import com.netflix.spectator.api.BasicTag;
import io.mantisrx.common.WorkerPorts;
import io.mantisrx.common.metrics.Counter;
import io.mantisrx.common.metrics.Metrics;
import io.mantisrx.common.metrics.MetricsRegistry;
import io.mantisrx.common.metrics.spectator.MetricGroupId;
import io.mantisrx.master.akka.MantisActorSupervisorStrategy;
import io.mantisrx.master.events.LifecycleEventPublisher;
import io.mantisrx.master.events.LifecycleEventsProto;
import io.mantisrx.master.jobcluster.WorkerInfoListHolder;
import io.mantisrx.master.jobcluster.job.worker.IMantisWorkerMetadata;
import io.mantisrx.master.jobcluster.job.worker.JobWorker;
import io.mantisrx.master.jobcluster.job.worker.WorkerHeartbeat;
import io.mantisrx.master.jobcluster.job.worker.WorkerState;
import io.mantisrx.master.jobcluster.job.worker.WorkerStatus;
import io.mantisrx.master.jobcluster.job.worker.WorkerTerminate;
import io.mantisrx.master.jobcluster.proto.JobClusterManagerProto;
import io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDefinitionUpdatedFromJobActorRequest;
import io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsRequest;
import io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsResponse;
import io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobSchedInfoRequest;
import io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobSchedInfoResponse;
import io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetLatestJobDiscoveryInfoRequest;
import io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetLatestJobDiscoveryInfoResponse;
import io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.KillJobResponse;
import io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListWorkersRequest;
import io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListWorkersResponse;
import io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ResubmitWorkerRequest;
import io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ResubmitWorkerResponse;
import io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ScaleStageRequest;
import io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ScaleStageResponse;
import io.mantisrx.master.jobcluster.proto.JobClusterProto;
import io.mantisrx.master.jobcluster.proto.JobProto;
import io.mantisrx.master.jobcluster.proto.JobProto.InitJob;
import io.mantisrx.master.jobcluster.proto.JobProto.JobInitialized;
import io.mantisrx.runtime.JobConstraints;
import io.mantisrx.runtime.JobSla;
import io.mantisrx.runtime.MachineDefinition;
import io.mantisrx.runtime.MantisJobDurationType;
import io.mantisrx.runtime.MantisJobState;
import io.mantisrx.runtime.MigrationStrategy;
import io.mantisrx.runtime.WorkerMigrationConfig;
import io.mantisrx.runtime.descriptor.SchedulingInfo;
import io.mantisrx.runtime.descriptor.StageScalingPolicy;
import io.mantisrx.runtime.descriptor.StageSchedulingInfo;
import io.mantisrx.server.core.JobCompletedReason;
import io.mantisrx.server.core.JobSchedulingInfo;
import io.mantisrx.server.core.Status;
import io.mantisrx.server.core.WorkerAssignments;
import io.mantisrx.server.core.WorkerHost;
import io.mantisrx.server.core.domain.ArtifactID;
import io.mantisrx.server.core.domain.JobArtifact;
import io.mantisrx.server.core.domain.JobMetadata;
import io.mantisrx.server.core.domain.WorkerId;
import io.mantisrx.server.core.scheduler.SchedulingConstraints;
import io.mantisrx.server.master.agentdeploy.MigrationStrategyFactory;
import io.mantisrx.server.master.config.ConfigurationProvider;
import io.mantisrx.server.master.config.MasterConfiguration;
import io.mantisrx.server.master.domain.DataFormatAdapter;
import io.mantisrx.server.master.domain.IJobClusterDefinition;
import io.mantisrx.server.master.domain.JobDefinition;
import io.mantisrx.server.master.domain.JobId;
import io.mantisrx.server.master.persistence.MantisJobStore;
import io.mantisrx.server.master.persistence.exceptions.InvalidJobException;
import io.mantisrx.server.master.persistence.exceptions.InvalidWorkerStateChangeException;
import io.mantisrx.server.master.resourcecluster.ClusterID;
import io.mantisrx.server.master.scheduler.BatchScheduleRequest;
import io.mantisrx.server.master.scheduler.MantisScheduler;
import io.mantisrx.server.master.scheduler.ScheduleRequest;
import io.mantisrx.server.master.scheduler.WorkerEvent;
import io.mantisrx.server.master.scheduler.WorkerOnDisabledVM;
import io.mantisrx.server.master.scheduler.WorkerUnscheduleable;
import io.mantisrx.shaded.com.fasterxml.jackson.databind.ObjectMapper;
import io.mantisrx.shaded.com.google.common.base.Preconditions;
import io.mantisrx.shaded.com.google.common.cache.Cache;
import io.mantisrx.shaded.com.google.common.cache.CacheBuilder;
import io.mantisrx.shaded.com.google.common.collect.Lists;
import java.io.IOException;
import java.net.URL;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentSkipListSet;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import rx.Observable;
import rx.schedulers.Schedulers;
import rx.subjects.BehaviorSubject;
/**
* Actor responsible for handling all operations for a given JobID. private static final String
* API_JOB_SUBMIT_PATH="/api/submit"; private static final String API_JOB_KILL="/api/jobs/kill"; private static final
* String API_JOB_STAGE_SCALE="/api/jobs/scaleStage"; private static final String API_JOB_RESUBMIT_WORKER="/api/jobs/resubmitWorker";
*
* @author njoshi
*/
public class JobActor extends AbstractActorWithTimers implements IMantisJobManager {
private static final String CHECK_HB_TIMER_KEY = "CHECK_HB";
private static final String REFRESH_SEND_STAGE_ASSIGNEMNTS_KEY = "REFRESH_SEND_STAGE_ASSIGNMENTS";
private static final Logger LOGGER = LoggerFactory.getLogger(JobActor.class);
private static final double DEFAULT_JOB_MASTER_CORES = 1;
private static final double DEFAULT_JOB_MASTER_MEM = 1024;
private static final double DEFAULT_JOB_MASTER_NW = 128;
private static final double DEFAULT_JOB_MASTER_DISK = 1024;
private final Metrics metrics;
private final MetricGroupId metricsGroupId;
private final Counter numWorkerResubmissions;
private final Counter numWorkerResubmitLimitReached;
private final Counter numWorkerTerminated;
private final Counter numScaleStage;
private final Counter numWorkersCompletedNotTerminal;
private final Counter numSchedulingChangesRefreshed;
private final Counter numMissingWorkerPorts;
/**
* Behavior after being initialized.
*/
private Receive initializedBehavior;
/**
* Behavior once active.
*/
private Receive activeBehavior;
/**
* Behavior during termination.
*/
private Receive terminatingBehavior;
/**
* Behavior after termination waiting for JCA to terminate actor.
*/
private Receive terminatedBehavior;
private final String clusterName;
private final JobId jobId;
private final IJobClusterDefinition jobClusterDefinition;
private volatile MantisJobMetadataImpl mantisJobMetaData;
private final MantisJobStore jobStore;
// load from config
private int workerWritesBatchSize = 10;
// Manages life cycle of worker
private IWorkerManager workerManager = null;
// Used to schedule and unschedule workers
private final MantisScheduler mantisScheduler;
private final LifecycleEventPublisher eventPublisher;
private final CostsCalculator costsCalculator;
private boolean hasJobMaster;
private volatile boolean allWorkersCompleted = false;
/**
* Used by the JobCluster Actor to create this Job Actor.
*
* @param jobClusterDefinition The job cluster definition to be used while creating this job.
* @param jobMetadata The job metadata provided by the user.
* @param jobStore Reference to the persistence store {@link MantisJobStore}.
* @param mantisScheduler Reference to the {@link MantisScheduler} to be used to schedule work
* @param eventPublisher Reference to the event publisher {@link LifecycleEventPublisher} where lifecycle
* events are to be published.
* @return
*/
public static Props props(
final IJobClusterDefinition jobClusterDefinition,
final MantisJobMetadataImpl jobMetadata,
final MantisJobStore jobStore,
final MantisScheduler mantisScheduler,
final LifecycleEventPublisher eventPublisher,
final CostsCalculator costsCalculator) {
return Props.create(JobActor.class, jobClusterDefinition, jobMetadata, jobStore,
mantisScheduler, eventPublisher, costsCalculator);
}
/**
* This is invoked indirectly via props method to create an instance of this class.
*
* @param jobClusterDefinition
* @param jobMetadata
* @param jobStore
* @param scheduler
* @param eventPublisher
*/
public JobActor(
final IJobClusterDefinition jobClusterDefinition,
final MantisJobMetadataImpl jobMetadata,
final MantisJobStore jobStore,
final MantisScheduler scheduler,
final LifecycleEventPublisher eventPublisher,
final CostsCalculator costsCalculator) {
this.clusterName = jobMetadata.getClusterName();
this.jobId = jobMetadata.getJobId();
this.jobStore = jobStore;
this.jobClusterDefinition = jobClusterDefinition;
this.mantisScheduler = scheduler;
this.eventPublisher = eventPublisher;
this.mantisJobMetaData = jobMetadata;
this.costsCalculator = costsCalculator;
initializedBehavior = getInitializedBehavior();
activeBehavior = getActiveBehavior();
terminatingBehavior = getTerminatingBehavior();
terminatedBehavior = getTerminatedBehavior();
this.metricsGroupId = getMetricGroupId(jobId.getId(), getResourceCluster());
Metrics m = new Metrics.Builder()
.id(metricsGroupId)
.addCounter("numWorkerResubmissions")
.addCounter("numWorkerResubmitLimitReached")
.addCounter("numWorkerTerminated")
.addCounter("numScaleStage")
.addCounter("numWorkersCompletedNotTerminal")
.addCounter("numSchedulingChangesRefreshed")
.addCounter("numMissingWorkerPorts")
.build();
this.metrics = MetricsRegistry.getInstance().registerAndGet(m);
this.numWorkerResubmissions = metrics.getCounter("numWorkerResubmissions");
this.numWorkerResubmitLimitReached = metrics.getCounter("numWorkerResubmitLimitReached");
this.numWorkerTerminated = metrics.getCounter("numWorkerTerminated");
this.numScaleStage = metrics.getCounter("numScaleStage");
this.numWorkersCompletedNotTerminal = metrics.getCounter("numWorkersCompletedNotTerminal");
this.numSchedulingChangesRefreshed = metrics.getCounter("numSchedulingChangesRefreshed");
this.numMissingWorkerPorts = metrics.getCounter("numMissingWorkerPorts");
}
/**
* Create a MetricGroupId using the given job Id.
*
* @param id
* @param resourceCluster
* @return
*/
MetricGroupId getMetricGroupId(String id, String resourceCluster) {
return new MetricGroupId("JobActor", new BasicTag("jobId", id), new BasicTag("resourceCluster", resourceCluster));
}
/**
* Validates the job definition, stores the job to persistence. Instantiates the SubscriptionManager to keep track
* of subscription and runtime timeouts Instantiates the WorkerManager which manages the worker life cycle
*
* @throws InvalidJobRequest
* @throws InvalidJobException
*/
void initialize(boolean isSubmit) throws Exception {
LOGGER.info("Initializing Job {}", jobId);
if (isSubmit) {
eventPublisher.publishStatusEvent(new LifecycleEventsProto.JobStatusEvent(INFO,
"Job request received", getJobId(), getJobState()));
// Ignore isReady flag, if the job is autoscaled it gets a Job Master
// this.jobClusterDefinition.getIsReadyForJobMaster() &&
if (isAutoscaled(mantisJobMetaData.getSchedulingInfo())) {
LOGGER.info("Job is autoscaled, setting up Job Master");
setupJobMasterStage(mantisJobMetaData.getSchedulingInfo());
}
LOGGER.info("Storing job");
jobStore.storeNewJob(mantisJobMetaData);
}
LOGGER.info("Stored mantis job");
this.workerManager = new WorkerManager(this, jobClusterDefinition.getWorkerMigrationConfig(),
this.mantisScheduler, isSubmit, ConfigurationProvider.getConfig().isBatchSchedulingEnabled());
long checkAgainInSeconds = getWorkerTimeoutSecs();
long refreshStageAssignementsDurationMs = ConfigurationProvider.getConfig()
.getStageAssignmentRefreshIntervalMs();
getTimers().startPeriodicTimer(CHECK_HB_TIMER_KEY, new JobProto.CheckHeartBeat(),
Duration.ofSeconds(checkAgainInSeconds));
// -1 indicates disabled, which means all updates will be sent immediately
if (refreshStageAssignementsDurationMs > 0) {
getTimers().startPeriodicTimer(
REFRESH_SEND_STAGE_ASSIGNEMNTS_KEY,
new JobProto.SendWorkerAssignementsIfChanged(),
Duration.ofMillis(refreshStageAssignementsDurationMs));
}
mantisJobMetaData.getJobDefinition().getJobSla().getRuntimeLimitSecs();
LOGGER.info("Job {} initialized", this.jobId);
}
private long getWorkerTimeoutSecs() {
if (mantisJobMetaData.getWorkerTimeoutSecs() > 0) {
return mantisJobMetaData.getWorkerTimeoutSecs();
} else {
return ConfigurationProvider.getConfig().getDefaultWorkerTimeoutSecs();
}
}
private void setupJobMasterStage(SchedulingInfo schedulingInfo)
throws io.mantisrx.runtime.command.InvalidJobException {
LOGGER.info("Job {} is autoscaled setting up Job Master", this.jobId);
if (schedulingInfo.forStage(0) == null) {
// create stage 0 schedulingInfo only if not already provided
// jobMaster stage itself is not scaled
schedulingInfo.addJobMasterStage(StageSchedulingInfo.builder()
.numberOfInstances(1)
.machineDefinition(getJobMasterMachineDef())
.build());
// Update jobMetadata with the new stage added
mantisJobMetaData = new MantisJobMetadataImpl.Builder(mantisJobMetaData)
.withJobDefinition(
new JobDefinition.Builder()
.from(mantisJobMetaData.getJobDefinition())
.withSchedulingInfo(schedulingInfo)
.withNumberOfStages(schedulingInfo.getStages().size())
.build())
.build();
}
hasJobMaster = true;
}
private MachineDefinition getJobMasterMachineDef() {
MasterConfiguration config = ConfigurationProvider.getConfig();
if (config != null) {
return new MachineDefinition(
config.getJobMasterCores(), config.getJobMasterMemoryMB(), config.getJobMasterNetworkMbps(),
config.getJobMasterDiskMB(), 1
);
} else {
return new MachineDefinition(
DEFAULT_JOB_MASTER_CORES, DEFAULT_JOB_MASTER_MEM, DEFAULT_JOB_MASTER_NW,
DEFAULT_JOB_MASTER_DISK, 1);
}
}
@Override
public void preStart() throws Exception {
LOGGER.info("Job Actor {}-{} started", clusterName, jobId);
}
@Override
public void postStop() throws Exception {
LOGGER.info("Job Actor {} stopped invoking cleanup logic", jobId);
if (metricsGroupId != null) {
MetricsRegistry.getInstance().remove(metricsGroupId);
}
//shutdown();
}
@Override
public SupervisorStrategy supervisorStrategy() {
// custom supervisor strategy to resume the child actors on Exception instead of the default restart
return MantisActorSupervisorStrategy.getInstance().create();
}
@Override
public Receive createReceive() {
return getInitializingBehavior();
}
private String genUnexpectedMsg(String event, String cluster, String state) {
return String.format("Unexpected message %s received by Job actor %s in %s State", event, cluster, state);
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/*
Job Actor behaviors 12 total
* - Init
* - GET
* - LIST workers
* - GET SCHED INFO
* - SCALE
* - KILL
* - RESUBMIT WORKER
* - WorkerEvent
*
* // SELF SENT
* - HB enforcement
* - Runtime enforcement
* - Self Destruct
* - Refresh Stage Assignments
*/
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* A Terminating Job allows. - GET - LIST workers - WorkerEvent
*
* @return
*/
private Receive getTerminatingBehavior() {
String state = "terminating";
return receiveBuilder()
// EXPECTED MESSAGES BEGIN//
// get Job Details
.match(GetJobDetailsRequest.class, this::onGetJobDetails)
// process request to get the given job definition updated with this job actor.
.match(GetJobDefinitionUpdatedFromJobActorRequest.class, this::onGetJobDefinitionUpdatedFromJobActor)
// list active workers request
.match(ListWorkersRequest.class, this::onListActiveWorkers)
// EXPECTED MESSAGES END//
// UNEXPECTED MESSAGES BEGIN //
// Worker related events
.match(WorkerEvent.class, (x) -> LOGGER.warn("Job {} is Terminating, ignoring worker Events {}",
this.jobId.getId(), x))
.match(InitJob.class, (x) -> getSender().tell(new JobInitialized(x.requestId, SUCCESS,
genUnexpectedMsg(x.toString(), this.jobId.getId(), state), this.jobId, x.requstor), getSelf()))
// explicit resubmit worker
.match(ResubmitWorkerRequest.class, (x) -> getSender().tell(new ResubmitWorkerResponse(x.requestId,
CLIENT_ERROR, genUnexpectedMsg(x.toString(), this.jobId.getId(), state)), getSelf()))
// Heart beat accounting timers
.match(JobProto.CheckHeartBeat.class, (x) -> LOGGER.warn(genUnexpectedMsg(x.toString(),
this.jobId.getId(), state)))
// runtime limit reached
.match(JobProto.RuntimeLimitReached.class, (x) -> LOGGER.warn(genUnexpectedMsg(x.toString(),
this.jobId.getId(), state)))
// Kill job request
.match(JobClusterProto.KillJobRequest.class, (x) -> getSender().tell(new KillJobResponse(x.requestId,
SUCCESS, JobState.Noop, genUnexpectedMsg(x.toString(), this.jobId.getId(), state),
this.jobId, x.user), getSelf()))
// scale stage request
.match(ScaleStageRequest.class, (x) -> getSender().tell(new ScaleStageResponse(x.requestId,
CLIENT_ERROR, genUnexpectedMsg(x.toString(), this.jobId.getId(), state),
0), getSelf()))
// scheduling Info observable
.match(GetJobSchedInfoRequest.class, (x) -> getSender().tell(
new GetJobSchedInfoResponse(x.requestId, CLIENT_ERROR,
genUnexpectedMsg(x.toString(), this.jobId.getId(), state), empty()), getSelf()))
.match(GetLatestJobDiscoveryInfoRequest.class, (x) -> getSender().tell(
new GetLatestJobDiscoveryInfoResponse(x.requestId, CLIENT_ERROR,
genUnexpectedMsg(x.toString(), this.jobId.getId(), state), empty()), getSelf()))
.match(
JobProto.SendWorkerAssignementsIfChanged.class,
(x) -> LOGGER.warn(genUnexpectedMsg(x.toString(), this.jobId.getId(), state)))
.match(KillJobResponse.class, (x) -> LOGGER.info("Received Kill Job Response in"
+ "Terminating State Ignoring"))
.matchAny(x -> LOGGER.warn(genUnexpectedMsg(x.toString(), this.jobId.getId(), state)))
// UNEXPECTED MESSAGES END
.build();
}
/**
* A Terminated Job allows. - GET - LIST workers
*
* @return
*/
private Receive getTerminatedBehavior() {
String state = "terminated";
return receiveBuilder()
// EXPECTED MESSAGES BEGIN//
// get Job Details
.match(GetJobDetailsRequest.class, this::onGetJobDetails)
// process request to get the given job definition updated by this job actor.
.match(GetJobDefinitionUpdatedFromJobActorRequest.class, this::onGetJobDefinitionUpdatedFromJobActor)
// list active workers request
.match(ListWorkersRequest.class, this::onListActiveWorkers)
// EXPECTED MESSAGES END//
// UNEXPECTED MESSAGES BEGIN //
.match(InitJob.class, (x) -> getSender().tell(
new JobInitialized(x.requestId, SUCCESS, genUnexpectedMsg(
x.toString(), this.jobId.getId(), state), this.jobId, x.requstor), getSelf()))
// explicit resubmit worker
.match(ResubmitWorkerRequest.class, (x) -> getSender().tell(
new ResubmitWorkerResponse(x.requestId, CLIENT_ERROR,
genUnexpectedMsg(x.toString(), this.jobId.getId(), state)), getSelf()))
// Heart beat accounting timers
.match(JobProto.CheckHeartBeat.class, (x) -> LOGGER.warn(
genUnexpectedMsg(x.toString(), this.jobId.getId(), state)))
// Migrate worker request
.match(JobProto.MigrateDisabledVmWorkersRequest.class, (x) -> LOGGER.warn(
genUnexpectedMsg(x.toString(), this.jobId.getId(), state)))
// runtime limit reached
.match(JobProto.RuntimeLimitReached.class, (x) -> LOGGER.warn(
genUnexpectedMsg(x.toString(), this.jobId.getId(), state)))
// Kill job request
.match(JobClusterProto.KillJobRequest.class, (x) -> getSender().tell(
new KillJobResponse(x.requestId, SUCCESS, JobState.Noop,
genUnexpectedMsg(x.toString(), this.jobId.getId(), state), this.jobId, x.user),
getSelf()))
// scale stage request
.match(ScaleStageRequest.class, (x) -> getSender().tell(
new ScaleStageResponse(x.requestId, CLIENT_ERROR,
genUnexpectedMsg(x.toString(), this.jobId.getId(), state), 0),
getSelf()))
// scheduling Info observable
.match(GetJobSchedInfoRequest.class, (x) -> getSender().tell(
new GetJobSchedInfoResponse(x.requestId, CLIENT_ERROR, genUnexpectedMsg(
x.toString(), this.jobId.getId(), state), empty()), getSelf()))
.match(GetLatestJobDiscoveryInfoRequest.class, (x) -> getSender().tell(
new GetLatestJobDiscoveryInfoResponse(x.requestId, CLIENT_ERROR, genUnexpectedMsg(
x.toString(), this.jobId.getId(), state), empty()), getSelf()))
.match(KillJobResponse.class, (x) -> LOGGER.info("Received Kill Job Response in"
+ "Terminating State Ignoring"))
.match(JobProto.SendWorkerAssignementsIfChanged.class, (x) -> LOGGER.warn(genUnexpectedMsg(
x.toString(), this.jobId.getId(), state)))
// Worker related events
.match(WorkerEvent.class, (x) -> LOGGER.info("Received worker event in Terminated State Ignoring"))
.matchAny(x -> LOGGER.warn(genUnexpectedMsg(x.toString(), this.jobId.getId(), state)))
// UNEXPECTED MESSAGES END
.build();
}
/**
* An active job allows. - GET - LIST workers - GET SCHED INFO - SCALE - KILL - RESUBMIT WORKER - WorkerEvent - HB
* enforcement - Runtime enforcement - Refresh Stage Assignments
*
* @return
*/
private Receive getActiveBehavior() {
String state = "active";
// get Job Details
return receiveBuilder()
// EXPECTED MESSAGES BEGIN//
.match(GetJobDetailsRequest.class, this::onGetJobDetails)
// process request to get the given job definition updated by this job actor.
.match(GetJobDefinitionUpdatedFromJobActorRequest.class, this::onGetJobDefinitionUpdatedFromJobActor)
// Worker related events
.match(WorkerEvent.class, r -> processWorkerEvent(r))
// explicit resubmit worker
.match(ResubmitWorkerRequest.class, this::onResubmitWorker)
// Heart beat accounting timers
.match(JobProto.CheckHeartBeat.class, this::onCheckHeartBeats)
// Migrate workers from disabled VMs
.match(JobProto.MigrateDisabledVmWorkersRequest.class, this::onMigrateWorkers)
// runtime limit reached
.match(JobProto.RuntimeLimitReached.class, this::onRuntimeLimitReached)
// Kill job request
.match(JobClusterProto.KillJobRequest.class, this::onJobKill)
// scale stage request
.match(ScaleStageRequest.class, this::onScaleStage)
// list active workers request
.match(ListWorkersRequest.class, this::onListActiveWorkers)
// scheduling Info observable
.match(GetJobSchedInfoRequest.class, this::onGetJobStatusSubject)
.match(GetLatestJobDiscoveryInfoRequest.class, this::onGetLatestJobDiscoveryInfo)
.match(JobProto.SendWorkerAssignementsIfChanged.class, this::onSendWorkerAssignments)
// EXPECTED MESSAGES END//
// UNEXPECTED MESSAGES BEGIN //
.match(InitJob.class, (x) -> getSender().tell(new JobInitialized(x.requestId, SUCCESS,
genUnexpectedMsg(x.toString(), this.jobId.getId(), state), this.jobId, x.requstor), getSelf()))
.matchAny(x -> LOGGER.warn(genUnexpectedMsg(x.toString(), this.jobId.getId(), state)))
// UNEXPECTED MESSAGES END //
.build();
}
/**
* INITIALIZED JOB allows. - GET - LIST workers - GET SCHED INFO - KILL - WorkerEvent - HB enforcement - REFRESH
* STAGE scheduling info
*
* @return
*/
private Receive getInitializedBehavior() {
String state = "initialized";
return receiveBuilder()
// EXPECTED MESSAGES BEGIN//
// get Job Details
.match(GetJobDetailsRequest.class, this::onGetJobDetails)
// process request to get the given job definition updated by this job actor.
.match(GetJobDefinitionUpdatedFromJobActorRequest.class, this::onGetJobDefinitionUpdatedFromJobActor)
// Worker related events
.match(WorkerEvent.class, r -> processWorkerEvent(r))
// Heart beat accounting timers
.match(JobProto.CheckHeartBeat.class, this::onCheckHeartBeats)
// Migrate workers from disabled VMs
.match(JobProto.MigrateDisabledVmWorkersRequest.class, this::onMigrateWorkers)
// Kill job request
.match(JobClusterProto.KillJobRequest.class, this::onJobKill)
// list active workers request
.match(ListWorkersRequest.class, this::onListActiveWorkers)
.match(GetJobSchedInfoRequest.class, this::onGetJobStatusSubject)
.match(GetLatestJobDiscoveryInfoRequest.class, this::onGetLatestJobDiscoveryInfo)
.match(JobProto.SendWorkerAssignementsIfChanged.class, this::onSendWorkerAssignments)
// EXPECTED MESSAGES END//
// UNEXPECTED MESSAGES BEGIN //
// explicit resubmit worker
.match(ResubmitWorkerRequest.class, (x) -> getSender().tell(
new ResubmitWorkerResponse(x.requestId, CLIENT_ERROR, genUnexpectedMsg(
x.toString(), this.jobId.getId(), state)), getSelf()))
// runtime limit reached
.match(JobProto.RuntimeLimitReached.class, (x) -> LOGGER.warn(genUnexpectedMsg(
x.toString(), this.jobId.getId(), state)))
// scale stage request
.match(ScaleStageRequest.class, (x) -> getSender().tell(
new ScaleStageResponse(x.requestId, CLIENT_ERROR, genUnexpectedMsg(
x.toString(), this.jobId.getId(), state), 0), getSelf()))
.match(InitJob.class, (x) -> getSender().tell(new JobInitialized(x.requestId, SUCCESS,
genUnexpectedMsg(x.toString(), this.jobId.getId(), state), this.jobId, x.requstor), getSelf()))
.matchAny(x -> LOGGER.warn(genUnexpectedMsg(x.toString(), this.jobId.getId(), state)))
// UNEXPECTED MESSAGES END //
.build();
}
/**
* AN INITIALIZING JOB ALLOWS. - Init Job
*
* @return
*/
private Receive getInitializingBehavior() {
String state = "initializing";
return receiveBuilder()
// EXPECTED MESSAGES BEING//
.match(InitJob.class, this::onJobInitialize)
// EXPECTED MESSAGES END//
//UNEXPECTED MESSAGES BEGIN //
// get Job Details
.match(GetJobDetailsRequest.class, (x) -> getSender().tell(
new GetJobDetailsResponse(x.requestId, CLIENT_ERROR, genUnexpectedMsg(
x.toString(), this.jobId.getId(), state), empty()), getSelf()))
// no invalid metadata to use, return intermediate job definition directly
.match(
GetJobDefinitionUpdatedFromJobActorRequest.class,
(r) -> getSender().tell(
new JobClusterManagerProto.GetJobDefinitionUpdatedFromJobActorResponse(
r.requestId, SUCCESS, "", r.getUser(), r.getJobDefinition(),
r.isAutoResubmit(), r.isQuickSubmit(), r.getOriginalSender()),
getSelf()))
// Worker related events
.match(WorkerEvent.class, (x) -> LOGGER.warn(genUnexpectedMsg(x.toString(), this.jobId.getId(), state)))
// explicit resubmit worker
.match(ResubmitWorkerRequest.class, (x) -> getSender().tell(
new ResubmitWorkerResponse(x.requestId, CLIENT_ERROR, genUnexpectedMsg(
x.toString(), this.jobId.getId(), state)), getSelf()))
// Heart beat accounting timers
.match(JobProto.CheckHeartBeat.class, (x) -> LOGGER.warn(genUnexpectedMsg(
x.toString(), this.jobId.getId(), state)))
// Migrate workers request
.match(JobProto.MigrateDisabledVmWorkersRequest.class, (x) -> LOGGER.warn(genUnexpectedMsg(
x.toString(), this.jobId.getId(), state)))
// runtime limit reached
.match(JobProto.RuntimeLimitReached.class, (x) -> LOGGER.warn(genUnexpectedMsg(
x.toString(), this.jobId.getId(), state)))
// Kill job request
.match(JobClusterProto.KillJobRequest.class, (x) -> getSender().tell(
new KillJobResponse(x.requestId, CLIENT_ERROR, JobState.Noop, genUnexpectedMsg(
x.toString(), this.jobId.getId(), state), this.jobId, x.user), getSelf()))
// scale stage request
.match(ScaleStageRequest.class, (x) -> getSender().tell(
new ScaleStageResponse(x.requestId, CLIENT_ERROR, genUnexpectedMsg(
x.toString(), this.jobId.getId(), state), 0), getSelf()))
// list active workers request
.match(ListWorkersRequest.class, (x) -> getSender().tell(
new ListWorkersResponse(x.requestId, CLIENT_ERROR, genUnexpectedMsg(
x.toString(), this.jobId.getId(), state), Lists.newArrayList()), getSelf()))
// scheduling Info observable
.match(GetJobSchedInfoRequest.class, (x) -> getSender().tell(
new GetJobSchedInfoResponse(x.requestId, CLIENT_ERROR, genUnexpectedMsg(
x.toString(), this.jobId.getId(), state), empty()), getSelf()))
// latest scheduling Info
.match(GetLatestJobDiscoveryInfoRequest.class, (x) -> getSender().tell(
new GetLatestJobDiscoveryInfoResponse(x.requestId, CLIENT_ERROR, genUnexpectedMsg(
x.toString(), this.jobId.getId(), state), empty()), getSelf()))
//UNEXPECTED MESSAGES END //
.matchAny(x -> LOGGER.warn(genUnexpectedMsg(x.toString(), this.jobId.getId(), state)))
.build();
}
//////////////////////////////////////////// Akka Messages sent to the Job Actor Begin/////////////////////
@Override
public void onJobInitialize(InitJob i) {
ActorRef sender = getSender();
try {
initialize(i.isSubmit);
if (JobState.isRunningState(mantisJobMetaData.getState())) {
getContext().become(activeBehavior);
setRuntimeLimitTimersIfRequired(Instant.now());
} else {
getContext().become(initializedBehavior);
}
sender.tell(new JobInitialized(i.requestId, SUCCESS, String.format(
"Job %s initialized successfully", jobId), jobId, i.requstor), getSelf());
} catch (Exception e) {
LOGGER.error("Exception initializing job ", e);
sender.tell(
new JobInitialized(i.requestId, SERVER_ERROR, "" + e.getMessage(), jobId, i.requstor),
getSelf());
}
}
/**
* Return information related to this job.
*
* @param r
*/
@Override
public void onGetJobDetails(GetJobDetailsRequest r) {
ActorRef sender = getSender();
sender.tell(new GetJobDetailsResponse(r.requestId, SUCCESS, "", of(getJobDetails())), getSelf());
}
public void onGetJobDefinitionUpdatedFromJobActor(GetJobDefinitionUpdatedFromJobActorRequest r) {
ActorRef sender = getSender();
sender.tell(
getIntermediateJobDefinition(r),
getSelf());
}
/**
* Return a BehaviorSubject that streams worker lifecycle events to the user.
*
* @param r
*/
@Override
public void onGetJobStatusSubject(GetJobSchedInfoRequest r) {
ActorRef sender = getSender();
if (r.getJobId().equals(this.jobId)) {
sender.tell(new GetJobSchedInfoResponse(r.requestId, SUCCESS, "",
of(workerManager.getJobStatusSubject())), getSelf());
} else {
String msg = "JobId in the request " + r.getJobId() + " does not match Job Actors job Id " + this.jobId;
LOGGER.warn(msg);
sender.tell(new GetJobSchedInfoResponse(r.requestId, CLIENT_ERROR, msg, empty()), getSelf());
}
}
@Override
public void onGetLatestJobDiscoveryInfo(GetLatestJobDiscoveryInfoRequest r) {
ActorRef sender = getSender();
if (r.getJobCluster().equals(this.jobId.getCluster())) {
JobSchedulingInfo schedulingInfo = workerManager.getJobStatusSubject().getValue();
if (schedulingInfo != null) {
sender.tell(new GetLatestJobDiscoveryInfoResponse(r.requestId, SUCCESS, "",
ofNullable(schedulingInfo)), getSelf());
} else {
LOGGER.info("discoveryInfo from BehaviorSubject is null {}", jobId);
sender.tell(new GetLatestJobDiscoveryInfoResponse(r.requestId,
SERVER_ERROR,
"discoveryInfo from BehaviorSubject is null " + jobId,
empty()), getSelf());
}
} else {
String msg = "JobCluster in the request " + r.getJobCluster() + " does not match Job Actors job ID "
+ this.jobId;
LOGGER.warn(msg);
sender.tell(new GetLatestJobDiscoveryInfoResponse(r.requestId, SERVER_ERROR, msg, empty()), getSelf());
}
}
/**
* Worker Events sent by the worker itself of the Scheduling Service.
*/
@Override
public void processWorkerEvent(final WorkerEvent e) {
this.workerManager.processEvent(e, mantisJobMetaData.getState());
}
/**
* Resubmit a specific worker Index.
*/
@Override
public void onResubmitWorker(final ResubmitWorkerRequest r) {
ActorRef sender = getSender();
try {
eventPublisher.publishStatusEvent(new LifecycleEventsProto.JobStatusEvent(INFO,
r.getWorkerNum() + " workerNum resubmit requested by " + r.getUser() + " , reason: "
+ r.getReason(),
getJobId(), getJobState()));
this.workerManager.resubmitWorker(r.getWorkerNum());
numWorkerResubmissions.increment();
sender.tell(new ResubmitWorkerResponse(r.requestId, SUCCESS,
String.format("Worker %d of job %s resubmitted", r.getWorkerNum(), r.getJobId())), getSelf());
} catch (Exception e) {
sender.tell(new ResubmitWorkerResponse(r.requestId, SERVER_ERROR, e.getMessage()), getSelf());
}
}
@Override
public void onMigrateWorkers(final JobProto.MigrateDisabledVmWorkersRequest r) {
workerManager.migrateDisabledVmWorkers(r.time);
}
/**
* Invoked periodically to check heart beat status of the workers.
*
* @param r
*/
@Override
public void onCheckHeartBeats(final JobProto.CheckHeartBeat r) {
this.workerManager.checkHeartBeats(r.getTime());
}
@Override
public void onRuntimeLimitReached(final JobProto.RuntimeLimitReached r) {
LOGGER.info("In onRuntimeLimitReached {} for Job {} ", Instant.now(), this.jobId);
LOGGER.info("Job {} Started at {} and killed at {} due to Runtime limit reached", jobId,
mantisJobMetaData.getStartedAtInstant().orElse(Instant.now()), Instant.now());
getContext().getParent().tell(new JobClusterProto.KillJobRequest(jobId,
"runtime limit reached", JobCompletedReason.Killed,
MANTIS_MASTER_USER, ActorRef.noSender()), getSelf());
}
@Override
public void onSendWorkerAssignments(final JobProto.SendWorkerAssignementsIfChanged r) {
this.workerManager.refreshAndSendWorkerAssignments();
}
/**
* Will update Job state to terminal. Unschedule all workers Update worker state as failed in DB Archive job Self
* destruct
*
* Worker terminated events will get ignored.
*
* @param req
*/
@Override
public void onJobKill(JobClusterProto.KillJobRequest req) {
ActorRef sender = getSender();
LOGGER.info("Shutting down job {} on request by {}", jobId, sender);
try {
eventPublisher.publishStatusEvent(new LifecycleEventsProto.JobStatusEvent(INFO,
"Killing job, reason: " + req.reason,
getJobId(), getJobState()));
JobState newState;
if (req.jobCompletedReason.equals(JobCompletedReason.Error)
|| req.jobCompletedReason.equals(JobCompletedReason.Lost)) {
newState = JobState.Failed;
} else {
newState = JobState.Completed;
}
// update job state
updateStateAndPersist(newState);
// inform caller
sender.tell(new JobClusterProto.KillJobResponse(req.requestId, SUCCESS, getJobState(), getJobId()
+ " terminated", getJobId(), this.mantisJobMetaData, req.user, req.requestor), getSelf());
// continue with rest of the shutdown
getTimers().cancel(CHECK_HB_TIMER_KEY);
getContext().become(terminatingBehavior);
// shutdown workers
shutdown(newState, req.reason);
// take poison pill
performFinalShutdown();
} catch (Exception e) {
LOGGER.error("Failed to kill job {}", jobId, e);
sender.tell(new JobClusterProto.KillJobResponse(req.requestId, SERVER_ERROR, getJobState(),
getJobId() + " Could not be terminated due to " + e.getMessage(), getJobId(),
this.mantisJobMetaData, req.user, req.requestor), getSelf());
}
}
@Override
public void onScaleStage(ScaleStageRequest scaleStage) {
LOGGER.info("In Scale stage {} for Job {}", scaleStage, this.jobId);
ActorRef sender = getSender();
Optional stageMeta = this.mantisJobMetaData.getStageMetadata(scaleStage.getStageNum());
// Make sure stage is valid
if (!stageMeta.isPresent()) {
LOGGER.warn("Stage {} does not exist in Job {}", scaleStage.getStageNum(), this.jobId);
sender.tell(new ScaleStageResponse(scaleStage.requestId, CLIENT_ERROR, "Non existent stage "
+ scaleStage.getStageNum(), 0), getSelf());
return;
}
// Make sure stage is scalable
MantisStageMetadataImpl stageMetaData = (MantisStageMetadataImpl) stageMeta.get();
if (!stageMetaData.getScalable()) {
LOGGER.warn("Stage {} is not scalable in Job {}", scaleStage.getStageNum(), this.jobId);
eventPublisher.publishStatusEvent(new LifecycleEventsProto.JobStatusEvent(
LifecycleEventsProto.StatusEvent.StatusEventType.WARN,
"Can't change #workers to " + scaleStage.getNumWorkers() + ", stage "
+ scaleStage.getStageNum() + " is not scalable", getJobId(), getJobState()));
sender.tell(new ScaleStageResponse(scaleStage.requestId, CLIENT_ERROR, "Stage "
+ scaleStage.getStageNum() + " is not scalable", 0), getSelf());
return;
}
try {
int actualScaleup = this.workerManager.scaleStage(stageMetaData, scaleStage.getNumWorkers(),
scaleStage.getReason());
LOGGER.info("Scaled stage {} to {} workers for Job {}", scaleStage.getStageNum(), actualScaleup,
this.jobId);
numScaleStage.increment();
sender.tell(new ScaleStageResponse(scaleStage.requestId, SUCCESS,
String.format("Scaled stage %d to %d workers", scaleStage.getStageNum(), actualScaleup),
actualScaleup), getSelf());
} catch (Exception e) {
String msg = String.format("Stage %d scale failed due to %s", scaleStage.getStageNum(), e.getMessage());
LOGGER.error(msg, e);
sender.tell(new ScaleStageResponse(scaleStage.requestId, SERVER_ERROR, msg, 0), getSelf());
}
}
/**
* Responds with {@link ListWorkersResponse} object containing data about all active workers.
*
* @param listWorkersRequest
*/
public void onListActiveWorkers(ListWorkersRequest listWorkersRequest) {
ActorRef sender = getSender();
List activeWorkers = this.workerManager.getActiveWorkers(listWorkersRequest.getLimit());
sender.tell(new ListWorkersResponse(listWorkersRequest.requestId, SUCCESS, "",
Collections.unmodifiableList(activeWorkers)), getSelf());
}
//////////////////////////////////////////// Akka Messages sent to the Job Actor End/////////////////////////
/////////////////////////////////////////// Internal State change events Begin //////////////////////////////
private void performFinalShutdown() {
try {
LOGGER.info("Archiving Job {}", this.jobId);
jobStore.archiveJob(mantisJobMetaData);
} catch (IOException e) {
LOGGER.warn("Exception archiving job " + mantisJobMetaData.getJobId(), e);
}
getContext().become(terminatedBehavior);
// commit suicide
getSelf().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
/**
* Invoked when all workers are in terminal state. Should get called only during shutdown process
*/
@Override
public void onAllWorkersCompleted() {
LOGGER.info("JobActor: onAllWorkersCompleted with current state {}", mantisJobMetaData.getState());
if (!JobState.isTerminalState(mantisJobMetaData.getState()) && !allWorkersCompleted) {
LOGGER.info("All workers completed but job {} in {} state. Request termination", jobId, getJobState());
allWorkersCompleted = true;
getContext().parent().tell(
new JobClusterProto.KillJobRequest(
jobId, "Job Completed", JobCompletedReason.Normal, MANTIS_MASTER_USER,
ActorRef.noSender()), getSelf());
numWorkersCompletedNotTerminal.increment();
} else {
// job kill has already been requested, ignore
LOGGER.debug("Job {} Kill already requested", this.jobId);
}
}
/**
* Should get called only once after all workers have started.
*/
@Override
public boolean onAllWorkersStarted() {
LOGGER.info("In onAllWorkersStarted for Job {}", jobId);
boolean isSuccess = true;
if (mantisJobMetaData.getState() == JobState.Accepted) {
try {
// update record in storage
updateStateAndPersist(JobState.Launched);
// update behavior to active
getContext().become(activeBehavior);
eventPublisher.publishStatusEvent(new LifecycleEventsProto.JobStatusEvent(INFO,
"all workers started, job transitioning to Active", getJobId(), getJobState()));
// inform job cluster manager that the job has started
getContext().getParent().tell(new JobClusterProto.JobStartedEvent(getJobId()), getSelf());
// kick off max runtime timer if needed
Instant currentTime = Instant.now();
// Update start time and persist state
mantisJobMetaData.setStartedAt(currentTime.toEpochMilli(), jobStore);
setRuntimeLimitTimersIfRequired(currentTime);
} catch (Exception e) {
LOGGER.error("Error processing all worker started event ", e);
isSuccess = false;
}
} else if (mantisJobMetaData.getState() == JobState.Launched) {
// no op
LOGGER.info("Job is already in launched state");
isSuccess = false;
} else {
// something is wrong!
LOGGER.warn("Unexpected all Workers Started Event while job in {} state", mantisJobMetaData.getState());
isSuccess = false;
}
return isSuccess;
}
/**
* Invoked if workers have been relaunched too many times. Request this job to be terminated and marked as failed
*/
@Override
public boolean onTooManyWorkerResubmits() {
LOGGER.warn("Too many worker resubmits detected for Job {}. Requesting job shutdown", jobId);
boolean isSuccess = true;
eventPublisher.publishStatusEvent(new LifecycleEventsProto.JobStatusEvent(ERROR,
"Worker Resubmit limit reached, shutting down job", getJobId(), getJobState()));
numWorkerResubmitLimitReached.increment();
//updateStateAndPersist(JobState.Terminating_abnormal);
// ask Parent to shut it down
getContext().parent().tell(
new JobClusterProto.KillJobRequest(
jobId, "Too many worker resubmits", JobCompletedReason.Error, MANTIS_MASTER_USER,
ActorRef.noSender()), getSelf());
return isSuccess;
}
//////////////////////////////////Internal State Change Events END //////////////////////////////////////
/**
* Retuns the details of this job.
*/
@Override
public IMantisJobMetadata getJobDetails() {
return this.mantisJobMetaData;
}
public JobClusterManagerProto.GetJobDefinitionUpdatedFromJobActorResponse getIntermediateJobDefinition(
GetJobDefinitionUpdatedFromJobActorRequest r) {
final JobDefinition givenJobDefn = r.getJobDefinition();
final boolean forceInheritance = r.isQuickSubmit();
IMantisJobMetadata lastJobMeta = this.mantisJobMetaData;
JobDefinition.Builder jobDefnBuilder = new JobDefinition.Builder().fromWithInstanceCountInheritance(
givenJobDefn,
forceInheritance,
(stageId) -> lastJobMeta.getStageMetadata(stageId).map(IMantisStageMetadata::getNumWorkers));
try {
JobDefinition mergedJobDefn = jobDefnBuilder.build();
return new JobClusterManagerProto.GetJobDefinitionUpdatedFromJobActorResponse(
r.requestId, SUCCESS, "", r.getUser(), mergedJobDefn, r.isAutoResubmit(),
r.isQuickSubmit(), r.getOriginalSender());
} catch (io.mantisrx.runtime.command.InvalidJobException ije) {
LOGGER.error("Failed to build job definition with inheritance:", ije);
return new JobClusterManagerProto.GetJobDefinitionUpdatedFromJobActorResponse(
r.requestId, SERVER_ERROR, ije.getMessage(), r.getUser(), null, r.isAutoResubmit(),
r.isQuickSubmit(), r.getOriginalSender());
}
}
/**
* Triggered when the JobActor receives the Job Kill message. it will update the state of the job to terminating in
* the persistence layer and request the workers to be terminated.
*
* @param state
*/
@Override
public void shutdown(JobState state, String reason) {
LOGGER.info("Entering JobActor:shutdown {}", jobId);
workerManager.shutdown();
eventPublisher.publishStatusEvent(new LifecycleEventsProto.JobStatusEvent(INFO,
"job shutdown, reason: " + reason,
getJobId(), state));
eventPublisher.publishAuditEvent(new LifecycleEventsProto.AuditEvent(
LifecycleEventsProto.AuditEvent.AuditEventType.JOB_TERMINATE,
jobId.getId(), "job shutdown, reason: " + reason));
}
@Override
public JobId getJobId() {
return this.jobId;
}
private void updateStateAndPersist(JobState newState) throws Exception {
mantisJobMetaData.setJobState(newState, jobStore);
}
/**
* Always invoked after the job has transitioned to started state.
*
* @param currentTime
*/
private void setRuntimeLimitTimersIfRequired(Instant currentTime) {
long maxRuntimeSecs = mantisJobMetaData.getJobDefinition().getJobSla().getRuntimeLimitSecs();
Instant startedAt = mantisJobMetaData.getStartedAtInstant().orElse(currentTime);
long terminateJobInSecs;
if (maxRuntimeSecs > 0) {
terminateJobInSecs = JobHelper.calculateRuntimeDuration(maxRuntimeSecs, startedAt);
LOGGER.info("Will terminate Job {} at {} ", jobId, (currentTime.plusSeconds(terminateJobInSecs)));
getTimers().startSingleTimer("RUNTIME_LIMIT", new JobProto.RuntimeLimitReached(),
Duration.ofSeconds(terminateJobInSecs));
} else {
LOGGER.info("maxRuntime for Job {} is {} ignore ", jobId, mantisJobMetaData.getJobDefinition()
.getJobSla().getRuntimeLimitSecs());
}
}
@Override
public JobState getJobState() {
return mantisJobMetaData.getState();
}
private boolean isAutoscaled(SchedulingInfo schedulingInfo) {
for (Map.Entry entry : schedulingInfo.getStages().entrySet()) {
final StageScalingPolicy scalingPolicy = entry.getValue().getScalingPolicy();
if (scalingPolicy != null && scalingPolicy.isEnabled()) {
LOGGER.info("Job {} is autoscaleable", jobId);
return true;
}
}
LOGGER.info("Job {} is NOT scaleable", jobId);
return false;
}
/*package protected*/
/**
* Returns the calculated subscription timeout in seconds for this job.
*
* @param mjmd
* @return
*/
static long getSubscriptionTimeoutSecs(final IMantisJobMetadata mjmd) {
// if perpetual job there is no subscription timeout
if (mjmd.getJobDefinition().getJobSla().getDurationType() == MantisJobDurationType.Perpetual) {
return 0;
}
return mjmd.getSubscriptionTimeoutSecs() == 0
? ConfigurationProvider.getConfig().getEphemeralJobUnsubscribedTimeoutSecs()
: mjmd.getSubscriptionTimeoutSecs();
}
static long getHeartbeatIntervalSecs(final IMantisJobMetadata mjmd) {
if (mjmd.getHeartbeatIntervalSecs() > 0) {
return mjmd.getHeartbeatIntervalSecs();
}
return ConfigurationProvider.getConfig().getDefaultWorkerHeartbeatIntervalSecs();
}
/**
* Keeps track of the last used worker number and mints a new one every time a worker is scheduled.
*/
static class WorkerNumberGenerator {
private static final Logger LOGGER = LoggerFactory.getLogger(WorkerNumberGenerator.class);
private static final int MAX_ATTEMPTS = 10;
private static final long SLEEP_DURATION_MS = Duration.ofSeconds(2).toMillis();
private static final int DEFAULT_INCREMENT_STEP = 10;
private final int incrementStep;
private int lastUsed;
private int currLimit;
private volatile boolean hasErrored = false;
/**
* Creates an instance of this class.
*
* @param lastUsed
* @param incrementStep
*/
WorkerNumberGenerator(int lastUsed, int incrementStep) {
Preconditions.checkArgument(lastUsed >= 0,
"Last Used worker Number cannot be negative {} ", lastUsed);
Preconditions.checkArgument(incrementStep >= 1,
"incrementStepcannot be less than 1 {} ", incrementStep);
this.lastUsed = lastUsed;
this.currLimit = lastUsed;
this.incrementStep = incrementStep;
}
/**
* Default constructor sets last used number to 0.
*/
WorkerNumberGenerator() {
this(0, DEFAULT_INCREMENT_STEP);
}
private void advance(MantisJobMetadataImpl mantisJobMetaData, MantisJobStore jobStore) {
try {
final int value = currLimit + incrementStep;
// If store operations fail, extraneous workers will be killed since currLimit would be lower
setNextWorkerNumberWithRetries(mantisJobMetaData, jobStore, value);
currLimit = value;
} catch (Exception e) {
hasErrored = true;
LOGGER.error("Exception setting nextWorkerNumberToUse after {} consecutive attempts", MAX_ATTEMPTS, e);
throw new RuntimeException("Unexpected error setting next worker number to use", e);
}
}
private void setNextWorkerNumberWithRetries(MantisJobMetadataImpl mantisJobMetaData, MantisJobStore jobStore, int value) throws Exception {
int attempts = 0;
Exception exception = null;
while (attempts < MAX_ATTEMPTS) {
try {
mantisJobMetaData.setNextWorkerNumberToUse(value, jobStore);
return;
} catch (Exception e) {
LOGGER.warn("Failed to setNextWorkerNumberToUse to {} (attempt {}/{})", value, attempts, MAX_ATTEMPTS, e);
exception = e;
}
Thread.sleep(SLEEP_DURATION_MS);
attempts++;
}
throw exception;
}
/**
* Get the next unused worker number.
*
* For performance reasons, this object updates state in persistence every N calls made to this method.
*
* @return The next worker number to use for new workers
* @throws IllegalStateException if there was an error saving the next worker number to use to the job store
*/
int getNextWorkerNumber(MantisJobMetadataImpl mantisJobMetaData, MantisJobStore jobStore) {
if (hasErrored) {
throw new IllegalStateException("Unexpected: Invalid state likely due to getting/setting"
+ "next worker number");
}
if (lastUsed == currLimit) {
advance(mantisJobMetaData, jobStore);
}
return ++lastUsed;
}
}
/**
* Responsible for managing worker related state of this job.
*/
class WorkerManager implements IWorkerManager {
private static final int WORKER_RESUBMIT_LIMIT = 100;
private ObjectMapper mapper = new ObjectMapper();
private final WorkerNumberGenerator workerNumberGenerator;
private boolean allWorkersStarted = false;
private final IMantisJobManager jobMgr;
private ConcurrentSkipListSet workersToMigrate = new ConcurrentSkipListSet<>();
private int sinkStageNum;
private final MigrationStrategy migrationStrategy;
private final MantisScheduler scheduler;
private long lastWorkerMigrationTimestamp = Long.MIN_VALUE;
private Map stageAssignments = new HashMap<>();
private BehaviorSubject jobSchedulingInfoBehaviorSubject;
private String currentJobSchedulingInfoStr = null;
private final WorkerResubmitRateLimiter resubmitRateLimiter = new WorkerResubmitRateLimiter();
// Use expiring cache to effectively track worker resubmitted in the last hour.
private Cache recentErrorWorkersCache = CacheBuilder.newBuilder()
.expireAfterWrite(1, TimeUnit.HOURS)
.build();
private volatile boolean stageAssignmentPotentiallyChanged;
private final boolean batchSchedulingEnabled;
/**
* Creates an instance of this class.
*
* @param jobMgr
* @param migrationConfig
* @param scheduler
* @param isSubmit
* @throws Exception
*/
WorkerManager(
IMantisJobManager jobMgr, WorkerMigrationConfig migrationConfig, MantisScheduler scheduler,
boolean isSubmit, boolean batchSchedulingEnabled) throws Exception {
workerNumberGenerator = new WorkerNumberGenerator((isSubmit) ? 0
: jobMgr.getJobDetails().getNextWorkerNumberToUse(), WorkerNumberGenerator.DEFAULT_INCREMENT_STEP);
this.scheduler = scheduler;
this.jobMgr = jobMgr;
this.batchSchedulingEnabled = batchSchedulingEnabled;
migrationStrategy = MigrationStrategyFactory.getStrategy(jobId.getId(), migrationConfig);
int noOfStages = mantisJobMetaData.getStageMetadata().size();
if (noOfStages == 1) {
sinkStageNum = 1;
} else {
sinkStageNum = noOfStages - 1;
}
JobSchedulingInfo initialJS = new JobSchedulingInfo(jobMgr.getJobId().getId(), new HashMap<>());
currentJobSchedulingInfoStr = mapper.writeValueAsString(initialJS);
jobSchedulingInfoBehaviorSubject = BehaviorSubject.create(initialJS);
initialize(isSubmit);
}
/**
* Initializes a worker manager.
*
* A WorkerManager can get initialized on a job submission or a failover.
*
* Init from Job submission: submits initial workers which each go through their startup lifecycle.
*
* Init from Master failover: workers are already running; gets state from Mesos and updates its view of the
* world. If worker information is bad from Mesos, gather up these worker and resubmit them in all together
* after initialization of running workers.
*
* @param isSubmit specifies if this initialization is due to job submission or a master failover.
* @throws Exception
*/
void initialize(boolean isSubmit) throws Exception {
if (isSubmit) {
submitInitialWorkers();
} else {
initializeRunningWorkers();
}
mantisJobMetaData.setJobCosts(costsCalculator.calculateCosts(mantisJobMetaData));
}
private void initializeRunningWorkers() {
// Scan for the list of all corrupted workers to be resubmitted.
List workersToResubmit = markCorruptedWorkers();
List workersToSubmit = new ArrayList<>();
// publish a refresh before enqueuing tasks to the Scheduler, as there is a potential race between
// WorkerRegistryV2 getting updated and isWorkerValid being called from SchedulingService loop
// If worker is not found in the SchedulingService loop, it is considered invalid and prematurely
// removed from Fenzo state.
markStageAssignmentsChanged(true);
for (IMantisStageMetadata stageMeta : mantisJobMetaData.getStageMetadata().values()) {
Map workerHosts = new HashMap<>();
for (JobWorker worker : stageMeta.getAllWorkers()) {
IMantisWorkerMetadata wm = worker.getMetadata();
if (WorkerState.isRunningState(wm.getState())) {
// send fake heartbeat
try {
WorkerEvent fakeHB = new WorkerHeartbeat(new Status(jobId.getId(), stageMeta.getStageNum(),
wm.getWorkerIndex(), wm.getWorkerNumber(), Status.TYPE.HEARTBEAT, "",
MantisJobState.Started, System.currentTimeMillis()));
worker.processEvent(fakeHB, jobStore);
} catch (InvalidWorkerStateChangeException | IOException e) {
LOGGER.error("problem sending initial heartbeat for Job {} during initialization",
worker.getMetadata().getJobId(), e);
}
workerHosts.put(
wm.getWorkerNumber(),
new WorkerHost(
wm.getSlave(),
wm.getWorkerIndex(),
wm.getWorkerPorts().getPorts(),
DataFormatAdapter.convertWorkerStateToMantisJobState(wm.getState()),
wm.getWorkerNumber(),
wm.getMetricsPort(),
wm.getCustomPort()));
ScheduleRequest scheduleRequest = createSchedulingRequest(wm, empty());
scheduler.initializeRunningWorker(scheduleRequest, wm.getSlave(), wm.getSlaveID());
} else if (wm.getState().equals(WorkerState.Accepted)) {
// If the job is in accepted state, queue all its pending workers at once in a batch request.
// This is important when before master failover there were pending batch requests
if (batchSchedulingEnabled && JobState.isAcceptedState(mantisJobMetaData.getState())) {
workersToSubmit.add(wm);
} else {
queueTask(wm);
}
}
}
if (stageMeta.getStageNum() > 0) {
stageAssignments.put(stageMeta.getStageNum(), new WorkerAssignments(stageMeta.getStageNum(),
stageMeta.getNumWorkers(), workerHosts));
}
}
if (JobState.isAcceptedState(mantisJobMetaData.getState()) && !workersToSubmit.isEmpty()) {
queueTasks(workersToSubmit, empty());
}
// publish another update after queuing tasks to Fenzo (in case some workers were marked Started
// due to the Fake heartbeat in above loop)
markStageAssignmentsChanged(true);
// Resubmit workers with missing ports so they can be reassigned new resources.
for (JobWorker jobWorker : workersToResubmit) {
LOGGER.warn("discovered workers with missing ports during initialization: {}", jobWorker);
try {
resubmitWorker(jobWorker);
} catch (Exception e) {
LOGGER.warn("Exception resubmitting worker {} during initializeRunningWorkers due to {}",
jobWorker, e.getMessage(), e);
}
}
}
private List markCorruptedWorkers() {
List corruptedWorkers = new ArrayList<>();
for (IMantisStageMetadata stageMeta : mantisJobMetaData.getStageMetadata().values()) {
for (JobWorker worker : stageMeta.getAllWorkers()) {
IMantisWorkerMetadata wm = worker.getMetadata();
Optional workerPortsOptional = wm.getPorts();
if (WorkerState.isRunningState(wm.getState()) &&
(!workerPortsOptional.isPresent())) {
LOGGER.info("marking corrupted worker {} for Job ID {} as {}",
worker.getMetadata().getWorkerId(), jobId, WorkerState.Failed);
numMissingWorkerPorts.increment();
// Mark this worker as corrupted.
corruptedWorkers.add(worker);
// Send initial status event to signal to the worker to mark itself as failed.
try {
WorkerStatus status = new WorkerStatus(new Status(jobId.getId(), stageMeta.getStageNum(),
wm.getWorkerIndex(), wm.getWorkerNumber(), Status.TYPE.HEARTBEAT, "",
MantisJobState.Failed, System.currentTimeMillis()));
worker.processEvent(status, jobStore);
} catch (InvalidWorkerStateChangeException | IOException e) {
LOGGER.error("problem sending initial heartbeat for Job {} during initialization",
worker.getMetadata().getJobId(), e);
}
}
}
}
return corruptedWorkers;
}
private void markStageAssignmentsChanged(boolean forceRefresh) {
this.stageAssignmentPotentiallyChanged = true;
long refreshInterval = ConfigurationProvider.getConfig().getStageAssignmentRefreshIntervalMs();
if (refreshInterval == -1 || forceRefresh) {
refreshStageAssignmentsAndPush();
}
}
private void refreshStageAssignmentsAndPush() {
if (!stageAssignmentPotentiallyChanged) {
return;
}
List acceptedAndActiveWorkers = new ArrayList<>();
List activeWorkers = new ArrayList<>();
for (IMantisStageMetadata stageMeta : mantisJobMetaData.getStageMetadata().values()) {
Map workerHosts = new HashMap<>();
for (JobWorker worker : stageMeta.getAllWorkers()) {
IMantisWorkerMetadata wm = worker.getMetadata();
if (WorkerState.isRunningState(wm.getState())) {
workerHosts.put(
wm.getWorkerNumber(),
new WorkerHost(
wm.getSlave(),
wm.getWorkerIndex(),
wm.getWorkerPorts().getPorts(),
DataFormatAdapter.convertWorkerStateToMantisJobState(wm.getState()),
wm.getWorkerNumber(),
wm.getMetricsPort(),
wm.getCustomPort()));
activeWorkers.add(wm);
acceptedAndActiveWorkers.add(wm);
} else if (wm.getState().equals(WorkerState.Accepted)) {
acceptedAndActiveWorkers.add(wm);
}
}
stageAssignments.put(stageMeta.getStageNum(), new WorkerAssignments(stageMeta.getStageNum(),
stageMeta.getNumWorkers(), workerHosts));
}
JobSchedulingInfo jobSchedulingInfo = new JobSchedulingInfo(jobId.getId(), stageAssignments);
jobSchedulingInfoBehaviorSubject.onNext(jobSchedulingInfo);
eventPublisher.publishWorkerListChangedEvent(new LifecycleEventsProto.WorkerListChangedEvent(
new WorkerInfoListHolder(this.jobMgr.getJobId(), acceptedAndActiveWorkers)));
numSchedulingChangesRefreshed.increment();
stageAssignmentPotentiallyChanged = false;
}
private void submitInitialWorkers() throws Exception {
List workers = getInitialWorkers(
mantisJobMetaData.getJobDefinition(),
System.currentTimeMillis());
try {
jobStore.storeNewWorkers(jobMgr.getJobDetails(), workers);
LOGGER.info("Stored workers {} for Job {}", workers, jobId);
// refresh Worker Registry state before enqueuing task to Scheduler
markStageAssignmentsChanged(true);
if (!workers.isEmpty()) {
// queue to scheduler
if (batchSchedulingEnabled) {
queueTasks(workers, empty());
} else {
workers.forEach(this::queueTask);
}
}
} catch (Exception e) {
LOGGER.error("Error {} storing workers of job {}", e.getMessage(), jobId.getId(), e);
throw new RuntimeException("Exception saving worker for Job " + jobId, e);
}
}
private void queueTasks(final List workerRequests, final Optional readyAt) {
final List scheduleRequests = workerRequests
.stream()
.map(wR -> createSchedulingRequest(wR, readyAt))
.collect(Collectors.toList());
LOGGER.info("Queueing up batch schedule request for {} workers", workerRequests.size());
try {
scheduler.scheduleWorkers(new BatchScheduleRequest(scheduleRequests));
} catch (Exception e) {
LOGGER.error("Exception queueing tasks", e);
}
}
private void queueTask(final IMantisWorkerMetadata workerRequest) {
queueTasks(Collections.singletonList(workerRequest), empty());
}
private ScheduleRequest createSchedulingRequest(
final IMantisWorkerMetadata workerRequest,
final Optional readyAt) {
try {
final WorkerId workerId = workerRequest.getWorkerId();
// setup constraints
Optional stageMetadataOp =
mantisJobMetaData.getStageMetadata(workerRequest.getStageNum());
if (!stageMetadataOp.isPresent()) {
throw new RuntimeException(String.format("No such stage %d", workerRequest.getStageNum()));
}
IMantisStageMetadata stageMetadata = stageMetadataOp.get();
List stageHC = stageMetadata.getHardConstraints();
List stageSC = stageMetadata.getSoftConstraints();
final Set coTasks = new HashSet<>();
if ((stageHC != null && !stageHC.isEmpty())
|| (stageSC != null && !stageSC.isEmpty())) {
for (JobWorker jobWorker : stageMetadata.getAllWorkers()) {
if (jobWorker.getMetadata().getWorkerNumber() != workerId.getWorkerNum()) {
coTasks.add(workerId.getId());
}
}
}
JobMetadata jobMetadata = new JobMetadata(
mantisJobMetaData.getJobId().getId(),
mantisJobMetaData.getJobJarUrl(),
mantisJobMetaData.getJobDefinition().getVersion(),
mantisJobMetaData.getTotalStages(),
mantisJobMetaData.getUser(),
mantisJobMetaData.getSchedulingInfo(),
mantisJobMetaData.getParameters(),
getSubscriptionTimeoutSecs(mantisJobMetaData),
getHeartbeatIntervalSecs(mantisJobMetaData),
mantisJobMetaData.getMinRuntimeSecs()
);
ScheduleRequest sr = new ScheduleRequest(
workerId,
workerRequest.getStageNum(),
jobMetadata,
mantisJobMetaData.getSla().orElse(new JobSla.Builder().build()).getDurationType(),
// TODO(fdichiara): make this a property of JobStageMetadata. https://github.com/Netflix/mantis/pull/629/files#r1487043262
SchedulingConstraints.of(
stageMetadata.getMachineDefinition(),
// Fetch the 'sizeName' for the given stage among its container attributes
stageMetadata.getSizeAttribute(),
mergeJobDefAndArtifactAssigmentAttributes(jobMetadata.getJobJarUrl())),
readyAt.orElse(0L));
return sr;
} catch (Exception e) {
LOGGER.error("Exception creating scheduleRequest ", e);
throw e;
}
}
/**
* Merges attributes assignment between job and artifact definitions. It does it by first fetching
* the associated JobArtifact tags using the artifact ID from the job, and then merging them with the assignment
* attributes from the job definition itself. The keys from the job definition take precedence over the
* keys from the artifact's tags.
*
* @param artifactUrl The URL of the artifact leveraged by the job for which the attributes are to be collated
* @return A merged map of scheduling attributes. The precedence of keys follows: job definition > artifact's tags.
*/
private Map mergeJobDefAndArtifactAssigmentAttributes(URL artifactUrl) {
try {
Optional artifactName = DataFormatAdapter.extractArtifactBaseName(artifactUrl);
if (artifactName.isPresent()) {
JobArtifact artifact = jobStore.getJobArtifact(ArtifactID.of(artifactName.get()));
if (artifact != null && artifact.getTags() != null) {
Map mergedMap = new HashMap<>(artifact.getTags());
mergedMap.putAll(mantisJobMetaData.getJobDefinition().getSchedulingConstraints());
return mergedMap;
}
}
} catch (Exception e) {
LOGGER.warn("Couldn't find job artifact by id: {}", artifactUrl, e);
}
return mantisJobMetaData.getJobDefinition().getSchedulingConstraints();
}
private List getInitialWorkers(JobDefinition jobDetails, long submittedAt)
throws Exception {
List workerRequests = Lists.newLinkedList();
SchedulingInfo schedulingInfo = jobDetails.getSchedulingInfo();
int totalStages = schedulingInfo.getStages().size();
Iterator it = schedulingInfo.getStages().keySet().iterator();
while (it.hasNext()) {
int stageNum = it.next();
List stageWorkers = setupStageWorkers(schedulingInfo, totalStages,
stageNum, submittedAt);
workerRequests.addAll(stageWorkers);
}
return workerRequests;
}
private List setupStageWorkers(
SchedulingInfo schedulingInfo, int totalStages,
int stageNum, long submittedAt) throws Exception {
List workerRequests = new LinkedList<>();
StageSchedulingInfo stage = schedulingInfo.getStages().get(stageNum);
if (stage == null) {
LOGGER.error("StageSchedulingInfo cannot be null for Stage {}", stageNum);
throw new Exception("StageSchedulingInfo cannot be null for Stage " + stageNum);
//return workerRequests; // can happen when stageNum=0 and there is no jobMaster defined
}
int numInstancesAtStage = stage.getNumberOfInstances();
// add worker request for each instance required in stage
int stageIndex = 0;
for (int i = 0; i < numInstancesAtStage; i++) {
// during initialization worker number and index are identical
int workerIndex = stageIndex++;
if (!mantisJobMetaData.getStageMetadata(stageNum).isPresent()) {
IMantisStageMetadata msmd = new MantisStageMetadataImpl.Builder().
withJobId(jobId)
.withStageNum(stageNum)
.withNumStages(totalStages)
.withMachineDefinition(stage.getMachineDefinition())
.withNumWorkers(numInstancesAtStage)
.withHardConstraints(stage.getHardConstraints())
.withSoftConstraints(stage.getSoftConstraints())
.withScalingPolicy(stage.getScalingPolicy())
.withSizeAttribute(Optional.ofNullable(stage.getContainerAttributes()).map(attrs -> attrs.get(MANTIS_STAGE_CONTAINER_SIZE_NAME_KEY)).orElse(null))
.isScalable(stage.getScalable())
.build();
mantisJobMetaData.addJobStageIfAbsent(msmd);
jobStore.updateStage(msmd);
}
IMantisWorkerMetadata mwmd = addWorker(schedulingInfo, stageNum, workerIndex);
workerRequests.add(mwmd);
}
return workerRequests;
}
private IMantisWorkerMetadata addWorker(SchedulingInfo schedulingInfo, int stageNo, int workerIndex)
throws InvalidJobException {
StageSchedulingInfo stageSchedInfo = schedulingInfo.getStages().get(stageNo);
int workerNumber = workerNumberGenerator.getNextWorkerNumber(mantisJobMetaData, jobStore);
JobWorker jw = new JobWorker.Builder()
.withJobId(jobId)
.withWorkerIndex(workerIndex)
.withWorkerNumber(workerNumber)
.withNumberOfPorts(stageSchedInfo.getMachineDefinition().getNumPorts()
+ MANTIS_SYSTEM_ALLOCATED_NUM_PORTS)
.withStageNum(stageNo)
.withLifecycleEventsPublisher(eventPublisher)
.build();
if (!mantisJobMetaData.addWorkerMetadata(stageNo, jw)) {
Optional tmp = mantisJobMetaData.getWorkerByIndex(stageNo, workerIndex);
if (tmp.isPresent()) {
throw new InvalidJobException(mantisJobMetaData.getJobId().getId(), stageNo, workerIndex,
new Exception("Couldn't add worker " + workerNumber + " as index " + workerIndex
+ ", that index already has worker " + tmp.get().getMetadata().getWorkerNumber()));
} else {
throw new InvalidJobException(mantisJobMetaData.getJobId().getId(), stageNo, workerIndex,
new Exception("Couldn't add worker " + workerNumber + " as index "
+ workerIndex + "doesn't exist "));
}
}
mantisJobMetaData.setJobCosts(costsCalculator.calculateCosts(mantisJobMetaData));
return jw.getMetadata();
}
@Override
public void shutdown() {
scheduler.unscheduleJob(jobId.getId());
// if workers have not already completed
if (!allWorkerCompleted()) {
// kill workers
terminateAllWorkersAsync();
}
//send empty schedulingInfo changes so downstream jobs would explicitly disconnect
jobSchedulingInfoBehaviorSubject.onNext(new JobSchedulingInfo(
this.jobMgr.getJobId().getId(),
new HashMap<>()));
jobSchedulingInfoBehaviorSubject.onCompleted();
}
private void terminateAllWorkersAsync() {
LOGGER.info("Terminating all workers of job {}", jobId);
Observable.from(mantisJobMetaData.getStageMetadata().values())
.flatMap((st) -> Observable.from(st.getAllWorkers()))
.filter((worker) -> !WorkerState.isTerminalState(worker.getMetadata().getState()))
.map((worker) -> {
LOGGER.info("Terminating " + worker);
terminateWorker(worker.getMetadata(), WorkerState.Completed, JobCompletedReason.Killed);
return worker;
})
.doOnCompleted(() -> markStageAssignmentsChanged(true))
.subscribeOn(Schedulers.io())
.subscribe();
LOGGER.info("Terminated all workers of job {}", jobId);
}
private void terminateWorker(
IMantisWorkerMetadata workerMeta, WorkerState finalWorkerState,
JobCompletedReason reason) {
LOGGER.info("Terminating worker {} with number {}", workerMeta, workerMeta.getWorkerNumber());
try {
WorkerId workerId = workerMeta.getWorkerId();
// call vmservice terminate
scheduler.unscheduleAndTerminateWorker(
workerMeta.getWorkerId(),
Optional.ofNullable(workerMeta.getSlave()));
int stageNum = mantisJobMetaData.getWorkerNumberToStageMap().get(workerMeta.getWorkerNumber());
Optional stageMetaOp = mantisJobMetaData.getStageMetadata(stageNum);
if (stageMetaOp.isPresent()) {
// Mark work as terminal
WorkerTerminate terminateEvent = new WorkerTerminate(workerId, finalWorkerState, reason);
MantisStageMetadataImpl stageMetaData = (MantisStageMetadataImpl) stageMetaOp.get();
Optional jobWorkerOp = stageMetaData.processWorkerEvent(terminateEvent, jobStore);
// Mark work as terminal
if (jobWorkerOp.isPresent()) {
jobStore.archiveWorker(jobWorkerOp.get().getMetadata());
eventPublisher.publishStatusEvent(new LifecycleEventsProto.WorkerStatusEvent(INFO,
"Terminated worker, reason: " + reason.name(),
workerMeta.getStageNum(), workerMeta.getWorkerId(), workerMeta.getState()));
}
} else {
LOGGER.error("Stage {} not found while terminating worker {}", stageNum, workerId);
}
} catch (Exception e) {
LOGGER.error("Error terminating worker {}", workerMeta.getWorkerId(), e);
}
}
private void terminateAndRemoveWorker(
IMantisWorkerMetadata workerMeta, WorkerState finalWorkerState,
JobCompletedReason reason) {
LOGGER.info("Terminating and removing worker {}", workerMeta.getWorkerId().getId());
try {
WorkerId workerId = workerMeta.getWorkerId();
int stageNum = mantisJobMetaData.getWorkerNumberToStageMap().get(workerMeta.getWorkerNumber());
Optional stageMetaOp = mantisJobMetaData.getStageMetadata(stageNum);
if (stageMetaOp.isPresent()) {
// Mark work as terminal
WorkerTerminate terminateEvent = new WorkerTerminate(workerId, finalWorkerState, reason);
MantisStageMetadataImpl stageMetaData = (MantisStageMetadataImpl) stageMetaOp.get();
Optional workerOp = stageMetaData.processWorkerEvent(terminateEvent, jobStore);
eventPublisher.publishStatusEvent(new LifecycleEventsProto.WorkerStatusEvent(INFO,
"Removing worker, reason: " + reason.name(),
workerMeta.getStageNum(), workerMeta.getWorkerId(), workerMeta.getState()));
// remove this worker index and archives the worker
stageMetaData.unsafeRemoveWorker(workerId.getWorkerIndex(), workerId.getWorkerNum(), jobStore);
// call vmservice terminate
scheduler.unscheduleAndTerminateWorker(workerMeta.getWorkerId(), Optional.ofNullable(
workerMeta.getSlave()));
//remove from workerNumber to stage map
mantisJobMetaData.removeWorkerMetadata(workerMeta.getWorkerNumber());
mantisJobMetaData.setJobCosts(costsCalculator.calculateCosts(mantisJobMetaData));
LOGGER.info("Terminated worker {}", workerMeta);
markStageAssignmentsChanged(true);
} else {
LOGGER.error("Stage {} not found while terminating worker {}", stageNum, workerId);
}
} catch (Exception e) {
LOGGER.error("Error terminating worker {}", workerMeta.getWorkerId(), e);
}
}
@Override
public void refreshAndSendWorkerAssignments() {
refreshStageAssignmentsAndPush();
}
@Override
public void checkHeartBeats(Instant currentTime) {
LOGGER.debug("Using worker timeout {} for job {}", getWorkerTimeoutSecs(), this.jobMgr.getJobId());
// heartbeat misses are calculated as 3 * heartbeatInterval, pick 1.5 multiplier for this check interval
long missedHeartBeatToleranceSecs = (long) (1.5 * getWorkerTimeoutSecs());
// Allow more time for workers to start
long stuckInSubmitToleranceSecs =
missedHeartBeatToleranceSecs + ConfigurationProvider.getConfig().getWorkerInitTimeoutSecs();
List workersToResubmit = Lists.newArrayList();
// expire worker resubmit entries
resubmitRateLimiter.expireResubmitRecords(currentTime.toEpochMilli());
// For each stage
for (IMantisStageMetadata stage : mantisJobMetaData.getStageMetadata().values()) {
// For each worker in the stage
for (JobWorker worker : stage.getAllWorkers()) {
IMantisWorkerMetadata workerMeta = worker.getMetadata();
if (!workerMeta.getLastHeartbeatAt().isPresent()) {
// the worker is still waiting for resource allocation and the scheduler should take care of
// the retry logic.
Instant acceptedAt = Instant.ofEpochMilli(workerMeta.getAcceptedAt());
LOGGER.warn("Job {}, Worker {} stuck in accepted state since {}",
this.jobMgr.getJobId(),
workerMeta.getWorkerId(),
acceptedAt);
} else {
if (Duration.between(workerMeta.getLastHeartbeatAt().get(), currentTime).getSeconds()
> missedHeartBeatToleranceSecs) {
// heartbeat too old
LOGGER.info("Job {}, Worker {} Duration between last heartbeat and now {} "
+ "missed heart beat threshold {} exceeded", this.jobMgr.getJobId(),
workerMeta.getWorkerId(), Duration.between(
workerMeta.getLastHeartbeatAt().get(),
currentTime).getSeconds(), missedHeartBeatToleranceSecs);
if (ConfigurationProvider.getConfig().isHeartbeatTerminationEnabled()) {
eventPublisher.publishStatusEvent(new LifecycleEventsProto.WorkerStatusEvent(WARN,
"heartbeat too old, resubmitting worker", workerMeta.getStageNum(),
workerMeta.getWorkerId(), workerMeta.getState()));
workersToResubmit.add(worker);
} else {
LOGGER.warn(
"Heart beat based termination is disabled. Skipping termination of "
+ "worker {} Please see mantis.worker.heartbeat.termination.enabled",
workerMeta);
}
}
}
}
}
for (JobWorker worker : workersToResubmit) {
try {
resubmitWorker(worker);
} catch (Exception e) {
LOGGER.warn(
"Exception {} occurred resubmitting Worker {}",
e.getMessage(),
worker.getMetadata(),
e);
}
}
migrateDisabledVmWorkers(currentTime);
}
@Override
public void migrateDisabledVmWorkers(Instant currentTime) {
if (!workersToMigrate.isEmpty()) {
Map workerToStageMap = mantisJobMetaData.getWorkerNumberToStageMap();
final List workers = migrationStrategy.execute(workersToMigrate,
getNumberOfWorkersInStartedState(), getTotalWorkerCount(), lastWorkerMigrationTimestamp);
if (!workers.isEmpty()) {
LOGGER.info("Job {} Going to migrate {} workers in this iteration", jobId, workers.size());
}
workers.forEach((w) -> {
if (workerToStageMap.containsKey(w)) {
int stageNo = workerToStageMap.get(w);
Optional stageMetaOp = mantisJobMetaData.getStageMetadata(stageNo);
if (stageMetaOp.isPresent()) {
JobWorker jobWorker = null;
try {
jobWorker = stageMetaOp.get().getWorkerByWorkerNumber(w);
IMantisWorkerMetadata wm = jobWorker.getMetadata();
LOGGER.info("Moving worker {} of job {} away from disabled VM", wm.getWorkerId(),
jobId);
eventPublisher.publishStatusEvent(new LifecycleEventsProto.WorkerStatusEvent(INFO,
" Moving out of disabled VM " + wm.getSlave(), wm.getStageNum(),
wm.getWorkerId(), wm.getState()));
resubmitWorker(jobWorker);
lastWorkerMigrationTimestamp = System.currentTimeMillis();
} catch (Exception e) {
LOGGER.warn("Exception resubmitting worker {} during migration due to {}",
jobWorker, e.getMessage(), e);
}
} else {
LOGGER.warn("Stage {} Not Found. Skip move for worker {} in Job {}", stageNo, w, jobId);
}
} else {
LOGGER.warn("worker {} not found in workerToStageMap {} for Job {}", w, workerToStageMap,
jobId);
}
});
}
}
private Optional getStageForWorker(WorkerEvent event) {
// Make sure we know about this worker. If not terminate it
Map workerToStageMap = mantisJobMetaData.getWorkerNumberToStageMap();
if (!workerToStageMap.containsKey(event.getWorkerId().getWorkerNum())) {
LOGGER.warn("Event {} from Unknown worker {} ", event.getWorkerId(), event);
return empty();
}
// Find stage associated with this worker
Integer stageNum = workerToStageMap.get(event.getWorkerId().getWorkerNum());
Optional stageMetaOp = mantisJobMetaData.getStageMetadata(stageNum);
if (!stageMetaOp.isPresent()) {
LOGGER.warn("Stage {} not found in Job {} while processing event {}", stageNum, jobId, event);
}
return stageMetaOp;
}
private void terminateUnknownWorkerIfNonTerminal(final WorkerEvent event) {
if (!JobHelper.isTerminalWorkerEvent(event)) {
LOGGER.warn("Non terminal event from Unknown worker {} in Job {}. Request Termination",
event.getWorkerId(), this.jobMgr.getJobId());
Optional host = JobHelper.getWorkerHostFromWorkerEvent(event);
scheduler.unscheduleAndTerminateWorker(event.getWorkerId(), host);
} else {
LOGGER.warn("Job {} Terminal event from Unknown worker {}. Ignoring", jobId, event.getWorkerId());
}
}
@Override
public void processEvent(WorkerEvent event, JobState jobState) {
try {
Optional stageMetaOp = getStageForWorker(event);
if (!stageMetaOp.isPresent()) {
terminateUnknownWorkerIfNonTerminal(event);
return;
}
// If worker cannot be scheduled currently, then put it back on the queue with delay and don't update
// its state
if (event instanceof WorkerUnscheduleable) {
scheduler.updateWorkerSchedulingReadyTime(
event.getWorkerId(),
resubmitRateLimiter.getWorkerResubmitTime(
event.getWorkerId(),
stageMetaOp.get().getStageNum()));
eventPublisher.publishStatusEvent(new LifecycleEventsProto.WorkerStatusEvent(
LifecycleEventsProto.StatusEvent.StatusEventType.ERROR,
"rate limiting: no resources to fit worker",
((WorkerUnscheduleable) event).getStageNum(), event.getWorkerId(), WorkerState.Accepted));
return;
}
MantisStageMetadataImpl stageMeta = (MantisStageMetadataImpl) stageMetaOp.get();
// Check if stage worker state (worker index -> worker number) is consistent with the worker event.
// TODO: add termination once confirmed the actual corruption scenario.
try {
if (event instanceof WorkerHeartbeat) {
int eventWorkerIndex = event.getWorkerId().getWorkerIndex();
int eventWorkerNum = event.getWorkerId().getWorkerNum();
int currentWorkerNum = stageMeta.getWorkerByIndex(eventWorkerIndex).getMetadata().getWorkerNumber();
if (currentWorkerNum > eventWorkerNum) {
// event is from a different worker number on same worker index
LOGGER.error(
"[Corrupted state] StaleWorkerEvent: {}, current worker at {}, Terminate stale "
+ "worker",
event.getWorkerId(),
currentWorkerNum);
} else if (currentWorkerNum < eventWorkerNum) {
// this case should not happen as new worker assignment should update state and persist first.
LOGGER.error(
"[Corrupted state] Newer worker num received: {}, Current stage worker: {}",
event,
currentWorkerNum);
}
}
} catch (InvalidJobException ije) {
LOGGER.error("Invalid job error when checking event: {}", event, ije);
}
try {
// Delegate processing of the event to the stage
Optional workerOp = stageMeta.processWorkerEvent(event, jobStore);
if (!workerOp.isPresent()) {
terminateUnknownWorkerIfNonTerminal(event);
return;
}
IMantisWorkerMetadata wm = workerOp.get().getMetadata();
// If we need to migrate off of disabled VM add it to the queue
if (event instanceof WorkerOnDisabledVM) {
workersToMigrate.add(wm.getWorkerNumber());
return;
}
// Worker transitioned to terminal state resubmit
if (WorkerState.isErrorState(wm.getState()) && !JobState.isTerminalState(jobState)) {
eventPublisher.publishStatusEvent(new LifecycleEventsProto.WorkerStatusEvent(WARN,
"resubmitting lost worker ", wm.getStageNum(),
wm.getWorkerId(), wm.getState()));
recentErrorWorkersCache.put(wm.getWorkerNumber(), true);
resubmitWorker(workerOp.get());
return;
} else if (WorkerState.isTerminalState(wm.getState())) { // worker has explicitly
// completed complete job
jobStore.archiveWorker(wm);
LOGGER.info("Received Worker Complete signal. Wait for all workers to complete before "
+ "terminating Job {}", jobId);
}
if (!(event instanceof WorkerHeartbeat)) {
markStageAssignmentsChanged(false);
}
} catch (Exception e) {
LOGGER.warn("Exception saving worker update", e);
}
if (!allWorkersStarted && !JobState.isTerminalState(jobState)) {
if (allWorkerStarted()) {
allWorkersStarted = true;
jobMgr.onAllWorkersStarted();
scheduler.unscheduleJob(jobId.getId());
markStageAssignmentsChanged(true);
} else if (allWorkerCompleted()) {
LOGGER.info("Job {} All workers completed1", jobId);
allWorkersStarted = false;
jobMgr.onAllWorkersCompleted();
}
} else {
if (allWorkerCompleted()) {
LOGGER.info("Job {} All workers completed", jobId);
allWorkersStarted = false;
jobMgr.onAllWorkersCompleted();
}
}
} catch (Exception e1) {
LOGGER.error("Job {} Exception occurred in process worker event ", jobId, e1);
}
}
private boolean allWorkerStarted() {
Iterator extends IMantisStageMetadata> iterator =
mantisJobMetaData.getStageMetadata().values().iterator();
while (iterator.hasNext()) {
MantisStageMetadataImpl stageMeta = (MantisStageMetadataImpl) iterator.next();
if (!stageMeta.isAllWorkerStarted()) {
return false;
}
}
return true;
}
private int getNumberOfWorkersInStartedState() {
return mantisJobMetaData.getStageMetadata().values().stream()
.map((stageMeta) -> ((MantisStageMetadataImpl) stageMeta).getNumStartedWorkers())
.reduce(0, (acc, num) -> acc + num);
}
private int getTotalWorkerCount() {
return mantisJobMetaData.getStageMetadata().values().stream()
.map(IMantisStageMetadata::getNumWorkers)
.reduce(0, (acc, num) -> acc + num);
}
private boolean allWorkerCompleted() {
Iterator extends IMantisStageMetadata> iterator =
mantisJobMetaData.getStageMetadata().values().iterator();
while (iterator.hasNext()) {
MantisStageMetadataImpl stageMeta = (MantisStageMetadataImpl) iterator.next();
// skip job master worker
if (stageMeta.getStageNum() == 0) {
continue;
}
if (!stageMeta.isAllWorkerCompleted()) {
return false;
}
}
return true;
}
@Override
public void resubmitWorker(int workerNum) throws Exception {
Map workerToStageMap = mantisJobMetaData.getWorkerNumberToStageMap();
if (workerToStageMap.containsKey(workerNum)) {
int stageNum = workerToStageMap.get(workerNum);
Optional stageMeta = mantisJobMetaData.getStageMetadata(stageNum);
if (stageMeta.isPresent()) {
JobWorker worker = stageMeta.get().getWorkerByWorkerNumber(workerNum);
resubmitWorker(worker);
} else {
throw new Exception(String.format("Invalid stage %d in resubmit Worker request %d", stageNum,
workerNum));
}
} else {
LOGGER.warn("No such Worker number {} in Job with ID {}", workerNum, jobId);
throw new Exception(String.format("No such worker number %d in resubmit Worker request", workerNum));
}
}
@Override
public List getActiveWorkers(int limit) {
List workers = mantisJobMetaData.getStageMetadata().values()
.stream()
.flatMap((st) -> st.getAllWorkers().stream())
.filter((worker) -> !WorkerState.isTerminalState(worker.getMetadata().getState()))
.map(JobWorker::getMetadata)
.collect(Collectors.toList());
if (workers.size() > limit) {
return workers.subList(0, limit);
} else {
return workers;
}
}
@Override
public BehaviorSubject getJobStatusSubject() {
return this.jobSchedulingInfoBehaviorSubject;
}
private void resubmitWorker(JobWorker oldWorker) throws Exception {
LOGGER.info("Resubmitting worker {}", oldWorker.getMetadata());
Map workerToStageMap = mantisJobMetaData.getWorkerNumberToStageMap();
IMantisWorkerMetadata oldWorkerMetadata = oldWorker.getMetadata();
if (recentErrorWorkersCache.size()
< ConfigurationProvider.getConfig().getMaximumResubmissionsPerWorker()) {
Integer stageNo = workerToStageMap.get(oldWorkerMetadata.getWorkerId().getWorkerNum());
if (stageNo == null) {
String errMsg = String.format("Stage %d not found in Job %s while resubmiting worker %s",
stageNo, jobId, oldWorker);
LOGGER.warn(errMsg);
throw new Exception(errMsg);
}
Optional stageMetaOp = mantisJobMetaData.getStageMetadata(stageNo);
if (!stageMetaOp.isPresent()) {
String errMsg = String.format("Stage %d not found in Job %s while resubmiting worker %s",
stageNo, jobId, oldWorker);
LOGGER.warn(errMsg);
throw new Exception(errMsg);
}
MantisStageMetadataImpl stageMeta = (MantisStageMetadataImpl) stageMetaOp.get();
JobWorker newWorker = new JobWorker.Builder()
.withJobId(jobId)
.withWorkerIndex(oldWorkerMetadata.getWorkerIndex())
.withWorkerNumber(workerNumberGenerator.getNextWorkerNumber(mantisJobMetaData, jobStore))
.withNumberOfPorts(stageMeta.getMachineDefinition().getNumPorts()
+ MANTIS_SYSTEM_ALLOCATED_NUM_PORTS)
.withStageNum(oldWorkerMetadata.getStageNum())
.withResubmitCount(oldWorkerMetadata.getTotalResubmitCount() + 1)
.withResubmitOf(oldWorkerMetadata.getWorkerNumber())
.withLifecycleEventsPublisher(eventPublisher)
.build();
mantisJobMetaData.replaceWorkerMetaData(oldWorkerMetadata.getStageNum(), newWorker, oldWorker,
jobStore);
mantisJobMetaData.setJobCosts(costsCalculator.calculateCosts(mantisJobMetaData));
// kill the task if it is still running
scheduler.unscheduleAndTerminateWorker(
oldWorkerMetadata.getWorkerId(),
Optional.ofNullable(oldWorkerMetadata.getSlave()));
long workerResubmitTime = resubmitRateLimiter.getWorkerResubmitTime(
newWorker.getMetadata().getWorkerId(), stageMeta.getStageNum());
Optional delayDuration = of(workerResubmitTime);
// publish a refresh before enqueuing new Task to Scheduler
markStageAssignmentsChanged(true);
// queue the new worker for execution
queueTasks(Collections.singletonList(newWorker.getMetadata()), delayDuration);
LOGGER.info("Worker {} successfully queued for scheduling", newWorker);
numWorkerResubmissions.increment();
} else {
// todo numWorkerResubmitLimitReached.increment();
LOGGER.error("Resubmit count exceeded");
jobMgr.onTooManyWorkerResubmits();
}
}
/**
* Preconditions : Stage is Valid and scalable Determines the actual no of workers for this stage within min and
* max, updates the expected num workers first and saves to store. (If that fails we abort the operation) then
* continues adding/terminating worker one by one. If an exception occurs adding/removing any worker we continue
* forward with others. Heartbeat check should kick in and resubmit any workers that didn't get scheduled
*/
@Override
public int scaleStage(MantisStageMetadataImpl stageMetaData, int numWorkers, String reason) {
LOGGER.info("Scaling stage {} to {} workers", stageMetaData.getStageNum(), numWorkers);
final int oldNumWorkers = stageMetaData.getNumWorkers();
int max = ConfigurationProvider.getConfig().getMaxWorkersPerStage();
int min = 0;
if (stageMetaData.getScalingPolicy() != null) {
max = stageMetaData.getScalingPolicy().getMax();
min = stageMetaData.getScalingPolicy().getMin();
}
// sanitize input worker count to be between min and max
int newNumWorkerCount = Math.max(Math.min(numWorkers, max), min);
if (newNumWorkerCount != oldNumWorkers) {
try {
stageMetaData.unsafeSetNumWorkers(newNumWorkerCount, jobStore);
eventPublisher.publishStatusEvent(new LifecycleEventsProto.JobStatusEvent(INFO,
String.format("Setting #workers to %d for stage %d, reason=%s", newNumWorkerCount,
stageMetaData.getStageNum(), reason), getJobId(), getJobState()));
} catch (Exception e) {
String error = String.format("Exception updating stage %d worker count for Job %s due to %s",
stageMetaData.getStageNum(), jobId, e.getMessage());
LOGGER.warn(error);
eventPublisher.publishStatusEvent(new LifecycleEventsProto.JobStatusEvent(WARN,
String.format("Scaling stage failed for stage %d reason: %s", stageMetaData.getStageNum(), e.getMessage()),
getJobId(), getJobState()));
throw new RuntimeException(error);
}
if (newNumWorkerCount > oldNumWorkers) {
for (int i = 0; i < newNumWorkerCount - oldNumWorkers; i++) {
try {
int newWorkerIndex = oldNumWorkers + i;
SchedulingInfo schedInfo = mantisJobMetaData.getJobDefinition().getSchedulingInfo();
IMantisWorkerMetadata workerRequest = addWorker(schedInfo, stageMetaData.getStageNum(),
newWorkerIndex);
jobStore.storeNewWorker(workerRequest);
markStageAssignmentsChanged(true);
queueTask(workerRequest);
} catch (Exception e) {
// creating a worker failed but expected no of workers was set successfully,
// during heartbeat check we will
// retry launching this worker
LOGGER.warn("Exception adding new worker for {}", stageMetaData.getJobId().getId(), e);
}
}
} else {
// potential bulk removal opportunity?
for (int i = 0; i < oldNumWorkers - newNumWorkerCount; i++) {
try {
final JobWorker w = stageMetaData.getWorkerByIndex(oldNumWorkers - i - 1);
terminateAndRemoveWorker(w.getMetadata(), WorkerState.Completed, JobCompletedReason.Killed);
} catch (InvalidJobException e) {
// deleting a worker failed but expected no of workers was set successfully,
// during heartbeat check we will
// retry killing this worker
LOGGER.warn("Exception terminating worker for {}", stageMetaData.getJobId().getId(), e);
}
}
}
}
LOGGER.info("{} Scaled stage to {} workers", stageMetaData.getJobId().getId(), newNumWorkerCount);
return newNumWorkerCount;
}
}
private String getResourceCluster() {
return mantisJobMetaData.getJobDefinition().getResourceCluster().map(ClusterID::getResourceID).orElse("mesos");
}
}