Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.hubspot.singularity.scheduler.SingularityDeployChecker Maven / Gradle / Ivy
package com.hubspot.singularity.scheduler;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import javax.inject.Singleton;
import org.apache.commons.lang3.time.DurationFormatUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Optional;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.google.inject.Inject;
import com.hubspot.baragon.models.BaragonRequestState;
import com.hubspot.mesos.JavaUtils;
import com.hubspot.singularity.DeployState;
import com.hubspot.singularity.LoadBalancerRequestType;
import com.hubspot.singularity.LoadBalancerRequestType.LoadBalancerRequestId;
import com.hubspot.singularity.RequestState;
import com.hubspot.singularity.RequestType;
import com.hubspot.singularity.SingularityDeploy;
import com.hubspot.singularity.SingularityDeployFailure;
import com.hubspot.singularity.SingularityDeployFailureReason;
import com.hubspot.singularity.SingularityDeployKey;
import com.hubspot.singularity.SingularityDeployMarker;
import com.hubspot.singularity.SingularityDeployProgress;
import com.hubspot.singularity.SingularityDeployResult;
import com.hubspot.singularity.SingularityLoadBalancerUpdate;
import com.hubspot.singularity.SingularityPendingDeploy;
import com.hubspot.singularity.SingularityPendingRequest;
import com.hubspot.singularity.SingularityPendingRequest.PendingType;
import com.hubspot.singularity.SingularityPendingRequestBuilder;
import com.hubspot.singularity.SingularityPendingTaskId;
import com.hubspot.singularity.SingularityRequest;
import com.hubspot.singularity.SingularityRequestDeployState;
import com.hubspot.singularity.SingularityRequestHistory.RequestHistoryType;
import com.hubspot.singularity.SingularityRequestWithState;
import com.hubspot.singularity.SingularityTask;
import com.hubspot.singularity.SingularityTaskCleanup;
import com.hubspot.singularity.SingularityTaskId;
import com.hubspot.singularity.SingularityTaskShellCommandRequestId;
import com.hubspot.singularity.SingularityUpdatePendingDeployRequest;
import com.hubspot.singularity.TaskCleanupType;
import com.hubspot.singularity.api.SingularityRunNowRequest;
import com.hubspot.singularity.config.SingularityConfiguration;
import com.hubspot.singularity.data.DeployManager;
import com.hubspot.singularity.data.RequestManager;
import com.hubspot.singularity.data.TaskManager;
import com.hubspot.singularity.expiring.SingularityExpiringPause;
import com.hubspot.singularity.expiring.SingularityExpiringScale;
import com.hubspot.singularity.hooks.LoadBalancerClient;
import com.hubspot.singularity.mesos.SingularitySchedulerLock;
import com.hubspot.singularity.scheduler.SingularityDeployHealthHelper.DeployHealth;
@Singleton
public class SingularityDeployChecker {
private static final Logger LOG = LoggerFactory.getLogger(SingularityDeployChecker.class);
private final DeployManager deployManager;
private final TaskManager taskManager;
private final SingularityDeployHealthHelper deployHealthHelper;
private final RequestManager requestManager;
private final SingularityConfiguration configuration;
private final LoadBalancerClient lbClient;
private final SingularitySchedulerLock lock;
private final ExecutorService deployCheckerExecutor = Executors.newCachedThreadPool(new ThreadFactoryBuilder().setNameFormat("deploy-checker-%d").build());
@Inject
public SingularityDeployChecker(DeployManager deployManager, SingularityDeployHealthHelper deployHealthHelper, LoadBalancerClient lbClient, RequestManager requestManager, TaskManager taskManager,
SingularityConfiguration configuration, SingularitySchedulerLock lock) {
this.configuration = configuration;
this.lbClient = lbClient;
this.deployHealthHelper = deployHealthHelper;
this.requestManager = requestManager;
this.deployManager = deployManager;
this.taskManager = taskManager;
this.lock = lock;
}
public int checkDeploys() {
final List pendingDeploys = deployManager.getPendingDeploys();
final List cancelDeploys = deployManager.getCancelDeploys();
final List updateRequests = deployManager.getPendingDeployUpdates();
if (pendingDeploys.isEmpty() && cancelDeploys.isEmpty()) {
return 0;
}
final Map pendingDeployToKey = SingularityDeployKey.fromPendingDeploys(pendingDeploys);
final Map deployKeyToDeploy = deployManager.getDeploysForKeys(pendingDeployToKey.values());
pendingDeploys.parallelStream().forEach((pendingDeploy) -> {
lock.runWithRequestLock(() -> {
checkDeploy(pendingDeploy, cancelDeploys, pendingDeployToKey, deployKeyToDeploy, updateRequests);
}, pendingDeploy.getDeployMarker().getRequestId(), getClass().getSimpleName());
});
cancelDeploys.forEach(deployManager::deleteCancelDeployRequest);
updateRequests.forEach(deployManager::deleteUpdatePendingDeployRequest);
return pendingDeploys.size();
}
private void checkDeploy(final SingularityPendingDeploy pendingDeploy, final List cancelDeploys,
final Map pendingDeployToKey, final Map deployKeyToDeploy,
List updateRequests) {
final SingularityDeployKey deployKey = pendingDeployToKey.get(pendingDeploy);
final Optional deploy = Optional.fromNullable(deployKeyToDeploy.get(deployKey));
Optional maybeRequestWithState = requestManager.getRequest(pendingDeploy.getDeployMarker().getRequestId());
if (!(maybeRequestWithState.isPresent() && maybeRequestWithState.get().getState() == RequestState.FINISHED)
&& !(configuration.isAllowDeployOfPausedRequests() && maybeRequestWithState.isPresent() && maybeRequestWithState.get().getState() == RequestState.PAUSED)
&& !SingularityRequestWithState.isActive(maybeRequestWithState)) {
LOG.warn("Deploy {} request was {}, removing deploy", pendingDeploy, SingularityRequestWithState.getRequestState(maybeRequestWithState));
if (shouldCancelLoadBalancer(pendingDeploy)) {
cancelLoadBalancer(pendingDeploy, SingularityDeployFailure.deployRemoved());
}
failPendingDeployDueToState(pendingDeploy, maybeRequestWithState, deploy);
return;
}
final SingularityDeployMarker pendingDeployMarker = pendingDeploy.getDeployMarker();
final Optional cancelRequest = findCancel(cancelDeploys, pendingDeployMarker);
final Optional updatePendingDeployRequest = findUpdateRequest(updateRequests, pendingDeploy);
final SingularityRequestWithState requestWithState = maybeRequestWithState.get();
final SingularityRequest request = pendingDeploy.getUpdatedRequest().or(requestWithState.getRequest());
final List requestTasks = taskManager.getTaskIdsForRequest(request.getId());
final List activeTasks = taskManager.filterActiveTaskIds(requestTasks);
final List inactiveDeployMatchingTasks = new ArrayList<>(requestTasks.size());
for (SingularityTaskId taskId : requestTasks) {
if (taskId.getDeployId().equals(pendingDeployMarker.getDeployId()) && !activeTasks.contains(taskId)) {
inactiveDeployMatchingTasks.add(taskId);
}
}
final List deployMatchingTasks = new ArrayList<>(activeTasks.size());
final List allOtherMatchingTasks = new ArrayList<>(activeTasks.size());
for (SingularityTaskId taskId : activeTasks) {
if (taskId.getDeployId().equals(pendingDeployMarker.getDeployId())) {
deployMatchingTasks.add(taskId);
} else {
allOtherMatchingTasks.add(taskId);
}
}
SingularityDeployResult deployResult =
getDeployResult(request, requestWithState.getState(), cancelRequest, pendingDeploy, updatePendingDeployRequest, deploy, deployMatchingTasks, allOtherMatchingTasks, inactiveDeployMatchingTasks);
LOG.info("Deploy {} had result {} after {}", pendingDeployMarker, deployResult, JavaUtils.durationFromMillis(System.currentTimeMillis() - pendingDeployMarker.getTimestamp()));
if (deployResult.getDeployState() == DeployState.SUCCEEDED) {
if (saveNewDeployState(pendingDeployMarker, Optional.of(pendingDeployMarker))) {
if (!(request.getRequestType() == RequestType.RUN_ONCE)) {
deleteObsoletePendingTasks(pendingDeploy);
}
finishDeploy(requestWithState, deploy, pendingDeploy, allOtherMatchingTasks, deployResult);
return;
} else {
LOG.warn("Failing deploy {} because it failed to save deploy state", pendingDeployMarker);
deployResult =
new SingularityDeployResult(DeployState.FAILED_INTERNAL_STATE, Optional.of(String.format("Deploy had state %s but failed to persist it correctly", deployResult.getDeployState())),
deployResult.getLbUpdate(), SingularityDeployFailure.failedToSave(), deployResult.getTimestamp());
}
} else if (!deployResult.getDeployState().isDeployFinished()) {
return;
}
// success case is handled, handle failure cases:
saveNewDeployState(pendingDeployMarker, Optional. absent());
finishDeploy(requestWithState, deploy, pendingDeploy, deployMatchingTasks, deployResult);
}
private void deleteObsoletePendingTasks(SingularityPendingDeploy pendingDeploy) {
List obsoletePendingTasks = taskManager.getPendingTaskIds()
.stream()
.filter(taskId -> taskId.getRequestId().equals(pendingDeploy.getDeployMarker().getRequestId()))
.filter(taskId -> !taskId.getDeployId().equals(pendingDeploy.getDeployMarker().getDeployId()))
.collect(Collectors.toList());
for (SingularityPendingTaskId pendingTaskId : obsoletePendingTasks) {
LOG.debug("Deleting obsolete pending task {}", pendingTaskId.getId());
taskManager.deletePendingTask(pendingTaskId);
}
}
private Optional findCancel(List cancelDeploys, SingularityDeployMarker activeDeploy) {
for (SingularityDeployMarker cancelDeploy : cancelDeploys) {
if (cancelDeploy.getRequestId().equals(activeDeploy.getRequestId()) && cancelDeploy.getDeployId().equals(activeDeploy.getDeployId())) {
return Optional.of(cancelDeploy);
}
}
return Optional.absent();
}
private Optional findUpdateRequest(List updateRequests, SingularityPendingDeploy pendingDeploy) {
for (SingularityUpdatePendingDeployRequest updateRequest : updateRequests) {
if (updateRequest.getRequestId().equals(pendingDeploy.getDeployMarker().getRequestId()) && updateRequest.getDeployId().equals(pendingDeploy.getDeployMarker().getDeployId())) {
return Optional.of(updateRequest);
}
}
return Optional.absent();
}
private void updateLoadBalancerStateForTasks(Collection taskIds, LoadBalancerRequestType type, SingularityLoadBalancerUpdate update) {
for (SingularityTaskId taskId : taskIds) {
taskManager.saveLoadBalancerState(taskId, type, update);
}
}
private void cleanupTasks(SingularityPendingDeploy pendingDeploy, SingularityRequest request, SingularityDeployResult deployResult, Iterable tasksToKill) {
for (SingularityTaskId matchingTask : tasksToKill) {
taskManager.saveTaskCleanup(new SingularityTaskCleanup(pendingDeploy.getDeployMarker().getUser(), getCleanupType(pendingDeploy, request, deployResult), deployResult.getTimestamp(), matchingTask,
Optional.of(String.format("Deploy %s - %s", pendingDeploy.getDeployMarker().getDeployId(), deployResult.getDeployState().name())), Optional. absent(), Optional.absent()));
}
}
private TaskCleanupType getCleanupType(SingularityPendingDeploy pendingDeploy, SingularityRequest request, SingularityDeployResult deployResult) {
if (pendingDeploy.getDeployProgress().isPresent() && pendingDeploy.getDeployProgress().get().getDeployInstanceCountPerStep() != request.getInstancesSafe()) {
// For incremental deploys, return a special cleanup type
if (deployResult.getDeployState() == DeployState.FAILED) {
return TaskCleanupType.INCREMENTAL_DEPLOY_FAILED;
} else if (deployResult.getDeployState() == DeployState.CANCELED) {
return TaskCleanupType.INCREMENTAL_DEPLOY_CANCELLED;
}
}
return deployResult.getDeployState().getCleanupType();
}
private boolean saveNewDeployState(SingularityDeployMarker pendingDeployMarker, Optional newActiveDeploy) {
Optional deployState = deployManager.getRequestDeployState(pendingDeployMarker.getRequestId());
if (!deployState.isPresent()) {
LOG.error("Expected deploy state for deploy marker: {} but didn't find it", pendingDeployMarker);
return false;
}
deployManager.saveNewRequestDeployState(new SingularityRequestDeployState(deployState.get().getRequestId(), newActiveDeploy.or(deployState.get().getActiveDeploy()),
Optional.absent()));
return true;
}
private void finishDeploy(SingularityRequestWithState requestWithState, Optional deploy, SingularityPendingDeploy pendingDeploy, Iterable tasksToKill,
SingularityDeployResult deployResult) {
SingularityRequest request = requestWithState.getRequest();
if (!request.isOneOff() && !(request.getRequestType() == RequestType.RUN_ONCE)) {
cleanupTasks(pendingDeploy, request, deployResult, tasksToKill);
}
if (deploy.isPresent() && deploy.get().getRunImmediately().isPresent()) {
String requestId = deploy.get().getRequestId();
String deployId = deploy.get().getId();
SingularityRunNowRequest runNowRequest = deploy.get().getRunImmediately().get();
List activeTasks = taskManager.getActiveTaskIdsForRequest(requestId);
List pendingTasks = taskManager.getPendingTaskIdsForRequest(requestId);
SingularityPendingRequestBuilder builder = new SingularityPendingRequestBuilder()
.setRequestId(requestId)
.setDeployId(deployId)
.setTimestamp(deployResult.getTimestamp())
.setUser(pendingDeploy.getDeployMarker().getUser())
.setCmdLineArgsList(runNowRequest.getCommandLineArgs())
.setRunId(runNowRequest.getRunId().or(Optional.of(UUID.randomUUID().toString())))
.setSkipHealthchecks(runNowRequest.getSkipHealthchecks().or(request.getSkipHealthchecks()))
.setMessage(runNowRequest.getMessage()
.or(pendingDeploy.getDeployMarker().getMessage()))
.setResources(runNowRequest.getResources())
.setRunAsUserOverride(runNowRequest.getRunAsUserOverride())
.setEnvOverrides(runNowRequest.getEnvOverrides())
.setExtraArtifacts(runNowRequest.getExtraArtifacts())
.setRunAt(runNowRequest.getRunAt());
PendingType pendingType = null;
if (request.isScheduled()) {
if (activeTasks.isEmpty()) {
pendingType = PendingType.IMMEDIATE;
} else {
// Don't run scheduled task over a running task. Will be picked up on the next run.
pendingType = PendingType.NEW_DEPLOY;
}
} else if (!request.isLongRunning()) {
if (request.getInstances().isPresent()
&& (activeTasks.size() + pendingTasks.size() < request.getInstances().get())) {
pendingType = PendingType.ONEOFF;
} else {
// Don't run one-off / on-demand task when already at instance count cap
pendingType = PendingType.NEW_DEPLOY;
}
}
if (pendingType != null) {
builder.setPendingType(canceledOr(deployResult.getDeployState(), pendingType));
requestManager.addToPendingQueue(builder.build());
} else {
LOG.warn("Could not determine pending type for deploy {}.", deployId);
}
} else if (!request.isDeployable() && !request.isOneOff()) {
PendingType pendingType = canceledOr(deployResult.getDeployState(), PendingType.NEW_DEPLOY);
requestManager.addToPendingQueue(new SingularityPendingRequest(request.getId(), pendingDeploy.getDeployMarker().getDeployId(),
deployResult.getTimestamp(), pendingDeploy.getDeployMarker().getUser(), pendingType,
deploy.isPresent() ? deploy.get().getSkipHealthchecksOnDeploy() : Optional.absent(), pendingDeploy.getDeployMarker().getMessage()));
}
if (deployResult.getDeployState() == DeployState.SUCCEEDED) {
if (request.isDeployable() && !request.isOneOff()) {
// remove the lock on bounces in case we deployed during a bounce
requestManager.markBounceComplete(request.getId());
}
if (requestWithState.getState() == RequestState.FINISHED) {
// A FINISHED request is moved to ACTIVE state so we can reevaluate the schedule
requestManager.activate(
request,
RequestHistoryType.UPDATED,
System.currentTimeMillis(),
deploy.isPresent() ? deploy.get().getUser() : Optional.absent(),
Optional.absent());
}
}
deployManager.saveDeployResult(pendingDeploy.getDeployMarker(), deploy, deployResult);
if (request.isDeployable() && (deployResult.getDeployState() == DeployState.CANCELED || deployResult.getDeployState() == DeployState.FAILED || deployResult.getDeployState() == DeployState.OVERDUE)) {
Optional maybeRequestDeployState = deployManager.getRequestDeployState(request.getId());
if (maybeRequestDeployState.isPresent()
&& maybeRequestDeployState.get().getActiveDeploy().isPresent()
&& !(requestWithState.getState() == RequestState.PAUSED || requestWithState.getState() == RequestState.DEPLOYING_TO_UNPAUSE)) {
requestManager.addToPendingQueue(new SingularityPendingRequest(
request.getId(),
maybeRequestDeployState.get().getActiveDeploy().get().getDeployId(),
deployResult.getTimestamp(),
pendingDeploy.getDeployMarker().getUser(),
deployResult.getDeployState() == DeployState.CANCELED ? PendingType.DEPLOY_CANCELLED : PendingType.DEPLOY_FAILED,
request.getSkipHealthchecks(),
pendingDeploy.getDeployMarker().getMessage()));
}
}
if (deployResult.getDeployState() == DeployState.SUCCEEDED) {
List newDeployCleaningTasks = taskManager.getCleanupTaskIds().stream()
.filter((t) -> t.getDeployId().equals(pendingDeploy.getDeployMarker().getDeployId()))
.collect(Collectors.toList());
// Account for any bounce/decom that may have happened during the deploy
if (!newDeployCleaningTasks.isEmpty()) {
requestManager.addToPendingQueue(new SingularityPendingRequest(
request.getId(),
pendingDeploy.getDeployMarker().getDeployId(),
deployResult.getTimestamp(),
pendingDeploy.getDeployMarker().getUser(),
PendingType.DEPLOY_FINISHED,
request.getSkipHealthchecks(),
pendingDeploy.getDeployMarker().getMessage()
));
}
}
if (request.isDeployable() && deployResult.getDeployState() == DeployState.SUCCEEDED && pendingDeploy.getDeployProgress().isPresent() && requestWithState.getState() != RequestState.PAUSED) {
if (pendingDeploy.getDeployProgress().get().getTargetActiveInstances() != request.getInstancesSafe()) {
requestManager.addToPendingQueue(new SingularityPendingRequest(request.getId(), pendingDeploy.getDeployMarker().getDeployId(), deployResult.getTimestamp(),
pendingDeploy.getDeployMarker().getUser(), PendingType.UPDATED_REQUEST, request.getSkipHealthchecks(), pendingDeploy.getDeployMarker().getMessage()));
}
}
if (requestWithState.getState() == RequestState.DEPLOYING_TO_UNPAUSE) {
if (deployResult.getDeployState() == DeployState.SUCCEEDED) {
requestManager.activate(request, RequestHistoryType.DEPLOYED_TO_UNPAUSE, deployResult.getTimestamp(), pendingDeploy.getDeployMarker().getUser(), Optional. absent());
requestManager.deleteExpiringObject(SingularityExpiringPause.class, request.getId());
} else {
requestManager.pause(request, deployResult.getTimestamp(), pendingDeploy.getDeployMarker().getUser(), Optional. absent());
}
}
if (pendingDeploy.getUpdatedRequest().isPresent() && deployResult.getDeployState() == DeployState.SUCCEEDED) {
requestManager.update(pendingDeploy.getUpdatedRequest().get(), System.currentTimeMillis(), pendingDeploy.getDeployMarker().getUser(), Optional.absent());
requestManager.deleteExpiringObject(SingularityExpiringScale.class, request.getId());
}
removePendingDeploy(pendingDeploy);
}
private PendingType canceledOr(DeployState deployState, PendingType pendingType) {
if (deployState == DeployState.CANCELED) {
return PendingType.DEPLOY_CANCELLED;
} else {
return pendingType;
}
}
private void removePendingDeploy(SingularityPendingDeploy pendingDeploy) {
deployManager.deletePendingDeploy(pendingDeploy.getDeployMarker().getRequestId());
}
private void failPendingDeployDueToState(SingularityPendingDeploy pendingDeploy, Optional maybeRequestWithState, Optional deploy) {
SingularityDeployResult deployResult = new SingularityDeployResult(DeployState.FAILED, Optional.of(String.format("Request in state %s is not deployable", SingularityRequestWithState.getRequestState(maybeRequestWithState))), Optional.absent());
if (!maybeRequestWithState.isPresent()) {
deployManager.saveDeployResult(pendingDeploy.getDeployMarker(), deploy, deployResult);
removePendingDeploy(pendingDeploy);
return;
}
saveNewDeployState(pendingDeploy.getDeployMarker(), Optional. absent());
finishDeploy(maybeRequestWithState.get(), deploy, pendingDeploy, Collections.emptyList(), deployResult);
}
private long getAllowedMillis(SingularityDeploy deploy) {
long seconds = deploy.getDeployHealthTimeoutSeconds().or(configuration.getDeployHealthyBySeconds());
if (deploy.getHealthcheck().isPresent() && !deploy.getSkipHealthchecksOnDeploy().or(false)) {
seconds += deployHealthHelper.getMaxHealthcheckTimeoutSeconds(deploy.getHealthcheck().get());
} else {
seconds += deploy.getConsiderHealthyAfterRunningForSeconds().or(configuration.getConsiderTaskHealthyAfterRunningForSeconds());
}
return TimeUnit.SECONDS.toMillis(seconds);
}
private boolean isDeployOverdue(SingularityPendingDeploy pendingDeploy, Optional deploy) {
if (!deploy.isPresent()) {
LOG.warn("Can't determine if deploy {} is overdue because it was missing", pendingDeploy);
return false;
}
if (pendingDeploy.getDeployProgress().isPresent() && pendingDeploy.getDeployProgress().get().isStepComplete()) {
return false;
}
final long startTime = getStartTime(pendingDeploy);
final long deployDuration = System.currentTimeMillis() - startTime;
final long allowedTime = getAllowedMillis(deploy.get());
if (deployDuration > allowedTime) {
LOG.warn("Deploy {} is overdue (duration: {}), allowed: {}", pendingDeploy, DurationFormatUtils.formatDurationHMS(deployDuration), DurationFormatUtils.formatDurationHMS(allowedTime));
return true;
} else {
LOG.trace("Deploy {} is not yet overdue (duration: {}), allowed: {}", pendingDeploy, DurationFormatUtils.formatDurationHMS(deployDuration), DurationFormatUtils.formatDurationHMS(allowedTime));
return false;
}
}
private long getStartTime(SingularityPendingDeploy pendingDeploy) {
if (pendingDeploy.getDeployProgress().isPresent()) {
return pendingDeploy.getDeployProgress().get().getTimestamp();
} else {
return pendingDeploy.getDeployMarker().getTimestamp();
}
}
private List getTasks(Collection taskIds, Map taskIdToTask) {
final List tasks = Lists.newArrayListWithCapacity(taskIds.size());
for (SingularityTaskId taskId : taskIds) {
// TODO what if one is missing?
tasks.add(taskIdToTask.get(taskId));
}
return tasks;
}
private void updatePendingDeploy(SingularityPendingDeploy pendingDeploy, Optional lbUpdate, DeployState deployState,
Optional deployProgress) {
SingularityPendingDeploy copy = new SingularityPendingDeploy(pendingDeploy.getDeployMarker(), lbUpdate, deployState, deployProgress, pendingDeploy.getUpdatedRequest());
deployManager.savePendingDeploy(copy);
}
private void updatePendingDeploy(SingularityPendingDeploy pendingDeploy, Optional lbUpdate, DeployState deployState) {
updatePendingDeploy(pendingDeploy, lbUpdate, deployState, pendingDeploy.getDeployProgress());
}
private DeployState interpretLoadBalancerState(SingularityLoadBalancerUpdate lbUpdate, DeployState unknownState) {
switch (lbUpdate.getLoadBalancerState()) {
case CANCELED:
return DeployState.CANCELED;
case SUCCESS:
return DeployState.SUCCEEDED;
case FAILED:
case INVALID_REQUEST_NOOP:
return DeployState.FAILED;
case CANCELING:
return DeployState.CANCELING;
case UNKNOWN:
return unknownState;
case WAITING:
}
return DeployState.WAITING;
}
private SingularityLoadBalancerUpdate sendCancelToLoadBalancer(SingularityPendingDeploy pendingDeploy) {
return lbClient.cancel(getLoadBalancerRequestId(pendingDeploy));
}
private SingularityDeployResult cancelLoadBalancer(SingularityPendingDeploy pendingDeploy, List deployFailures) {
final SingularityLoadBalancerUpdate lbUpdate = sendCancelToLoadBalancer(pendingDeploy);
final DeployState deployState = interpretLoadBalancerState(lbUpdate, DeployState.CANCELING);
updatePendingDeploy(pendingDeploy, Optional.of(lbUpdate), deployState);
return new SingularityDeployResult(deployState, lbUpdate, deployFailures);
}
private boolean shouldCancelLoadBalancer(final SingularityPendingDeploy pendingDeploy) {
return pendingDeploy.getLastLoadBalancerUpdate().isPresent() && !pendingDeploy.getCurrentDeployState().isDeployFinished();
}
private boolean shouldCheckLbState(final SingularityPendingDeploy pendingDeploy) {
return pendingDeploy.getLastLoadBalancerUpdate().isPresent()
&& getLoadBalancerRequestId(pendingDeploy).getId().equals(pendingDeploy.getLastLoadBalancerUpdate().get().getLoadBalancerRequestId().getId())
&& (pendingDeploy.getLastLoadBalancerUpdate().get().getLoadBalancerState() != BaragonRequestState.UNKNOWN);
}
private LoadBalancerRequestId getLoadBalancerRequestId(SingularityPendingDeploy pendingDeploy) {
return new LoadBalancerRequestId(
String.format("%s-%s-%s", pendingDeploy.getDeployMarker().getRequestId(), pendingDeploy.getDeployMarker().getDeployId(), pendingDeploy.getDeployProgress().get().getTargetActiveInstances()),
LoadBalancerRequestType.DEPLOY, Optional. absent());
}
private SingularityDeployResult getDeployResult(final SingularityRequest request, final RequestState requestState, final Optional cancelRequest, final SingularityPendingDeploy pendingDeploy,
final Optional updatePendingDeployRequest, final Optional deploy, final Collection deployActiveTasks, final Collection otherActiveTasks,
final Collection inactiveDeployMatchingTasks) {
if (!request.isDeployable() || (configuration.isAllowDeployOfPausedRequests() && requestState == RequestState.PAUSED)) {
LOG.info("Succeeding a deploy {} because the request {} was not deployable", pendingDeploy, request);
return new SingularityDeployResult(DeployState.SUCCEEDED, "Request not deployable");
}
if (!pendingDeploy.getDeployProgress().isPresent()) {
return new SingularityDeployResult(DeployState.FAILED, "No deploy progress data present in Zookeeper. Please reattempt your deploy");
}
Set newInactiveDeployTasks = getNewInactiveDeployTasks(pendingDeploy, inactiveDeployMatchingTasks);
if (!newInactiveDeployTasks.isEmpty()) {
if (canRetryTasks(deploy, inactiveDeployMatchingTasks)) {
SingularityDeployProgress newProgress = pendingDeploy.getDeployProgress().get().withFailedTasks(new HashSet<>(inactiveDeployMatchingTasks));
updatePendingDeploy(pendingDeploy, pendingDeploy.getLastLoadBalancerUpdate(), DeployState.WAITING, Optional.of(newProgress));
requestManager.addToPendingQueue(
new SingularityPendingRequest(request.getId(), pendingDeploy.getDeployMarker().getDeployId(), System.currentTimeMillis(), pendingDeploy.getDeployMarker().getUser(),
PendingType.NEXT_DEPLOY_STEP, deploy.isPresent() ? deploy.get().getSkipHealthchecksOnDeploy() : Optional. absent(),
pendingDeploy.getDeployMarker().getMessage()));
return new SingularityDeployResult(DeployState.WAITING);
}
if (request.isLoadBalanced() && shouldCancelLoadBalancer(pendingDeploy)) {
LOG.info("Attempting to cancel pending load balancer request, failing deploy {} regardless", pendingDeploy);
sendCancelToLoadBalancer(pendingDeploy);
}
int maxRetries = deploy.get().getMaxTaskRetries().or(configuration.getDefaultDeployMaxTaskRetries());
return getDeployResultWithFailures(request, deploy, pendingDeploy, DeployState.FAILED, String.format("%s task(s) for this deploy failed", inactiveDeployMatchingTasks.size() - maxRetries), inactiveDeployMatchingTasks);
}
return checkDeployProgress(request, cancelRequest, pendingDeploy, updatePendingDeployRequest, deploy, deployActiveTasks, otherActiveTasks);
}
private boolean canRetryTasks(Optional deploy, Collection inactiveDeployMatchingTasks) {
int maxRetries = deploy.get().getMaxTaskRetries().or(configuration.getDefaultDeployMaxTaskRetries());
return deploy.isPresent() && maxRetries > 0 && inactiveDeployMatchingTasks.size() <= maxRetries;
}
private Set getNewInactiveDeployTasks(SingularityPendingDeploy pendingDeploy, Collection inactiveDeployMatchingTasks) {
Set newInactiveDeployTasks = new HashSet<>();
newInactiveDeployTasks.addAll(inactiveDeployMatchingTasks);
if (pendingDeploy.getDeployProgress().isPresent()) {
newInactiveDeployTasks.removeAll(pendingDeploy.getDeployProgress().get().getFailedDeployTasks());
}
return newInactiveDeployTasks;
}
private SingularityDeployResult checkDeployProgress(final SingularityRequest request, final Optional cancelRequest, final SingularityPendingDeploy pendingDeploy,
final Optional updatePendingDeployRequest, final Optional deploy, final Collection deployActiveTasks,
final Collection otherActiveTasks) {
SingularityDeployProgress deployProgress = pendingDeploy.getDeployProgress().get();
if (cancelRequest.isPresent()) {
LOG.info("Canceling a deploy {} due to cancel request {}", pendingDeploy, cancelRequest.get());
String userMessage = cancelRequest.get().getUser().isPresent() ? String.format(" by %s", cancelRequest.get().getUser().get()) : "";
return new SingularityDeployResult(DeployState.CANCELED, Optional.of(String.format("Canceled due to request%s at %s", userMessage, cancelRequest.get().getTimestamp())),
pendingDeploy.getLastLoadBalancerUpdate(), Collections.emptyList(), System.currentTimeMillis());
}
if (deployProgress.isStepComplete()) {
return checkCanMoveToNextDeployStep(request, deploy, pendingDeploy, updatePendingDeployRequest);
}
final boolean isDeployOverdue = isDeployOverdue(pendingDeploy, deploy);
if (deployActiveTasks.size() < deployProgress.getTargetActiveInstances()) {
maybeUpdatePendingRequest(pendingDeploy, deploy, request, updatePendingDeployRequest);
return checkOverdue(request, deploy, pendingDeploy, deployActiveTasks, isDeployOverdue);
}
if (shouldCheckLbState(pendingDeploy)) {
final SingularityLoadBalancerUpdate lbUpdate = lbClient.getState(getLoadBalancerRequestId(pendingDeploy));
return processLbState(request, deploy, pendingDeploy, updatePendingDeployRequest, deployActiveTasks, otherActiveTasks, tasksToShutDown(deployProgress, otherActiveTasks, request), lbUpdate);
}
if (isDeployOverdue && request.isLoadBalanced() && shouldCancelLoadBalancer(pendingDeploy)) {
return cancelLoadBalancer(pendingDeploy, getDeployFailures(request, deploy, pendingDeploy, DeployState.OVERDUE, deployActiveTasks));
}
if (isWaitingForCurrentLbRequest(pendingDeploy)) {
return new SingularityDeployResult(DeployState.WAITING, Optional.of("Waiting on load balancer API"), pendingDeploy.getLastLoadBalancerUpdate());
}
final DeployHealth deployHealth = deployHealthHelper.getDeployHealth(request, deploy, deployActiveTasks, true);
switch (deployHealth) {
case WAITING:
maybeUpdatePendingRequest(pendingDeploy, deploy, request, updatePendingDeployRequest);
return checkOverdue(request, deploy, pendingDeploy, deployActiveTasks, isDeployOverdue);
case HEALTHY:
if (!request.isLoadBalanced()) {
return markStepFinished(pendingDeploy, deploy, deployActiveTasks, otherActiveTasks, request, updatePendingDeployRequest);
}
if (updatePendingDeployRequest.isPresent() && updatePendingDeployRequest.get().getTargetActiveInstances() != deployProgress.getTargetActiveInstances()) {
maybeUpdatePendingRequest(pendingDeploy, deploy, request, updatePendingDeployRequest);
return new SingularityDeployResult(DeployState.WAITING);
}
if (configuration.getLoadBalancerUri() == null) {
LOG.warn("Deploy {} required a load balancer URI but it wasn't set", pendingDeploy);
return new SingularityDeployResult(DeployState.FAILED, Optional.of("No valid load balancer URI was present"), Optional.absent(), Collections.emptyList(), System.currentTimeMillis());
}
for (SingularityTaskId activeTaskId : deployActiveTasks) {
taskManager.markHealthchecksFinished(activeTaskId);
taskManager.clearStartupHealthchecks(activeTaskId);
}
return enqueueAndProcessLbRequest(request, deploy, pendingDeploy, updatePendingDeployRequest, deployActiveTasks, otherActiveTasks);
case UNHEALTHY:
default:
for (SingularityTaskId activeTaskId : deployActiveTasks) {
taskManager.markHealthchecksFinished(activeTaskId);
taskManager.clearStartupHealthchecks(activeTaskId);
}
return getDeployResultWithFailures(request, deploy, pendingDeploy, DeployState.FAILED, "Not all tasks for deploy were healthy", deployActiveTasks);
}
}
private SingularityDeployResult checkCanMoveToNextDeployStep(SingularityRequest request, Optional deploy, SingularityPendingDeploy pendingDeploy,
Optional updatePendingDeployRequest) {
SingularityDeployProgress deployProgress = pendingDeploy.getDeployProgress().get();
if (canMoveToNextStep(deployProgress) || updatePendingDeployRequest.isPresent()) {
SingularityDeployProgress newProgress = deployProgress.withNewTargetInstances(getNewTargetInstances(deployProgress, request, updatePendingDeployRequest));
updatePendingDeploy(pendingDeploy, pendingDeploy.getLastLoadBalancerUpdate(), DeployState.WAITING, Optional.of(newProgress));
requestManager.addToPendingQueue(
new SingularityPendingRequest(request.getId(), pendingDeploy.getDeployMarker().getDeployId(), System.currentTimeMillis(), pendingDeploy.getDeployMarker().getUser(),
PendingType.NEXT_DEPLOY_STEP, deploy.isPresent() ? deploy.get().getSkipHealthchecksOnDeploy() : Optional. absent(), pendingDeploy.getDeployMarker().getMessage()));
}
return new SingularityDeployResult(DeployState.WAITING);
}
private SingularityDeployResult enqueueAndProcessLbRequest(SingularityRequest request, Optional deploy, SingularityPendingDeploy pendingDeploy,
Optional updatePendingDeployRequest, Collection deployActiveTasks, Collection otherActiveTasks) {
Collection toShutDown = tasksToShutDown(pendingDeploy.getDeployProgress().get(), otherActiveTasks, request);
final Map tasks = taskManager.getTasks(Iterables.concat(deployActiveTasks, toShutDown));
final LoadBalancerRequestId lbRequestId = getLoadBalancerRequestId(pendingDeploy);
List toRemoveFromLb = new ArrayList<>();
for (SingularityTaskId taskId : toShutDown) {
Optional maybeAddUpdate = taskManager.getLoadBalancerState(taskId, LoadBalancerRequestType.ADD);
if (maybeAddUpdate.isPresent() && maybeAddUpdate.get().getLoadBalancerState() == BaragonRequestState.SUCCESS) {
toRemoveFromLb.add(taskId);
}
}
updateLoadBalancerStateForTasks(deployActiveTasks, LoadBalancerRequestType.ADD, SingularityLoadBalancerUpdate.preEnqueue(lbRequestId));
updateLoadBalancerStateForTasks(toRemoveFromLb, LoadBalancerRequestType.REMOVE, SingularityLoadBalancerUpdate.preEnqueue(lbRequestId));
SingularityLoadBalancerUpdate enqueueResult = lbClient.enqueue(lbRequestId, request, deploy.get(), getTasks(deployActiveTasks, tasks), getTasks(toShutDown, tasks));
return processLbState(request, deploy, pendingDeploy, updatePendingDeployRequest, deployActiveTasks, otherActiveTasks, toShutDown, enqueueResult);
}
private SingularityDeployResult processLbState(SingularityRequest request, Optional deploy, SingularityPendingDeploy pendingDeploy,
Optional updatePendingDeployRequest, Collection deployActiveTasks, Collection otherActiveTasks,
Collection tasksToShutDown, SingularityLoadBalancerUpdate lbUpdate) {
List toRemoveFromLb = new ArrayList<>();
for (SingularityTaskId taskId : tasksToShutDown) {
Optional maybeRemoveUpdate = taskManager.getLoadBalancerState(taskId, LoadBalancerRequestType.REMOVE);
if (maybeRemoveUpdate.isPresent() && maybeRemoveUpdate.get().getLoadBalancerRequestId().getId().equals(lbUpdate.getLoadBalancerRequestId().getId())) {
toRemoveFromLb.add(taskId);
}
}
updateLoadBalancerStateForTasks(deployActiveTasks, LoadBalancerRequestType.ADD, lbUpdate);
updateLoadBalancerStateForTasks(toRemoveFromLb, LoadBalancerRequestType.REMOVE, lbUpdate);
DeployState deployState = interpretLoadBalancerState(lbUpdate, pendingDeploy.getCurrentDeployState());
if (deployState == DeployState.SUCCEEDED) {
updatePendingDeploy(pendingDeploy, Optional.of(lbUpdate), DeployState.WAITING); // A step has completed, markStepFinished will determine SUCCEEDED/WAITING
return markStepFinished(pendingDeploy, deploy, deployActiveTasks, otherActiveTasks, request, updatePendingDeployRequest);
} else if (deployState == DeployState.WAITING) {
updatePendingDeploy(pendingDeploy, Optional.of(lbUpdate), deployState);
maybeUpdatePendingRequest(pendingDeploy, deploy, request, updatePendingDeployRequest, Optional.of(lbUpdate));
return new SingularityDeployResult(DeployState.WAITING);
} else {
updatePendingDeploy(pendingDeploy, Optional.of(lbUpdate), deployState);
maybeUpdatePendingRequest(pendingDeploy, deploy, request, updatePendingDeployRequest, Optional.of(lbUpdate));
return new SingularityDeployResult(deployState, lbUpdate, SingularityDeployFailure.lbUpdateFailed());
}
}
private void maybeUpdatePendingRequest(SingularityPendingDeploy pendingDeploy, Optional deploy, SingularityRequest request,
Optional updatePendingDeployRequest) {
maybeUpdatePendingRequest(pendingDeploy, deploy, request, updatePendingDeployRequest, Optional. absent());
}
private void maybeUpdatePendingRequest(SingularityPendingDeploy pendingDeploy, Optional deploy, SingularityRequest request,
Optional updatePendingDeployRequest, Optional lbUpdate) {
if (updatePendingDeployRequest.isPresent() && pendingDeploy.getDeployProgress().isPresent()) {
SingularityDeployProgress newProgress =
pendingDeploy.getDeployProgress().get().withNewTargetInstances(Math.min(updatePendingDeployRequest.get().getTargetActiveInstances(), request.getInstancesSafe()));
updatePendingDeploy(pendingDeploy, lbUpdate.or(pendingDeploy.getLastLoadBalancerUpdate()), DeployState.WAITING, Optional.of(newProgress));
requestManager
.addToPendingQueue(new SingularityPendingRequest(request.getId(), pendingDeploy.getDeployMarker().getDeployId(), System.currentTimeMillis(), pendingDeploy.getDeployMarker().getUser(),
PendingType.NEXT_DEPLOY_STEP, deploy.isPresent() ? deploy.get().getSkipHealthchecksOnDeploy() : Optional. absent(),
pendingDeploy.getDeployMarker().getMessage()));
}
}
private boolean isWaitingForCurrentLbRequest(SingularityPendingDeploy pendingDeploy) {
return pendingDeploy.getLastLoadBalancerUpdate().isPresent()
&& getLoadBalancerRequestId(pendingDeploy).getId().equals(pendingDeploy.getLastLoadBalancerUpdate().get().getLoadBalancerRequestId().getId())
&& pendingDeploy.getLastLoadBalancerUpdate().get().getLoadBalancerState() == BaragonRequestState.WAITING;
}
private boolean isLastStepFinished(SingularityDeployProgress deployProgress, SingularityRequest request) {
return deployProgress.isStepComplete() && deployProgress.getTargetActiveInstances() >= request.getInstancesSafe();
}
private SingularityDeployResult markStepFinished(SingularityPendingDeploy pendingDeploy, Optional deploy, Collection deployActiveTasks,
Collection otherActiveTasks, SingularityRequest request,
Optional updatePendingDeployRequest) {
SingularityDeployProgress deployProgress = pendingDeploy.getDeployProgress().get();
if (updatePendingDeployRequest.isPresent() && getNewTargetInstances(deployProgress, request, updatePendingDeployRequest) != deployProgress.getTargetActiveInstances()) {
maybeUpdatePendingRequest(pendingDeploy, deploy, request, updatePendingDeployRequest);
return new SingularityDeployResult(DeployState.WAITING);
}
SingularityDeployProgress newProgress = deployProgress.withNewActiveInstances(deployActiveTasks.size()).withCompletedStep();
DeployState deployState = isLastStepFinished(newProgress, request) ? DeployState.SUCCEEDED : DeployState.WAITING;
String message = deployState == DeployState.SUCCEEDED ? "New deploy succeeded" : "New deploy is progressing, this task is being replaced";
updatePendingDeploy(pendingDeploy, pendingDeploy.getLastLoadBalancerUpdate(), deployState, Optional.of(newProgress));
for (SingularityTaskId taskId : tasksToShutDown(deployProgress, otherActiveTasks, request)) {
taskManager.createTaskCleanup(
new SingularityTaskCleanup(Optional. absent(), TaskCleanupType.DEPLOY_STEP_FINISHED, System.currentTimeMillis(), taskId, Optional.of(message),
Optional. absent(), Optional.absent()));
}
return new SingularityDeployResult(deployState);
}
private List tasksToShutDown(SingularityDeployProgress deployProgress, Collection otherActiveTasks, SingularityRequest request) {
int numTasksToShutDown = Math.max(otherActiveTasks.size() - (request.getInstancesSafe() - deployProgress.getTargetActiveInstances()), 0);
List sortedOtherTasks = new ArrayList<>(otherActiveTasks);
Collections.sort(sortedOtherTasks, SingularityTaskId.INSTANCE_NO_COMPARATOR);
return sortedOtherTasks.isEmpty() ? sortedOtherTasks : sortedOtherTasks.subList(0, Math.min(numTasksToShutDown, sortedOtherTasks.size()));
}
private boolean canMoveToNextStep(SingularityDeployProgress deployProgress) {
return deployProgress.isAutoAdvanceDeploySteps() && deployProgress.getTimestamp() + deployProgress.getDeployStepWaitTimeMs() < System.currentTimeMillis();
}
private int getNewTargetInstances(SingularityDeployProgress deployProgress, SingularityRequest request, Optional updateRequest) {
if (updateRequest.isPresent()) {
return Math.min(updateRequest.get().getTargetActiveInstances(), request.getInstancesSafe());
} else {
return Math.min(deployProgress.getTargetActiveInstances() + deployProgress.getDeployInstanceCountPerStep(), request.getInstancesSafe());
}
}
private SingularityDeployResult checkOverdue(SingularityRequest request, Optional deploy, SingularityPendingDeploy pendingDeploy, Collection deployActiveTasks, boolean isOverdue) {
String message = null;
if (deploy.isPresent()) {
message =
String.format("Deploy was able to launch %s tasks, but not all of them became healthy within %s", deployActiveTasks.size(), JavaUtils.durationFromMillis(getAllowedMillis(deploy.get())));
}
if (deploy.isPresent() && isOverdue) {
return getDeployResultWithFailures(request, deploy, pendingDeploy, DeployState.OVERDUE, message, deployActiveTasks);
} else {
return new SingularityDeployResult(DeployState.WAITING);
}
}
private SingularityDeployResult getDeployResultWithFailures(SingularityRequest request, Optional deploy, SingularityPendingDeploy pendingDeploy, DeployState state, String message, Collection matchingTasks) {
List deployFailures = getDeployFailures(request, deploy, pendingDeploy, state, matchingTasks);
if (deployFailures.size() == 1 && !deployFailures.get(0).getTaskId().isPresent()) { // Single non-task-specific failure should become the deploy result message (e.g. not enough resources to launch all tasks)
return new SingularityDeployResult(state, deployFailures.get(0).getMessage() , pendingDeploy.getLastLoadBalancerUpdate(), Collections.emptyList(), System.currentTimeMillis());
} else {
return new SingularityDeployResult(state, Optional.of(message), pendingDeploy.getLastLoadBalancerUpdate(), deployFailures, System.currentTimeMillis());
}
}
private List getDeployFailures(SingularityRequest request, Optional deploy, SingularityPendingDeploy pendingDeploy, DeployState state, Collection matchingTasks) {
List failures = new ArrayList<>();
failures.addAll(deployHealthHelper.getTaskFailures(deploy, matchingTasks));
if (state == DeployState.OVERDUE) {
int targetInstances = pendingDeploy.getDeployProgress().isPresent() ? pendingDeploy.getDeployProgress().get().getTargetActiveInstances() :request.getInstancesSafe();
if (failures.isEmpty() && matchingTasks.size() < targetInstances) {
failures.add(new SingularityDeployFailure(SingularityDeployFailureReason.TASK_COULD_NOT_BE_SCHEDULED, Optional.absent(), Optional.of(String.format("Only %s of %s tasks could be launched for deploy, there may not be enough resources to launch the remaining tasks", matchingTasks.size(), targetInstances))));
}
}
return failures;
}
}