Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.hubspot.singularity.scheduler.SingularityDeployChecker Maven / Gradle / Ivy
package com.hubspot.singularity.scheduler;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.inject.Inject;
import com.hubspot.baragon.models.BaragonRequestState;
import com.hubspot.mesos.JavaUtils;
import com.hubspot.singularity.DeployState;
import com.hubspot.singularity.ExtendedTaskState;
import com.hubspot.singularity.LoadBalancerRequestType;
import com.hubspot.singularity.LoadBalancerRequestType.LoadBalancerRequestId;
import com.hubspot.singularity.RequestState;
import com.hubspot.singularity.RequestType;
import com.hubspot.singularity.SingularityDeploy;
import com.hubspot.singularity.SingularityDeployFailure;
import com.hubspot.singularity.SingularityDeployFailureReason;
import com.hubspot.singularity.SingularityDeployKey;
import com.hubspot.singularity.SingularityDeployMarker;
import com.hubspot.singularity.SingularityDeployProgress;
import com.hubspot.singularity.SingularityDeployResult;
import com.hubspot.singularity.SingularityLoadBalancerUpdate;
import com.hubspot.singularity.SingularityManagedThreadPoolFactory;
import com.hubspot.singularity.SingularityPendingDeploy;
import com.hubspot.singularity.SingularityPendingRequest;
import com.hubspot.singularity.SingularityPendingRequest.PendingType;
import com.hubspot.singularity.SingularityPendingRequestBuilder;
import com.hubspot.singularity.SingularityPendingTask;
import com.hubspot.singularity.SingularityPendingTaskId;
import com.hubspot.singularity.SingularityRequest;
import com.hubspot.singularity.SingularityRequestDeployState;
import com.hubspot.singularity.SingularityRequestHistory.RequestHistoryType;
import com.hubspot.singularity.SingularityRequestWithState;
import com.hubspot.singularity.SingularityTask;
import com.hubspot.singularity.SingularityTaskCleanup;
import com.hubspot.singularity.SingularityTaskHistoryUpdate;
import com.hubspot.singularity.SingularityTaskId;
import com.hubspot.singularity.SingularityTaskShellCommandRequestId;
import com.hubspot.singularity.SingularityUpdatePendingDeployRequest;
import com.hubspot.singularity.TaskCleanupType;
import com.hubspot.singularity.api.SingularityRunNowRequest;
import com.hubspot.singularity.async.CompletableFutures;
import com.hubspot.singularity.config.SingularityConfiguration;
import com.hubspot.singularity.data.DeployManager;
import com.hubspot.singularity.data.RequestManager;
import com.hubspot.singularity.data.TaskManager;
import com.hubspot.singularity.data.usage.UsageManager;
import com.hubspot.singularity.expiring.SingularityExpiringPause;
import com.hubspot.singularity.expiring.SingularityExpiringScale;
import com.hubspot.singularity.hooks.LoadBalancerClient;
import com.hubspot.singularity.mesos.SingularitySchedulerLock;
import com.hubspot.singularity.scheduler.SingularityDeployHealthHelper.DeployHealth;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import javax.inject.Singleton;
import org.apache.commons.lang3.time.DurationFormatUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@Singleton
public class SingularityDeployChecker {
private static final Logger LOG = LoggerFactory.getLogger(
SingularityDeployChecker.class
);
private final DeployManager deployManager;
private final TaskManager taskManager;
private final SingularityDeployHealthHelper deployHealthHelper;
private final RequestManager requestManager;
private final SingularityConfiguration configuration;
private final LoadBalancerClient lbClient;
private final SingularitySchedulerLock lock;
private final UsageManager usageManager;
private final ExecutorService deployCheckExecutor;
@Inject
public SingularityDeployChecker(
DeployManager deployManager,
SingularityDeployHealthHelper deployHealthHelper,
LoadBalancerClient lbClient,
RequestManager requestManager,
TaskManager taskManager,
SingularityConfiguration configuration,
SingularitySchedulerLock lock,
UsageManager usageManager,
SingularityManagedThreadPoolFactory threadPoolFactory
) {
this.configuration = configuration;
this.lbClient = lbClient;
this.deployHealthHelper = deployHealthHelper;
this.requestManager = requestManager;
this.deployManager = deployManager;
this.taskManager = taskManager;
this.lock = lock;
this.usageManager = usageManager;
this.deployCheckExecutor =
threadPoolFactory.get("deploy-checker", configuration.getCoreThreadpoolSize());
}
public int checkDeploys() {
final List pendingDeploys = deployManager.getPendingDeploys();
final List cancelDeploys = deployManager.getCancelDeploys();
final List updateRequests = deployManager.getPendingDeployUpdates();
if (pendingDeploys.isEmpty() && cancelDeploys.isEmpty()) {
return 0;
}
final Map pendingDeployToKey = SingularityDeployKey.fromPendingDeploys(
pendingDeploys
);
final Map deployKeyToDeploy = deployManager.getDeploysForKeys(
pendingDeployToKey.values()
);
CompletableFutures
.allOf(
pendingDeploys
.stream()
.map(
pendingDeploy ->
CompletableFuture.runAsync(
() ->
lock.runWithRequestLock(
() ->
checkDeploy(
pendingDeploy,
cancelDeploys,
pendingDeployToKey,
deployKeyToDeploy,
updateRequests
),
pendingDeploy.getDeployMarker().getRequestId(),
getClass().getSimpleName()
),
deployCheckExecutor
)
)
.collect(Collectors.toList())
)
.join();
cancelDeploys.forEach(deployManager::deleteCancelDeployRequest);
updateRequests.forEach(deployManager::deleteUpdatePendingDeployRequest);
return pendingDeploys.size();
}
private void checkDeploy(
final SingularityPendingDeploy pendingDeploy,
final List cancelDeploys,
final Map pendingDeployToKey,
final Map deployKeyToDeploy,
List updateRequests
) {
final SingularityDeployKey deployKey = pendingDeployToKey.get(pendingDeploy);
final Optional deploy = Optional.ofNullable(
deployKeyToDeploy.get(deployKey)
);
Optional maybeRequestWithState = requestManager.getRequest(
pendingDeploy.getDeployMarker().getRequestId()
);
if (
!(
maybeRequestWithState.isPresent() &&
maybeRequestWithState.get().getState() == RequestState.FINISHED
) &&
!(
configuration.isAllowDeployOfPausedRequests() &&
maybeRequestWithState.isPresent() &&
maybeRequestWithState.get().getState() == RequestState.PAUSED
) &&
!SingularityRequestWithState.isActive(maybeRequestWithState)
) {
LOG.warn(
"Deploy {} request was {}, removing deploy",
pendingDeploy,
SingularityRequestWithState.getRequestState(maybeRequestWithState)
);
if (shouldCancelLoadBalancer(pendingDeploy)) {
cancelLoadBalancer(pendingDeploy, SingularityDeployFailure.deployRemoved());
}
failPendingDeployDueToState(pendingDeploy, maybeRequestWithState, deploy);
return;
}
final SingularityDeployMarker pendingDeployMarker = pendingDeploy.getDeployMarker();
final Optional cancelRequest = findCancel(
cancelDeploys,
pendingDeployMarker
);
final Optional updatePendingDeployRequest = findUpdateRequest(
updateRequests,
pendingDeploy
);
final SingularityRequestWithState requestWithState = maybeRequestWithState.get();
final SingularityRequest request = pendingDeploy
.getUpdatedRequest()
.orElse(requestWithState.getRequest());
final List requestTasks = taskManager.getTaskIdsForRequest(
request.getId()
);
final List activeTasks = taskManager.filterActiveTaskIds(
requestTasks
);
final List inactiveDeployMatchingTasks = new ArrayList<>(
requestTasks.size()
);
for (SingularityTaskId taskId : requestTasks) {
if (
taskId.getDeployId().equals(pendingDeployMarker.getDeployId()) &&
!activeTasks.contains(taskId)
) {
inactiveDeployMatchingTasks.add(taskId);
}
}
final List deployMatchingTasks = new ArrayList<>(
activeTasks.size()
);
final List allOtherMatchingTasks = new ArrayList<>(
activeTasks.size()
);
for (SingularityTaskId taskId : activeTasks) {
if (taskId.getDeployId().equals(pendingDeployMarker.getDeployId())) {
deployMatchingTasks.add(taskId);
} else {
allOtherMatchingTasks.add(taskId);
}
}
SingularityDeployResult deployResult = getDeployResultSafe(
request,
requestWithState.getState(),
cancelRequest,
pendingDeploy,
updatePendingDeployRequest,
deploy,
deployMatchingTasks,
allOtherMatchingTasks,
inactiveDeployMatchingTasks
);
LOG.info(
"Deploy {} had result {} after {}",
pendingDeployMarker,
deployResult,
JavaUtils.durationFromMillis(
System.currentTimeMillis() - pendingDeployMarker.getTimestamp()
)
);
if (deployResult.getDeployState() == DeployState.SUCCEEDED) {
if (saveNewDeployState(pendingDeployMarker, Optional.of(pendingDeployMarker))) {
if (request.getRequestType() == RequestType.ON_DEMAND) {
deleteOrRecreatePendingTasks(pendingDeploy);
} else if (request.getRequestType() != RequestType.RUN_ONCE) {
deleteObsoletePendingTasks(pendingDeploy);
}
finishDeploy(
requestWithState,
deploy,
pendingDeploy,
allOtherMatchingTasks,
deployResult
);
return;
} else {
LOG.warn(
"Failing deploy {} because it failed to save deploy state",
pendingDeployMarker
);
deployResult =
new SingularityDeployResult(
DeployState.FAILED_INTERNAL_STATE,
Optional.of(
String.format(
"Deploy had state %s but failed to persist it correctly",
deployResult.getDeployState()
)
),
deployResult.getLbUpdate(),
SingularityDeployFailure.failedToSave(),
deployResult.getTimestamp()
);
}
} else if (!deployResult.getDeployState().isDeployFinished()) {
return;
}
// success case is handled, handle failure cases:
saveNewDeployState(pendingDeployMarker, Optional.empty());
finishDeploy(
requestWithState,
deploy,
pendingDeploy,
deployMatchingTasks,
deployResult
);
}
private void deleteOrRecreatePendingTasks(SingularityPendingDeploy pendingDeploy) {
List obsoletePendingTasks = new ArrayList<>();
taskManager
.getPendingTaskIdsForRequest(pendingDeploy.getDeployMarker().getRequestId())
.forEach(
taskId -> {
if (
!taskId.getDeployId().equals(pendingDeploy.getDeployMarker().getDeployId())
) {
if (taskId.getPendingType() == PendingType.ONEOFF) {
Optional maybePendingTask = taskManager.getPendingTask(
taskId
);
if (maybePendingTask.isPresent()) {
// Reschedule any user-initiated pending tasks under the new deploy
SingularityPendingTask pendingTask = maybePendingTask.get();
requestManager.addToPendingQueue(
new SingularityPendingRequest(
pendingTask.getPendingTaskId().getRequestId(),
pendingDeploy.getDeployMarker().getDeployId(),
System.currentTimeMillis(),
pendingTask.getUser(),
pendingTask.getPendingTaskId().getPendingType(),
pendingTask.getCmdLineArgsList(),
pendingTask.getRunId(),
pendingTask.getSkipHealthchecks(),
pendingTask.getMessage(),
pendingTask.getActionId(),
pendingTask.getResources(),
pendingTask.getS3UploaderAdditionalFiles(),
pendingTask.getRunAsUserOverride(),
pendingTask.getEnvOverrides(),
pendingTask.getRequiredAgentAttributeOverrides(),
pendingTask.getAllowedAgentAttributeOverrides(),
pendingTask.getExtraArtifacts(),
Optional.of(pendingTask.getPendingTaskId().getNextRunAt())
)
);
}
}
obsoletePendingTasks.add(taskId);
}
}
);
for (SingularityPendingTaskId pendingTaskId : obsoletePendingTasks) {
LOG.debug("Deleting obsolete pending task {}", pendingTaskId.getId());
taskManager.deletePendingTask(pendingTaskId);
}
}
private void deleteObsoletePendingTasks(SingularityPendingDeploy pendingDeploy) {
List obsoletePendingTasks = taskManager
.getPendingTaskIdsForRequest(pendingDeploy.getDeployMarker().getRequestId())
.stream()
.filter(
taskId ->
!taskId.getDeployId().equals(pendingDeploy.getDeployMarker().getDeployId())
)
.collect(Collectors.toList());
for (SingularityPendingTaskId pendingTaskId : obsoletePendingTasks) {
LOG.debug("Deleting obsolete pending task {}", pendingTaskId.getId());
taskManager.deletePendingTask(pendingTaskId);
}
}
private Optional findCancel(
List cancelDeploys,
SingularityDeployMarker activeDeploy
) {
for (SingularityDeployMarker cancelDeploy : cancelDeploys) {
if (
cancelDeploy.getRequestId().equals(activeDeploy.getRequestId()) &&
cancelDeploy.getDeployId().equals(activeDeploy.getDeployId())
) {
return Optional.of(cancelDeploy);
}
}
return Optional.empty();
}
private Optional findUpdateRequest(
List updateRequests,
SingularityPendingDeploy pendingDeploy
) {
for (SingularityUpdatePendingDeployRequest updateRequest : updateRequests) {
if (
updateRequest
.getRequestId()
.equals(pendingDeploy.getDeployMarker().getRequestId()) &&
updateRequest.getDeployId().equals(pendingDeploy.getDeployMarker().getDeployId())
) {
return Optional.of(updateRequest);
}
}
return Optional.empty();
}
private void updateLoadBalancerStateForTasks(
Collection taskIds,
LoadBalancerRequestType type,
SingularityLoadBalancerUpdate update
) {
for (SingularityTaskId taskId : taskIds) {
taskManager.saveLoadBalancerState(taskId, type, update);
}
}
private void cleanupTasks(
SingularityPendingDeploy pendingDeploy,
SingularityRequest request,
SingularityDeployResult deployResult,
Iterable tasksToKill
) {
for (SingularityTaskId matchingTask : tasksToKill) {
taskManager.saveTaskCleanup(
new SingularityTaskCleanup(
pendingDeploy.getDeployMarker().getUser(),
getCleanupType(pendingDeploy, request, deployResult),
deployResult.getTimestamp(),
matchingTask,
Optional.of(
String.format(
"Deploy %s - %s",
pendingDeploy.getDeployMarker().getDeployId(),
deployResult.getDeployState().name()
)
),
Optional.empty(),
Optional.empty()
)
);
}
}
private TaskCleanupType getCleanupType(
SingularityPendingDeploy pendingDeploy,
SingularityRequest request,
SingularityDeployResult deployResult
) {
if (
pendingDeploy.getDeployProgress().isPresent() &&
pendingDeploy.getDeployProgress().get().getDeployInstanceCountPerStep() !=
request.getInstancesSafe()
) {
// For incremental deploys, return a special cleanup type
if (deployResult.getDeployState() == DeployState.FAILED) {
return TaskCleanupType.INCREMENTAL_DEPLOY_FAILED;
} else if (deployResult.getDeployState() == DeployState.CANCELED) {
return TaskCleanupType.INCREMENTAL_DEPLOY_CANCELLED;
}
}
return deployResult.getDeployState().getCleanupType();
}
private boolean saveNewDeployState(
SingularityDeployMarker pendingDeployMarker,
Optional newActiveDeploy
) {
Optional deployState = deployManager.getRequestDeployState(
pendingDeployMarker.getRequestId()
);
if (!deployState.isPresent()) {
LOG.error(
"Expected deploy state for deploy marker: {} but didn't find it",
pendingDeployMarker
);
return false;
}
deployManager.saveNewRequestDeployState(
new SingularityRequestDeployState(
deployState.get().getRequestId(),
newActiveDeploy.isPresent()
? newActiveDeploy
: deployState.get().getActiveDeploy(),
Optional.empty()
)
);
return true;
}
private void finishDeploy(
SingularityRequestWithState requestWithState,
Optional deploy,
SingularityPendingDeploy pendingDeploy,
Iterable tasksToKill,
SingularityDeployResult deployResult
) {
SingularityRequest request = requestWithState.getRequest();
if (!request.isOneOff() && !(request.getRequestType() == RequestType.RUN_ONCE)) {
cleanupTasks(pendingDeploy, request, deployResult, tasksToKill);
}
if (deploy.isPresent() && deploy.get().getRunImmediately().isPresent()) {
String requestId = deploy.get().getRequestId();
String deployId = deploy.get().getId();
SingularityRunNowRequest runNowRequest = deploy.get().getRunImmediately().get();
List activeTasks = taskManager.getActiveTaskIdsForRequest(
requestId
);
List pendingTasks = taskManager.getPendingTaskIdsForRequest(
requestId
);
SingularityPendingRequestBuilder builder = new SingularityPendingRequestBuilder()
.setRequestId(requestId)
.setDeployId(deployId)
.setTimestamp(deployResult.getTimestamp())
.setUser(pendingDeploy.getDeployMarker().getUser())
.setCmdLineArgsList(runNowRequest.getCommandLineArgs())
.setRunId(
Optional.of(runNowRequest.getRunId().orElse(UUID.randomUUID().toString()))
)
.setSkipHealthchecks(
runNowRequest.getSkipHealthchecks().isPresent()
? runNowRequest.getSkipHealthchecks()
: request.getSkipHealthchecks()
)
.setMessage(
runNowRequest.getMessage().isPresent()
? runNowRequest.getMessage()
: pendingDeploy.getDeployMarker().getMessage()
)
.setResources(runNowRequest.getResources())
.setRunAsUserOverride(runNowRequest.getRunAsUserOverride())
.setEnvOverrides(runNowRequest.getEnvOverrides())
.setExtraArtifacts(runNowRequest.getExtraArtifacts())
.setRunAt(runNowRequest.getRunAt());
PendingType pendingType = null;
if (request.isScheduled()) {
if (activeTasks.isEmpty()) {
pendingType = PendingType.IMMEDIATE;
} else {
// Don't run scheduled task over a running task. Will be picked up on the next run.
pendingType = PendingType.NEW_DEPLOY;
}
} else if (!request.isLongRunning()) {
if (
request.getInstances().isPresent() &&
(activeTasks.size() + pendingTasks.size() < request.getInstances().get())
) {
pendingType = PendingType.ONEOFF;
} else {
// Don't run one-off / on-demand task when already at instance count cap
pendingType = PendingType.NEW_DEPLOY;
}
}
if (pendingType != null) {
builder.setPendingType(canceledOr(deployResult.getDeployState(), pendingType));
requestManager.addToPendingQueue(builder.build());
} else {
LOG.warn("Could not determine pending type for deploy {}.", deployId);
}
} else if (!request.isDeployable() && !request.isOneOff()) {
PendingType pendingType = canceledOr(
deployResult.getDeployState(),
PendingType.NEW_DEPLOY
);
requestManager.addToPendingQueue(
new SingularityPendingRequest(
request.getId(),
pendingDeploy.getDeployMarker().getDeployId(),
deployResult.getTimestamp(),
pendingDeploy.getDeployMarker().getUser(),
pendingType,
deploy.isPresent()
? deploy.get().getSkipHealthchecksOnDeploy()
: Optional.empty(),
pendingDeploy.getDeployMarker().getMessage()
)
);
}
if (deployResult.getDeployState() == DeployState.SUCCEEDED) {
if (request.isDeployable() && !request.isOneOff()) {
// remove the lock on bounces in case we deployed during a bounce
requestManager.markBounceComplete(request.getId());
requestManager.removeExpiringBounce(request.getId());
}
if (requestWithState.getState() == RequestState.FINISHED) {
// A FINISHED request is moved to ACTIVE state so we can reevaluate the schedule
requestManager.activate(
request,
RequestHistoryType.UPDATED,
System.currentTimeMillis(),
deploy.isPresent() ? deploy.get().getUser() : Optional.empty(),
Optional.empty()
);
}
// Clear utilization since a new deploy will update usage patterns
// do this async so sql isn't on the main scheduling path for deploys
CompletableFuture
.runAsync(
() -> usageManager.deleteRequestUtilization(request.getId()),
deployCheckExecutor
)
.exceptionally(
t -> {
LOG.error("Could not clear usage data after new deploy", t);
return null;
}
);
}
deployManager.saveDeployResult(pendingDeploy.getDeployMarker(), deploy, deployResult);
if (
request.isDeployable() &&
(
deployResult.getDeployState() == DeployState.CANCELED ||
deployResult.getDeployState() == DeployState.FAILED ||
deployResult.getDeployState() == DeployState.OVERDUE
)
) {
Optional maybeRequestDeployState = deployManager.getRequestDeployState(
request.getId()
);
if (
maybeRequestDeployState.isPresent() &&
maybeRequestDeployState.get().getActiveDeploy().isPresent() &&
!(
requestWithState.getState() == RequestState.PAUSED ||
requestWithState.getState() == RequestState.DEPLOYING_TO_UNPAUSE
)
) {
requestManager.addToPendingQueue(
new SingularityPendingRequest(
request.getId(),
maybeRequestDeployState.get().getActiveDeploy().get().getDeployId(),
deployResult.getTimestamp(),
pendingDeploy.getDeployMarker().getUser(),
deployResult.getDeployState() == DeployState.CANCELED
? PendingType.DEPLOY_CANCELLED
: PendingType.DEPLOY_FAILED,
request.getSkipHealthchecks(),
pendingDeploy.getDeployMarker().getMessage()
)
);
}
}
if (deployResult.getDeployState() == DeployState.SUCCEEDED) {
List newDeployCleaningTasks = taskManager
.getCleanupTaskIds()
.stream()
.filter(
t -> t.getDeployId().equals(pendingDeploy.getDeployMarker().getDeployId())
)
.collect(Collectors.toList());
// Account for any bounce/decom that may have happened during the deploy
if (!newDeployCleaningTasks.isEmpty()) {
requestManager.addToPendingQueue(
new SingularityPendingRequest(
request.getId(),
pendingDeploy.getDeployMarker().getDeployId(),
deployResult.getTimestamp(),
pendingDeploy.getDeployMarker().getUser(),
PendingType.DEPLOY_FINISHED,
request.getSkipHealthchecks(),
pendingDeploy.getDeployMarker().getMessage()
)
);
}
}
if (
request.isDeployable() &&
deployResult.getDeployState() == DeployState.SUCCEEDED &&
pendingDeploy.getDeployProgress().isPresent() &&
requestWithState.getState() != RequestState.PAUSED
) {
if (
pendingDeploy.getDeployProgress().get().getTargetActiveInstances() !=
request.getInstancesSafe()
) {
requestManager.addToPendingQueue(
new SingularityPendingRequest(
request.getId(),
pendingDeploy.getDeployMarker().getDeployId(),
deployResult.getTimestamp(),
pendingDeploy.getDeployMarker().getUser(),
PendingType.UPDATED_REQUEST,
request.getSkipHealthchecks(),
pendingDeploy.getDeployMarker().getMessage()
)
);
}
}
if (requestWithState.getState() == RequestState.DEPLOYING_TO_UNPAUSE) {
if (deployResult.getDeployState() == DeployState.SUCCEEDED) {
requestManager.activate(
request,
RequestHistoryType.DEPLOYED_TO_UNPAUSE,
deployResult.getTimestamp(),
pendingDeploy.getDeployMarker().getUser(),
Optional.empty()
);
requestManager.deleteExpiringObject(
SingularityExpiringPause.class,
request.getId()
);
} else {
requestManager.pause(
request,
deployResult.getTimestamp(),
pendingDeploy.getDeployMarker().getUser(),
Optional.empty()
);
}
}
if (
pendingDeploy.getUpdatedRequest().isPresent() &&
deployResult.getDeployState() == DeployState.SUCCEEDED
) {
requestManager.update(
pendingDeploy.getUpdatedRequest().get(),
System.currentTimeMillis(),
pendingDeploy.getDeployMarker().getUser(),
Optional.empty()
);
requestManager.deleteExpiringObject(
SingularityExpiringScale.class,
request.getId()
);
}
removePendingDeploy(pendingDeploy);
}
private boolean shouldTransferDeploy(
String requestId,
SingularityRequestDeployState deployState,
String deployId
) {
if (deployState == null) {
LOG.warn(
"Missing request deploy state for request {}. deploy {}",
requestId,
deployId
);
return true;
}
if (
deployState.getActiveDeploy().isPresent() &&
deployState.getActiveDeploy().get().getDeployId().equals(deployId)
) {
return false;
}
if (
deployState.getPendingDeploy().isPresent() &&
deployState.getPendingDeploy().get().getDeployId().equals(deployId)
) {
return false;
}
return true;
}
private PendingType canceledOr(DeployState deployState, PendingType pendingType) {
if (deployState == DeployState.CANCELED) {
return PendingType.DEPLOY_CANCELLED;
} else {
return pendingType;
}
}
private void removePendingDeploy(SingularityPendingDeploy pendingDeploy) {
deployManager.deletePendingDeploy(pendingDeploy.getDeployMarker().getRequestId());
}
private void failPendingDeployDueToState(
SingularityPendingDeploy pendingDeploy,
Optional maybeRequestWithState,
Optional deploy
) {
SingularityDeployResult deployResult = new SingularityDeployResult(
DeployState.FAILED,
Optional.of(
String.format(
"Request in state %s is not deployable",
SingularityRequestWithState.getRequestState(maybeRequestWithState)
)
),
Optional.empty()
);
if (!maybeRequestWithState.isPresent()) {
deployManager.saveDeployResult(
pendingDeploy.getDeployMarker(),
deploy,
deployResult
);
removePendingDeploy(pendingDeploy);
return;
}
saveNewDeployState(
pendingDeploy.getDeployMarker(),
Optional.empty()
);
finishDeploy(
maybeRequestWithState.get(),
deploy,
pendingDeploy,
Collections.emptyList(),
deployResult
);
}
private long getAllowedMillis(SingularityDeploy deploy) {
long seconds = deploy
.getDeployHealthTimeoutSeconds()
.orElse(configuration.getDeployHealthyBySeconds());
if (
deploy.getHealthcheck().isPresent() &&
!deploy.getSkipHealthchecksOnDeploy().orElse(false)
) {
seconds +=
deployHealthHelper.getMaxHealthcheckTimeoutSeconds(deploy.getHealthcheck().get());
} else {
seconds +=
deploy
.getConsiderHealthyAfterRunningForSeconds()
.orElse(configuration.getConsiderTaskHealthyAfterRunningForSeconds());
}
return TimeUnit.SECONDS.toMillis(seconds);
}
private boolean isDeployOverdue(
SingularityPendingDeploy pendingDeploy,
Optional deploy
) {
if (!deploy.isPresent()) {
if (
System.currentTimeMillis() -
pendingDeploy.getDeployMarker().getTimestamp() >
TimeUnit.SECONDS.toMillis(configuration.getDeployHealthyBySeconds())
) {
LOG.warn(
"Can't determine if deploy {} is overdue because it was missing, but pending time is > {}s, marking as overdue",
pendingDeploy,
configuration.getDeployHealthyBySeconds()
);
return true;
} else {
LOG.warn(
"Can't determine if deploy {} is overdue because it was missing",
pendingDeploy
);
return false;
}
}
if (
pendingDeploy.getDeployProgress().isPresent() &&
pendingDeploy.getDeployProgress().get().isStepComplete()
) {
return false;
}
final long startTime = getStartTime(pendingDeploy);
final long deployDuration = System.currentTimeMillis() - startTime;
final long allowedTime = getAllowedMillis(deploy.get());
if (deployDuration > allowedTime) {
LOG.warn(
"Deploy {} is overdue (duration: {}), allowed: {}",
pendingDeploy,
DurationFormatUtils.formatDurationHMS(deployDuration),
DurationFormatUtils.formatDurationHMS(allowedTime)
);
return true;
} else {
LOG.trace(
"Deploy {} is not yet overdue (duration: {}), allowed: {}",
pendingDeploy,
DurationFormatUtils.formatDurationHMS(deployDuration),
DurationFormatUtils.formatDurationHMS(allowedTime)
);
return false;
}
}
private long getStartTime(SingularityPendingDeploy pendingDeploy) {
if (pendingDeploy.getDeployProgress().isPresent()) {
return pendingDeploy.getDeployProgress().get().getTimestamp();
} else {
return pendingDeploy.getDeployMarker().getTimestamp();
}
}
private List getTasks(
Collection taskIds,
Map taskIdToTask
) {
final List tasks = Lists.newArrayListWithCapacity(taskIds.size());
for (SingularityTaskId taskId : taskIds) {
// TODO what if one is missing?
tasks.add(taskIdToTask.get(taskId));
}
return tasks;
}
private void updatePendingDeploy(
SingularityPendingDeploy pendingDeploy,
Optional lbUpdate,
DeployState deployState,
Optional deployProgress
) {
SingularityPendingDeploy copy = new SingularityPendingDeploy(
pendingDeploy.getDeployMarker(),
lbUpdate,
deployState,
deployProgress,
pendingDeploy.getUpdatedRequest()
);
deployManager.savePendingDeploy(copy);
}
private void updatePendingDeploy(
SingularityPendingDeploy pendingDeploy,
Optional lbUpdate,
DeployState deployState
) {
updatePendingDeploy(
pendingDeploy,
lbUpdate,
deployState,
pendingDeploy.getDeployProgress()
);
}
private DeployState interpretLoadBalancerState(
SingularityLoadBalancerUpdate lbUpdate,
DeployState unknownState
) {
switch (lbUpdate.getLoadBalancerState()) {
case CANCELED:
return DeployState.CANCELED;
case SUCCESS:
return DeployState.SUCCEEDED;
case FAILED:
case INVALID_REQUEST_NOOP:
return DeployState.FAILED;
case CANCELING:
return DeployState.CANCELING;
case UNKNOWN:
return unknownState;
case WAITING:
}
return DeployState.WAITING;
}
private SingularityLoadBalancerUpdate sendCancelToLoadBalancer(
SingularityPendingDeploy pendingDeploy
) {
return lbClient.cancel(getLoadBalancerRequestId(pendingDeploy));
}
private SingularityDeployResult cancelLoadBalancer(
SingularityPendingDeploy pendingDeploy,
List deployFailures
) {
final SingularityLoadBalancerUpdate lbUpdate = sendCancelToLoadBalancer(
pendingDeploy
);
final DeployState deployState = interpretLoadBalancerState(
lbUpdate,
DeployState.CANCELING
);
updatePendingDeploy(pendingDeploy, Optional.of(lbUpdate), deployState);
return new SingularityDeployResult(deployState, lbUpdate, deployFailures);
}
private boolean shouldCancelLoadBalancer(final SingularityPendingDeploy pendingDeploy) {
return (
pendingDeploy.getLastLoadBalancerUpdate().isPresent() &&
!pendingDeploy.getCurrentDeployState().isDeployFinished()
);
}
private boolean shouldCheckLbState(final SingularityPendingDeploy pendingDeploy) {
return (
pendingDeploy.getLastLoadBalancerUpdate().isPresent() &&
getLoadBalancerRequestId(pendingDeploy)
.getId()
.equals(
pendingDeploy
.getLastLoadBalancerUpdate()
.get()
.getLoadBalancerRequestId()
.getId()
) &&
(
pendingDeploy.getLastLoadBalancerUpdate().get().getLoadBalancerState() !=
BaragonRequestState.UNKNOWN
)
);
}
private LoadBalancerRequestId getLoadBalancerRequestId(
SingularityPendingDeploy pendingDeploy
) {
return new LoadBalancerRequestId(
String.format(
"%s-%s-%s",
pendingDeploy.getDeployMarker().getRequestId(),
pendingDeploy.getDeployMarker().getDeployId(),
pendingDeploy.getDeployProgress().get().getTargetActiveInstances()
),
LoadBalancerRequestType.DEPLOY,
Optional.empty()
);
}
private SingularityDeployResult getDeployResultSafe(
final SingularityRequest request,
final RequestState requestState,
final Optional cancelRequest,
final SingularityPendingDeploy pendingDeploy,
final Optional updatePendingDeployRequest,
final Optional deploy,
final Collection deployActiveTasks,
final Collection otherActiveTasks,
final Collection inactiveDeployMatchingTasks
) {
try {
return getDeployResult(
request,
requestState,
cancelRequest,
pendingDeploy,
updatePendingDeployRequest,
deploy,
deployActiveTasks,
otherActiveTasks,
inactiveDeployMatchingTasks
);
} catch (Exception e) {
LOG.error(
"Uncaught exception processing deploy {} - {}",
pendingDeploy.getDeployMarker().getRequestId(),
pendingDeploy.getDeployMarker().getDeployId(),
e
);
return new SingularityDeployResult(
DeployState.FAILED_INTERNAL_STATE,
String.format("Uncaught exception: %s", e.getMessage())
);
}
}
private SingularityDeployResult getDeployResult(
final SingularityRequest request,
final RequestState requestState,
final Optional cancelRequest,
final SingularityPendingDeploy pendingDeploy,
final Optional updatePendingDeployRequest,
final Optional deploy,
final Collection deployActiveTasks,
final Collection otherActiveTasks,
final Collection inactiveDeployMatchingTasks
) {
if (
!request.isDeployable() ||
(
configuration.isAllowDeployOfPausedRequests() &&
requestState == RequestState.PAUSED
)
) {
LOG.info(
"Succeeding a deploy {} because the request {} was not deployable",
pendingDeploy,
request
);
return new SingularityDeployResult(DeployState.SUCCEEDED, "Request not deployable");
}
if (!deploy.isPresent()) {
// Check for abandoned pending deploy
Optional result = deployManager.getDeployResult(
request.getId(),
pendingDeploy.getDeployMarker().getDeployId()
);
if (result.isPresent() && result.get().getDeployState().isDeployFinished()) {
LOG.info(
"Deploy was already finished, running cleanup of pending data for {}",
pendingDeploy.getDeployMarker()
);
return result.get();
}
}
if (!pendingDeploy.getDeployProgress().isPresent()) {
return new SingularityDeployResult(
DeployState.FAILED,
"No deploy progress data present in Zookeeper. Please reattempt your deploy"
);
}
Set newInactiveDeployTasks = getNewInactiveDeployTasks(
pendingDeploy,
inactiveDeployMatchingTasks
);
if (!newInactiveDeployTasks.isEmpty()) {
if (canRetryTasks(deploy, inactiveDeployMatchingTasks)) {
SingularityDeployProgress newProgress = pendingDeploy
.getDeployProgress()
.get()
.withFailedTasks(new HashSet<>(inactiveDeployMatchingTasks));
updatePendingDeploy(
pendingDeploy,
pendingDeploy.getLastLoadBalancerUpdate(),
DeployState.WAITING,
Optional.of(newProgress)
);
requestManager.addToPendingQueue(
new SingularityPendingRequest(
request.getId(),
pendingDeploy.getDeployMarker().getDeployId(),
System.currentTimeMillis(),
pendingDeploy.getDeployMarker().getUser(),
PendingType.NEXT_DEPLOY_STEP,
deploy.isPresent()
? deploy.get().getSkipHealthchecksOnDeploy()
: Optional.empty(),
pendingDeploy.getDeployMarker().getMessage()
)
);
return new SingularityDeployResult(DeployState.WAITING);
}
if (request.isLoadBalanced() && shouldCancelLoadBalancer(pendingDeploy)) {
LOG.info(
"Attempting to cancel pending load balancer request, failing deploy {} regardless",
pendingDeploy
);
sendCancelToLoadBalancer(pendingDeploy);
}
int maxRetries = deploy
.get()
.getMaxTaskRetries()
.orElse(configuration.getDefaultDeployMaxTaskRetries());
return getDeployResultWithFailures(
request,
deploy,
pendingDeploy,
DeployState.FAILED,
String.format(
"%s task(s) for this deploy failed",
inactiveDeployMatchingTasks.size() - maxRetries
),
inactiveDeployMatchingTasks
);
}
return checkDeployProgress(
request,
cancelRequest,
pendingDeploy,
updatePendingDeployRequest,
deploy,
deployActiveTasks,
otherActiveTasks
);
}
private boolean canRetryTasks(
Optional deploy,
Collection inactiveDeployMatchingTasks
) {
int maxRetries = deploy
.get()
.getMaxTaskRetries()
.orElse(configuration.getDefaultDeployMaxTaskRetries());
long matchingInactiveTasks = inactiveDeployMatchingTasks
.stream()
.filter(
t -> {
// All TASK_LOSTs that are not resource limit related should be able to be retried
for (SingularityTaskHistoryUpdate historyUpdate : taskManager.getTaskHistoryUpdates(
t
)) {
if (
historyUpdate.getTaskState() == ExtendedTaskState.TASK_LOST &&
!historyUpdate.getStatusReason().orElse("").startsWith("REASON_CONTAINER")
) {
return false;
}
}
return true;
}
)
.count();
return maxRetries > 0 && matchingInactiveTasks <= maxRetries;
}
private Set getNewInactiveDeployTasks(
SingularityPendingDeploy pendingDeploy,
Collection inactiveDeployMatchingTasks
) {
Set newInactiveDeployTasks = new HashSet<>();
newInactiveDeployTasks.addAll(inactiveDeployMatchingTasks);
if (pendingDeploy.getDeployProgress().isPresent()) {
newInactiveDeployTasks.removeAll(
pendingDeploy.getDeployProgress().get().getFailedDeployTasks()
);
}
return newInactiveDeployTasks;
}
private SingularityDeployResult checkDeployProgress(
final SingularityRequest request,
final Optional cancelRequest,
final SingularityPendingDeploy pendingDeploy,
final Optional updatePendingDeployRequest,
final Optional deploy,
final Collection deployActiveTasks,
final Collection otherActiveTasks
) {
SingularityDeployProgress deployProgress = pendingDeploy.getDeployProgress().get();
if (cancelRequest.isPresent()) {
LOG.info(
"Canceling a deploy {} due to cancel request {}",
pendingDeploy,
cancelRequest.get()
);
String userMessage = cancelRequest.get().getUser().isPresent()
? String.format(" by %s", cancelRequest.get().getUser().get())
: "";
return new SingularityDeployResult(
DeployState.CANCELED,
Optional.of(
String.format(
"Canceled due to request%s at %s",
userMessage,
cancelRequest.get().getTimestamp()
)
),
pendingDeploy.getLastLoadBalancerUpdate(),
Collections.emptyList(),
System.currentTimeMillis()
);
}
if (deployProgress.isStepComplete()) {
return checkCanMoveToNextDeployStep(
request,
deploy,
pendingDeploy,
updatePendingDeployRequest
);
}
final boolean isDeployOverdue = isDeployOverdue(pendingDeploy, deploy);
if (shouldCheckLbState(pendingDeploy)) {
final SingularityLoadBalancerUpdate lbUpdate = lbClient.getState(
getLoadBalancerRequestId(pendingDeploy)
);
return processLbState(
request,
deploy,
pendingDeploy,
updatePendingDeployRequest,
deployActiveTasks,
otherActiveTasks,
tasksToShutDown(deployProgress, otherActiveTasks, request),
lbUpdate
);
}
if (
isDeployOverdue &&
request.isLoadBalanced() &&
shouldCancelLoadBalancer(pendingDeploy)
) {
return cancelLoadBalancer(
pendingDeploy,
getDeployFailures(
request,
deploy,
pendingDeploy,
DeployState.OVERDUE,
deployActiveTasks
)
);
}
if (deployActiveTasks.size() < deployProgress.getTargetActiveInstances()) {
maybeUpdatePendingRequest(
pendingDeploy,
deploy,
request,
updatePendingDeployRequest
);
return checkOverdue(
request,
deploy,
pendingDeploy,
deployActiveTasks,
isDeployOverdue
);
}
if (isWaitingForCurrentLbRequest(pendingDeploy)) {
return new SingularityDeployResult(
DeployState.WAITING,
Optional.of("Waiting on load balancer API"),
pendingDeploy.getLastLoadBalancerUpdate()
);
}
final DeployHealth deployHealth = deployHealthHelper.getDeployHealth(
request,
deploy,
deployActiveTasks,
true
);
switch (deployHealth) {
case WAITING:
maybeUpdatePendingRequest(
pendingDeploy,
deploy,
request,
updatePendingDeployRequest
);
return checkOverdue(
request,
deploy,
pendingDeploy,
deployActiveTasks,
isDeployOverdue
);
case HEALTHY:
if (!request.isLoadBalanced()) {
return markStepFinished(
pendingDeploy,
deploy,
deployActiveTasks,
otherActiveTasks,
request,
updatePendingDeployRequest
);
}
if (
updatePendingDeployRequest.isPresent() &&
updatePendingDeployRequest.get().getTargetActiveInstances() !=
deployProgress.getTargetActiveInstances()
) {
maybeUpdatePendingRequest(
pendingDeploy,
deploy,
request,
updatePendingDeployRequest
);
return new SingularityDeployResult(DeployState.WAITING);
}
if (configuration.getLoadBalancerUri() == null) {
LOG.warn(
"Deploy {} required a load balancer URI but it wasn't set",
pendingDeploy
);
return new SingularityDeployResult(
DeployState.FAILED,
Optional.of("No valid load balancer URI was present"),
Optional.empty(),
Collections.emptyList(),
System.currentTimeMillis()
);
}
for (SingularityTaskId activeTaskId : deployActiveTasks) {
taskManager.markHealthchecksFinished(activeTaskId);
taskManager.clearStartupHealthchecks(activeTaskId);
}
return enqueueAndProcessLbRequest(
request,
deploy,
pendingDeploy,
updatePendingDeployRequest,
deployActiveTasks,
otherActiveTasks
);
case UNHEALTHY:
default:
for (SingularityTaskId activeTaskId : deployActiveTasks) {
taskManager.markHealthchecksFinished(activeTaskId);
taskManager.clearStartupHealthchecks(activeTaskId);
}
return getDeployResultWithFailures(
request,
deploy,
pendingDeploy,
DeployState.FAILED,
"Not all tasks for deploy were healthy",
deployActiveTasks
);
}
}
private SingularityDeployResult checkCanMoveToNextDeployStep(
SingularityRequest request,
Optional deploy,
SingularityPendingDeploy pendingDeploy,
Optional updatePendingDeployRequest
) {
SingularityDeployProgress deployProgress = pendingDeploy.getDeployProgress().get();
if (canMoveToNextStep(deployProgress) || updatePendingDeployRequest.isPresent()) {
SingularityDeployProgress newProgress = deployProgress.withNewTargetInstances(
getNewTargetInstances(deployProgress, request, updatePendingDeployRequest)
);
updatePendingDeploy(
pendingDeploy,
pendingDeploy.getLastLoadBalancerUpdate(),
DeployState.WAITING,
Optional.of(newProgress)
);
requestManager.addToPendingQueue(
new SingularityPendingRequest(
request.getId(),
pendingDeploy.getDeployMarker().getDeployId(),
System.currentTimeMillis(),
pendingDeploy.getDeployMarker().getUser(),
PendingType.NEXT_DEPLOY_STEP,
deploy.isPresent()
? deploy.get().getSkipHealthchecksOnDeploy()
: Optional.empty(),
pendingDeploy.getDeployMarker().getMessage()
)
);
}
return new SingularityDeployResult(DeployState.WAITING);
}
private SingularityDeployResult enqueueAndProcessLbRequest(
SingularityRequest request,
Optional deploy,
SingularityPendingDeploy pendingDeploy,
Optional updatePendingDeployRequest,
Collection deployActiveTasks,
Collection otherActiveTasks
) {
Collection toShutDown = tasksToShutDown(
pendingDeploy.getDeployProgress().get(),
otherActiveTasks,
request
);
final Map tasks = taskManager.getTasks(
Iterables.concat(deployActiveTasks, toShutDown)
);
final LoadBalancerRequestId lbRequestId = getLoadBalancerRequestId(pendingDeploy);
List toRemoveFromLb = new ArrayList<>();
for (SingularityTaskId taskId : toShutDown) {
Optional maybeAddUpdate = taskManager.getLoadBalancerState(
taskId,
LoadBalancerRequestType.ADD
);
if (
maybeAddUpdate.isPresent() &&
(
maybeAddUpdate.get().getLoadBalancerState() == BaragonRequestState.SUCCESS ||
maybeAddUpdate.get().getLoadBalancerState().isInProgress()
)
) {
toRemoveFromLb.add(taskId);
}
}
updateLoadBalancerStateForTasks(
deployActiveTasks,
LoadBalancerRequestType.ADD,
SingularityLoadBalancerUpdate.preEnqueue(lbRequestId)
);
updateLoadBalancerStateForTasks(
toRemoveFromLb,
LoadBalancerRequestType.REMOVE,
SingularityLoadBalancerUpdate.preEnqueue(lbRequestId)
);
SingularityLoadBalancerUpdate enqueueResult = lbClient.enqueue(
lbRequestId,
request,
deploy.get(),
getTasks(deployActiveTasks, tasks),
getTasks(toShutDown, tasks)
);
return processLbState(
request,
deploy,
pendingDeploy,
updatePendingDeployRequest,
deployActiveTasks,
otherActiveTasks,
toShutDown,
enqueueResult
);
}
private SingularityDeployResult processLbState(
SingularityRequest request,
Optional deploy,
SingularityPendingDeploy pendingDeploy,
Optional updatePendingDeployRequest,
Collection deployActiveTasks,
Collection otherActiveTasks,
Collection tasksToShutDown,
SingularityLoadBalancerUpdate lbUpdate
) {
List toRemoveFromLb = new ArrayList<>();
for (SingularityTaskId taskId : tasksToShutDown) {
Optional maybeRemoveUpdate = taskManager.getLoadBalancerState(
taskId,
LoadBalancerRequestType.REMOVE
);
if (
maybeRemoveUpdate.isPresent() &&
maybeRemoveUpdate
.get()
.getLoadBalancerRequestId()
.getId()
.equals(lbUpdate.getLoadBalancerRequestId().getId())
) {
toRemoveFromLb.add(taskId);
}
}
updateLoadBalancerStateForTasks(
deployActiveTasks,
LoadBalancerRequestType.ADD,
lbUpdate
);
updateLoadBalancerStateForTasks(
toRemoveFromLb,
LoadBalancerRequestType.REMOVE,
lbUpdate
);
DeployState deployState = interpretLoadBalancerState(
lbUpdate,
pendingDeploy.getCurrentDeployState()
);
if (deployState == DeployState.SUCCEEDED) {
updatePendingDeploy(pendingDeploy, Optional.of(lbUpdate), DeployState.WAITING); // A step has completed, markStepFinished will determine SUCCEEDED/WAITING
return markStepFinished(
pendingDeploy,
deploy,
deployActiveTasks,
otherActiveTasks,
request,
updatePendingDeployRequest
);
} else if (deployState == DeployState.WAITING) {
updatePendingDeploy(pendingDeploy, Optional.of(lbUpdate), deployState);
maybeUpdatePendingRequest(
pendingDeploy,
deploy,
request,
updatePendingDeployRequest,
Optional.of(lbUpdate)
);
return new SingularityDeployResult(DeployState.WAITING);
} else {
updatePendingDeploy(pendingDeploy, Optional.of(lbUpdate), deployState);
maybeUpdatePendingRequest(
pendingDeploy,
deploy,
request,
updatePendingDeployRequest,
Optional.of(lbUpdate)
);
return new SingularityDeployResult(
deployState,
lbUpdate,
SingularityDeployFailure.lbUpdateFailed()
);
}
}
private void maybeUpdatePendingRequest(
SingularityPendingDeploy pendingDeploy,
Optional deploy,
SingularityRequest request,
Optional updatePendingDeployRequest
) {
maybeUpdatePendingRequest(
pendingDeploy,
deploy,
request,
updatePendingDeployRequest,
Optional.empty()
);
}
private void maybeUpdatePendingRequest(
SingularityPendingDeploy pendingDeploy,
Optional deploy,
SingularityRequest request,
Optional updatePendingDeployRequest,
Optional lbUpdate
) {
if (
updatePendingDeployRequest.isPresent() &&
pendingDeploy.getDeployProgress().isPresent()
) {
SingularityDeployProgress newProgress = pendingDeploy
.getDeployProgress()
.get()
.withNewTargetInstances(
Math.min(
updatePendingDeployRequest.get().getTargetActiveInstances(),
request.getInstancesSafe()
)
);
updatePendingDeploy(
pendingDeploy,
lbUpdate.isPresent() ? lbUpdate : pendingDeploy.getLastLoadBalancerUpdate(),
DeployState.WAITING,
Optional.of(newProgress)
);
requestManager.addToPendingQueue(
new SingularityPendingRequest(
request.getId(),
pendingDeploy.getDeployMarker().getDeployId(),
System.currentTimeMillis(),
pendingDeploy.getDeployMarker().getUser(),
PendingType.NEXT_DEPLOY_STEP,
deploy.isPresent()
? deploy.get().getSkipHealthchecksOnDeploy()
: Optional.empty(),
pendingDeploy.getDeployMarker().getMessage()
)
);
}
}
private boolean isWaitingForCurrentLbRequest(SingularityPendingDeploy pendingDeploy) {
return (
pendingDeploy.getLastLoadBalancerUpdate().isPresent() &&
getLoadBalancerRequestId(pendingDeploy)
.getId()
.equals(
pendingDeploy
.getLastLoadBalancerUpdate()
.get()
.getLoadBalancerRequestId()
.getId()
) &&
pendingDeploy.getLastLoadBalancerUpdate().get().getLoadBalancerState() ==
BaragonRequestState.WAITING
);
}
private boolean isLastStepFinished(
SingularityDeployProgress deployProgress,
SingularityRequest request
) {
return (
deployProgress.isStepComplete() &&
deployProgress.getTargetActiveInstances() >= request.getInstancesSafe()
);
}
private SingularityDeployResult markStepFinished(
SingularityPendingDeploy pendingDeploy,
Optional deploy,
Collection deployActiveTasks,
Collection otherActiveTasks,
SingularityRequest request,
Optional updatePendingDeployRequest
) {
SingularityDeployProgress deployProgress = pendingDeploy.getDeployProgress().get();
if (
updatePendingDeployRequest.isPresent() &&
getNewTargetInstances(deployProgress, request, updatePendingDeployRequest) !=
deployProgress.getTargetActiveInstances()
) {
maybeUpdatePendingRequest(
pendingDeploy,
deploy,
request,
updatePendingDeployRequest
);
return new SingularityDeployResult(DeployState.WAITING);
}
SingularityDeployProgress newProgress = deployProgress
.withNewActiveInstances(deployActiveTasks.size())
.withCompletedStep();
DeployState deployState = isLastStepFinished(newProgress, request)
? DeployState.SUCCEEDED
: DeployState.WAITING;
String message = deployState == DeployState.SUCCEEDED
? "New deploy succeeded"
: "New deploy is progressing, this task is being replaced";
updatePendingDeploy(
pendingDeploy,
pendingDeploy.getLastLoadBalancerUpdate(),
deployState,
Optional.of(newProgress)
);
for (SingularityTaskId taskId : tasksToShutDown(
deployProgress,
otherActiveTasks,
request
)) {
taskManager.createTaskCleanup(
new SingularityTaskCleanup(
Optional.empty(),
TaskCleanupType.DEPLOY_STEP_FINISHED,
System.currentTimeMillis(),
taskId,
Optional.of(message),
Optional.empty(),
Optional.empty()
)
);
}
return new SingularityDeployResult(deployState);
}
private List tasksToShutDown(
SingularityDeployProgress deployProgress,
Collection otherActiveTasks,
SingularityRequest request
) {
int numTasksToShutDown = Math.max(
otherActiveTasks.size() -
(request.getInstancesSafe() - deployProgress.getTargetActiveInstances()),
0
);
List sortedOtherTasks = new ArrayList<>(otherActiveTasks);
Collections.sort(sortedOtherTasks, SingularityTaskId.INSTANCE_NO_COMPARATOR);
return sortedOtherTasks.isEmpty()
? sortedOtherTasks
: sortedOtherTasks.subList(
0,
Math.min(numTasksToShutDown, sortedOtherTasks.size())
);
}
private boolean canMoveToNextStep(SingularityDeployProgress deployProgress) {
return (
deployProgress.isAutoAdvanceDeploySteps() &&
deployProgress.getTimestamp() +
deployProgress.getDeployStepWaitTimeMs() <
System.currentTimeMillis()
);
}
private int getNewTargetInstances(
SingularityDeployProgress deployProgress,
SingularityRequest request,
Optional updateRequest
) {
if (updateRequest.isPresent()) {
return Math.min(
updateRequest.get().getTargetActiveInstances(),
request.getInstancesSafe()
);
} else {
return Math.min(
deployProgress.getTargetActiveInstances() +
deployProgress.getDeployInstanceCountPerStep(),
request.getInstancesSafe()
);
}
}
private SingularityDeployResult checkOverdue(
SingularityRequest request,
Optional deploy,
SingularityPendingDeploy pendingDeploy,
Collection deployActiveTasks,
boolean isOverdue
) {
String message = null;
if (deploy.isPresent()) {
message =
String.format(
"Deploy was able to launch %s tasks, but not all of them became healthy within %s",
deployActiveTasks.size(),
JavaUtils.durationFromMillis(getAllowedMillis(deploy.get()))
);
}
if (isOverdue) {
if (deploy.isPresent()) {
return getDeployResultWithFailures(
request,
deploy,
pendingDeploy,
DeployState.OVERDUE,
message,
deployActiveTasks
);
} else {
return new SingularityDeployResult(DeployState.OVERDUE);
}
} else {
return new SingularityDeployResult(DeployState.WAITING);
}
}
private SingularityDeployResult getDeployResultWithFailures(
SingularityRequest request,
Optional deploy,
SingularityPendingDeploy pendingDeploy,
DeployState state,
String message,
Collection matchingTasks
) {
List deployFailures = getDeployFailures(
request,
deploy,
pendingDeploy,
state,
matchingTasks
);
if (deployFailures.size() == 1 && !deployFailures.get(0).getTaskId().isPresent()) { // Single non-task-specific failure should become the deploy result message (e.g. not enough resources to launch all tasks)
return new SingularityDeployResult(
state,
deployFailures.get(0).getMessage(),
pendingDeploy.getLastLoadBalancerUpdate(),
Collections.emptyList(),
System.currentTimeMillis()
);
} else {
return new SingularityDeployResult(
state,
Optional.of(message),
pendingDeploy.getLastLoadBalancerUpdate(),
deployFailures,
System.currentTimeMillis()
);
}
}
private List getDeployFailures(
SingularityRequest request,
Optional deploy,
SingularityPendingDeploy pendingDeploy,
DeployState state,
Collection matchingTasks
) {
List failures = new ArrayList<>();
failures.addAll(deployHealthHelper.getTaskFailures(deploy, matchingTasks));
if (state == DeployState.OVERDUE) {
int targetInstances = pendingDeploy.getDeployProgress().isPresent()
? pendingDeploy.getDeployProgress().get().getTargetActiveInstances()
: request.getInstancesSafe();
if (failures.isEmpty() && matchingTasks.size() < targetInstances) {
failures.add(
new SingularityDeployFailure(
SingularityDeployFailureReason.TASK_COULD_NOT_BE_SCHEDULED,
Optional.empty(),
Optional.of(
String.format(
"Only %s of %s tasks could be launched for deploy, there may not be enough resources to launch the remaining tasks",
matchingTasks.size(),
targetInstances
)
)
)
);
}
}
return failures;
}
}