All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hubspot.singularity.scheduler.SingularityCleaner Maven / Gradle / Ivy

package com.hubspot.singularity.scheduler;

import com.google.common.collect.HashMultiset;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Multiset;
import com.google.inject.Inject;
import com.hubspot.baragon.models.BaragonRequestState;
import com.hubspot.mesos.JavaUtils;
import com.hubspot.singularity.LoadBalancerRequestType;
import com.hubspot.singularity.LoadBalancerRequestType.LoadBalancerRequestId;
import com.hubspot.singularity.RequestCleanupType;
import com.hubspot.singularity.RequestState;
import com.hubspot.singularity.SingularityDeleteResult;
import com.hubspot.singularity.SingularityDeploy;
import com.hubspot.singularity.SingularityDeployKey;
import com.hubspot.singularity.SingularityKilledTaskIdRecord;
import com.hubspot.singularity.SingularityLoadBalancerUpdate;
import com.hubspot.singularity.SingularityManagedThreadPoolFactory;
import com.hubspot.singularity.SingularityPendingRequest;
import com.hubspot.singularity.SingularityPendingRequest.PendingType;
import com.hubspot.singularity.SingularityPendingTaskId;
import com.hubspot.singularity.SingularityRequest;
import com.hubspot.singularity.SingularityRequestCleanup;
import com.hubspot.singularity.SingularityRequestDeployState;
import com.hubspot.singularity.SingularityRequestHistory;
import com.hubspot.singularity.SingularityRequestLbCleanup;
import com.hubspot.singularity.SingularityRequestWithState;
import com.hubspot.singularity.SingularityTask;
import com.hubspot.singularity.SingularityTaskCleanup;
import com.hubspot.singularity.SingularityTaskId;
import com.hubspot.singularity.SingularityTaskShellCommandRequest;
import com.hubspot.singularity.SingularityTaskShellCommandRequestId;
import com.hubspot.singularity.SingularityTaskShellCommandUpdate;
import com.hubspot.singularity.TaskCleanupType;
import com.hubspot.singularity.async.CompletableFutures;
import com.hubspot.singularity.config.SingularityConfiguration;
import com.hubspot.singularity.data.DeployManager;
import com.hubspot.singularity.data.RequestGroupManager;
import com.hubspot.singularity.data.RequestManager;
import com.hubspot.singularity.data.TaskManager;
import com.hubspot.singularity.data.history.RequestHistoryHelper;
import com.hubspot.singularity.data.usage.UsageManager;
import com.hubspot.singularity.expiring.SingularityExpiringBounce;
import com.hubspot.singularity.hooks.LoadBalancerClient;
import com.hubspot.singularity.mesos.SingularityMesosScheduler;
import com.hubspot.singularity.mesos.SingularitySchedulerLock;
import com.hubspot.singularity.scheduler.SingularityDeployHealthHelper.DeployHealth;
import com.hubspot.singularity.sentry.SingularityExceptionNotifier;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import javax.inject.Singleton;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Singleton
public class SingularityCleaner {
  private static final Logger LOG = LoggerFactory.getLogger(SingularityCleaner.class);

  private final TaskManager taskManager;
  private final DeployManager deployManager;
  private final RequestManager requestManager;
  private final SingularityDeployHealthHelper deployHealthHelper;
  private final LoadBalancerClient lbClient;
  private final SingularityExceptionNotifier exceptionNotifier;
  private final RequestHistoryHelper requestHistoryHelper;
  private final SingularityMesosScheduler scheduler;
  private final SingularitySchedulerLock lock;
  private final UsageManager usageManager;
  private final RequestGroupManager requestGroupManager;
  private final ExecutorService cleanerExecutor;

  private final SingularityConfiguration configuration;
  private final long killNonLongRunningTasksInCleanupAfterMillis;

  @Inject
  public SingularityCleaner(
    TaskManager taskManager,
    SingularityDeployHealthHelper deployHealthHelper,
    DeployManager deployManager,
    RequestManager requestManager,
    SingularityConfiguration configuration,
    LoadBalancerClient lbClient,
    SingularityExceptionNotifier exceptionNotifier,
    RequestHistoryHelper requestHistoryHelper,
    SingularityMesosScheduler scheduler,
    SingularitySchedulerLock lock,
    UsageManager usageManager,
    RequestGroupManager requestGroupManager,
    SingularityManagedThreadPoolFactory threadPoolFactory
  ) {
    this.taskManager = taskManager;
    this.lbClient = lbClient;
    this.deployHealthHelper = deployHealthHelper;
    this.deployManager = deployManager;
    this.requestManager = requestManager;
    this.exceptionNotifier = exceptionNotifier;
    this.requestHistoryHelper = requestHistoryHelper;
    this.scheduler = scheduler;
    this.lock = lock;
    this.usageManager = usageManager;
    this.requestGroupManager = requestGroupManager;

    this.configuration = configuration;
    this.cleanerExecutor =
      threadPoolFactory.get("cleaner", configuration.getCoreThreadpoolSize());
    this.killNonLongRunningTasksInCleanupAfterMillis =
      TimeUnit.SECONDS.toMillis(
        configuration.getKillNonLongRunningTasksInCleanupAfterSeconds()
      );
  }

  private boolean shouldKillTask(
    SingularityTaskCleanup taskCleanup,
    List activeTaskIds,
    Set cleaningTasks,
    Multiset incrementalCleaningTasks
  ) {
    final Optional requestWithState = requestManager.getRequest(
      taskCleanup.getTaskId().getRequestId()
    );

    if (!requestWithState.isPresent()) {
      LOG.debug(
        "Killing a task {} immediately because the request was missing",
        taskCleanup
      );
      return true;
    }

    final SingularityRequest request = requestWithState.get().getRequest();

    if (taskCleanup.getRunBeforeKillId().isPresent()) {
      List shellCommandUpdates = taskManager.getTaskShellCommandUpdates(
        taskCleanup.getRunBeforeKillId().get()
      );
      boolean finished = false;
      for (SingularityTaskShellCommandUpdate update : shellCommandUpdates) {
        if (update.getUpdateType().isFinished()) {
          finished = true;
          break;
        }
      }
      if (!finished) {
        LOG.debug(
          "Waiting for pre-kill shell command {} to finish before killing task",
          taskCleanup.getRunBeforeKillId()
        );
        return false;
      }
    }

    if (taskCleanup.getCleanupType().shouldKillTaskInstantly(request)) {
      LOG.debug("Killing a task {} immediately because of its cleanup type", taskCleanup);
      return true;
    }

    // If pausing, must be a long-running task to kill here
    if (
      requestWithState.get().getState() == RequestState.PAUSED &&
      (
        !(taskCleanup.getCleanupType() == TaskCleanupType.PAUSING) ||
        request.isLongRunning()
      )
    ) {
      LOG.debug(
        "Killing a task {} immediately because the request was paused",
        taskCleanup
      );
      return true;
    }

    if (!request.isLongRunning()) {
      final long timeSinceCleanup =
        System.currentTimeMillis() - taskCleanup.getTimestamp();
      final long maxWaitTime = request
        .getKillOldNonLongRunningTasksAfterMillis()
        .orElse(killNonLongRunningTasksInCleanupAfterMillis);
      final boolean tooOld = (maxWaitTime < 1) || (timeSinceCleanup > maxWaitTime);

      if (!tooOld) {
        LOG.trace(
          "Not killing a non-longRunning task {}, running time since cleanup {} (max wait time is {})",
          taskCleanup,
          timeSinceCleanup,
          maxWaitTime
        );
      } else {
        LOG.debug(
          "Killing a non-longRunning task {} - running time since cleanup {} exceeded max wait time {}",
          taskCleanup,
          timeSinceCleanup,
          maxWaitTime
        );
      }

      return tooOld;
    }

    final String requestId = request.getId();

    final Optional deployState = deployManager.getRequestDeployState(
      requestId
    );

    if (
      taskCleanup.getCleanupType() == TaskCleanupType.DECOMISSIONING &&
      deployState.get().getPendingDeploy().isPresent() &&
      deployState
        .get()
        .getPendingDeploy()
        .get()
        .getDeployId()
        .equals(taskCleanup.getTaskId().getDeployId())
    ) {
      final long timeSinceCleanup =
        System.currentTimeMillis() - taskCleanup.getTimestamp();
      final long maxWaitTime = configuration.getPendingDeployHoldTaskDuringDecommissionMillis();
      final boolean tooOld = (maxWaitTime < 1) || (timeSinceCleanup > maxWaitTime);

      if (!tooOld) {
        LOG.trace(
          "Not killing {} - part of pending deploy - running time since cleanup {} (max wait time is {})",
          taskCleanup,
          timeSinceCleanup,
          maxWaitTime
        );
        return false;
      } else {
        LOG.debug(
          "Killing {} - part of pending deploy but running time since cleanup {} exceeded max wait time {}",
          taskCleanup,
          timeSinceCleanup,
          maxWaitTime
        );
        return true;
      }
    }

    if (!deployState.isPresent() || !deployState.get().getActiveDeploy().isPresent()) {
      LOG.debug(
        "Killing a task {} immediately because there is no active deploy state {}",
        taskCleanup,
        deployState
      );
      return true;
    }

    final String activeDeployId = deployState.get().getActiveDeploy().get().getDeployId();
    final String matchingTasksDeployId = taskCleanup.getCleanupType() ==
      TaskCleanupType.INCREMENTAL_DEPLOY_CANCELLED ||
      taskCleanup.getCleanupType() == TaskCleanupType.INCREMENTAL_DEPLOY_FAILED
      ? activeDeployId
      : taskCleanup.getTaskId().getDeployId();

    // check to see if there are enough active tasks out there that have been active for long enough that we can safely shut this task down.
    final List matchingTasks = new ArrayList<>();
    for (SingularityTaskId taskId : activeTaskIds) {
      if (
        !taskId.getRequestId().equals(requestId) ||
        !taskId.getDeployId().equals(matchingTasksDeployId)
      ) {
        continue;
      }
      if (cleaningTasks.contains(taskId)) {
        continue;
      }
      matchingTasks.add(taskId);
    }

    // For an incremental bounce or incremental deploy cleanup, shut down old tasks as new ones are started
    final SingularityDeployKey key = SingularityDeployKey.fromTaskId(
      taskCleanup.getTaskId()
    );
    if (taskCleanup.getCleanupType() == TaskCleanupType.INCREMENTAL_BOUNCE) {
      return shouldKillIncrementalBounceTask(
        request,
        taskCleanup,
        matchingTasksDeployId,
        matchingTasks,
        key,
        incrementalCleaningTasks
      );
    } else if (isIncrementalDeployCleanup(taskCleanup)) {
      return shouldKillIncrementalDeployCleanupTask(
        request,
        taskCleanup,
        matchingTasksDeployId,
        matchingTasks,
        key,
        incrementalCleaningTasks
      );
    } else {
      if (matchingTasks.size() < request.getInstancesSafe()) {
        LOG.trace(
          "Not killing a task {} yet, only {} matching out of a required {}",
          taskCleanup,
          matchingTasks.size(),
          request.getInstancesSafe()
        );
        return false;
      }
    }

    final Optional deploy = deployManager.getDeploy(
      requestId,
      activeDeployId
    );

    final DeployHealth deployHealth = deployHealthHelper.getDeployHealth(
      requestWithState.get().getRequest(),
      deploy,
      matchingTasks,
      false
    );

    switch (deployHealth) {
      case HEALTHY:
        for (SingularityTaskId taskId : matchingTasks) {
          DeployHealth lbHealth = getLbHealth(request, taskId);

          if (lbHealth != DeployHealth.HEALTHY) {
            LOG.trace(
              "Not killing a task {}, waiting for new replacement tasks to be added to LB (current state: {})",
              taskCleanup,
              lbHealth
            );
            return false;
          }
        }

        LOG.debug(
          "Killing a task {}, at least {} replacement tasks are healthy [{}]",
          taskCleanup,
          request.getInstancesSafe(),
          matchingTasks
        );
        return true;
      case WAITING:
      case UNHEALTHY:
      default:
        LOG.trace(
          "Not killing a task {}, waiting for new replacement tasks to be healthy (current state: {})",
          taskCleanup,
          deployHealth
        );
        return false;
    }
  }

  private boolean isIncrementalDeployCleanup(SingularityTaskCleanup taskCleanup) {
    return (
      taskCleanup.getCleanupType() == TaskCleanupType.INCREMENTAL_DEPLOY_FAILED ||
      taskCleanup.getCleanupType() == TaskCleanupType.INCREMENTAL_DEPLOY_CANCELLED
    );
  }

  private boolean shouldKillIncrementalBounceTask(
    SingularityRequest request,
    SingularityTaskCleanup taskCleanup,
    String matchingTasksDeployId,
    List matchingTasks,
    SingularityDeployKey key,
    Multiset incrementalCleaningTasks
  ) {
    int healthyReplacementTasks = getNumHealthyTasks(
      request,
      matchingTasksDeployId,
      matchingTasks
    );
    if (
      healthyReplacementTasks +
      incrementalCleaningTasks.count(key) <=
      request.getInstancesSafe()
    ) {
      LOG.trace(
        "Not killing a task {} yet, only {} matching out of a required {}",
        taskCleanup,
        matchingTasks.size(),
        request.getInstancesSafe() - incrementalCleaningTasks.count(key)
      );
      return false;
    } else {
      LOG.debug(
        "Killing a task {}, {} replacement tasks are healthy",
        taskCleanup,
        healthyReplacementTasks
      );
      incrementalCleaningTasks.remove(key);
      return true;
    }
  }

  private boolean shouldKillIncrementalDeployCleanupTask(
    SingularityRequest request,
    SingularityTaskCleanup taskCleanup,
    String matchingTasksDeployId,
    List matchingTasks,
    SingularityDeployKey key,
    Multiset incrementalCleaningTasks
  ) {
    int healthyActiveDeployTasks = getNumHealthyTasks(
      request,
      matchingTasksDeployId,
      matchingTasks
    );
    if (healthyActiveDeployTasks < request.getInstancesSafe()) {
      LOG.trace(
        "Not killing a task {} yet, only {} matching out of a required {}",
        taskCleanup,
        matchingTasks.size(),
        request.getInstancesSafe() - incrementalCleaningTasks.count(key)
      );
      return false;
    } else {
      LOG.debug(
        "Killing a task {}, {} active deploy tasks are healthy",
        taskCleanup,
        healthyActiveDeployTasks
      );
      incrementalCleaningTasks.remove(key);
      return true;
    }
  }

  private int getNumHealthyTasks(
    SingularityRequest request,
    String deployId,
    List matchingTasks
  ) {
    Optional deploy = deployManager.getDeploy(
      request.getId(),
      deployId
    );

    List healthyTasks = deployHealthHelper.getHealthyTasks(
      request,
      deploy,
      matchingTasks,
      false
    );

    int numHealthyTasks = 0;

    for (SingularityTaskId taskId : healthyTasks) {
      DeployHealth lbHealth = getLbHealth(request, taskId);

      if (lbHealth == DeployHealth.HEALTHY) {
        numHealthyTasks++;
      }
    }

    return numHealthyTasks;
  }

  private DeployHealth getLbHealth(SingularityRequest request, SingularityTaskId taskId) {
    if (!request.isLoadBalanced()) {
      return DeployHealth.HEALTHY;
    }

    Optional update = taskManager.getLoadBalancerState(
      taskId,
      LoadBalancerRequestType.ADD
    );

    if (!update.isPresent()) {
      return DeployHealth.WAITING;
    }

    switch (update.get().getLoadBalancerState()) {
      case SUCCESS:
        return DeployHealth.HEALTHY;
      case CANCELED:
      case CANCELING:
      case UNKNOWN:
      case INVALID_REQUEST_NOOP:
      case FAILED:
        return DeployHealth.UNHEALTHY;
      case WAITING:
        return DeployHealth.WAITING;
    }

    return DeployHealth.WAITING;
  }

  private boolean isObsolete(long start, long cleanupRequest) {
    final long delta = start - cleanupRequest;

    return delta > getObsoleteExpirationTime();
  }

  private long getObsoleteExpirationTime() {
    return TimeUnit.SECONDS.toMillis(configuration.getCleanupEverySeconds()) * 3;
  }

  private void drainRequestCleanupQueue() {
    final long start = System.currentTimeMillis();

    final List cleanupRequests = requestManager.getCleanupRequests();

    if (cleanupRequests.isEmpty()) {
      LOG.trace("Request cleanup queue is empty");
      return;
    }

    LOG.info("Cleaning up {} requests", cleanupRequests.size());

    AtomicInteger numTasksKilled = new AtomicInteger(0);
    AtomicInteger numScheduledTasksRemoved = new AtomicInteger(0);

    CompletableFutures
      .allOf(
        cleanupRequests
          .stream()
          .map(
            requestCleanup ->
              CompletableFuture.runAsync(
                () ->
                  lock.runWithRequestLock(
                    () -> {
                      processRequestCleanup(
                        start,
                        numTasksKilled,
                        numScheduledTasksRemoved,
                        requestCleanup
                      );
                    },
                    requestCleanup.getRequestId(),
                    String.format(
                      "%s#%s",
                      getClass().getSimpleName(),
                      "drainRequestCleanupQueue"
                    )
                  ),
                cleanerExecutor
              )
          )
          .collect(Collectors.toList())
      )
      .join();

    LOG.info(
      "Killed {} tasks (removed {} scheduled) in {}",
      numTasksKilled.get(),
      numScheduledTasksRemoved.get(),
      JavaUtils.duration(start)
    );
  }

  private void processRequestCleanup(
    long start,
    AtomicInteger numTasksKilled,
    AtomicInteger numScheduledTasksRemoved,
    SingularityRequestCleanup requestCleanup
  ) {
    final List activeTaskIds = taskManager.getActiveTaskIdsForRequest(
      requestCleanup.getRequestId()
    );
    final List pendingTaskIds = taskManager.getPendingTaskIdsForRequest(
      requestCleanup.getRequestId()
    );
    final String requestId = requestCleanup.getRequestId();
    final Optional requestWithState = requestManager.getRequest(
      requestId
    );

    boolean killActiveTasks = requestCleanup
      .getKillTasks()
      .orElse(configuration.isDefaultValueForKillTasksOfPausedRequests());
    boolean killScheduledTasks = true;

    switch (requestCleanup.getCleanupType()) {
      case PAUSING:
        if (SingularityRequestWithState.isActive(requestWithState)) {
          if (isObsolete(start, requestCleanup.getTimestamp())) {
            killScheduledTasks = false;
            killActiveTasks = false;
            LOG.info(
              "Ignoring {}, because {} is {}",
              requestCleanup,
              requestCleanup.getRequestId(),
              requestWithState.get().getState()
            );
          } else {
            LOG.debug(
              "Waiting on {} (it will expire after {}), because {} is {}",
              requestCleanup,
              JavaUtils.durationFromMillis(getObsoleteExpirationTime()),
              requestCleanup.getRequestId(),
              requestWithState.get().getState()
            );
            return;
          }
        } else {
          if (pause(requestCleanup, activeTaskIds) == TaskCleanupType.PAUSING) {
            killActiveTasks = false;
          }
        }
        break;
      case DELETING:
        if (!Iterables.isEmpty(activeTaskIds)) {
          killActiveTasks = false;
          killScheduledTasks = false;

          delete(requestCleanup, activeTaskIds);
        } else {
          Optional maybeHistory = requestHistoryHelper.getLastHistory(
            requestId
          );
          if (maybeHistory.isPresent()) {
            if (
              maybeHistory.get().getRequest().isLoadBalanced() &&
              configuration.isDeleteRemovedRequestsFromLoadBalancer() &&
              requestCleanup.getRemoveFromLoadBalancer().orElse(true)
            ) {
              createLbCleanupRequest(requestId, activeTaskIds);
            }
            requestManager.markDeleted(
              maybeHistory.get().getRequest(),
              start,
              requestCleanup.getUser(),
              requestCleanup.getMessage()
            );
          }
          cleanupRequestData(requestCleanup);
        }
        break;
      case BOUNCE:
      case INCREMENTAL_BOUNCE:
        killActiveTasks = false;
        killScheduledTasks = false;

        bounce(requestCleanup, activeTaskIds);
        break;
    }

    if (killActiveTasks) {
      for (SingularityTaskId matchingTaskId : activeTaskIds) {
        LOG.debug("Killing task {} due to {}", matchingTaskId, requestCleanup);
        scheduler.killAndRecord(
          matchingTaskId,
          requestCleanup.getCleanupType(),
          Optional.empty()
        );
        numTasksKilled.getAndIncrement();
      }
    } else {
      LOG.info("Active tasks for {} not killed", requestCleanup);
    }

    if (killScheduledTasks) {
      for (SingularityPendingTaskId matchingTaskId : pendingTaskIds) {
        LOG.debug("Deleting scheduled task {} due to {}", matchingTaskId, requestCleanup);
        taskManager.deletePendingTask(matchingTaskId);
        numScheduledTasksRemoved.getAndIncrement();
      }
    }

    requestManager.deleteCleanRequest(requestId, requestCleanup.getCleanupType());
  }

  private void createLbCleanupRequest(
    String requestId,
    Iterable matchingActiveTaskIds
  ) {
    Optional maybeCurrentDeployId = deployManager.getInUseDeployId(requestId);
    Optional maybeDeploy = Optional.empty();
    if (maybeCurrentDeployId.isPresent()) {
      maybeDeploy = deployManager.getDeploy(requestId, maybeCurrentDeployId.get());
      if (maybeDeploy.isPresent()) {
        List taskIds = new ArrayList<>();
        for (SingularityTaskId taskId : matchingActiveTaskIds) {
          taskIds.add(taskId.getId());
        }
        requestManager.saveLbCleanupRequest(
          new SingularityRequestLbCleanup(
            requestId,
            maybeDeploy.get().getLoadBalancerGroups().get(),
            maybeDeploy.get().getServiceBasePath().get(),
            taskIds,
            Optional.empty()
          )
        );
        return;
      }
    }
    exceptionNotifier.notify(
      "Insufficient data to create LB request cleanup",
      ImmutableMap.of(
        "requestId",
        requestId,
        "deployId",
        maybeCurrentDeployId.toString(),
        "deploy",
        maybeDeploy.toString()
      )
    );
  }

  private void bounce(
    SingularityRequestCleanup requestCleanup,
    final List activeTaskIds
  ) {
    final long start = System.currentTimeMillis();

    final List matchingTaskIds = new ArrayList<>();

    for (SingularityTaskId activeTaskId : activeTaskIds) {
      if (
        activeTaskId.getRequestId().equals(requestCleanup.getRequestId()) &&
        activeTaskId.getDeployId().equals(requestCleanup.getDeployId().get())
      ) {
        matchingTaskIds.add(activeTaskId);
      }
    }

    for (SingularityTaskId matchingTaskId : matchingTaskIds) {
      LOG.debug("Adding task {} to cleanup (bounce)", matchingTaskId.getId());

      Optional runBeforeKillId = Optional.empty();

      if (requestCleanup.getRunShellCommandBeforeKill().isPresent()) {
        SingularityTaskShellCommandRequest shellRequest = new SingularityTaskShellCommandRequest(
          matchingTaskId,
          requestCleanup.getUser(),
          System.currentTimeMillis(),
          requestCleanup.getRunShellCommandBeforeKill().get()
        );
        taskManager.saveTaskShellCommandRequestToQueue(shellRequest);
        runBeforeKillId = Optional.of(shellRequest.getId());
      }

      taskManager.createTaskCleanup(
        new SingularityTaskCleanup(
          requestCleanup.getUser(),
          requestCleanup.getCleanupType().getTaskCleanupType().get(),
          start,
          matchingTaskId,
          requestCleanup.getMessage(),
          requestCleanup.getActionId(),
          runBeforeKillId
        )
      );
    }

    if (matchingTaskIds.isEmpty() && requestCleanup.getDeployId().isPresent()) {
      Optional expiringBounce = requestManager.getExpiringBounce(
        requestCleanup.getRequestId()
      );
      if (
        expiringBounce.isPresent() &&
        expiringBounce
          .get()
          .getDeployId()
          .equalsIgnoreCase(requestCleanup.getDeployId().get())
      ) {
        LOG.info(
          "No running tasks for request {}. Marking bounce {} complete and starting new tasks",
          expiringBounce.get().getRequestId(),
          expiringBounce.get()
        );

        requestManager.removeExpiringBounce(requestCleanup.getRequestId());
      }
      requestManager.markBounceComplete(requestCleanup.getRequestId());
    }

    requestManager.addToPendingQueue(
      new SingularityPendingRequest(
        requestCleanup.getRequestId(),
        requestCleanup.getDeployId().get(),
        requestCleanup.getTimestamp(),
        requestCleanup.getUser(),
        PendingType.BOUNCE,
        Optional.empty(),
        Optional.empty(),
        requestCleanup.getSkipHealthchecks(),
        requestCleanup.getMessage(),
        requestCleanup.getActionId()
      )
    );

    LOG.info(
      "Added {} tasks for request {} to cleanup bounce queue in {}",
      matchingTaskIds.size(),
      requestCleanup.getRequestId(),
      JavaUtils.duration(start)
    );
  }

  private TaskCleanupType pause(
    SingularityRequestCleanup requestCleanup,
    Iterable activeTaskIds
  ) {
    final long start = System.currentTimeMillis();
    boolean killTasks = requestCleanup
      .getKillTasks()
      .orElse(configuration.isDefaultValueForKillTasksOfPausedRequests());
    if (requestCleanup.getRunShellCommandBeforeKill().isPresent()) {
      killTasks = false;
    }

    TaskCleanupType cleanupType = killTasks
      ? TaskCleanupType.PAUSE
      : TaskCleanupType.PAUSING;

    for (SingularityTaskId taskId : activeTaskIds) {
      LOG.debug("Adding task {} to cleanup (pause)", taskId.getId());

      Optional runBeforeKillId = Optional.empty();

      if (requestCleanup.getRunShellCommandBeforeKill().isPresent()) {
        SingularityTaskShellCommandRequest shellRequest = new SingularityTaskShellCommandRequest(
          taskId,
          requestCleanup.getUser(),
          System.currentTimeMillis(),
          requestCleanup.getRunShellCommandBeforeKill().get()
        );
        taskManager.saveTaskShellCommandRequestToQueue(shellRequest);
        runBeforeKillId = Optional.of(shellRequest.getId());
      }

      taskManager.createTaskCleanup(
        new SingularityTaskCleanup(
          requestCleanup.getUser(),
          cleanupType,
          start,
          taskId,
          requestCleanup.getMessage(),
          requestCleanup.getActionId(),
          runBeforeKillId
        )
      );
    }

    return cleanupType;
  }

  private void delete(
    SingularityRequestCleanup requestCleanup,
    Iterable activeTaskIds
  ) {
    final long start = System.currentTimeMillis();

    for (SingularityTaskId taskId : activeTaskIds) {
      LOG.debug("Adding task {} to cleanup (delete)", taskId.getId());

      Optional runBeforeKillId = Optional.empty();

      if (requestCleanup.getRunShellCommandBeforeKill().isPresent()) {
        SingularityTaskShellCommandRequest shellRequest = new SingularityTaskShellCommandRequest(
          taskId,
          requestCleanup.getUser(),
          System.currentTimeMillis(),
          requestCleanup.getRunShellCommandBeforeKill().get()
        );
        taskManager.saveTaskShellCommandRequestToQueue(shellRequest);
        runBeforeKillId = Optional.of(shellRequest.getId());
      }

      taskManager.saveTaskCleanup(
        new SingularityTaskCleanup(
          requestCleanup.getUser(),
          TaskCleanupType.REQUEST_DELETING,
          start,
          taskId,
          requestCleanup.getMessage(),
          requestCleanup.getActionId(),
          runBeforeKillId,
          requestCleanup.getRemoveFromLoadBalancer()
        )
      );
    }
  }

  private void cleanupRequestData(SingularityRequestCleanup requestCleanup) {
    SingularityDeleteResult deletePendingDeployResult = deployManager.deletePendingDeploy(
      requestCleanup.getRequestId()
    );
    SingularityDeleteResult deleteRequestDeployStateResult = deployManager.deleteRequestDeployState(
      requestCleanup.getRequestId()
    );
    LOG.trace(
      "Deleted pendingDeploy ({}) and requestDeployState ({}) due to {}",
      deletePendingDeployResult,
      deleteRequestDeployStateResult,
      requestCleanup
    );
    usageManager.deleteRequestUtilization(requestCleanup.getRequestId());
    requestGroupManager.removeFromAllGroups(requestCleanup.getRequestId());
  }

  public int drainCleanupQueue() {
    drainRequestCleanupQueue();
    int cleanupTasks = drainTaskCleanupQueue();

    final List lbCleanupTasks = taskManager.getLBCleanupTasks();
    drainLBTaskCleanupQueue(lbCleanupTasks);
    drainLBRequestCleanupQueue(lbCleanupTasks);

    checkKilledTaskIdRecords();
    return cleanupTasks;
  }

  private boolean isValidTask(SingularityTaskCleanup cleanupTask) {
    return taskManager.isActiveTask(cleanupTask.getTaskId());
  }

  private void checkKilledTaskIdRecords() {
    final long start = System.currentTimeMillis();
    final List killedTaskIdRecords = taskManager.getKilledTaskIdRecords();

    if (killedTaskIdRecords.isEmpty()) {
      LOG.trace("No killed taskId records");
      return;
    }

    AtomicInteger obsolete = new AtomicInteger(0);
    AtomicInteger waiting = new AtomicInteger(0);
    AtomicInteger rekilled = new AtomicInteger(0);

    CompletableFutures
      .allOf(
        killedTaskIdRecords
          .stream()
          .collect(Collectors.groupingBy(record -> record.getTaskId().getRequestId()))
          .entrySet()
          .stream()
          .map(
            killedTaskIdRecordsForRequest ->
              CompletableFuture.runAsync(
                () ->
                  lock.runWithRequestLock(
                    () -> {
                      for (SingularityKilledTaskIdRecord killedTaskIdRecord : killedTaskIdRecordsForRequest.getValue()) {
                        if (!taskManager.isActiveTask(killedTaskIdRecord.getTaskId())) {
                          SingularityDeleteResult deleteResult = taskManager.deleteKilledRecord(
                            killedTaskIdRecord.getTaskId()
                          );

                          LOG.debug(
                            "Deleting obsolete {} - {}",
                            killedTaskIdRecord,
                            deleteResult
                          );

                          obsolete.getAndIncrement();

                          continue;
                        }

                        long duration = start - killedTaskIdRecord.getTimestamp();

                        if (
                          duration >
                          configuration.getAskDriverToKillTasksAgainAfterMillis()
                        ) {
                          LOG.info(
                            "{} is still active, and time since last kill {} is greater than configured (askDriverToKillTasksAgainAfterMillis) {} - asking driver to kill again",
                            killedTaskIdRecord,
                            JavaUtils.durationFromMillis(duration),
                            JavaUtils.durationFromMillis(
                              configuration.getAskDriverToKillTasksAgainAfterMillis()
                            )
                          );

                          scheduler.killAndRecord(
                            killedTaskIdRecord.getTaskId(),
                            killedTaskIdRecord.getRequestCleanupType(),
                            killedTaskIdRecord.getTaskCleanupType(),
                            Optional.of(killedTaskIdRecord.getOriginalTimestamp()),
                            Optional.of(killedTaskIdRecord.getRetries()),
                            Optional.empty()
                          );

                          rekilled.getAndIncrement();
                        } else {
                          LOG.trace(
                            "Ignoring {}, because duration {} is less than configured (askDriverToKillTasksAgainAfterMillis) {}",
                            killedTaskIdRecord,
                            JavaUtils.durationFromMillis(duration),
                            JavaUtils.durationFromMillis(
                              configuration.getAskDriverToKillTasksAgainAfterMillis()
                            )
                          );

                          waiting.getAndIncrement();
                        }
                      }
                    },
                    killedTaskIdRecordsForRequest.getKey(),
                    String.format(
                      "%s#%s",
                      getClass().getSimpleName(),
                      "checkKilledTaskIdRecords"
                    )
                  ),
                cleanerExecutor
              )
          )
          .collect(Collectors.toList())
      )
      .join();

    LOG.info(
      "{} obsolete, {} waiting, {} rekilled tasks based on {} killedTaskIdRecords",
      obsolete,
      waiting,
      rekilled,
      killedTaskIdRecords.size()
    );
  }

  private int drainTaskCleanupQueue() {
    final long start = System.currentTimeMillis();

    final Map> cleanupTasks = taskManager
      .getCleanupTasks()
      .stream()
      .collect(
        Collectors.groupingBy(taskCleanup -> taskCleanup.getTaskId().getRequestId())
      );

    if (cleanupTasks.isEmpty()) {
      LOG.trace("Task cleanup queue is empty");
      return 0;
    }

    AtomicInteger killedTasks = new AtomicInteger(0);
    CompletableFutures
      .allOf(
        cleanupTasks
          .entrySet()
          .stream()
          .map(
            taskCleanupsForRequest ->
              CompletableFuture.runAsync(
                () ->
                  lock.runWithRequestLock(
                    () -> {
                      processTaskCleanupsForRequest(
                        taskCleanupsForRequest.getKey(),
                        taskCleanupsForRequest.getValue(),
                        killedTasks
                      );
                    },
                    taskCleanupsForRequest.getKey(),
                    String.format(
                      "%s#%s",
                      getClass().getSimpleName(),
                      "drainTaskCleanupQueue"
                    )
                  ),
                cleanerExecutor
              )
          )
          .collect(Collectors.toList())
      )
      .join();

    LOG.info("Killed {} tasks in {}", killedTasks, JavaUtils.duration(start));
    return cleanupTasks.size();
  }

  private void processTaskCleanupsForRequest(
    String requestId,
    List cleanupTasks,
    AtomicInteger killedTasks
  ) {
    final Multiset incrementalCleaningTasks = HashMultiset.create(
      cleanupTasks.size()
    );
    final List taskIdsForDeletedRequest = new ArrayList<>();
    boolean isRequestDeleting = false;

    // TODO - Better check for deleting request state
    final Set cleaningTasks = new HashSet<>(cleanupTasks.size());
    for (SingularityTaskCleanup cleanupTask : cleanupTasks) {
      cleaningTasks.add(cleanupTask.getTaskId());
      if (
        isIncrementalDeployCleanup(cleanupTask) ||
        cleanupTask.getCleanupType() == TaskCleanupType.INCREMENTAL_BOUNCE
      ) {
        incrementalCleaningTasks.add(
          SingularityDeployKey.fromTaskId(cleanupTask.getTaskId())
        );
      }
      if (cleanupTask.getCleanupType() == TaskCleanupType.REQUEST_DELETING) {
        taskIdsForDeletedRequest.add(cleanupTask.getTaskId().getId());
        isRequestDeleting = true;
      }
    }

    LOG.info("Cleaning up {} tasks for request {}", cleanupTasks.size(), requestId);

    final List activeTaskIds = taskManager.getActiveTaskIds();

    for (SingularityTaskCleanup cleanupTask : cleanupTasks) {
      SingularityTaskId taskId = cleanupTask.getTaskId();

      if (!isValidTask(cleanupTask)) {
        LOG.info(
          "Couldn't find a matching active task for cleanup task {}, deleting..",
          cleanupTask
        );
        taskManager.deleteCleanupTask(taskId.getId());
      } else if (
        shouldKillTask(
          cleanupTask,
          activeTaskIds,
          cleaningTasks,
          incrementalCleaningTasks
        ) &&
        checkLBStateAndShouldKillTask(cleanupTask)
      ) {
        scheduler.killAndRecord(
          taskId,
          cleanupTask.getCleanupType(),
          cleanupTask.getUser()
        );
        taskManager.deleteCleanupTask(taskId.getId());

        killedTasks.getAndIncrement();
      }

      cleanupRequestIfNoRemainingTasks(
        cleanupTask,
        taskIdsForDeletedRequest,
        isRequestDeleting
      );
    }
  }

  private void cleanupRequestIfNoRemainingTasks(
    SingularityTaskCleanup cleanupTask,
    List taskIdsForDeletedRequest,
    boolean isRequestDeleting
  ) {
    String requestId = cleanupTask.getTaskId().getRequestId();

    taskIdsForDeletedRequest.remove(cleanupTask.getTaskId().getId());
    if (taskIdsForDeletedRequest.isEmpty() && isRequestDeleting) {
      LOG.warn(
        "All tasks for requestId {} are now killed, re-enqueueing request cleanup",
        requestId
      );
      requestManager.createCleanupRequest(
        new SingularityRequestCleanup(
          cleanupTask.getUser(),
          RequestCleanupType.DELETING,
          System.currentTimeMillis(),
          Optional.of(Boolean.TRUE),
          cleanupTask.getRemoveFromLoadBalancer(),
          requestId,
          Optional.empty(),
          Optional.empty(),
          cleanupTask.getMessage(),
          Optional.empty(),
          Optional.empty()
        )
      );
    }
  }

  private boolean checkLBStateAndShouldKillTask(SingularityTaskCleanup cleanupTask) {
    final long start = System.currentTimeMillis();

    CheckLBState checkLbState = checkLbState(cleanupTask.getTaskId());

    LOG.debug(
      "TaskCleanup {} had LB state {} after {}",
      cleanupTask,
      checkLbState,
      JavaUtils.duration(start)
    );

    switch (checkLbState) {
      case DONE:
      case NOT_LOAD_BALANCED:
      case MISSING_TASK:
      case LOAD_BALANCE_FAILED:
        return true;
      case RETRY:
      case WAITING:
    }

    return false;
  }

  private enum CheckLBState {
    NOT_LOAD_BALANCED,
    LOAD_BALANCE_FAILED,
    MISSING_TASK,
    WAITING,
    DONE,
    RETRY
  }

  private boolean shouldRemoveLbState(
    SingularityTaskId taskId,
    SingularityLoadBalancerUpdate loadBalancerUpdate
  ) {
    switch (loadBalancerUpdate.getLoadBalancerState()) {
      case UNKNOWN:
      case WAITING:
      case SUCCESS:
        return true;
      case INVALID_REQUEST_NOOP:
        return false; // don't need to remove because Baragon doesnt know about it
      default:
        LOG.trace("Task {} had abnormal LB state {}", taskId, loadBalancerUpdate);
        return false;
    }
  }

  private LoadBalancerRequestId getLoadBalancerRequestId(
    SingularityTaskId taskId,
    Optional lbRemoveUpdate
  ) {
    if (!lbRemoveUpdate.isPresent()) {
      return new LoadBalancerRequestId(
        taskId.getId(),
        LoadBalancerRequestType.REMOVE,
        Optional.empty()
      );
    }

    switch (lbRemoveUpdate.get().getLoadBalancerState()) {
      case FAILED:
      case CANCELED:
        return new LoadBalancerRequestId(
          taskId.getId(),
          LoadBalancerRequestType.REMOVE,
          Optional.of(
            lbRemoveUpdate.get().getLoadBalancerRequestId().getAttemptNumber() + 1
          )
        );
      default:
        return lbRemoveUpdate.get().getLoadBalancerRequestId();
    }
  }

  private boolean shouldEnqueueLbRequest(
    Optional maybeLbUpdate
  ) {
    if (!maybeLbUpdate.isPresent()) {
      return true;
    }

    switch (maybeLbUpdate.get().getLoadBalancerState()) {
      case UNKNOWN:
      case FAILED:
      case CANCELED:
        return true;
      case CANCELING:
      case SUCCESS:
      case WAITING:
      case INVALID_REQUEST_NOOP:
    }

    return false;
  }

  private CheckLBState checkLbState(SingularityTaskId taskId) {
    Optional lbAddUpdate = taskManager.getLoadBalancerState(
      taskId,
      LoadBalancerRequestType.ADD
    );

    if (!lbAddUpdate.isPresent()) {
      return CheckLBState.NOT_LOAD_BALANCED;
    }

    if (!shouldRemoveLbState(taskId, lbAddUpdate.get())) {
      return CheckLBState.LOAD_BALANCE_FAILED;
    }

    Optional maybeLbRemoveUpdate = taskManager.getLoadBalancerState(
      taskId,
      LoadBalancerRequestType.REMOVE
    );
    SingularityLoadBalancerUpdate lbRemoveUpdate = null;

    final LoadBalancerRequestId loadBalancerRequestId = getLoadBalancerRequestId(
      taskId,
      maybeLbRemoveUpdate
    );

    if (shouldEnqueueLbRequest(maybeLbRemoveUpdate)) {
      final Optional task = taskManager.getTask(taskId);

      if (!task.isPresent()) {
        LOG.error("Missing task {}", taskId);
        return CheckLBState.MISSING_TASK;
      }

      lbRemoveUpdate =
        lbClient.enqueue(
          loadBalancerRequestId,
          task.get().getTaskRequest().getRequest(),
          task.get().getTaskRequest().getDeploy(),
          Collections.emptyList(),
          Collections.singletonList(task.get())
        );

      taskManager.saveLoadBalancerState(
        taskId,
        LoadBalancerRequestType.REMOVE,
        lbRemoveUpdate
      );
    } else if (
      maybeLbRemoveUpdate.get().getLoadBalancerState() == BaragonRequestState.WAITING ||
      maybeLbRemoveUpdate.get().getLoadBalancerState() == BaragonRequestState.CANCELING
    ) {
      lbRemoveUpdate = lbClient.getState(loadBalancerRequestId);

      taskManager.saveLoadBalancerState(
        taskId,
        LoadBalancerRequestType.REMOVE,
        lbRemoveUpdate
      );
    } else {
      lbRemoveUpdate = maybeLbRemoveUpdate.get();
    }

    switch (lbRemoveUpdate.getLoadBalancerState()) {
      case SUCCESS:
        if (configuration.getLoadBalancerRemovalGracePeriodMillis() > 0) {
          final long duration =
            System.currentTimeMillis() - lbRemoveUpdate.getTimestamp();

          if (duration < configuration.getLoadBalancerRemovalGracePeriodMillis()) {
            LOG.trace(
              "LB removal for {} succeeded - waiting at least {} to kill task (current duration {})",
              taskId,
              JavaUtils.durationFromMillis(
                configuration.getLoadBalancerRemovalGracePeriodMillis()
              ),
              JavaUtils.durationFromMillis(duration)
            );
            return CheckLBState.WAITING;
          }
        }

        return CheckLBState.DONE;
      case FAILED:
      case CANCELED:
        LOG.error(
          "LB removal request {} ({}) got unexpected response {}",
          lbRemoveUpdate,
          loadBalancerRequestId,
          lbRemoveUpdate.getLoadBalancerState()
        );
        exceptionNotifier.notify(
          "LB removal failed",
          ImmutableMap.of(
            "state",
            lbRemoveUpdate.getLoadBalancerState().name(),
            "loadBalancerRequestId",
            loadBalancerRequestId.toString(),
            "addUpdate",
            lbRemoveUpdate.toString()
          )
        );
        return CheckLBState.RETRY;
      case UNKNOWN:
      case CANCELING:
      case WAITING:
        LOG.trace(
          "Waiting on LB cleanup request {} in state {}",
          loadBalancerRequestId,
          lbRemoveUpdate.getLoadBalancerState()
        );
        break;
      case INVALID_REQUEST_NOOP:
        exceptionNotifier.notify(
          "LB removal failed",
          ImmutableMap.of(
            "state",
            lbRemoveUpdate.getLoadBalancerState().name(),
            "loadBalancerRequestId",
            loadBalancerRequestId.toString(),
            "addUpdate",
            lbRemoveUpdate.toString()
          )
        );
        return CheckLBState.LOAD_BALANCE_FAILED;
    }

    return CheckLBState.WAITING;
  }

  private void drainLBTaskCleanupQueue(List lbCleanupTasks) {
    final long start = System.currentTimeMillis();

    if (lbCleanupTasks.isEmpty()) {
      LOG.trace("LB task cleanup queue is empty");
      return;
    }

    LOG.info("LB task cleanup queue had {} tasks", lbCleanupTasks.size());

    AtomicInteger cleanedTasks = new AtomicInteger(0);
    AtomicInteger ignoredTasks = new AtomicInteger(0);

    CompletableFutures
      .allOf(
        lbCleanupTasks
          .stream()
          .collect(Collectors.groupingBy(SingularityTaskId::getRequestId))
          .entrySet()
          .stream()
          .map(
            lbCleanupsForRequest ->
              CompletableFuture.runAsync(
                () ->
                  lock.runWithRequestLock(
                    () -> {
                      for (SingularityTaskId taskId : lbCleanupsForRequest.getValue()) {
                        final long checkStart = System.currentTimeMillis();

                        final CheckLBState checkLbState = checkLbState(taskId);

                        LOG.debug(
                          "LB cleanup for task {} had state {} after {}",
                          taskId,
                          checkLbState,
                          JavaUtils.duration(checkStart)
                        );

                        switch (checkLbState) {
                          case WAITING:
                          case RETRY:
                            continue;
                          case DONE:
                          case MISSING_TASK:
                            cleanedTasks.getAndIncrement();
                            break;
                          case NOT_LOAD_BALANCED:
                          case LOAD_BALANCE_FAILED:
                            ignoredTasks.getAndIncrement();
                        }

                        taskManager.deleteLBCleanupTask(taskId);
                      }
                    },
                    lbCleanupsForRequest.getKey(),
                    String.format(
                      "%s#%s",
                      getClass().getSimpleName(),
                      "drainLBTaskCleanupQueue"
                    )
                  ),
                cleanerExecutor
              )
          )
          .collect(Collectors.toList())
      )
      .join();

    LOG.info(
      "LB cleaned {} tasks ({} left, {} obsolete) in {}",
      cleanedTasks,
      lbCleanupTasks.size() - (ignoredTasks.get() + cleanedTasks.get()),
      ignoredTasks,
      JavaUtils.duration(start)
    );
  }

  private void drainLBRequestCleanupQueue(List lbCleanupTasks) {
    final long start = System.currentTimeMillis();

    final List lbCleanupRequests = requestManager.getLbCleanupRequests();

    if (lbCleanupRequests.isEmpty()) {
      LOG.trace("LB request cleanup queue is empty");
      return;
    }

    LOG.info("LB request cleanup queue had {} requests", lbCleanupRequests.size());

    AtomicInteger cleanedRequests = new AtomicInteger(0);
    AtomicInteger ignoredRequests = new AtomicInteger(0);

    CompletableFutures
      .allOf(
        lbCleanupRequests
          .stream()
          .map(
            cleanup ->
              CompletableFuture.runAsync(
                () ->
                  lock.runWithRequestLock(
                    () -> {
                      final long checkStart = System.currentTimeMillis();

                      final CheckLBState checkLbState = checkRequestLbState(
                        cleanup,
                        lbCleanupTasks
                      );

                      LOG.debug(
                        "LB cleanup for request {} had state {} after {}",
                        cleanup.getRequestId(),
                        checkLbState,
                        JavaUtils.duration(checkStart)
                      );

                      switch (checkLbState) {
                        case WAITING:
                        case RETRY:
                          return;
                        case DONE:
                        case MISSING_TASK:
                          cleanedRequests.getAndIncrement();
                          break;
                        case NOT_LOAD_BALANCED:
                        case LOAD_BALANCE_FAILED:
                          ignoredRequests.getAndIncrement();
                      }

                      requestManager.deleteLbCleanupRequest(cleanup.getRequestId());
                    },
                    cleanup.getRequestId(),
                    String.format(
                      "%s#%s",
                      getClass().getSimpleName(),
                      "drainLBRequestCleanupQueue"
                    )
                  ),
                cleanerExecutor
              )
          )
          .collect(Collectors.toList())
      )
      .join();
    LOG.info(
      "LB cleaned {} requests ({} left, {} obsolete) in {}",
      cleanedRequests,
      lbCleanupRequests.size() - (ignoredRequests.get() + cleanedRequests.get()),
      ignoredRequests,
      JavaUtils.duration(start)
    );
  }

  private boolean canRunRequestLbCleanup(
    SingularityRequestLbCleanup cleanup,
    List lbCleanupTasks
  ) {
    Optional maybeRequestWithState = requestManager.getRequest(
      cleanup.getRequestId()
    );
    if (
      maybeRequestWithState.isPresent() &&
      SingularityRequestWithState.isActive(maybeRequestWithState)
    ) {
      LOG.trace("Request is still active, will wait for request lb cleanup");
      return false;
    }
    for (String taskId : cleanup.getActiveTaskIds()) {
      if (taskManager.isActiveTask(SingularityTaskId.valueOf(taskId))) {
        LOG.trace("Request still has active tasks, will wait for lb request cleanup");
        return false;
      }
    }
    for (SingularityTaskId taskId : lbCleanupTasks) {
      if (taskId.getRequestId().equals(cleanup.getRequestId())) {
        LOG.trace(
          "Waiting for task lb cleanup to finish before trying request lb cleanup for request {}",
          cleanup.getRequestId()
        );
        return false;
      }
    }
    return true;
  }

  private CheckLBState checkRequestLbState(
    SingularityRequestLbCleanup cleanup,
    List lbCleanupTasks
  ) {
    if (!canRunRequestLbCleanup(cleanup, lbCleanupTasks)) {
      return CheckLBState.RETRY;
    }

    Optional maybeDeleteUpdate = cleanup.getLoadBalancerUpdate();
    final LoadBalancerRequestId loadBalancerRequestId = getLoadBalancerRequestId(
      cleanup.getRequestId(),
      maybeDeleteUpdate
    );
    SingularityLoadBalancerUpdate lbDeleteUpdate;
    if (shouldEnqueueLbRequest(maybeDeleteUpdate)) {
      lbDeleteUpdate =
        lbClient.delete(
          loadBalancerRequestId,
          cleanup.getRequestId(),
          cleanup.getLoadBalancerGroups(),
          cleanup.getServiceBasePath()
        );
      cleanup.setLoadBalancerUpdate(Optional.of(lbDeleteUpdate));
      requestManager.saveLbCleanupRequest(cleanup);
    } else if (
      maybeDeleteUpdate.get().getLoadBalancerState() == BaragonRequestState.WAITING ||
      maybeDeleteUpdate.get().getLoadBalancerState() == BaragonRequestState.CANCELING
    ) {
      lbDeleteUpdate = lbClient.getState(loadBalancerRequestId);
      cleanup.setLoadBalancerUpdate(Optional.of(lbDeleteUpdate));
      requestManager.saveLbCleanupRequest(cleanup);
    } else {
      lbDeleteUpdate = maybeDeleteUpdate.get();
    }

    switch (lbDeleteUpdate.getLoadBalancerState()) {
      case SUCCESS:
        return CheckLBState.DONE;
      case FAILED:
      case CANCELED:
        LOG.error(
          "LB delete request {} ({}) got unexpected response {}",
          lbDeleteUpdate,
          loadBalancerRequestId,
          lbDeleteUpdate.getLoadBalancerState()
        );
        exceptionNotifier.notify(
          "LB delete failed",
          ImmutableMap.of(
            "state",
            lbDeleteUpdate.getLoadBalancerState().name(),
            "loadBalancerRequestId",
            loadBalancerRequestId.toString(),
            "addUpdate",
            lbDeleteUpdate.toString()
          )
        );
        return CheckLBState.RETRY;
      case UNKNOWN:
      case CANCELING:
      case WAITING:
        LOG.trace(
          "Waiting on LB delete request {} in state {}",
          loadBalancerRequestId,
          lbDeleteUpdate.getLoadBalancerState()
        );
        break;
      case INVALID_REQUEST_NOOP:
        exceptionNotifier.notify(
          String.format(
            "LB delete failed for %s",
            lbDeleteUpdate.getLoadBalancerRequestId().toString()
          ),
          ImmutableMap.of(
            "state",
            lbDeleteUpdate.getLoadBalancerState().name(),
            "loadBalancerRequestId",
            loadBalancerRequestId.toString(),
            "addUpdate",
            lbDeleteUpdate.toString()
          )
        );
        return CheckLBState.LOAD_BALANCE_FAILED;
    }

    return CheckLBState.WAITING;
  }

  private LoadBalancerRequestId getLoadBalancerRequestId(
    String requestId,
    Optional lbDeleteUpdate
  ) {
    if (!lbDeleteUpdate.isPresent()) {
      return new LoadBalancerRequestId(
        String.format("%s-%s", requestId, System.currentTimeMillis()),
        LoadBalancerRequestType.DELETE,
        Optional.empty()
      );
    }

    switch (lbDeleteUpdate.get().getLoadBalancerState()) {
      case FAILED:
      case CANCELED:
        return new LoadBalancerRequestId(
          String.format("%s-%s", requestId, System.currentTimeMillis()),
          LoadBalancerRequestType.DELETE,
          Optional.of(
            lbDeleteUpdate.get().getLoadBalancerRequestId().getAttemptNumber() + 1
          )
        );
      default:
        return lbDeleteUpdate.get().getLoadBalancerRequestId();
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy