All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hubspot.singularity.executor.task.SingularityExecutorTaskProcessCallable Maven / Gradle / Ivy

The newest version!
package com.hubspot.singularity.executor.task;

import com.github.rholder.retry.RetryException;
import com.github.rholder.retry.Retryer;
import com.github.rholder.retry.RetryerBuilder;
import com.github.rholder.retry.StopStrategies;
import com.github.rholder.retry.WaitStrategies;
import com.hubspot.deploy.HealthcheckOptions;
import com.hubspot.singularity.executor.config.SingularityExecutorConfiguration;
import com.hubspot.singularity.executor.utils.ExecutorUtils;
import com.hubspot.singularity.runner.base.shared.SafeProcessManager;
import java.io.File;
import java.nio.file.Paths;
import java.util.Optional;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import org.apache.mesos.Protos;
import org.apache.mesos.Protos.TaskState;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SingularityExecutorTaskProcessCallable
  extends SafeProcessManager
  implements Callable {
  private static final Logger LOG = LoggerFactory.getLogger(
    SingularityExecutorTaskProcessCallable.class
  );

  private final ProcessBuilder processBuilder;
  private final ExecutorUtils executorUtils;
  private final SingularityExecutorConfiguration configuration;
  private final SingularityExecutorTask task;

  enum HealthCheckResult {
    PASSED,
    PASSED_EXITED,
    FAILED_EXITED,
    WAITING
  }

  public SingularityExecutorTaskProcessCallable(
    SingularityExecutorConfiguration configuration,
    SingularityExecutorTask task,
    ProcessBuilder processBuilder,
    ExecutorUtils executorUtils
  ) {
    super(task.getLog());
    this.executorUtils = executorUtils;
    this.processBuilder = processBuilder;
    this.configuration = configuration;
    this.task = task;
  }

  @Override
  public Integer call() throws Exception {
    LOG.info("Process being started");
    Process process = startProcess(processBuilder);

    if (!runHealthcheck(process)) {
      task
        .getLog()
        .info("Killing task {} that did not pass health checks", task.getTaskId());
      super.signalKillToProcessIfActive();
    }

    return process.waitFor();
  }

  public SingularityExecutorTask getTask() {
    return task;
  }

  @Override
  public String toString() {
    return "SingularityExecutorTaskProcessCallable [task=" + task + "]";
  }

  private boolean runHealthcheck(Process process) {
    Optional maybeOptions = task
      .getTaskDefinition()
      .getHealthcheckOptions();
    Optional expectedHealthcheckResultFilePath = task
      .getTaskDefinition()
      .getHealthcheckResultFilePath();

    if (!maybeOptions.isPresent() || !expectedHealthcheckResultFilePath.isPresent()) {
      executorUtils.sendStatusUpdate(
        task.getDriver(),
        task.getTaskInfo().getTaskId(),
        Protos.TaskState.TASK_RUNNING,
        String.format("Task running process %s", getCurrentProcessToString()),
        task.getLog()
      );
      return true;
    }

    LOG.debug(
      "Checking for healthcheck file {}",
      expectedHealthcheckResultFilePath.get()
    );
    String taskAppDirectory = task.getTaskDefinition().getTaskAppDirectory();
    File fullHealthcheckPath = Paths
      .get(taskAppDirectory, expectedHealthcheckResultFilePath.get())
      .toFile();

    Integer healthcheckMaxRetries = maybeOptions
      .get()
      .getMaxRetries()
      .orElse(configuration.getDefaultHealthcheckMaxRetries());
    Integer retryInterval = maybeOptions
      .get()
      .getIntervalSeconds()
      .orElse(configuration.getDefaultHealthcheckInternvalSeconds());
    long maxDelay =
      configuration.getDefaultHealthcheckBaseTimeoutSeconds() +
      (retryInterval * healthcheckMaxRetries);

    try {
      Retryer retryer = RetryerBuilder
        .newBuilder()
        .retryIfResult(result -> result == HealthCheckResult.WAITING)
        .withWaitStrategy(WaitStrategies.fixedWait(retryInterval, TimeUnit.SECONDS))
        .withStopStrategy(StopStrategies.stopAfterDelay(maxDelay, TimeUnit.SECONDS))
        .build();

      HealthCheckResult result = retryer.call(
        () -> {
          if (fullHealthcheckPath.exists()) {
            return HealthCheckResult.PASSED;
          } else if (process.isAlive()) {
            return HealthCheckResult.WAITING;
          } else {
            if (process.exitValue() == 0) {
              return HealthCheckResult.PASSED_EXITED;
            } else {
              return HealthCheckResult.FAILED_EXITED;
            }
          }
        }
      );

      switch (result) {
        case PASSED:
          executorUtils.sendStatusUpdate(
            task.getDriver(),
            task.getTaskInfo().getTaskId(),
            Protos.TaskState.TASK_RUNNING,
            String.format(
              "Task running process %s (health check file found successfully).",
              getCurrentProcessToString()
            ),
            task.getLog()
          );
          return true;
        case PASSED_EXITED:
          LOG.info(
            "Task already exited with code 0, considering healthcheck a success and sending running/finished update"
          );
          executorUtils.sendStatusUpdate(
            task.getDriver(),
            task.getTaskInfo().getTaskId(),
            Protos.TaskState.TASK_RUNNING,
            String.format(
              "Task running process %s (health check file found successfully).",
              getCurrentProcessToString()
            ),
            task.getLog()
          );
          return true;
        case FAILED_EXITED:
          executorUtils.sendStatusUpdate(
            task.getDriver(),
            task.getTaskInfo().getTaskId(),
            TaskState.TASK_FAILED,
            String.format("Process failed with code %d", process.exitValue()),
            task.getLog()
          );
          return false;
        case WAITING:
        default:
          executorUtils.sendStatusUpdate(
            task.getDriver(),
            task.getTaskInfo().getTaskId(),
            TaskState.TASK_FAILED,
            String.format(
              "Task timed out on health checks after %d seconds (health check file not found).",
              maxDelay
            ),
            task.getLog()
          );
          return false;
      }
    } catch (ExecutionException | RetryException e) {
      executorUtils.sendStatusUpdate(
        task.getDriver(),
        task.getTaskInfo().getTaskId(),
        TaskState.TASK_FAILED,
        String.format(
          "Task timed out on health checks after %d seconds (health check file not found).",
          maxDelay
        ),
        task.getLog()
      );
      return false;
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy