All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hubspot.singularity.scheduler.SingularityTaskReconciliation Maven / Gradle / Ivy

package com.hubspot.singularity.scheduler;

import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;

import javax.inject.Singleton;

import org.apache.mesos.Protos.SlaveID;
import org.apache.mesos.Protos.TaskID;
import org.apache.mesos.Protos.TaskState;
import org.apache.mesos.Protos.TaskStatus;
import org.apache.mesos.SchedulerDriver;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.codahale.metrics.Histogram;
import com.codahale.metrics.Snapshot;
import com.codahale.metrics.UniformReservoir;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.collect.Lists;
import com.google.inject.Inject;
import com.google.inject.name.Named;
import com.hubspot.mesos.JavaUtils;
import com.hubspot.singularity.SingularityAbort;
import com.hubspot.singularity.SingularityAbort.AbortReason;
import com.hubspot.singularity.SingularityMainModule;
import com.hubspot.singularity.SingularityManagedScheduledExecutorServiceFactory;
import com.hubspot.singularity.SingularityTaskId;
import com.hubspot.singularity.SingularityTaskReconciliationStatistics;
import com.hubspot.singularity.SingularityTaskStatusHolder;
import com.hubspot.singularity.config.SingularityConfiguration;
import com.hubspot.singularity.data.StateManager;
import com.hubspot.singularity.data.TaskManager;
import com.hubspot.singularity.mesos.SchedulerDriverSupplier;
import com.hubspot.singularity.sentry.SingularityExceptionNotifier;

@Singleton
public class SingularityTaskReconciliation {

  private static final Logger LOG = LoggerFactory.getLogger(SingularityTaskReconciliation.class);

  private final TaskManager taskManager;
  private final String serverId;
  private final ScheduledExecutorService executorService;
  private final AtomicBoolean isRunningReconciliation;
  private final SingularityConfiguration configuration;
  private final SingularityAbort abort;
  private final SingularityExceptionNotifier exceptionNotifier;
  private final SchedulerDriverSupplier schedulerDriverSupplier;
  private final StateManager stateManager;

  @Inject
  public SingularityTaskReconciliation(SingularityManagedScheduledExecutorServiceFactory executorServiceFactory,
      SingularityExceptionNotifier exceptionNotifier,
      TaskManager taskManager,
      StateManager stateManager,
      SingularityConfiguration configuration,
      @Named(SingularityMainModule.SERVER_ID_PROPERTY) String serverId,
      SingularityAbort abort,
      SchedulerDriverSupplier schedulerDriverSupplier) {
    this.taskManager = taskManager;
    this.stateManager = stateManager;
    this.serverId = serverId;

    this.exceptionNotifier = exceptionNotifier;
    this.configuration = configuration;
    this.abort = abort;
    this.schedulerDriverSupplier = schedulerDriverSupplier;

    this.isRunningReconciliation = new AtomicBoolean(false);
    this.executorService = executorServiceFactory.get(getClass().getSimpleName());
  }

  enum ReconciliationState {
    ALREADY_RUNNING, STARTED, NO_DRIVER;
  }

  @VisibleForTesting
  boolean isReconciliationRunning() {
    return isRunningReconciliation.get();
  }

  public ReconciliationState startReconciliation() {
    final long taskReconciliationStartedAt = System.currentTimeMillis();

    if (!isRunningReconciliation.compareAndSet(false, true)) {
      LOG.info("Reconciliation is already running, NOT starting a new reconciliation process");
      return ReconciliationState.ALREADY_RUNNING;
    }

    Optional schedulerDriver = schedulerDriverSupplier.get();

    if (!schedulerDriver.isPresent()) {
      LOG.trace("Not running reconciliation - no schedulerDriver present");
      isRunningReconciliation.set(false);
      return ReconciliationState.NO_DRIVER;
    }

    final List activeTaskIds = taskManager.getActiveTaskIds();

    LOG.info("Starting a reconciliation cycle - {} current active tasks", activeTaskIds.size());

    SchedulerDriver driver = schedulerDriver.get();
    driver.reconcileTasks(Collections. emptyList());

    scheduleReconciliationCheck(driver, taskReconciliationStartedAt, activeTaskIds, 0, new Histogram(new UniformReservoir()));

    return ReconciliationState.STARTED;
  }

  private void scheduleReconciliationCheck(final SchedulerDriver driver, final long reconciliationStart, final Collection remainingTaskIds, final int numTimes, final Histogram histogram) {
    LOG.info("Scheduling reconciliation check #{} - {} tasks left - waiting {}", numTimes + 1, remainingTaskIds.size(), JavaUtils.durationFromMillis(configuration.getCheckReconcileWhenRunningEveryMillis()));

    executorService.schedule(new Runnable() {

      @Override
      public void run() {
        try {
          checkReconciliation(driver, reconciliationStart, remainingTaskIds, numTimes + 1, histogram);
        } catch (Throwable t) {
          LOG.error("While checking for reconciliation tasks", t);
          exceptionNotifier.notify(String.format("Error checking for reconciliation tasks (%s)", t.getMessage()), t);
          abort.abort(AbortReason.UNRECOVERABLE_ERROR, Optional.of(t));
        }
      }
    }, configuration.getCheckReconcileWhenRunningEveryMillis(), TimeUnit.MILLISECONDS);
  }

  private void checkReconciliation(final SchedulerDriver driver, final long reconciliationStart, final Collection remainingTaskIds, final int numTimes, final Histogram histogram) {
    final List taskStatusHolders = taskManager.getLastActiveTaskStatusesFor(remainingTaskIds);
    final List taskStatuses = Lists.newArrayListWithCapacity(taskStatusHolders.size());

    for (SingularityTaskStatusHolder taskStatusHolder : taskStatusHolders) {
      if (taskStatusHolder.getServerId().equals(serverId) && taskStatusHolder.getServerTimestamp() > reconciliationStart) {
        histogram.update(taskStatusHolder.getServerTimestamp() - reconciliationStart);
        continue;
      }

      if (taskStatusHolder.getTaskStatus().isPresent()) {
        LOG.debug("Re-requesting task status for {}", taskStatusHolder.getTaskId());
        taskStatuses.add(taskStatusHolder.getTaskStatus().get());
      } else {
        TaskStatus.Builder fakeTaskStatusBuilder = TaskStatus.newBuilder()
            .setTaskId(TaskID.newBuilder().setValue(taskStatusHolder.getTaskId().getId()))
            .setState(TaskState.TASK_STARTING);

        if (taskStatusHolder.getSlaveId().isPresent()) {
          fakeTaskStatusBuilder.setSlaveId(SlaveID.newBuilder().setValue(taskStatusHolder.getSlaveId().get()));
        }

        LOG.info("Task {} didn't have a TaskStatus yet, submitting fake status", taskStatusHolder.getTaskId());
        taskStatuses.add(fakeTaskStatusBuilder.build());
      }
    }

    if (taskStatuses.isEmpty()) {
      LOG.info("Task reconciliation ended after {} checks and {}", numTimes, JavaUtils.duration(reconciliationStart));

      final Snapshot snapshot = histogram.getSnapshot();
      stateManager.saveTaskReconciliationStatistics(new SingularityTaskReconciliationStatistics(reconciliationStart, System.currentTimeMillis() - reconciliationStart, numTimes, histogram.getCount(), snapshot.getMax(), snapshot.getMean(), snapshot.getMin(), snapshot.getMedian(), snapshot.get75thPercentile(), snapshot.get95thPercentile(), snapshot.get98thPercentile(), snapshot.get99thPercentile(), snapshot.get999thPercentile(), snapshot.getStdDev()));

      isRunningReconciliation.set(false);

      return;
    }

    LOG.info("Requesting reconciliation of {} taskStatuses, task reconciliation has been running for {}", taskStatuses.size(), JavaUtils.duration(reconciliationStart));

    driver.reconcileTasks(taskStatuses);

    scheduleReconciliationCheck(driver, reconciliationStart, remainingTaskIds, numTimes, histogram);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy