All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hubspot.singularity.data.StateManager Maven / Gradle / Ivy

package com.hubspot.singularity.data;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;

import javax.inject.Singleton;

import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.imps.CuratorFrameworkState;
import org.apache.curator.utils.ZKPaths;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.codahale.metrics.MetricRegistry;
import com.google.common.base.Optional;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
import com.google.inject.Inject;
import com.google.inject.name.Named;
import com.hubspot.mesos.CounterMap;
import com.hubspot.mesos.JavaUtils;
import com.hubspot.singularity.SingularityCreateResult;
import com.hubspot.singularity.SingularityDeployMarker;
import com.hubspot.singularity.SingularityHostState;
import com.hubspot.singularity.SingularityMainModule;
import com.hubspot.singularity.SingularityPendingDeploy;
import com.hubspot.singularity.SingularityPendingTaskId;
import com.hubspot.singularity.SingularityRack;
import com.hubspot.singularity.SingularityRequest;
import com.hubspot.singularity.SingularityRequestDeployState;
import com.hubspot.singularity.SingularityRequestWithState;
import com.hubspot.singularity.SingularityScheduledTasksInfo;
import com.hubspot.singularity.SingularitySlave;
import com.hubspot.singularity.SingularityState;
import com.hubspot.singularity.SingularityTaskId;
import com.hubspot.singularity.SingularityTaskReconciliationStatistics;
import com.hubspot.singularity.auth.datastore.SingularityAuthDatastore;
import com.hubspot.singularity.config.SingularityConfiguration;
import com.hubspot.singularity.data.transcoders.Transcoder;

@Singleton
public class StateManager extends CuratorManager {

  private static final Logger LOG = LoggerFactory.getLogger(StateManager.class);

  private static final String ROOT_PATH = "/hosts";
  private static final String STATE_PATH = "/STATE";
  private static final String TASK_RECONCILIATION_STATISTICS_PATH = STATE_PATH + "/taskReconciliation";

  private final RequestManager requestManager;
  private final TaskManager taskManager;
  private final DeployManager deployManager;
  private final SlaveManager slaveManager;
  private final RackManager rackManager;
  private final Transcoder stateTranscoder;
  private final Transcoder hostStateTranscoder;
  private final SingularityConfiguration singularityConfiguration;
  private final SingularityAuthDatastore authDatastore;
  private final Transcoder taskReconciliationStatisticsTranscoder;
  private final PriorityManager priorityManager;
  private final AtomicLong statusUpdateDeltaAvg;
  private final AtomicLong lastHeartbeatTime;

  @Inject
  public StateManager(CuratorFramework curatorFramework,
                      SingularityConfiguration configuration,
                      MetricRegistry metricRegistry,
                      RequestManager requestManager,
                      TaskManager taskManager,
                      DeployManager deployManager,
                      SlaveManager slaveManager,
                      RackManager rackManager,
                      Transcoder stateTranscoder,
                      Transcoder hostStateTranscoder,
                      SingularityConfiguration singularityConfiguration,
                      SingularityAuthDatastore authDatastore,
                      PriorityManager priorityManager,
                      Transcoder taskReconciliationStatisticsTranscoder,
                      @Named(SingularityMainModule.STATUS_UPDATE_DELTA_30S_AVERAGE) AtomicLong statusUpdateDeltaAvg,
                      @Named(SingularityMainModule.LAST_MESOS_MASTER_HEARTBEAT_TIME) AtomicLong lastHeartbeatTime) {
    super(curatorFramework, configuration, metricRegistry);

    this.requestManager = requestManager;
    this.taskManager = taskManager;
    this.stateTranscoder = stateTranscoder;
    this.hostStateTranscoder = hostStateTranscoder;
    this.slaveManager = slaveManager;
    this.rackManager = rackManager;
    this.deployManager = deployManager;
    this.singularityConfiguration = singularityConfiguration;
    this.authDatastore = authDatastore;
    this.priorityManager = priorityManager;
    this.taskReconciliationStatisticsTranscoder = taskReconciliationStatisticsTranscoder;
    this.statusUpdateDeltaAvg = statusUpdateDeltaAvg;
    this.lastHeartbeatTime = lastHeartbeatTime;
  }

  public SingularityCreateResult saveTaskReconciliationStatistics(SingularityTaskReconciliationStatistics taskReconciliationStatistics) {
    return save(TASK_RECONCILIATION_STATISTICS_PATH, taskReconciliationStatistics, taskReconciliationStatisticsTranscoder);
  }

  public Optional getTaskReconciliationStatistics() {
    return getData(TASK_RECONCILIATION_STATISTICS_PATH, taskReconciliationStatisticsTranscoder);
  }

  public void save(SingularityHostState hostState) throws InterruptedException {
    final String path = ZKPaths.makePath(ROOT_PATH, hostState.getHostname());
    final byte[] data = hostStateTranscoder.toBytes(hostState);

    if (curator.getState() == CuratorFrameworkState.STARTED) {
      try {
        if (exists(path)) {
          curator.setData().forPath(path, data);
        } else {
          curator.create().creatingParentsIfNeeded().withMode(CreateMode.EPHEMERAL).forPath(path, data);
        }
      } catch (Throwable t) {
        throw Throwables.propagate(t);
      }
    }
  }

  public SingularityState getState(boolean skipCache, boolean includeRequestIds) {
    Optional fromZk = Optional.absent();

    if (!skipCache) {
      fromZk = getData(STATE_PATH, stateTranscoder);
    }

    if (fromZk.isPresent()) {
      final long now = System.currentTimeMillis();
      final long delta = now - fromZk.get().getGeneratedAt();

      if (delta < singularityConfiguration.getCacheStateForMillis()) {
        return fromZk.get();
      }
    }

    final long start = System.currentTimeMillis();

    SingularityState newState = generateState(includeRequestIds);

    if (!skipCache) {
      final byte[] bytes = stateTranscoder.toBytes(newState);
      save(STATE_PATH, newState, stateTranscoder);

      LOG.info("Generated new state and saved {} bytes in {}", bytes.length, JavaUtils.duration(start));
    }

    return newState;
  }

  public SingularityState generateState(boolean includeRequestIds) {
    final int launchingTasks = taskManager.getNumLaunchingTasks();
    final int activeTasks = taskManager.getNumActiveTasks() - launchingTasks;
    final int scheduledTasks = taskManager.getNumScheduledTasks();
    final int cleaningTasks = taskManager.getNumCleanupTasks();
    final int lbCleanupTasks = taskManager.getNumLbCleanupTasks();
    final int lbCleanupRequests = requestManager.getNumLbCleanupRequests();

    final SingularityScheduledTasksInfo scheduledTasksInfo = SingularityScheduledTasksInfo.getInfo(taskManager.getPendingTasks(), singularityConfiguration.getDeltaAfterWhichTasksAreLateMillis());

    final List overProvisionedRequestIds = new ArrayList<>();
    final Set possiblyUnderProvisionedRequestIds = new HashSet<>();

    final List requests = requestManager.getRequests();

    final Map numInstances = getNumTasks(requests);

    int numActiveRequests = 0;
    int numPausedRequests = 0;
    int cooldownRequests = 0;
    int numFinishedRequests = 0;

    for (SingularityRequestWithState requestWithState : requests) {
      switch (requestWithState.getState()) {
        case DEPLOYING_TO_UNPAUSE:
        case ACTIVE:
          numActiveRequests++;
          break;
        case FINISHED:
          numFinishedRequests++;
          break;
        case PAUSED:
          numPausedRequests++;
          break;
        case SYSTEM_COOLDOWN:
          cooldownRequests++;
          break;
        case DELETED:
          break;
      }

      updatePossiblyUnderProvisionedAndOverProvisionedIds(requestWithState, numInstances, overProvisionedRequestIds, possiblyUnderProvisionedRequestIds);
    }

    filterForPendingRequests(possiblyUnderProvisionedRequestIds);
    final List underProvisionedRequestIds = getUnderProvisionedRequestIds(possiblyUnderProvisionedRequestIds);

    final int pendingRequests = requestManager.getSizeOfPendingQueue();
    final int cleaningRequests = requestManager.getSizeOfCleanupQueue();

    List racks = rackManager.getObjects();

    int activeRacks = 0;
    int deadRacks = 0;
    int decommissioningRacks = 0;
    int unknownRacks = 0;

    for (SingularityRack rack : racks) {
      switch (rack.getCurrentState().getState()) {
        case ACTIVE:
          activeRacks++;
          break;
        case DEAD:
          deadRacks++;
          break;
        case MISSING_ON_STARTUP:
          unknownRacks++;
          break;
        case DECOMMISSIONED:
        case STARTING_DECOMMISSION:
        case DECOMMISSIONING:
          decommissioningRacks++;
          break;
        default:
          unknownRacks++;
          break;
      }
    }

    List slaves = slaveManager.getObjects();

    int activeSlaves = 0;
    int deadSlaves = 0;
    int decommissioningSlaves = 0;
    int unknownSlaves = 0;

    for (SingularitySlave slave : slaves) {
      switch (slave.getCurrentState().getState()) {
        case ACTIVE:
          activeSlaves++;
          break;
        case DEAD:
          deadSlaves++;
          break;
        case MISSING_ON_STARTUP:
          unknownSlaves++;
          break;
        case DECOMMISSIONED:
        case STARTING_DECOMMISSION:
        case DECOMMISSIONING:
          decommissioningSlaves++;
          break;
        default:
          unknownSlaves++;
          break;
      }
    }

    final List states = getHostStates();

    int numDeploys = 0;
    long oldestDeploy = 0;
    long oldestDeployStep = 0;
    List activeDeploys = new ArrayList<>();
    final long now = System.currentTimeMillis();

    for (SingularityPendingDeploy pendingDeploy : deployManager.getPendingDeploys()) {
      activeDeploys.add(pendingDeploy.getDeployMarker());
      if (pendingDeploy.getDeployProgress().isPresent() && !pendingDeploy.getDeployProgress().get().isStepComplete()) {
        long deployStepDelta = now - pendingDeploy.getDeployProgress().get().getTimestamp();
        if (deployStepDelta > oldestDeployStep) {
          oldestDeployStep = deployStepDelta;
        }
      }
      long delta = now - pendingDeploy.getDeployMarker().getTimestamp();
      if (delta > oldestDeploy) {
        oldestDeploy = delta;
      }
      numDeploys++;
    }

    final Optional authDatastoreHealthy = authDatastore.isHealthy();

    final Optional minimumPriorityLevel = getMinimumPriorityLevel();

    return new SingularityState(activeTasks, launchingTasks, numActiveRequests, cooldownRequests, numPausedRequests, scheduledTasks, pendingRequests, lbCleanupTasks, lbCleanupRequests, cleaningRequests, activeSlaves,
        deadSlaves, decommissioningSlaves, activeRacks, deadRacks, decommissioningRacks, cleaningTasks, states, oldestDeploy, numDeploys, oldestDeployStep, activeDeploys, scheduledTasksInfo.getNumLateTasks(),
        scheduledTasksInfo.getNumFutureTasks(), scheduledTasksInfo.getMaxTaskLag(), System.currentTimeMillis(), includeRequestIds ? overProvisionedRequestIds : null,
        includeRequestIds ? underProvisionedRequestIds : null, overProvisionedRequestIds.size(), underProvisionedRequestIds.size(), numFinishedRequests, unknownRacks, unknownSlaves, authDatastoreHealthy, minimumPriorityLevel,
        statusUpdateDeltaAvg.get(), lastHeartbeatTime.get());
  }

  private Map getNumTasks(List requests) {
    final CounterMap numTasks = new CounterMap<>(requests.size());

    for (SingularityTaskId taskId : taskManager.getActiveTaskIds()) {
      numTasks.incr(taskId.getRequestId());
    }

    for (SingularityPendingTaskId pendingTaskId : taskManager.getPendingTaskIds()) {
      numTasks.incr(pendingTaskId.getRequestId());
    }

    for (SingularityTaskId cleaningTaskId : taskManager.getCleanupTaskIds()) {
      Optional request = requestManager.getRequest(cleaningTaskId.getRequestId());
      if (request.isPresent() && request.get().getRequest().isScheduled()) {
        continue;
      }

      numTasks.decr(cleaningTaskId.getRequestId());
    }

    return numTasks.toCountMap();
  }

  private void updatePossiblyUnderProvisionedAndOverProvisionedIds(SingularityRequestWithState requestWithState, Map numInstances, List overProvisionedRequestIds, Set possiblyUnderProvisionedRequestIds) {
    if (requestWithState.getState().isRunnable() && requestWithState.getRequest().isAlwaysRunning()) {
      SingularityRequest request = requestWithState.getRequest();
      final int expectedInstances = request.getInstancesSafe();

      final Long numActualInstances = numInstances.get(request.getId());

      if (numActualInstances == null || numActualInstances < expectedInstances) {
        possiblyUnderProvisionedRequestIds.add(request.getId());
      } else if (numActualInstances > expectedInstances) {
        overProvisionedRequestIds.add(request.getId());
      }
    }
  }

  private void filterForPendingRequests(Set possiblyUnderProvisionedRequestIds) {
    if (possiblyUnderProvisionedRequestIds.size() == 0) {
      return;
    }

    final Set pendingRequestIds = requestManager.getPendingRequests().stream().map((r) -> r.getRequestId()).collect(Collectors.toCollection(HashSet::new));
    possiblyUnderProvisionedRequestIds.removeAll(pendingRequestIds);
  }

  private List getUnderProvisionedRequestIds(Set possiblyUnderProvisionedRequestIds) {
    final List underProvisionedRequestIds = new ArrayList<>(possiblyUnderProvisionedRequestIds.size());

    if (!possiblyUnderProvisionedRequestIds.isEmpty()) {
      Map deployStates = deployManager.getRequestDeployStatesByRequestIds(possiblyUnderProvisionedRequestIds);

      for (SingularityRequestDeployState deployState : deployStates.values()) {
        if (deployState.getActiveDeploy().isPresent() || deployState.getPendingDeploy().isPresent()) {
          underProvisionedRequestIds.add(deployState.getRequestId());
        }
      }
    }

    return underProvisionedRequestIds;
  }

  private List getHostStates() {
    List children = getChildren(ROOT_PATH);
    List states = Lists.newArrayListWithCapacity(children.size());

    for (String child : children) {

      try {
        byte[] bytes = curator.getData().forPath(ZKPaths.makePath(ROOT_PATH, child));

        states.add(hostStateTranscoder.fromBytes(bytes));
      } catch (NoNodeException nne) {
      } catch (Exception e) {
        throw Throwables.propagate(e);
      }
    }

    return states;
  }

  private Optional getMinimumPriorityLevel() {
    return priorityManager.getActivePriorityFreeze().isPresent() ? Optional.of(priorityManager.getActivePriorityFreeze().get().getPriorityFreeze().getMinimumPriorityLevel()) : Optional.absent();
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy