com.hubspot.singularity.data.StateManager Maven / Gradle / Ivy
package com.hubspot.singularity.data;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import javax.inject.Singleton;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.imps.CuratorFrameworkState;
import org.apache.curator.utils.ZKPaths;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.codahale.metrics.MetricRegistry;
import com.google.common.base.Optional;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
import com.google.inject.Inject;
import com.google.inject.name.Named;
import com.hubspot.mesos.CounterMap;
import com.hubspot.mesos.JavaUtils;
import com.hubspot.singularity.RequestType;
import com.hubspot.singularity.SingularityCreateResult;
import com.hubspot.singularity.SingularityDeployMarker;
import com.hubspot.singularity.SingularityHostState;
import com.hubspot.singularity.SingularityMainModule;
import com.hubspot.singularity.SingularityPendingDeploy;
import com.hubspot.singularity.SingularityPendingTaskId;
import com.hubspot.singularity.SingularityRack;
import com.hubspot.singularity.SingularityRequest;
import com.hubspot.singularity.SingularityRequestDeployState;
import com.hubspot.singularity.SingularityRequestWithState;
import com.hubspot.singularity.SingularityScheduledTasksInfo;
import com.hubspot.singularity.SingularitySlave;
import com.hubspot.singularity.SingularityState;
import com.hubspot.singularity.SingularityTaskId;
import com.hubspot.singularity.SingularityTaskReconciliationStatistics;
import com.hubspot.singularity.auth.datastore.SingularityAuthDatastore;
import com.hubspot.singularity.config.SingularityConfiguration;
import com.hubspot.singularity.data.transcoders.Transcoder;
@Singleton
public class StateManager extends CuratorManager {
private static final Logger LOG = LoggerFactory.getLogger(StateManager.class);
private static final String ROOT_PATH = "/hosts";
private static final String STATE_PATH = "/STATE";
private static final String TASK_RECONCILIATION_STATISTICS_PATH = STATE_PATH + "/taskReconciliation";
private final RequestManager requestManager;
private final TaskManager taskManager;
private final DeployManager deployManager;
private final SlaveManager slaveManager;
private final RackManager rackManager;
private final Transcoder stateTranscoder;
private final Transcoder hostStateTranscoder;
private final SingularityConfiguration singularityConfiguration;
private final SingularityAuthDatastore authDatastore;
private final Transcoder taskReconciliationStatisticsTranscoder;
private final PriorityManager priorityManager;
private final AtomicLong statusUpdateDeltaAvg;
private final AtomicLong lastHeartbeatTime;
@Inject
public StateManager(CuratorFramework curatorFramework,
SingularityConfiguration configuration,
MetricRegistry metricRegistry,
RequestManager requestManager,
TaskManager taskManager,
DeployManager deployManager,
SlaveManager slaveManager,
RackManager rackManager,
Transcoder stateTranscoder,
Transcoder hostStateTranscoder,
SingularityConfiguration singularityConfiguration,
SingularityAuthDatastore authDatastore,
PriorityManager priorityManager,
Transcoder taskReconciliationStatisticsTranscoder,
@Named(SingularityMainModule.STATUS_UPDATE_DELTA_30S_AVERAGE) AtomicLong statusUpdateDeltaAvg,
@Named(SingularityMainModule.LAST_MESOS_MASTER_HEARTBEAT_TIME) AtomicLong lastHeartbeatTime) {
super(curatorFramework, configuration, metricRegistry);
this.requestManager = requestManager;
this.taskManager = taskManager;
this.stateTranscoder = stateTranscoder;
this.hostStateTranscoder = hostStateTranscoder;
this.slaveManager = slaveManager;
this.rackManager = rackManager;
this.deployManager = deployManager;
this.singularityConfiguration = singularityConfiguration;
this.authDatastore = authDatastore;
this.priorityManager = priorityManager;
this.taskReconciliationStatisticsTranscoder = taskReconciliationStatisticsTranscoder;
this.statusUpdateDeltaAvg = statusUpdateDeltaAvg;
this.lastHeartbeatTime = lastHeartbeatTime;
}
public SingularityCreateResult saveTaskReconciliationStatistics(SingularityTaskReconciliationStatistics taskReconciliationStatistics) {
return save(TASK_RECONCILIATION_STATISTICS_PATH, taskReconciliationStatistics, taskReconciliationStatisticsTranscoder);
}
public Optional getTaskReconciliationStatistics() {
return getData(TASK_RECONCILIATION_STATISTICS_PATH, taskReconciliationStatisticsTranscoder);
}
public void save(SingularityHostState hostState) throws InterruptedException {
final String path = ZKPaths.makePath(ROOT_PATH, hostState.getHostname());
final byte[] data = hostStateTranscoder.toBytes(hostState);
if (curator.getState() == CuratorFrameworkState.STARTED) {
try {
if (exists(path)) {
curator.setData().forPath(path, data);
} else {
curator.create().creatingParentsIfNeeded().withMode(CreateMode.EPHEMERAL).forPath(path, data);
}
} catch (Throwable t) {
throw Throwables.propagate(t);
}
}
}
public SingularityState getState(boolean skipCache, boolean includeRequestIds) {
Optional fromZk = Optional.absent();
if (!skipCache) {
fromZk = getData(STATE_PATH, stateTranscoder);
}
if (fromZk.isPresent()) {
final long now = System.currentTimeMillis();
final long delta = now - fromZk.get().getGeneratedAt();
if (delta < singularityConfiguration.getCacheStateForMillis()) {
return fromZk.get();
}
}
final long start = System.currentTimeMillis();
SingularityState newState = generateState(includeRequestIds);
if (!skipCache) {
final byte[] bytes = stateTranscoder.toBytes(newState);
save(STATE_PATH, newState, stateTranscoder);
LOG.info("Generated new state and saved {} bytes in {}", bytes.length, JavaUtils.duration(start));
}
return newState;
}
public SingularityState generateState(boolean includeRequestIds) {
final int launchingTasks = taskManager.getNumLaunchingTasks();
final int activeTasks = taskManager.getNumActiveTasks() - launchingTasks;
final int scheduledTasks = taskManager.getNumScheduledTasks();
final int cleaningTasks = taskManager.getNumCleanupTasks();
final int lbCleanupTasks = taskManager.getNumLbCleanupTasks();
final int lbCleanupRequests = requestManager.getNumLbCleanupRequests();
final SingularityScheduledTasksInfo scheduledTasksInfo = SingularityScheduledTasksInfo.getInfo(taskManager.getPendingTasks(), singularityConfiguration.getDeltaAfterWhichTasksAreLateMillis());
final List overProvisionedRequestIds = new ArrayList<>();
final Set possiblyUnderProvisionedRequestIds = new HashSet<>();
final List requests = requestManager.getRequests();
final Map numInstances = getNumTasks(requests);
int numActiveRequests = 0;
int numPausedRequests = 0;
int cooldownRequests = 0;
int numFinishedRequests = 0;
for (SingularityRequestWithState requestWithState : requests) {
switch (requestWithState.getState()) {
case DEPLOYING_TO_UNPAUSE:
case ACTIVE:
numActiveRequests++;
break;
case FINISHED:
numFinishedRequests++;
break;
case PAUSED:
numPausedRequests++;
break;
case SYSTEM_COOLDOWN:
cooldownRequests++;
break;
case DELETED:
break;
}
updatePossiblyUnderProvisionedAndOverProvisionedIds(requestWithState, numInstances, overProvisionedRequestIds, possiblyUnderProvisionedRequestIds);
}
filterForPendingRequests(possiblyUnderProvisionedRequestIds);
final List underProvisionedRequestIds = getUnderProvisionedRequestIds(possiblyUnderProvisionedRequestIds);
final int pendingRequests = requestManager.getSizeOfPendingQueue();
final int cleaningRequests = requestManager.getSizeOfCleanupQueue();
List racks = rackManager.getObjects();
int activeRacks = 0;
int deadRacks = 0;
int decommissioningRacks = 0;
int unknownRacks = 0;
for (SingularityRack rack : racks) {
switch (rack.getCurrentState().getState()) {
case ACTIVE:
activeRacks++;
break;
case DEAD:
deadRacks++;
break;
case MISSING_ON_STARTUP:
unknownRacks++;
break;
case DECOMMISSIONED:
case STARTING_DECOMMISSION:
case DECOMMISSIONING:
decommissioningRacks++;
break;
default:
unknownRacks++;
break;
}
}
List slaves = slaveManager.getObjects();
int activeSlaves = 0;
int deadSlaves = 0;
int decommissioningSlaves = 0;
int unknownSlaves = 0;
for (SingularitySlave slave : slaves) {
switch (slave.getCurrentState().getState()) {
case ACTIVE:
activeSlaves++;
break;
case DEAD:
deadSlaves++;
break;
case MISSING_ON_STARTUP:
unknownSlaves++;
break;
case DECOMMISSIONED:
case STARTING_DECOMMISSION:
case DECOMMISSIONING:
decommissioningSlaves++;
break;
default:
unknownSlaves++;
break;
}
}
final List states = getHostStates();
int numDeploys = 0;
long oldestDeploy = 0;
long oldestDeployStep = 0;
List activeDeploys = new ArrayList<>();
final long now = System.currentTimeMillis();
for (SingularityPendingDeploy pendingDeploy : deployManager.getPendingDeploys()) {
activeDeploys.add(pendingDeploy.getDeployMarker());
if (pendingDeploy.getDeployProgress().isPresent() && !pendingDeploy.getDeployProgress().get().isStepComplete()) {
long deployStepDelta = now - pendingDeploy.getDeployProgress().get().getTimestamp();
if (deployStepDelta > oldestDeployStep) {
oldestDeployStep = deployStepDelta;
}
}
long delta = now - pendingDeploy.getDeployMarker().getTimestamp();
if (delta > oldestDeploy) {
oldestDeploy = delta;
}
numDeploys++;
}
final Optional authDatastoreHealthy = authDatastore.isHealthy();
final Optional minimumPriorityLevel = getMinimumPriorityLevel();
final Map> lateTasksPartitionedByOnDemand = scheduledTasksInfo.getLateTasks().stream()
.collect(Collectors.partitioningBy(lateTask -> requestTypeIsOnDemand(lateTask)));
final List onDemandLateTasks = lateTasksPartitionedByOnDemand.get(true);
final List lateTasks = lateTasksPartitionedByOnDemand.get(false);
return new SingularityState(activeTasks, launchingTasks, numActiveRequests, cooldownRequests, numPausedRequests, scheduledTasks, pendingRequests, lbCleanupTasks, lbCleanupRequests, cleaningRequests, activeSlaves,
deadSlaves, decommissioningSlaves, activeRacks, deadRacks, decommissioningRacks, cleaningTasks, states, oldestDeploy, numDeploys, oldestDeployStep, activeDeploys, lateTasks.size(), lateTasks, onDemandLateTasks.size(), onDemandLateTasks,
scheduledTasksInfo.getNumFutureTasks(), scheduledTasksInfo.getMaxTaskLag(), System.currentTimeMillis(), includeRequestIds ? overProvisionedRequestIds : null,
includeRequestIds ? underProvisionedRequestIds : null, overProvisionedRequestIds.size(), underProvisionedRequestIds.size(), numFinishedRequests, unknownRacks, unknownSlaves, authDatastoreHealthy, minimumPriorityLevel,
statusUpdateDeltaAvg.get(), lastHeartbeatTime.get());
}
private boolean requestTypeIsOnDemand(SingularityPendingTaskId taskId) {
if (requestManager.getRequest(taskId.getRequestId()).isPresent()) {
return requestManager.getRequest(taskId.getRequestId()).get().getRequest().getRequestType().equals(RequestType.ON_DEMAND);
}
return false;
}
private Map getNumTasks(List requests) {
final CounterMap numTasks = new CounterMap<>(requests.size());
for (SingularityTaskId taskId : taskManager.getActiveTaskIds()) {
numTasks.incr(taskId.getRequestId());
}
for (SingularityPendingTaskId pendingTaskId : taskManager.getPendingTaskIds()) {
numTasks.incr(pendingTaskId.getRequestId());
}
for (SingularityTaskId cleaningTaskId : taskManager.getCleanupTaskIds()) {
Optional request = requestManager.getRequest(cleaningTaskId.getRequestId());
if (request.isPresent() && request.get().getRequest().isScheduled()) {
continue;
}
numTasks.decr(cleaningTaskId.getRequestId());
}
return numTasks.toCountMap();
}
private void updatePossiblyUnderProvisionedAndOverProvisionedIds(SingularityRequestWithState requestWithState, Map numInstances, List overProvisionedRequestIds, Set possiblyUnderProvisionedRequestIds) {
if (requestWithState.getState().isRunnable() && requestWithState.getRequest().isAlwaysRunning()) {
SingularityRequest request = requestWithState.getRequest();
final int expectedInstances = request.getInstancesSafe();
final Long numActualInstances = numInstances.get(request.getId());
if (numActualInstances == null || numActualInstances < expectedInstances) {
possiblyUnderProvisionedRequestIds.add(request.getId());
} else if (numActualInstances > expectedInstances) {
overProvisionedRequestIds.add(request.getId());
}
}
}
private void filterForPendingRequests(Set possiblyUnderProvisionedRequestIds) {
if (possiblyUnderProvisionedRequestIds.size() == 0) {
return;
}
final Set pendingRequestIds = requestManager.getPendingRequests().stream().map((r) -> r.getRequestId()).collect(Collectors.toCollection(HashSet::new));
possiblyUnderProvisionedRequestIds.removeAll(pendingRequestIds);
}
private List getUnderProvisionedRequestIds(Set possiblyUnderProvisionedRequestIds) {
final List underProvisionedRequestIds = new ArrayList<>(possiblyUnderProvisionedRequestIds.size());
if (!possiblyUnderProvisionedRequestIds.isEmpty()) {
Map deployStates = deployManager.getRequestDeployStatesByRequestIds(possiblyUnderProvisionedRequestIds);
for (SingularityRequestDeployState deployState : deployStates.values()) {
if (deployState.getActiveDeploy().isPresent() || deployState.getPendingDeploy().isPresent()) {
underProvisionedRequestIds.add(deployState.getRequestId());
}
}
}
return underProvisionedRequestIds;
}
private List getHostStates() {
List children = getChildren(ROOT_PATH);
List states = Lists.newArrayListWithCapacity(children.size());
for (String child : children) {
try {
byte[] bytes = curator.getData().forPath(ZKPaths.makePath(ROOT_PATH, child));
states.add(hostStateTranscoder.fromBytes(bytes));
} catch (NoNodeException nne) {
} catch (Exception e) {
throw Throwables.propagate(e);
}
}
return states;
}
private Optional getMinimumPriorityLevel() {
return priorityManager.getActivePriorityFreeze().isPresent() ? Optional.of(priorityManager.getActivePriorityFreeze().get().getPriorityFreeze().getMinimumPriorityLevel()) : Optional.absent();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy