Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.hubspot.singularity.data.StateManager Maven / Gradle / Ivy
package com.hubspot.singularity.data;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import javax.inject.Singleton;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.imps.CuratorFrameworkState;
import org.apache.curator.utils.ZKPaths;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.codahale.metrics.MetricRegistry;
import com.google.common.base.Optional;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
import com.google.inject.Inject;
import com.google.inject.name.Named;
import com.hubspot.mesos.CounterMap;
import com.hubspot.mesos.JavaUtils;
import com.hubspot.singularity.SingularityCreateResult;
import com.hubspot.singularity.SingularityDeployMarker;
import com.hubspot.singularity.SingularityHostState;
import com.hubspot.singularity.SingularityMainModule;
import com.hubspot.singularity.SingularityPendingDeploy;
import com.hubspot.singularity.SingularityPendingTaskId;
import com.hubspot.singularity.SingularityRack;
import com.hubspot.singularity.SingularityRequest;
import com.hubspot.singularity.SingularityRequestDeployState;
import com.hubspot.singularity.SingularityRequestWithState;
import com.hubspot.singularity.SingularityScheduledTasksInfo;
import com.hubspot.singularity.SingularitySlave;
import com.hubspot.singularity.SingularityState;
import com.hubspot.singularity.SingularityTaskId;
import com.hubspot.singularity.SingularityTaskReconciliationStatistics;
import com.hubspot.singularity.auth.datastore.SingularityAuthDatastore;
import com.hubspot.singularity.config.SingularityConfiguration;
import com.hubspot.singularity.data.transcoders.Transcoder;
@Singleton
public class StateManager extends CuratorManager {
private static final Logger LOG = LoggerFactory.getLogger(StateManager.class);
private static final String ROOT_PATH = "/hosts";
private static final String STATE_PATH = "/STATE";
private static final String TASK_RECONCILIATION_STATISTICS_PATH = STATE_PATH + "/taskReconciliation";
private final RequestManager requestManager;
private final TaskManager taskManager;
private final DeployManager deployManager;
private final SlaveManager slaveManager;
private final RackManager rackManager;
private final Transcoder stateTranscoder;
private final Transcoder hostStateTranscoder;
private final SingularityConfiguration singularityConfiguration;
private final SingularityAuthDatastore authDatastore;
private final Transcoder taskReconciliationStatisticsTranscoder;
private final PriorityManager priorityManager;
private final AtomicLong statusUpdateDeltaAvg;
private final AtomicLong lastHeartbeatTime;
@Inject
public StateManager(CuratorFramework curatorFramework,
SingularityConfiguration configuration,
MetricRegistry metricRegistry,
RequestManager requestManager,
TaskManager taskManager,
DeployManager deployManager,
SlaveManager slaveManager,
RackManager rackManager,
Transcoder stateTranscoder,
Transcoder hostStateTranscoder,
SingularityConfiguration singularityConfiguration,
SingularityAuthDatastore authDatastore,
PriorityManager priorityManager,
Transcoder taskReconciliationStatisticsTranscoder,
@Named(SingularityMainModule.STATUS_UPDATE_DELTA_30S_AVERAGE) AtomicLong statusUpdateDeltaAvg,
@Named(SingularityMainModule.LAST_MESOS_MASTER_HEARTBEAT_TIME) AtomicLong lastHeartbeatTime) {
super(curatorFramework, configuration, metricRegistry);
this.requestManager = requestManager;
this.taskManager = taskManager;
this.stateTranscoder = stateTranscoder;
this.hostStateTranscoder = hostStateTranscoder;
this.slaveManager = slaveManager;
this.rackManager = rackManager;
this.deployManager = deployManager;
this.singularityConfiguration = singularityConfiguration;
this.authDatastore = authDatastore;
this.priorityManager = priorityManager;
this.taskReconciliationStatisticsTranscoder = taskReconciliationStatisticsTranscoder;
this.statusUpdateDeltaAvg = statusUpdateDeltaAvg;
this.lastHeartbeatTime = lastHeartbeatTime;
}
public SingularityCreateResult saveTaskReconciliationStatistics(SingularityTaskReconciliationStatistics taskReconciliationStatistics) {
return save(TASK_RECONCILIATION_STATISTICS_PATH, taskReconciliationStatistics, taskReconciliationStatisticsTranscoder);
}
public Optional getTaskReconciliationStatistics() {
return getData(TASK_RECONCILIATION_STATISTICS_PATH, taskReconciliationStatisticsTranscoder);
}
public void save(SingularityHostState hostState) throws InterruptedException {
final String path = ZKPaths.makePath(ROOT_PATH, hostState.getHostname());
final byte[] data = hostStateTranscoder.toBytes(hostState);
if (curator.getState() == CuratorFrameworkState.STARTED) {
try {
if (exists(path)) {
curator.setData().forPath(path, data);
} else {
curator.create().creatingParentsIfNeeded().withMode(CreateMode.EPHEMERAL).forPath(path, data);
}
} catch (Throwable t) {
throw Throwables.propagate(t);
}
}
}
public SingularityState getState(boolean skipCache, boolean includeRequestIds) {
Optional fromZk = Optional.absent();
if (!skipCache) {
fromZk = getData(STATE_PATH, stateTranscoder);
}
if (fromZk.isPresent()) {
final long now = System.currentTimeMillis();
final long delta = now - fromZk.get().getGeneratedAt();
if (delta < singularityConfiguration.getCacheStateForMillis()) {
return fromZk.get();
}
}
final long start = System.currentTimeMillis();
SingularityState newState = generateState(includeRequestIds);
if (!skipCache) {
final byte[] bytes = stateTranscoder.toBytes(newState);
save(STATE_PATH, newState, stateTranscoder);
LOG.info("Generated new state and saved {} bytes in {}", bytes.length, JavaUtils.duration(start));
}
return newState;
}
public SingularityState generateState(boolean includeRequestIds) {
final int launchingTasks = taskManager.getNumLaunchingTasks();
final int activeTasks = taskManager.getNumActiveTasks() - launchingTasks;
final int scheduledTasks = taskManager.getNumScheduledTasks();
final int cleaningTasks = taskManager.getNumCleanupTasks();
final int lbCleanupTasks = taskManager.getNumLbCleanupTasks();
final int lbCleanupRequests = requestManager.getNumLbCleanupRequests();
final SingularityScheduledTasksInfo scheduledTasksInfo = SingularityScheduledTasksInfo.getInfo(taskManager.getPendingTasks(), singularityConfiguration.getDeltaAfterWhichTasksAreLateMillis());
final List overProvisionedRequestIds = new ArrayList<>();
final Set possiblyUnderProvisionedRequestIds = new HashSet<>();
final List requests = requestManager.getRequests();
final Map numInstances = getNumTasks(requests);
int numActiveRequests = 0;
int numPausedRequests = 0;
int cooldownRequests = 0;
int numFinishedRequests = 0;
for (SingularityRequestWithState requestWithState : requests) {
switch (requestWithState.getState()) {
case DEPLOYING_TO_UNPAUSE:
case ACTIVE:
numActiveRequests++;
break;
case FINISHED:
numFinishedRequests++;
break;
case PAUSED:
numPausedRequests++;
break;
case SYSTEM_COOLDOWN:
cooldownRequests++;
break;
case DELETED:
break;
}
updatePossiblyUnderProvisionedAndOverProvisionedIds(requestWithState, numInstances, overProvisionedRequestIds, possiblyUnderProvisionedRequestIds);
}
filterForPendingRequests(possiblyUnderProvisionedRequestIds);
final List underProvisionedRequestIds = getUnderProvisionedRequestIds(possiblyUnderProvisionedRequestIds);
final int pendingRequests = requestManager.getSizeOfPendingQueue();
final int cleaningRequests = requestManager.getSizeOfCleanupQueue();
List racks = rackManager.getObjects();
int activeRacks = 0;
int deadRacks = 0;
int decommissioningRacks = 0;
int unknownRacks = 0;
for (SingularityRack rack : racks) {
switch (rack.getCurrentState().getState()) {
case ACTIVE:
activeRacks++;
break;
case DEAD:
deadRacks++;
break;
case MISSING_ON_STARTUP:
unknownRacks++;
break;
case DECOMMISSIONED:
case STARTING_DECOMMISSION:
case DECOMMISSIONING:
decommissioningRacks++;
break;
default:
unknownRacks++;
break;
}
}
List slaves = slaveManager.getObjects();
int activeSlaves = 0;
int deadSlaves = 0;
int decommissioningSlaves = 0;
int unknownSlaves = 0;
for (SingularitySlave slave : slaves) {
switch (slave.getCurrentState().getState()) {
case ACTIVE:
activeSlaves++;
break;
case DEAD:
deadSlaves++;
break;
case MISSING_ON_STARTUP:
unknownSlaves++;
break;
case DECOMMISSIONED:
case STARTING_DECOMMISSION:
case DECOMMISSIONING:
decommissioningSlaves++;
break;
default:
unknownSlaves++;
break;
}
}
final List states = getHostStates();
int numDeploys = 0;
long oldestDeploy = 0;
long oldestDeployStep = 0;
List activeDeploys = new ArrayList<>();
final long now = System.currentTimeMillis();
for (SingularityPendingDeploy pendingDeploy : deployManager.getPendingDeploys()) {
activeDeploys.add(pendingDeploy.getDeployMarker());
if (pendingDeploy.getDeployProgress().isPresent() && !pendingDeploy.getDeployProgress().get().isStepComplete()) {
long deployStepDelta = now - pendingDeploy.getDeployProgress().get().getTimestamp();
if (deployStepDelta > oldestDeployStep) {
oldestDeployStep = deployStepDelta;
}
}
long delta = now - pendingDeploy.getDeployMarker().getTimestamp();
if (delta > oldestDeploy) {
oldestDeploy = delta;
}
numDeploys++;
}
final Optional authDatastoreHealthy = authDatastore.isHealthy();
final Optional minimumPriorityLevel = getMinimumPriorityLevel();
return new SingularityState(activeTasks, launchingTasks, numActiveRequests, cooldownRequests, numPausedRequests, scheduledTasks, pendingRequests, lbCleanupTasks, lbCleanupRequests, cleaningRequests, activeSlaves,
deadSlaves, decommissioningSlaves, activeRacks, deadRacks, decommissioningRacks, cleaningTasks, states, oldestDeploy, numDeploys, oldestDeployStep, activeDeploys, scheduledTasksInfo.getNumLateTasks(),
scheduledTasksInfo.getNumFutureTasks(), scheduledTasksInfo.getMaxTaskLag(), System.currentTimeMillis(), includeRequestIds ? overProvisionedRequestIds : null,
includeRequestIds ? underProvisionedRequestIds : null, overProvisionedRequestIds.size(), underProvisionedRequestIds.size(), numFinishedRequests, unknownRacks, unknownSlaves, authDatastoreHealthy, minimumPriorityLevel,
statusUpdateDeltaAvg.get(), lastHeartbeatTime.get());
}
private Map getNumTasks(List requests) {
final CounterMap numTasks = new CounterMap<>(requests.size());
for (SingularityTaskId taskId : taskManager.getActiveTaskIds()) {
numTasks.incr(taskId.getRequestId());
}
for (SingularityPendingTaskId pendingTaskId : taskManager.getPendingTaskIds()) {
numTasks.incr(pendingTaskId.getRequestId());
}
for (SingularityTaskId cleaningTaskId : taskManager.getCleanupTaskIds()) {
Optional request = requestManager.getRequest(cleaningTaskId.getRequestId());
if (request.isPresent() && request.get().getRequest().isScheduled()) {
continue;
}
numTasks.decr(cleaningTaskId.getRequestId());
}
return numTasks.toCountMap();
}
private void updatePossiblyUnderProvisionedAndOverProvisionedIds(SingularityRequestWithState requestWithState, Map numInstances, List overProvisionedRequestIds, Set possiblyUnderProvisionedRequestIds) {
if (requestWithState.getState().isRunnable() && requestWithState.getRequest().isAlwaysRunning()) {
SingularityRequest request = requestWithState.getRequest();
final int expectedInstances = request.getInstancesSafe();
final Long numActualInstances = numInstances.get(request.getId());
if (numActualInstances == null || numActualInstances < expectedInstances) {
possiblyUnderProvisionedRequestIds.add(request.getId());
} else if (numActualInstances > expectedInstances) {
overProvisionedRequestIds.add(request.getId());
}
}
}
private void filterForPendingRequests(Set possiblyUnderProvisionedRequestIds) {
if (possiblyUnderProvisionedRequestIds.size() == 0) {
return;
}
final Set pendingRequestIds = requestManager.getPendingRequests().stream().map((r) -> r.getRequestId()).collect(Collectors.toCollection(HashSet::new));
possiblyUnderProvisionedRequestIds.removeAll(pendingRequestIds);
}
private List getUnderProvisionedRequestIds(Set possiblyUnderProvisionedRequestIds) {
final List underProvisionedRequestIds = new ArrayList<>(possiblyUnderProvisionedRequestIds.size());
if (!possiblyUnderProvisionedRequestIds.isEmpty()) {
Map deployStates = deployManager.getRequestDeployStatesByRequestIds(possiblyUnderProvisionedRequestIds);
for (SingularityRequestDeployState deployState : deployStates.values()) {
if (deployState.getActiveDeploy().isPresent() || deployState.getPendingDeploy().isPresent()) {
underProvisionedRequestIds.add(deployState.getRequestId());
}
}
}
return underProvisionedRequestIds;
}
private List getHostStates() {
List children = getChildren(ROOT_PATH);
List states = Lists.newArrayListWithCapacity(children.size());
for (String child : children) {
try {
byte[] bytes = curator.getData().forPath(ZKPaths.makePath(ROOT_PATH, child));
states.add(hostStateTranscoder.fromBytes(bytes));
} catch (NoNodeException nne) {
} catch (Exception e) {
throw Throwables.propagate(e);
}
}
return states;
}
private Optional getMinimumPriorityLevel() {
return priorityManager.getActivePriorityFreeze().isPresent() ? Optional.of(priorityManager.getActivePriorityFreeze().get().getPriorityFreeze().getMinimumPriorityLevel()) : Optional.absent();
}
}