com.hubspot.singularity.scheduler.SingularityCooldown Maven / Gradle / Ivy
package com.hubspot.singularity.scheduler;
import java.util.concurrent.TimeUnit;
import javax.inject.Singleton;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Optional;
import com.google.inject.Inject;
import com.hubspot.mesos.JavaUtils;
import com.hubspot.singularity.RequestState;
import com.hubspot.singularity.SingularityDeployStatistics;
import com.hubspot.singularity.SingularityRequest;
import com.hubspot.singularity.SingularityTaskId;
import com.hubspot.singularity.config.SingularityConfiguration;
@Singleton
public class SingularityCooldown {
private static final Logger LOG = LoggerFactory.getLogger(SingularityCooldown.class);
private final SingularityConfiguration configuration;
private final long cooldownExpiresAfterMillis;
@Inject
public SingularityCooldown(SingularityConfiguration configuration) {
this.configuration = configuration;
this.cooldownExpiresAfterMillis = TimeUnit.MINUTES.toMillis(configuration.getCooldownExpiresAfterMinutes());
}
public boolean shouldEnterCooldown(SingularityRequest request, SingularityTaskId taskId, RequestState requestState, SingularityDeployStatistics deployStatistics, long failureTimestamp) {
if (requestState != RequestState.ACTIVE || !request.isAlwaysRunning()) {
return false;
}
if (configuration.getCooldownAfterFailures() < 1 || configuration.getCooldownExpiresAfterMinutes() < 1) {
return false;
}
final boolean failedTooManyTimes = hasFailedTooManyTimes(request, deployStatistics, Optional.of(taskId.getInstanceNo()), Optional.of(failureTimestamp));
if (failedTooManyTimes) {
LOG.trace("Request {} has failed at least {} times in {}", request.getId(), configuration.getCooldownAfterFailures(), configuration.getCooldownAfterFailures());
}
return failedTooManyTimes;
}
private boolean hasFailedTooManyTimes(SingularityRequest request, SingularityDeployStatistics deployStatistics, Optional instanceNo, Optional recentFailureTimestamp) {
final long now = System.currentTimeMillis();
int numInstancesThatMustFail = (int) Math.ceil(request.getInstancesSafe() * configuration.getCooldownAfterPctOfInstancesFail());
int numInstancesThatFailed = 0;
for (int i = 1; i < request.getInstancesSafe() + 1; i++) {
int numFailuresInsideCooldown = 0;
for (long failureTimestamp : deployStatistics.getInstanceSequentialFailureTimestamps().get(i)) {
if (hasFailedInsideCooldown(now, failureTimestamp)) {
numFailuresInsideCooldown++;
}
}
if (instanceNo.isPresent() && instanceNo.get() == i && recentFailureTimestamp.isPresent()) {
if (hasFailedInsideCooldown(now, recentFailureTimestamp.get())) {
numFailuresInsideCooldown++;
}
}
if (numFailuresInsideCooldown >= configuration.getCooldownAfterFailures()) {
numInstancesThatFailed++;
}
}
return numInstancesThatFailed >= numInstancesThatMustFail;
}
public boolean hasCooldownExpired(SingularityRequest request, SingularityDeployStatistics deployStatistics, Optional instanceNo, Optional recentFailureTimestamp) {
if (configuration.getCooldownExpiresAfterMinutes() < 1) {
return true;
}
final boolean hasCooldownExpired = !hasFailedTooManyTimes(request, deployStatistics, instanceNo, recentFailureTimestamp);
if (hasCooldownExpired) {
LOG.trace("Request {} cooldown has expired or is not valid because {} tasks have not failed in the last {}", deployStatistics.getRequestId(), configuration.getCooldownAfterFailures(), JavaUtils.durationFromMillis(cooldownExpiresAfterMillis));
}
return hasCooldownExpired;
}
private boolean hasFailedInsideCooldown(long now, long failureTimestamp) {
final long timeSinceFailure = now - failureTimestamp;
return timeSinceFailure < cooldownExpiresAfterMillis;
}
}