Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.hubspot.singularity.executor.cleanup.SingularityExecutorCleanup Maven / Gradle / Ivy
package com.hubspot.singularity.executor.cleanup;
import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Optional;
import com.google.common.base.Strings;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Sets;
import com.google.inject.Inject;
import com.google.inject.name.Named;
import com.hubspot.mesos.JavaUtils;
import com.hubspot.mesos.client.MesosClient;
import com.hubspot.singularity.MachineState;
import com.hubspot.singularity.SingularitySlave;
import com.hubspot.singularity.SingularityTask;
import com.hubspot.singularity.SingularityTaskExecutorData;
import com.hubspot.singularity.SingularityTaskHistory;
import com.hubspot.singularity.SingularityTaskHistoryUpdate;
import com.hubspot.singularity.client.SingularityClient;
import com.hubspot.singularity.client.SingularityClientException;
import com.hubspot.singularity.client.SingularityClientProvider;
import com.hubspot.singularity.executor.SingularityExecutorCleanupStatistics;
import com.hubspot.singularity.executor.SingularityExecutorCleanupStatistics.SingularityExecutorCleanupStatisticsBuilder;
import com.hubspot.singularity.executor.TemplateManager;
import com.hubspot.singularity.executor.cleanup.config.SingularityExecutorCleanupConfiguration;
import com.hubspot.singularity.executor.config.SingularityExecutorConfiguration;
import com.hubspot.singularity.executor.task.SingularityExecutorTaskCleanup;
import com.hubspot.singularity.executor.task.SingularityExecutorTaskDefinition;
import com.hubspot.singularity.executor.task.SingularityExecutorTaskLogManager;
import com.hubspot.singularity.executor.task.TaskCleanupResult;
import com.hubspot.singularity.executor.utils.DockerUtils;
import com.hubspot.singularity.runner.base.config.SingularityRunnerBaseModule;
import com.hubspot.singularity.runner.base.configuration.SingularityRunnerBaseConfiguration;
import com.hubspot.singularity.runner.base.sentry.SingularityRunnerExceptionNotifier;
import com.hubspot.singularity.runner.base.shared.CompressionType;
import com.hubspot.singularity.runner.base.shared.JsonObjectFileHelper;
import com.hubspot.singularity.runner.base.shared.ProcessFailedException;
import com.hubspot.singularity.runner.base.shared.ProcessUtils;
import com.hubspot.singularity.runner.base.shared.SimpleProcessManager;
import com.spotify.docker.client.messages.Container;
import com.spotify.docker.client.messages.ContainerInfo;
public class SingularityExecutorCleanup {
private static final Logger LOG = LoggerFactory.getLogger(SingularityExecutorCleanup.class);
private final JsonObjectFileHelper jsonObjectFileHelper;
private final SingularityRunnerBaseConfiguration baseConfiguration;
private final SingularityExecutorConfiguration executorConfiguration;
private final SingularityClient singularityClient;
private final TemplateManager templateManager;
private final SingularityExecutorCleanupConfiguration cleanupConfiguration;
private final MesosClient mesosClient;
private final ProcessUtils processUtils;
private final DockerUtils dockerUtils;
private final String hostname;
private final SingularityRunnerExceptionNotifier exceptionNotifier;
@Inject
public SingularityExecutorCleanup(SingularityClientProvider singularityClientProvider, JsonObjectFileHelper jsonObjectFileHelper, SingularityRunnerBaseConfiguration baseConfiguration,
SingularityExecutorConfiguration executorConfiguration, SingularityExecutorCleanupConfiguration cleanupConfiguration, TemplateManager templateManager, MesosClient mesosClient,
DockerUtils dockerUtils, @Named(SingularityRunnerBaseModule.HOST_NAME_PROPERTY) String hostname, SingularityRunnerExceptionNotifier exceptionNotifier) {
this.jsonObjectFileHelper = jsonObjectFileHelper;
this.baseConfiguration = baseConfiguration;
this.executorConfiguration = executorConfiguration;
this.cleanupConfiguration = cleanupConfiguration;
this.singularityClient = singularityClientProvider.get(cleanupConfiguration.getSingularityClientCredentials());
this.templateManager = templateManager;
this.mesosClient = mesosClient;
this.processUtils = new ProcessUtils(LOG);
this.dockerUtils = dockerUtils;
this.hostname = hostname;
this.exceptionNotifier = exceptionNotifier;
}
public SingularityExecutorCleanupStatistics clean() {
final SingularityExecutorCleanupStatisticsBuilder statisticsBldr = new SingularityExecutorCleanupStatisticsBuilder();
final Path directory = Paths.get(executorConfiguration.getGlobalTaskDefinitionDirectory());
Set runningTaskIds = null;
try {
runningTaskIds = getRunningTaskIds();
} catch (Exception e) {
LOG.error("While fetching running tasks from singularity", e);
exceptionNotifier.notify(String.format("Error fetching running tasks (%s)", e.getMessage()), e, Collections.emptyMap());
statisticsBldr.setErrorMessage(e.getMessage());
return statisticsBldr.build();
}
LOG.info("Found {} running tasks from Mesos", runningTaskIds);
statisticsBldr.setMesosRunningTasks(runningTaskIds.size());
if (runningTaskIds.isEmpty()) {
if (!isDecommissioned()) {
if (cleanupConfiguration.isSafeModeWontRunWithNoTasks()) {
final String errorMessage = "Running in safe mode and found 0 running tasks - aborting cleanup";
LOG.error(errorMessage);
statisticsBldr.setErrorMessage(errorMessage);
return statisticsBldr.build();
} else {
LOG.warn("Found 0 running tasks - proceeding with cleanup as we are not in safe mode");
}
} else {
if (!cleanupConfiguration.isCleanTasksWhenDecommissioned()) {
return statisticsBldr.build();
}
}
}
if (cleanupConfiguration.isRunDockerCleanup()) {
cleanDocker(runningTaskIds);
}
for (Path file : JavaUtils.iterable(directory)) {
if (!Objects.toString(file.getFileName()).endsWith(executorConfiguration.getGlobalTaskDefinitionSuffix())) {
LOG.debug("Ignoring file {} that doesn't have suffix {}", file, executorConfiguration.getGlobalTaskDefinitionSuffix());
statisticsBldr.incrInvalidTasks();
continue;
}
statisticsBldr.incrTotalTaskFiles();
try {
Optional maybeTaskDefinition = jsonObjectFileHelper.read(file, LOG, SingularityExecutorTaskDefinition.class);
if (!maybeTaskDefinition.isPresent()) {
statisticsBldr.incrInvalidTasks();
continue;
}
SingularityExecutorTaskDefinition taskDefinition = withDefaults(maybeTaskDefinition.get());
final String taskId = taskDefinition.getTaskId();
LOG.info("{} - Starting possible cleanup", taskId);
if (runningTaskIds.contains(taskId) || executorStillRunning(taskDefinition)) {
statisticsBldr.incrRunningTasksIgnored();
continue;
}
Optional taskHistory = null;
try {
taskHistory = singularityClient.getHistoryForTask(taskId);
} catch (SingularityClientException sce) {
LOG.error("{} - Failed fetching history", taskId, sce);
exceptionNotifier.notify(String.format("Error fetching history (%s)", sce.getMessage()), sce, ImmutableMap.of("taskId", taskId));
statisticsBldr.incrErrorTasks();
continue;
}
TaskCleanupResult result = cleanTask(taskDefinition, taskHistory);
LOG.info("{} - {}", taskId, result);
switch (result) {
case ERROR:
statisticsBldr.incrErrorTasks();
break;
case SUCCESS:
statisticsBldr.incrSuccessfullyCleanedTasks();
break;
case WAITING:
statisticsBldr.incrWaitingTasks();
break;
default:
break;
}
} catch (IOException ioe) {
LOG.error("Couldn't read file {}", file, ioe);
exceptionNotifier.notify(String.format("Error reading file (%s)", ioe.getMessage()), ioe, ImmutableMap.of("file", file.toString()));
statisticsBldr.incrIoErrorTasks();
}
}
return statisticsBldr.build();
}
private SingularityExecutorTaskDefinition withDefaults(SingularityExecutorTaskDefinition oldDefinition) {
return new SingularityExecutorTaskDefinition(
oldDefinition.getTaskId(),
new SingularityTaskExecutorData(
oldDefinition.getExecutorData(),
oldDefinition.getExecutorData().getS3UploaderAdditionalFiles() == null ? cleanupConfiguration.getS3UploaderAdditionalFiles() : oldDefinition.getExecutorData().getS3UploaderAdditionalFiles(),
Strings.isNullOrEmpty(oldDefinition.getExecutorData().getDefaultS3Bucket()) ? cleanupConfiguration.getDefaultS3Bucket() : oldDefinition.getExecutorData().getDefaultS3Bucket(),
Strings.isNullOrEmpty(oldDefinition.getExecutorData().getS3UploaderKeyPattern()) ? cleanupConfiguration.getS3KeyFormat(): oldDefinition.getExecutorData().getS3UploaderKeyPattern(),
Strings.isNullOrEmpty(oldDefinition.getExecutorData().getServiceLog()) ? cleanupConfiguration.getDefaultServiceLog() : oldDefinition.getExecutorData().getServiceLog(),
Strings.isNullOrEmpty(oldDefinition.getExecutorData().getServiceFinishedTailLog()) ? cleanupConfiguration.getDefaultServiceFinishedTailLog() : oldDefinition.getExecutorData().getServiceFinishedTailLog(),
oldDefinition.getExecutorData().getRequestGroup(),
oldDefinition.getExecutorData().getS3StorageClass(),
oldDefinition.getExecutorData().getApplyS3StorageClassAfterBytes()
),
oldDefinition.getTaskDirectory(),
oldDefinition.getExecutorPid(),
Strings.isNullOrEmpty(oldDefinition.getServiceLogFileName()) ? cleanupConfiguration.getDefaultServiceLog() :oldDefinition.getServiceLogFileName(),
oldDefinition.getServiceLogOutExtension(),
Strings.isNullOrEmpty(oldDefinition.getServiceFinishedTailLogFileName()) ? cleanupConfiguration.getDefaultServiceFinishedTailLog() : oldDefinition.getServiceFinishedTailLogFileName(),
oldDefinition.getTaskAppDirectory(),
oldDefinition.getExecutorBashOut(),
oldDefinition.getLogrotateStateFile(),
oldDefinition.getSignatureVerifyOut()
);
}
private boolean isDecommissioned() {
Collection slaves = singularityClient.getSlaves(Optional.of(MachineState.DECOMMISSIONED));
boolean decommissioned = false;
for (SingularitySlave slave : slaves) {
if (slave.getHost().equals(hostname)) {
decommissioned = true;
}
}
return decommissioned;
}
private Set getRunningTaskIds() {
final String slaveId = mesosClient.getSlaveState(mesosClient.getSlaveUri(hostname)).getId();
final Collection activeTasks = singularityClient.getActiveTasksOnSlave(slaveId);
final Set runningTaskIds = Sets.newHashSet();
for (SingularityTask task : activeTasks) {
runningTaskIds.add(task.getTaskId().getId());
}
return runningTaskIds;
}
private boolean executorStillRunning(SingularityExecutorTaskDefinition taskDefinition) {
Optional executorPidSafe = taskDefinition.getExecutorPidSafe();
if (!executorPidSafe.isPresent()) {
return false;
}
return processUtils.doesProcessExist(executorPidSafe.get());
}
private TaskCleanupResult cleanTask(SingularityExecutorTaskDefinition taskDefinition, Optional taskHistory) {
SingularityExecutorTaskLogManager logManager = new SingularityExecutorTaskLogManager(taskDefinition, templateManager, baseConfiguration, executorConfiguration, LOG, jsonObjectFileHelper);
SingularityExecutorTaskCleanup taskCleanup = new SingularityExecutorTaskCleanup(logManager, executorConfiguration, taskDefinition, LOG, dockerUtils);
boolean cleanupTaskAppDirectory = !taskDefinition.getExecutorData().getPreserveTaskSandboxAfterFinish().or(Boolean.FALSE);
if (taskHistory.isPresent()) {
final Optional lastUpdate = JavaUtils.getLast(taskHistory.get().getTaskUpdates());
if (lastUpdate.isPresent() && lastUpdate.get().getTaskState().isFailed()) {
final long delta = System.currentTimeMillis() - lastUpdate.get().getTimestamp();
if (delta < cleanupConfiguration.getCleanupAppDirectoryOfFailedTasksAfterMillis()) {
LOG.info("Not cleaning up task app directory of {} because only {} has elapsed since it failed (will cleanup after {})", taskDefinition.getTaskId(),
JavaUtils.durationFromMillis(delta), JavaUtils.durationFromMillis(cleanupConfiguration.getCleanupAppDirectoryOfFailedTasksAfterMillis()));
cleanupTaskAppDirectory = false;
}
}
}
if (taskDefinition.shouldLogrotateLogFile()) {
checkForUncompressedLogrotatedFile(taskDefinition);
}
boolean isDocker = (taskHistory.isPresent() && taskHistory.get().getTask().getMesosTask().hasContainer() && taskHistory.get().getTask().getMesosTask().getContainer().hasDocker());
return taskCleanup.cleanup(cleanupTaskAppDirectory, isDocker);
}
private Iterator getUncompressedLogrotatedFileIterator(SingularityExecutorTaskDefinition taskDefinition) {
final Path serviceLogOutPath = taskDefinition.getServiceLogOutPath();
final Path parent = serviceLogOutPath.getParent();
if (parent == null) {
throw new IllegalStateException("Service log path " + serviceLogOutPath + " has no parent");
}
final Path logrotateToPath = parent.resolve(executorConfiguration.getLogrotateToDirectory());
if (!logrotateToPath.toFile().exists() || !logrotateToPath.toFile().isDirectory()) {
LOG.warn("Skipping uncompressed logrotated file cleanup for {} -- {} does not exist or is not a directory (task sandbox was probably garbage collected by Mesos)", taskDefinition.getTaskId(), logrotateToPath);
return Collections.emptyIterator();
}
try {
DirectoryStream dirStream = Files.newDirectoryStream(logrotateToPath, String.format("%s-*", serviceLogOutPath.getFileName()));
return dirStream.iterator();
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
private void checkForUncompressedLogrotatedFile(SingularityExecutorTaskDefinition taskDefinition) {
final Iterator iterator = getUncompressedLogrotatedFileIterator(taskDefinition);
final Set emptyPaths = new HashSet<>();
final List uncompressedFiles = new ArrayList<>();
// check for matched 0 byte compressed files.. and delete/compress them
while (iterator.hasNext()) {
Path path = iterator.next();
final String fileName = Objects.toString(path.getFileName());
Optional maybeCompressionType = getFileCompressionType(fileName);
if (maybeCompressionType.isPresent()) {
try {
if (Files.size(path) == 0) {
Files.deleteIfExists(path);
emptyPaths.add(fileName.substring(0, fileName.length() - maybeCompressionType.get().getExtention().length()));
}
} catch (IOException ioe) {
LOG.error("Failed to handle empty {} file {}", maybeCompressionType.get(), path, ioe);
exceptionNotifier.notify(String.format("Error handling empty file (%s)", ioe.getMessage()), ioe, ImmutableMap.of("file", path.toString()));
}
} else {
uncompressedFiles.add(path);
}
}
for (Path path : uncompressedFiles) {
if (emptyPaths.contains(Objects.toString(path.getFileName()))) {
LOG.info("Compressing abandoned file {}", path);
try {
new SimpleProcessManager(LOG).runCommand(ImmutableList. of(cleanupConfiguration.getCompressionType().getCommand(), path.toString()));
} catch (InterruptedException | ProcessFailedException e) {
LOG.error("Failed to {} {}", cleanupConfiguration.getCompressionType(), path, e);
exceptionNotifier.notify(String.format("Failed to gzip (%s)", e.getMessage()), e, ImmutableMap.of("file", path.toString()));
}
} else {
LOG.debug("Didn't find matched empty {} file for {}", cleanupConfiguration.getCompressionType(), path);
}
}
}
private Optional getFileCompressionType(String fileName) {
if (fileName.endsWith(CompressionType.GZIP.getExtention())) {
return Optional.of(CompressionType.GZIP);
} else if (fileName.endsWith(CompressionType.BZIP2.getExtention())) {
return Optional.of(CompressionType.BZIP2);
} else {
return Optional.absent();
}
}
private void cleanDocker(Set runningTaskIds) {
try {
for (Container container : dockerUtils.listContainers()) {
for (String name : container.names()) {
if (name.startsWith(executorConfiguration.getDockerPrefix())) {
if (!runningTaskIds.contains(name.substring(executorConfiguration.getDockerPrefix().length()))) {
stopContainer(container);
}
}
}
}
} catch (Exception e) {
LOG.error("Could not get list of Docker containers", e);
exceptionNotifier.notify(String.format("Error listing docker containers (%s)", e.getMessage()), e, Collections.emptyMap());
}
}
private void stopContainer(Container container) {
try {
ContainerInfo containerInfo = dockerUtils.inspectContainer(container.id());
if (containerInfo.state().running()) {
dockerUtils.stopContainer(container.id(), executorConfiguration.getDockerStopTimeout());
LOG.debug("Forcefully stopped container {}", container.names());
}
dockerUtils.removeContainer(container.id(), true);
LOG.debug("Removed container {}", container.names());
} catch (Exception e) {
LOG.error("Failed to stop or remove container {}", container.names(), e);
exceptionNotifier.notify(String.format("Failed stopping container (%s)", e.getMessage()), e, Collections.emptyMap());
}
}
}