
com.hubspot.singularity.executor.SingularityExecutorThreadChecker Maven / Gradle / Ivy
package com.hubspot.singularity.executor;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.helpers.NOPLogger;
import com.google.common.base.Charsets;
import com.google.common.base.Optional;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.hubspot.mesos.JavaUtils;
import com.hubspot.singularity.executor.SingularityExecutorMonitor.KillState;
import com.hubspot.singularity.executor.config.SingularityExecutorConfiguration;
import com.hubspot.singularity.executor.models.ThreadCheckerType;
import com.hubspot.singularity.executor.task.SingularityExecutorTaskProcessCallable;
import com.hubspot.singularity.executor.utils.DockerUtils;
import com.hubspot.singularity.runner.base.shared.ProcessFailedException;
import com.hubspot.singularity.runner.base.shared.SimpleProcessManager;
import com.spotify.docker.client.DockerException;
@Singleton
public class SingularityExecutorThreadChecker {
private static final Logger LOG = LoggerFactory.getLogger(SingularityExecutorThreadChecker.class);
private static Pattern CGROUP_CPU_REGEX = Pattern.compile("^\\d+:cpu:/(.*)$");
private static Pattern PROC_STATUS_THREADS_REGEX = Pattern.compile("Threads:\\s*(\\d+)\\s*$");
private final SingularityExecutorConfiguration configuration;
private final ScheduledExecutorService scheduledExecutorService;
private final DockerUtils dockerUtils;
private SingularityExecutorMonitor monitor;
@Inject
public SingularityExecutorThreadChecker(SingularityExecutorConfiguration configuration, DockerUtils dockerUtils) {
this.configuration = configuration;
this.dockerUtils = dockerUtils;
this.scheduledExecutorService = Executors.newScheduledThreadPool(configuration.getThreadCheckThreads(), new ThreadFactoryBuilder().setNameFormat("SingularityExecutorThreadCheckerThread-%d").build());
}
public void start(SingularityExecutorMonitor monitor) {
LOG.info("Starting a thread checker that will run every {}", JavaUtils.durationFromMillis(configuration.getCheckThreadsEveryMillis()));
this.monitor = monitor;
this.scheduledExecutorService.scheduleAtFixedRate(new Runnable() {
@Override
public void run() {
final long start = System.currentTimeMillis();
try {
checkThreads();
} catch (Throwable t) {
LOG.error("While checking threads", t);
} finally {
LOG.trace("Finished checking threads after {}", JavaUtils.duration(start));
}
}
}, configuration.getCheckThreadsEveryMillis(), configuration.getCheckThreadsEveryMillis(), TimeUnit.MILLISECONDS);
}
private void checkThreads() {
for (SingularityExecutorTaskProcessCallable taskProcess : monitor.getRunningTasks()) {
if (!taskProcess.getTask().getExecutorData().getMaxTaskThreads().isPresent()) {
continue;
}
final int maxThreads = taskProcess.getTask().getExecutorData().getMaxTaskThreads().get();
int usedThreads = 0;
try {
usedThreads = getNumUsedThreads(taskProcess);
LOG.trace("{} is using {} threads", taskProcess.getTask().getTaskId(), usedThreads);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
return;
} catch (Throwable t) {
if (!taskProcess.wasKilled()) {
taskProcess.getTask().getLog().error("While fetching used threads for {}", taskProcess.getTask().getTaskId(), t);
}
continue;
}
if (usedThreads > maxThreads) {
taskProcess.getTask().getLog().info("{} using too many threads: {} (max {})", taskProcess.getTask().getTaskId(), usedThreads, maxThreads);
taskProcess.getTask().markKilledDueToThreads(usedThreads);
KillState killState = monitor.requestKill(taskProcess.getTask().getTaskId());
taskProcess.getTask().getLog().info("Killing {} due to thread overage (kill state {})", taskProcess.getTask().getTaskId(), killState);
}
}
}
public ExecutorService getExecutorService() {
return scheduledExecutorService;
}
private int getNumUsedThreads(SingularityExecutorTaskProcessCallable taskProcess) throws InterruptedException, ProcessFailedException {
Optional dockerPid = Optional.absent();
if (taskProcess.getTask().getTaskInfo().hasContainer() && taskProcess.getTask().getTaskInfo().getContainer().hasDocker()) {
try {
String containerName = String.format("%s%s", configuration.getDockerPrefix(), taskProcess.getTask().getTaskId());
int possiblePid = dockerUtils.getPid(containerName);
if (possiblePid == 0) {
LOG.warn(String.format("Container %s has pid %s (running: %s). Defaulting to 0 threads running.", containerName, possiblePid, dockerUtils.isContainerRunning(containerName)));
return 0;
} else {
dockerPid = Optional.of(possiblePid);
}
} catch (DockerException e) {
throw new ProcessFailedException("Could not get docker root pid due to error", e);
}
}
try {
Optional numThreads = getNumThreads(configuration.getThreadCheckerType(), taskProcess, dockerPid);
if (numThreads.isPresent()) {
return numThreads.get();
} else {
LOG.warn("Could not get num threads using {} thread checker", configuration.getThreadCheckerType());
return 0;
}
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
private Optional getNumThreads(ThreadCheckerType type, SingularityExecutorTaskProcessCallable taskProcess, Optional dockerPid) throws InterruptedException, ProcessFailedException, IOException {
Optional numThreads;
switch (type) {
case CGROUP:
numThreads = getNumThreadsFromCgroup(taskProcess, dockerPid);
break;
case PS:
numThreads = getNumThreadsFromCommand(taskProcess, dockerPid, "ps hH p %s | wc -l");
break;
case PROC_STATUS:
default:
numThreads = getNumThreadsFromProcStatus(taskProcess, dockerPid);
break;
}
return numThreads;
}
private Optional getNumThreadsFromCommand(SingularityExecutorTaskProcessCallable taskProcess, Optional dockerPid, String commandFormat) throws InterruptedException, ProcessFailedException {
SimpleProcessManager checkThreadsProcessManager = new SimpleProcessManager(NOPLogger.NOP_LOGGER);
List cmd = ImmutableList.of("/bin/sh", "-c", String.format(commandFormat, dockerPid.or(taskProcess.getCurrentPid().get())));
List output = checkThreadsProcessManager.runCommandWithOutput(cmd);
if (output.isEmpty()) {
LOG.warn("Output from ls was empty ({})", cmd);
return Optional.absent();
} else {
return Optional.of(Integer.parseInt(output.get(0)));
}
}
private Optional getNumThreadsFromProcStatus(SingularityExecutorTaskProcessCallable taskProcess, Optional dockerPid) throws InterruptedException, IOException {
final Path procStatusPath = Paths.get(String.format("/proc/%s/status", dockerPid.or(taskProcess.getCurrentPid().get())));
if (Files.exists(procStatusPath)) {
for (String line : Files.readAllLines(procStatusPath, Charsets.UTF_8)) {
final Matcher matcher = PROC_STATUS_THREADS_REGEX.matcher(line);
if (matcher.matches()) {
return Optional.of(Integer.parseInt(matcher.group(1)));
}
}
LOG.warn("Unable to parse threads from proc status file {}", procStatusPath);
return Optional.absent();
} else {
LOG.warn("Proc status file does not exist for pid {}", dockerPid.or(taskProcess.getCurrentPid().get()));
return Optional.absent();
}
}
private Optional getNumThreadsFromCgroup(SingularityExecutorTaskProcessCallable taskProcess, Optional dockerPid) throws InterruptedException, IOException {
final Path procCgroupPath = Paths.get(String.format(configuration.getProcCgroupFormat(), dockerPid.or(taskProcess.getCurrentPid().get())));
if (Files.exists(procCgroupPath)) {
for (String line : Files.readAllLines(procCgroupPath, Charsets.UTF_8)) {
final Matcher matcher = CGROUP_CPU_REGEX.matcher(line);
if (matcher.matches()) {
return Optional.of(Files.readAllLines(Paths.get(String.format(configuration.getCgroupsMesosCpuTasksFormat(), matcher.group(1))), Charsets.UTF_8).size());
}
}
LOG.warn("Unable to parse cgroup container from {}", procCgroupPath.toString());
return Optional.absent();
} else {
LOG.warn("cgroup {} does not exist", procCgroupPath.toString());
return Optional.absent();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy