All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hubspot.singularity.executor.SingularityExecutorThreadChecker Maven / Gradle / Ivy

package com.hubspot.singularity.executor;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.helpers.NOPLogger;

import com.google.common.base.Charsets;
import com.google.common.base.Optional;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.hubspot.mesos.JavaUtils;
import com.hubspot.singularity.executor.SingularityExecutorMonitor.KillState;
import com.hubspot.singularity.executor.config.SingularityExecutorConfiguration;
import com.hubspot.singularity.executor.models.ThreadCheckerType;
import com.hubspot.singularity.executor.task.SingularityExecutorTaskProcessCallable;
import com.hubspot.singularity.executor.utils.DockerUtils;
import com.hubspot.singularity.runner.base.shared.ProcessFailedException;
import com.hubspot.singularity.runner.base.shared.SimpleProcessManager;
import com.spotify.docker.client.DockerException;

@Singleton
public class SingularityExecutorThreadChecker {

  private static final Logger LOG = LoggerFactory.getLogger(SingularityExecutorThreadChecker.class);

  private static Pattern CGROUP_CPU_REGEX = Pattern.compile("^\\d+:cpu:/(.*)$");
  private static Pattern PROC_STATUS_THREADS_REGEX = Pattern.compile("Threads:\\s*(\\d+)\\s*$");

  private final SingularityExecutorConfiguration configuration;
  private final ScheduledExecutorService scheduledExecutorService;
  private final DockerUtils dockerUtils;

  private SingularityExecutorMonitor monitor;

  @Inject
  public SingularityExecutorThreadChecker(SingularityExecutorConfiguration configuration, DockerUtils dockerUtils) {
    this.configuration = configuration;
    this.dockerUtils = dockerUtils;

    this.scheduledExecutorService = Executors.newScheduledThreadPool(configuration.getThreadCheckThreads(), new ThreadFactoryBuilder().setNameFormat("SingularityExecutorThreadCheckerThread-%d").build());
  }

  public void start(SingularityExecutorMonitor monitor) {

    LOG.info("Starting a thread checker that will run every {}", JavaUtils.durationFromMillis(configuration.getCheckThreadsEveryMillis()));

    this.monitor = monitor;

    this.scheduledExecutorService.scheduleAtFixedRate(new Runnable() {

      @Override
      public void run() {
        final long start = System.currentTimeMillis();

        try {
          checkThreads();
        } catch (Throwable t) {
          LOG.error("While checking threads", t);
        } finally {
          LOG.trace("Finished checking threads after {}", JavaUtils.duration(start));
        }
      }
    }, configuration.getCheckThreadsEveryMillis(), configuration.getCheckThreadsEveryMillis(), TimeUnit.MILLISECONDS);
  }

  private void checkThreads() {
    for (SingularityExecutorTaskProcessCallable taskProcess : monitor.getRunningTasks()) {
      if (!taskProcess.getTask().getExecutorData().getMaxTaskThreads().isPresent()) {
        continue;
      }

      final int maxThreads = taskProcess.getTask().getExecutorData().getMaxTaskThreads().get();

      int usedThreads = 0;

      try {
        usedThreads = getNumUsedThreads(taskProcess);
        LOG.trace("{} is using {} threads", taskProcess.getTask().getTaskId(), usedThreads);
      } catch (InterruptedException ie) {
        Thread.currentThread().interrupt();
        return;
      } catch (Throwable t) {
        if (!taskProcess.wasKilled()) {
          taskProcess.getTask().getLog().error("While fetching used threads for {}", taskProcess.getTask().getTaskId(), t);
        }
        continue;
      }

      if (usedThreads > maxThreads) {
        taskProcess.getTask().getLog().info("{} using too many threads: {} (max {})", taskProcess.getTask().getTaskId(), usedThreads, maxThreads);

        taskProcess.getTask().markKilledDueToThreads(usedThreads);
        KillState killState = monitor.requestKill(taskProcess.getTask().getTaskId());

        taskProcess.getTask().getLog().info("Killing {} due to thread overage (kill state {})", taskProcess.getTask().getTaskId(), killState);
      }
    }
  }

  public ExecutorService getExecutorService() {
    return scheduledExecutorService;
  }

  private int getNumUsedThreads(SingularityExecutorTaskProcessCallable taskProcess) throws InterruptedException, ProcessFailedException {
    Optional dockerPid = Optional.absent();
    if (taskProcess.getTask().getTaskInfo().hasContainer() && taskProcess.getTask().getTaskInfo().getContainer().hasDocker()) {
      try {
        String containerName = String.format("%s%s", configuration.getDockerPrefix(), taskProcess.getTask().getTaskId());
        int possiblePid = dockerUtils.getPid(containerName);
        if (possiblePid == 0) {
          LOG.warn(String.format("Container %s has pid %s (running: %s). Defaulting to 0 threads running.", containerName, possiblePid, dockerUtils.isContainerRunning(containerName)));
          return 0;
        } else {
          dockerPid = Optional.of(possiblePid);
        }
      } catch (DockerException e) {
        throw new ProcessFailedException("Could not get docker root pid due to error", e);
      }
    }

    try {
      Optional numThreads = getNumThreads(configuration.getThreadCheckerType(), taskProcess, dockerPid);
      if (numThreads.isPresent()) {
        return numThreads.get();
      } else {
        LOG.warn("Could not get num threads using {} thread checker", configuration.getThreadCheckerType());
        return 0;
      }
    } catch (IOException e) {
      throw Throwables.propagate(e);
    }
  }

  private Optional getNumThreads(ThreadCheckerType type, SingularityExecutorTaskProcessCallable taskProcess, Optional dockerPid) throws InterruptedException, ProcessFailedException, IOException {
    Optional numThreads;
    switch (type) {
      case CGROUP:
        numThreads = getNumThreadsFromCgroup(taskProcess, dockerPid);
        break;
      case PS:
        numThreads = getNumThreadsFromCommand(taskProcess, dockerPid, "ps hH p %s | wc -l");
        break;
      case PROC_STATUS:
      default:
        numThreads = getNumThreadsFromProcStatus(taskProcess, dockerPid);
        break;
    }
    return numThreads;
  }

  private Optional getNumThreadsFromCommand(SingularityExecutorTaskProcessCallable taskProcess, Optional dockerPid, String commandFormat) throws InterruptedException, ProcessFailedException {
    SimpleProcessManager checkThreadsProcessManager = new SimpleProcessManager(NOPLogger.NOP_LOGGER);
    List cmd = ImmutableList.of("/bin/sh", "-c", String.format(commandFormat, dockerPid.or(taskProcess.getCurrentPid().get())));
    List output = checkThreadsProcessManager.runCommandWithOutput(cmd);
    if (output.isEmpty()) {
      LOG.warn("Output from ls was empty ({})", cmd);
      return Optional.absent();
    } else {
      return Optional.of(Integer.parseInt(output.get(0)));
    }
  }

  private Optional getNumThreadsFromProcStatus(SingularityExecutorTaskProcessCallable taskProcess, Optional dockerPid) throws InterruptedException, IOException {
    final Path procStatusPath = Paths.get(String.format("/proc/%s/status", dockerPid.or(taskProcess.getCurrentPid().get())));
    if (Files.exists(procStatusPath)) {
      for (String line : Files.readAllLines(procStatusPath, Charsets.UTF_8)) {
        final Matcher matcher = PROC_STATUS_THREADS_REGEX.matcher(line);
        if (matcher.matches()) {
          return Optional.of(Integer.parseInt(matcher.group(1)));
        }
      }
      LOG.warn("Unable to parse threads from proc status file {}", procStatusPath);
      return Optional.absent();
    } else {
      LOG.warn("Proc status file does not exist for pid {}", dockerPid.or(taskProcess.getCurrentPid().get()));
      return Optional.absent();
    }
  }

  private Optional getNumThreadsFromCgroup(SingularityExecutorTaskProcessCallable taskProcess, Optional dockerPid) throws InterruptedException, IOException {
    final Path procCgroupPath = Paths.get(String.format(configuration.getProcCgroupFormat(), dockerPid.or(taskProcess.getCurrentPid().get())));
    if (Files.exists(procCgroupPath)) {
      for (String line : Files.readAllLines(procCgroupPath, Charsets.UTF_8)) {
        final Matcher matcher = CGROUP_CPU_REGEX.matcher(line);
        if (matcher.matches()) {
          return Optional.of(Files.readAllLines(Paths.get(String.format(configuration.getCgroupsMesosCpuTasksFormat(), matcher.group(1))), Charsets.UTF_8).size());
        }
      }
      LOG.warn("Unable to parse cgroup container from {}", procCgroupPath.toString());
      return Optional.absent();
    } else {
      LOG.warn("cgroup {} does not exist", procCgroupPath.toString());
      return Optional.absent();
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy