All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.helios.agent.HealthCheckerFactory Maven / Gradle / Ivy

There is a newer version: 0.9.9
Show newest version
/*
 * Copyright (c) 2014 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.spotify.helios.agent;

import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.base.Throwables;
import com.google.common.collect.Iterables;

import com.spotify.docker.client.DockerClient;
import com.spotify.docker.client.DockerHost;
import com.spotify.docker.client.LogStream;
import com.spotify.docker.client.exceptions.DockerException;
import com.spotify.helios.common.descriptors.ExecHealthCheck;
import com.spotify.helios.common.descriptors.HealthCheck;
import com.spotify.helios.common.descriptors.HttpHealthCheck;
import com.spotify.helios.common.descriptors.TcpHealthCheck;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.Socket;
import java.net.URL;
import java.util.List;

import static java.util.concurrent.TimeUnit.SECONDS;

public final class HealthCheckerFactory {

  private HealthCheckerFactory() {
  }

  public static HealthChecker create(final TaskConfig taskConfig, final DockerClient docker,
                                     final DockerHost dockerHost, final boolean agentInContainer) {
    final HealthCheck healthCheck = taskConfig.healthCheck();

    if (healthCheck == null) {
      return null;
    } else if (healthCheck instanceof ExecHealthCheck) {
      return new ExecHealthChecker((ExecHealthCheck) healthCheck, docker);
    } else if (healthCheck instanceof HttpHealthCheck) {
      return new HttpHealthChecker((HttpHealthCheck) healthCheck, taskConfig, docker, dockerHost,
                                   agentInContainer);
    } else if (healthCheck instanceof TcpHealthCheck) {
      return new TcpHealthChecker((TcpHealthCheck) healthCheck, taskConfig, docker, dockerHost);
    }

    throw new IllegalArgumentException("Unknown healthCheck type");
  }

  static class ExecHealthChecker implements HealthChecker {

    private static final Logger log = LoggerFactory.getLogger(ExecHealthChecker.class);

    private final ExecHealthCheck healthCheck;
    private final DockerClient docker;

    ExecHealthChecker(final ExecHealthCheck healthCheck, final DockerClient docker) {
      this.healthCheck = healthCheck;
      this.docker = docker;
    }

    @Override
    public boolean check(final String containerId) {
      // Make sure we are on a docker version that supports exec health checks
      if (!compatibleDockerVersion(docker)) {
        throw new UnsupportedOperationException(
            "docker exec healthcheck is not supported on your docker version");
      }

      try {
        final List cmd = healthCheck.getCommand();
        final String execId = docker.execCreate(containerId, cmd.toArray(new String[cmd.size()]),
                                                DockerClient.ExecCreateParam.attachStdout(),
                                                DockerClient.ExecCreateParam.attachStderr());

        final String output;
        try (LogStream stream = docker.execStart(execId)) {
          output = stream.readFully();
        }

        final int exitCode = docker.execInspect(execId).exitCode();
        if (exitCode != 0) {
          log.info("healthcheck failed with exit code {}. output {}", exitCode, output);
          return false;
        }

        return true;
      } catch (DockerException e) {
        return false;
      } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        return false;
      }
    }

    private static boolean compatibleDockerVersion(final DockerClient docker) {
      final String apiVersion;
      try {
        apiVersion = docker.version().apiVersion();
      } catch (DockerException e) {
        return false;
      } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        return false;
      }

      if (Strings.isNullOrEmpty(apiVersion)) {
        return false;
      }

      final Iterable split = Splitter.on(".").split(apiVersion);
      final int major = Integer.parseInt(Iterables.get(split, 0, "0"));
      final int minor = Integer.parseInt(Iterables.get(split, 1, "0"));
      return major == 1 && minor >= 18;
    }
  }

  private abstract static class NetworkHealthchecker implements HealthChecker {
    private final DockerClient dockerClient;

    protected NetworkHealthchecker(final DockerClient dockerClient) {
      this.dockerClient = dockerClient;
    }

    protected String getBridgeAddress(String containerId)
        throws DockerException, InterruptedException {
      return dockerClient.inspectContainer(containerId).networkSettings().gateway();
    }
  }

  private static class HttpHealthChecker extends NetworkHealthchecker {

    private static final Logger log = LoggerFactory.getLogger(HttpHealthChecker.class);


    private static final int CONNECT_TIMEOUT_MILLIS = 500;
    private static final long READ_TIMEOUT_MILLIS = SECONDS.toMillis(10);

    private final HttpHealthCheck healthCheck;
    private final TaskConfig taskConfig;
    private final DockerHost dockerHost;
    private final boolean agentInContainer;

    private HttpHealthChecker(final HttpHealthCheck healthCheck, final TaskConfig taskConfig,
                              final DockerClient dockerClient, final DockerHost dockerHost,
                              final boolean agentInContainer) {
      super(dockerClient);
      this.healthCheck = healthCheck;
      this.taskConfig = taskConfig;
      this.dockerHost = dockerHost;
      this.agentInContainer = agentInContainer;
    }

    @Override
    public boolean check(final String containerId) throws InterruptedException, DockerException {

      final String host;
      // Special case for running the agent inside helios-solo and DOCKER_HOST is a unix socket:
      // in this case we cannot reach the job's container with "localhost" at the external port
      // since "localhost" will refer to the agent's container and it's network namespace.
      // The agent is only run in a container sibling to the job's container when in helios-solo.
      if (agentInContainer && dockerHost.host().startsWith("unix://")) {
        host = getBridgeAddress(containerId);
        log.info("Using bridge address {} for healthchecks", host);
      } else {
        host = dockerHost.address();
      }

      final URL url;
      // TODO (mbrown): is port always non-null? it is unconditionally unboxed on the next line
      final Integer port = taskConfig.ports().get(healthCheck.getPort()).getExternalPort();
      try {
        url = new URL("http", host, port, healthCheck.getPath());
      } catch (MalformedURLException e) {
        throw Throwables.propagate(e);
      }

      log.info("about to healthcheck containerId={} with url={} for task={}",
               containerId, url, taskConfig);

      try {
        final HttpURLConnection conn = (HttpURLConnection) url.openConnection();
        conn.setConnectTimeout(CONNECT_TIMEOUT_MILLIS);
        conn.setReadTimeout((int) READ_TIMEOUT_MILLIS);

        final int response = conn.getResponseCode();
        log.warn("healthcheck for containerId={} with url={} returned status={}",
                 containerId, url, response);
        return response >= 200 && response <= 399;
      } catch (Exception e) {
        log.warn("exception in healthchecking containerId={} with url={}", containerId, url, e);
        return false;
      }
    }

  }

  private static class TcpHealthChecker extends NetworkHealthchecker {

    private static final Logger log = LoggerFactory.getLogger(TcpHealthChecker.class);

    private static final int CONNECT_TIMEOUT_MILLIS = 500;

    private final TcpHealthCheck healthCheck;
    private final TaskConfig taskConfig;
    private final DockerHost dockerHost;


    private TcpHealthChecker(final TcpHealthCheck healthCheck, final TaskConfig taskConfig,
                             final DockerClient docker, final DockerHost dockerHost) {
      super(docker);
      this.healthCheck = healthCheck;
      this.taskConfig = taskConfig;
      this.dockerHost = dockerHost;
    }

    @Override
    public boolean check(final String containerId) throws InterruptedException, DockerException {
      final Integer port = taskConfig.ports().get(healthCheck.getPort()).getExternalPort();

      InetSocketAddress address = new InetSocketAddress(dockerHost.address(), port);
      if (address.getAddress().isLoopbackAddress()) {
        // tcp connections to a container-mapped port on loopback always succeed,
        // regardless of if the container is listening or not. use the bridge address instead.
        address = new InetSocketAddress(getBridgeAddress(containerId), port);
      }

      log.info("about to healthcheck containerId={} with address={} for task={}",
               containerId, address, taskConfig);

      try (final Socket s = new Socket()) {
        s.connect(address, CONNECT_TIMEOUT_MILLIS);
      } catch (Exception e) {
        return false;
      }

      return true;
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy