All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.helios.agent.DockerHealthChecker Maven / Gradle / Ivy

There is a newer version: 0.9.9
Show newest version
/*
 * Copyright (c) 2014 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.spotify.helios.agent;

import com.google.common.annotations.VisibleForTesting;

import com.spotify.helios.servicescommon.RiemannFacade;
import com.spotify.helios.servicescommon.statistics.MeterRates;
import com.spotify.helios.servicescommon.statistics.SupervisorMetrics;
import com.codahale.metrics.health.HealthCheck;

import io.dropwizard.lifecycle.Managed;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;

import static com.google.common.base.Preconditions.checkNotNull;

/**
 * Health checker exposed by /healthcheck on the admin port (5804 by default).
 */
public class DockerHealthChecker extends HealthCheck implements Managed {
  private static final Logger log = LoggerFactory.getLogger(DockerHealthChecker.class);

  @VisibleForTesting
  static final double FAILURE_LOW_WATERMARK = 0.4;
  @VisibleForTesting
  static final double FAILURE_HIGH_WATERMARK = 0.8;

  private final SupervisorMetrics metrics;
  private final ScheduledExecutorService scheduler;
  private final TimeUnit timeUnit;
  private final int interval;
  private final HealthCheckRunnable runnable;
  private final RiemannFacade facade;

  public DockerHealthChecker(final SupervisorMetrics metrics,
                             final TimeUnit timeUnit, int interval,
                             final RiemannFacade facade) {
    super();
    this.metrics = checkNotNull(metrics);
    this.timeUnit = checkNotNull(timeUnit);
    this.facade = checkNotNull(facade).stack("docker-health");

    this.scheduler = Executors.newScheduledThreadPool(1);
    this.interval = interval;
    this.runnable = new HealthCheckRunnable();
  }

  private class HealthCheckRunnable implements Runnable {
    private String reason = null;

    @Override
    public void run() {
      final double timeoutRatio = fiveMinuteRatio(
          metrics.getDockerTimeoutRates(), metrics.getSupervisorRunRates());
      final double exceptionRatio = fiveMinuteRatio(
          metrics.getContainersThrewExceptionRates(), metrics.getSupervisorRunRates());
      if (timeoutRatio > 0 || exceptionRatio > 0) {
        log.info("timeout ratio is {}, exception ratio is {}", timeoutRatio, exceptionRatio);
      }

      final String origReason = reason;

      // Yay hysteresis!
      if (timeoutRatio > FAILURE_HIGH_WATERMARK) {
        reason = "docker timeouts are too high for too long";
      }
      if (exceptionRatio > FAILURE_HIGH_WATERMARK) {
        reason = "supervisor run exception frequency is too high";
      }

      if (timeoutRatio < FAILURE_LOW_WATERMARK && exceptionRatio < FAILURE_LOW_WATERMARK) {
        reason = null;
      }

      // If reason changed, emit an event
      if (origReason != null && reason == null) {
        facade.event()
            .state("ok")
            .tags("docker", "health")
            .metric(1)
            .send();
      } else if (reason != null && origReason == null) {
        facade.event()
            .state("critical")
            .tags("docker", "health")
            .metric(0)
            .send();
      }
    }
  }

  private double fiveMinuteRatio(MeterRates numerator, MeterRates denominator) {
    if (denominator.getFiveMinuteRate() < .1) {
      return 0.0;
    }
    return (numerator.getFiveMinuteRate() * 1.0) / denominator.getFiveMinuteRate();
  }

  @Override
  public void stop() {
    scheduler.shutdownNow();
  }

  @Override
  public void start() {
    scheduler.scheduleAtFixedRate(runnable, interval, interval, timeUnit);
  }

  @Override
  protected Result check() throws Exception {
    runnable.run();
    if (runnable.reason != null) {
      return Result.unhealthy(runnable.reason);
    } else {
      return Result.healthy();
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy