All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.HealthCheckChore Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HealthChecker.HealthCheckerExitStatus;
import org.apache.hadoop.util.StringUtils;

/**
 * The Class HealthCheckChore for running health checker regularly.
 */
public class HealthCheckChore extends ScheduledChore {
  private static final Log LOG = LogFactory.getLog(HealthCheckChore.class);
  private HealthChecker healthChecker;
  private Configuration config;
  private int threshold;
  private int numTimesUnhealthy = 0;
  private long failureWindow;
  private long startWindow;

  public HealthCheckChore(int sleepTime, Stoppable stopper, Configuration conf) {
    super("HealthChecker", stopper, sleepTime);
    LOG.info("Health Check Chore runs every " + StringUtils.formatTime(sleepTime));
    this.config = conf;
    String healthCheckScript = this.config.get(HConstants.HEALTH_SCRIPT_LOC);
    long scriptTimeout = this.config.getLong(HConstants.HEALTH_SCRIPT_TIMEOUT,
      HConstants.DEFAULT_HEALTH_SCRIPT_TIMEOUT);
    healthChecker = new HealthChecker();
    healthChecker.init(healthCheckScript, scriptTimeout);
    this.threshold = config.getInt(HConstants.HEALTH_FAILURE_THRESHOLD,
      HConstants.DEFAULT_HEALTH_FAILURE_THRESHOLD);
    this.failureWindow = (long)this.threshold * (long)sleepTime;
  }

  @Override
  protected void chore() {
    HealthReport report = healthChecker.checkHealth();
    boolean isHealthy = (report.getStatus() == HealthCheckerExitStatus.SUCCESS);
    if (!isHealthy) {
      boolean needToStop = decideToStop();
      if (needToStop) {
        getStopper().stop(
          "The  node reported unhealthy " + threshold + " number of times consecutively.");
      }
      // Always log health report.
      LOG.info("Health status at " + StringUtils.formatTime(System.currentTimeMillis()) + " : "
          + report.getHealthReport());
    }
  }

  private boolean decideToStop() {
    boolean stop = false;
    if (numTimesUnhealthy == 0) {
      // First time we are seeing a failure. No need to stop, just
      // record the time.
      numTimesUnhealthy++;
      startWindow = System.currentTimeMillis();
    } else {
      if ((System.currentTimeMillis() - startWindow) < failureWindow) {
        numTimesUnhealthy++;
        if (numTimesUnhealthy == threshold) {
          stop = true;
        } else {
          stop = false;
        }
      } else {
        // Outside of failure window, so we reset to 1.
        numTimesUnhealthy = 1;
        startWindow = System.currentTimeMillis();
        stop = false;
      }
    }
    return stop;
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy