All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler Maven / Gradle / Ivy

There is a newer version: 3.0.0-beta-1
Show newest version
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.master.handler;

import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.HashSet;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.executor.EventType;
import org.apache.hadoop.hbase.master.AssignmentManager;
import org.apache.hadoop.hbase.master.DeadServer;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.zookeeper.KeeperException;

import com.google.common.annotations.VisibleForTesting;

import java.util.concurrent.atomic.AtomicInteger;

/**
 * Shutdown handler for the server hosting hbase:meta
 */
@InterfaceAudience.Private
public class MetaServerShutdownHandler extends ServerShutdownHandler {
  private static final Log LOG = LogFactory.getLog(MetaServerShutdownHandler.class);
  private AtomicInteger eventExceptionCount = new AtomicInteger(0);
  @VisibleForTesting
  static final int SHOW_STRACKTRACE_FREQUENCY = 100;

  public MetaServerShutdownHandler(final Server server,
      final MasterServices services,
      final DeadServer deadServers, final ServerName serverName) {
    super(server, services, deadServers, serverName,
      EventType.M_META_SERVER_SHUTDOWN, true);
  }

  @Override
  public void process() throws IOException {
    boolean gotException = true; 
    try {
      AssignmentManager am = this.services.getAssignmentManager();
      this.services.getMasterFileSystem().setLogRecoveryMode();
      boolean distributedLogReplay = 
        (this.services.getMasterFileSystem().getLogRecoveryMode() == RecoveryMode.LOG_REPLAY);
      try {
        if (this.shouldSplitHlog) {
          LOG.info("Splitting hbase:meta logs for " + serverName);
          if (distributedLogReplay) {
            Set regions = new HashSet();
            regions.add(HRegionInfo.FIRST_META_REGIONINFO);
            this.services.getMasterFileSystem().prepareLogReplay(serverName, regions);
          } else {
            this.services.getMasterFileSystem().splitMetaLog(serverName);
          }
          am.getRegionStates().logSplit(HRegionInfo.FIRST_META_REGIONINFO);
        }
      } catch (IOException ioe) {
        this.services.getExecutorService().submit(this);
        this.deadServers.add(serverName);
        throw new IOException("failed log splitting for " + serverName + ", will retry", ioe);
      }
  
      // Assign meta if we were carrying it.
      // Check again: region may be assigned to other where because of RIT
      // timeout
      if (am.isCarryingMeta(serverName)) {
        LOG.info("Server " + serverName + " was carrying META. Trying to assign.");
        am.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
        verifyAndAssignMetaWithRetries();
      } else if (!this.services.getCatalogTracker().isMetaLocationAvailable()) {
        // the meta location as per master is null. This could happen in case when meta assignment
        // in previous run failed, while meta znode has been updated to null. We should try to
        // assign the meta again.
        verifyAndAssignMetaWithRetries();
      } else {
        LOG.info("META has been assigned to otherwhere, skip assigning.");
      }

      try {
        if (this.shouldSplitHlog && distributedLogReplay) {
          if (!am.waitOnRegionToClearRegionsInTransition(HRegionInfo.FIRST_META_REGIONINFO,
            regionAssignmentWaitTimeout)) {
            // Wait here is to avoid log replay hits current dead server and incur a RPC timeout
            // when replay happens before region assignment completes.
            LOG.warn("Region " + HRegionInfo.FIRST_META_REGIONINFO.getEncodedName()
                + " didn't complete assignment in time");
          }
          this.services.getMasterFileSystem().splitMetaLog(serverName);
        }
      } catch (Exception ex) {
        if (ex instanceof IOException) {
          this.services.getExecutorService().submit(this);
          this.deadServers.add(serverName);
          throw new IOException("failed log splitting for " + serverName + ", will retry", ex);
        } else {
          throw new IOException(ex);
        }
      }

      gotException = false;
    } finally {
      if (gotException){
        // If we had an exception, this.deadServers.finish will be skipped in super.process()
        this.deadServers.finish(serverName);
      }     
    }

    super.process();
    // Clear this counter on successful handling.
    this.eventExceptionCount.set(0);
  }

  @Override
  boolean isCarryingMeta() {
    return true;
  }

  /**
   * Before assign the hbase:meta region, ensure it haven't
   *  been assigned by other place
   * 

* Under some scenarios, the hbase:meta region can be opened twice, so it seemed online * in two regionserver at the same time. * If the hbase:meta region has been assigned, so the operation can be canceled. * @throws InterruptedException * @throws IOException * @throws KeeperException */ private void verifyAndAssignMeta() throws InterruptedException, IOException, KeeperException { long timeout = this.server.getConfiguration(). getLong("hbase.catalog.verification.timeout", 1000); if (!this.server.getCatalogTracker().verifyMetaRegionLocation(timeout)) { this.services.getAssignmentManager().assignMeta(); } else if (serverName.equals(server.getCatalogTracker().getMetaLocation())) { throw new IOException("hbase:meta is onlined on the dead server " + serverName); } else { LOG.info("Skip assigning hbase:meta, because it is online on the " + server.getCatalogTracker().getMetaLocation()); } } /** * Failed many times, shutdown processing * @throws IOException */ private void verifyAndAssignMetaWithRetries() throws IOException { int iTimes = this.server.getConfiguration().getInt( "hbase.catalog.verification.retries", 10); long waitTime = this.server.getConfiguration().getLong( "hbase.catalog.verification.timeout", 1000); int iFlag = 0; while (true) { try { verifyAndAssignMeta(); break; } catch (KeeperException e) { this.server.abort("In server shutdown processing, assigning meta", e); throw new IOException("Aborting", e); } catch (Exception e) { if (iFlag >= iTimes) { this.server.abort("verifyAndAssignMeta failed after" + iTimes + " times retries, aborting", e); throw new IOException("Aborting", e); } try { Thread.sleep(waitTime); } catch (InterruptedException e1) { LOG.warn("Interrupted when is the thread sleep", e1); Thread.currentThread().interrupt(); throw (InterruptedIOException)new InterruptedIOException().initCause(e1); } iFlag++; } } } @Override public String toString() { String name = "UnknownServerName"; if(server != null && server.getServerName() != null) { name = server.getServerName().toString(); } return getClass().getSimpleName() + "-" + name + "-" + getSeqid(); } @Override protected void handleException(Throwable t) { int count = eventExceptionCount.getAndIncrement(); if (count < 0) count = eventExceptionCount.getAndSet(0); if (count > SHOW_STRACKTRACE_FREQUENCY) { // Too frequent, let's slow reporting Threads.sleep(1000); } if (count % SHOW_STRACKTRACE_FREQUENCY == 0) { LOG.error("Caught " + eventType + ", count=" + this.eventExceptionCount, t); } else { LOG.error("Caught " + eventType + ", count=" + this.eventExceptionCount + "; " + t.getMessage() + "; stack trace shows every " + SHOW_STRACKTRACE_FREQUENCY + "th time."); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy