All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.master.snapshot.TakeSnapshotHandler Maven / Gradle / Ivy

There is a newer version: 3.0.0-beta-1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.master.snapshot;

import static org.apache.hadoop.hbase.HConstants.DEFAULT_HBASE_RPC_TIMEOUT;
import static org.apache.hadoop.hbase.HConstants.HBASE_RPC_TIMEOUT_KEY;

import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CancellationException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.errorhandling.ForeignException;
import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
import org.apache.hadoop.hbase.executor.EventHandler;
import org.apache.hadoop.hbase.executor.EventType;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.MetricsSnapshot;
import org.apache.hadoop.hbase.master.SnapshotSentinel;
import org.apache.hadoop.hbase.master.locking.LockManager;
import org.apache.hadoop.hbase.master.locking.LockManager.MasterLock;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.monitoring.TaskMonitor;
import org.apache.hadoop.hbase.procedure2.LockType;
import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;

import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;

/**
 * A handler for taking snapshots from the master. This is not a subclass of TableEventHandler
 * because using that would incur an extra hbase:meta scan. The {@link #snapshotRegions(List)} call
 * should get implemented for each snapshot flavor.
 */
@InterfaceAudience.Private
public abstract class TakeSnapshotHandler extends EventHandler
  implements SnapshotSentinel, ForeignExceptionSnare {
  private static final Logger LOG = LoggerFactory.getLogger(TakeSnapshotHandler.class);
  public static final String HBASE_SNAPSHOT_MASTER_LOCK_ACQUIRE_TIMEOUT =
    "hbase.snapshot.master.lock.acquire.timeout";

  private volatile boolean finished;

  // none of these should ever be null
  protected final MasterServices master;
  protected final MetricsSnapshot metricsSnapshot = new MetricsSnapshot();
  protected final SnapshotDescription snapshot;
  protected final Configuration conf;
  protected final FileSystem rootFs;
  protected final FileSystem workingDirFs;
  protected final Path rootDir;
  private final Path snapshotDir;
  protected final Path workingDir;
  private final MasterSnapshotVerifier verifier;
  protected final ForeignExceptionDispatcher monitor;
  private final LockManager.MasterLock tableLock;
  protected final MonitoredTask status;
  protected final TableName snapshotTable;
  protected final SnapshotManifest snapshotManifest;
  protected final SnapshotManager snapshotManager;
  /**
   * Snapshot creation requires table lock. If any region of the table is in transition, table lock
   * cannot be acquired by LockProcedure and hence snapshot creation could hang for potentially very
   * long time. This timeout will ensure snapshot creation fails-fast by waiting for only given
   * timeout.
   */
  private final long lockAcquireTimeoutMs;

  protected TableDescriptor htd;

  /**
   * @param snapshot       descriptor of the snapshot to take
   * @param masterServices master services provider
   * @throws IllegalArgumentException if the working snapshot directory set from the configuration
   *                                  is the same as the completed snapshot directory
   * @throws IOException              if the file system of the working snapshot directory cannot be
   *                                  determined
   */
  public TakeSnapshotHandler(SnapshotDescription snapshot, final MasterServices masterServices,
    final SnapshotManager snapshotManager) throws IOException {
    super(masterServices, EventType.C_M_SNAPSHOT_TABLE);
    assert snapshot != null : "SnapshotDescription must not be nul1";
    assert masterServices != null : "MasterServices must not be nul1";
    this.master = masterServices;
    this.conf = this.master.getConfiguration();
    this.rootDir = this.master.getMasterFileSystem().getRootDir();
    this.workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir, conf);
    Preconditions.checkArgument(
      !SnapshotDescriptionUtils.isSubDirectoryOf(workingDir, rootDir)
        || SnapshotDescriptionUtils.isWithinDefaultWorkingDir(workingDir, conf),
      "The working directory " + workingDir + " cannot be in the root directory unless it is "
        + "within the default working directory");

    this.snapshot = snapshot;
    this.snapshotManager = snapshotManager;
    this.snapshotTable = TableName.valueOf(snapshot.getTable());
    this.rootFs = this.master.getMasterFileSystem().getFileSystem();
    this.snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
    this.workingDirFs = this.workingDir.getFileSystem(this.conf);
    this.monitor = new ForeignExceptionDispatcher(snapshot.getName());

    this.tableLock = master.getLockManager().createMasterLock(snapshotTable, LockType.EXCLUSIVE,
      this.getClass().getName() + ": take snapshot " + snapshot.getName());

    // prepare the verify
    this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, workingDirFs);
    // update the running tasks
    this.status = TaskMonitor.get().createStatus(
      "Taking " + snapshot.getType() + " snapshot on table: " + snapshotTable, false, true);
    this.snapshotManifest =
      SnapshotManifest.create(conf, rootFs, workingDir, snapshot, monitor, status);
    this.lockAcquireTimeoutMs = conf.getLong(HBASE_SNAPSHOT_MASTER_LOCK_ACQUIRE_TIMEOUT,
      conf.getLong(HBASE_RPC_TIMEOUT_KEY, DEFAULT_HBASE_RPC_TIMEOUT));
  }

  private TableDescriptor loadTableDescriptor() throws IOException {
    TableDescriptor htd = this.master.getTableDescriptors().get(snapshotTable);
    if (htd == null) {
      throw new IOException("TableDescriptor missing for " + snapshotTable);
    }
    if (htd.getMaxFileSize() == -1 && this.snapshot.getMaxFileSize() > 0) {
      htd = TableDescriptorBuilder.newBuilder(htd).setValue(TableDescriptorBuilder.MAX_FILESIZE,
        Long.toString(this.snapshot.getMaxFileSize())).build();
    }
    return htd;
  }

  @Override
  public TakeSnapshotHandler prepare() throws Exception {
    super.prepare();
    // after this, you should ensure to release this lock in case of exceptions
    if (this.tableLock.tryAcquire(this.lockAcquireTimeoutMs)) {
      try {
        this.htd = loadTableDescriptor(); // check that .tableinfo is present
      } catch (Exception e) {
        this.tableLock.release();
        throw e;
      }
    } else {
      LOG.error("Master lock could not be acquired in {} ms", lockAcquireTimeoutMs);
      throw new DoNotRetryIOException("Master lock could not be acquired");
    }
    return this;
  }

  /**
   * Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)} call
   * should get implemented for each snapshot flavor.
   */
  @Override
  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "REC_CATCH_EXCEPTION",
      justification = "Intentional")
  public void process() {
    String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " "
      + eventType + " on table " + snapshotTable;
    LOG.info(msg);
    MasterLock tableLockToRelease = this.tableLock;
    status.setStatus(msg);
    try {
      if (downgradeToSharedTableLock()) {
        // release the exclusive lock and hold the shared lock instead
        tableLockToRelease = master.getLockManager().createMasterLock(snapshotTable,
          LockType.SHARED, this.getClass().getName() + ": take snapshot " + snapshot.getName());
        tableLock.release();
        boolean isTableLockAcquired = tableLockToRelease.tryAcquire(this.lockAcquireTimeoutMs);
        if (!isTableLockAcquired) {
          LOG.error("Could not acquire shared lock on table {} in {} ms", snapshotTable,
            lockAcquireTimeoutMs);
          throw new IOException("Could not acquire shared lock on table " + snapshotTable);
        }
      }
      // If regions move after this meta scan, the region specific snapshot should fail, triggering
      // an external exception that gets captured here.

      // write down the snapshot info in the working directory
      SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, workingDirFs);
      snapshotManifest.addTableDescriptor(this.htd);
      monitor.rethrowException();

      List> regionsAndLocations =
        master.getAssignmentManager().getTableRegionsAndLocations(snapshotTable, false);

      // run the snapshot
      snapshotRegions(regionsAndLocations);
      monitor.rethrowException();

      // extract each pair to separate lists
      Set serverNames = new HashSet<>();
      for (Pair p : regionsAndLocations) {
        if (p != null && p.getFirst() != null && p.getSecond() != null) {
          RegionInfo hri = p.getFirst();
          if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) continue;
          serverNames.add(p.getSecond().toString());
        }
      }

      // flush the in-memory state, and write the single manifest
      status.setStatus("Consolidate snapshot: " + snapshot.getName());
      snapshotManifest.consolidate();

      // verify the snapshot is valid
      status.setStatus("Verifying snapshot: " + snapshot.getName());
      verifier.verifySnapshot(this.workingDir, serverNames);

      // complete the snapshot, atomically moving from tmp to .snapshot dir.
      SnapshotDescriptionUtils.completeSnapshot(this.snapshotDir, this.workingDir, this.rootFs,
        this.workingDirFs, this.conf);
      finished = true;
      msg = "Snapshot " + snapshot.getName() + " of table " + snapshotTable + " completed";
      status.markComplete(msg);
      LOG.info(msg);
      metricsSnapshot.addSnapshot(status.getCompletionTimestamp() - status.getStartTime());
      if (master.getMasterCoprocessorHost() != null) {
        master.getMasterCoprocessorHost()
          .postCompletedSnapshotAction(ProtobufUtil.createSnapshotDesc(snapshot), this.htd);
      }
    } catch (Exception e) { // FindBugs: REC_CATCH_EXCEPTION
      status.abort("Failed to complete snapshot " + snapshot.getName() + " on table "
        + snapshotTable + " because " + e.getMessage());
      String reason = "Failed taking snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot)
        + " due to exception:" + e.getMessage();
      LOG.error(reason, e);
      ForeignException ee = new ForeignException(reason, e);
      monitor.receive(ee);
      // need to mark this completed to close off and allow cleanup to happen.
      cancel(reason);
    } finally {
      LOG.debug("Launching cleanup of working dir:" + workingDir);
      try {
        // if the working dir is still present, the snapshot has failed. it is present we delete
        // it.
        if (!workingDirFs.delete(workingDir, true)) {
          LOG.error("Couldn't delete snapshot working directory:" + workingDir);
        }
      } catch (IOException e) {
        LOG.error("Couldn't delete snapshot working directory:" + workingDir);
      }
      if (LOG.isDebugEnabled()) {
        LOG.debug("Table snapshot journal : \n" + status.prettyPrintJournal());
      }
      tableLockToRelease.release();
    }
  }

  /**
   * When taking snapshot, first we must acquire the exclusive table lock to confirm that there are
   * no ongoing merge/split procedures. But later, we should try our best to release the exclusive
   * lock as this may hurt the availability, because we need to hold the shared lock when assigning
   * regions.
   * 

* See HBASE-21480 for more details. */ protected abstract boolean downgradeToSharedTableLock(); /** * Snapshot the specified regions */ protected abstract void snapshotRegions(List> regions) throws IOException, KeeperException; /** * Take a snapshot of the specified disabled region */ protected void snapshotDisabledRegion(final RegionInfo regionInfo) throws IOException { snapshotManifest.addRegion(CommonFSUtils.getTableDir(rootDir, snapshotTable), regionInfo); monitor.rethrowException(); status.setStatus("Completed referencing HFiles for offline region " + regionInfo.toString() + " of table: " + snapshotTable); } @Override public void cancel(String why) { if (finished) return; this.finished = true; LOG.info("Stop taking snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) + " because: " + why); CancellationException ce = new CancellationException(why); monitor.receive(new ForeignException(master.getServerName().toString(), ce)); } @Override public boolean isFinished() { return finished; } @Override public long getCompletionTimestamp() { return this.status.getCompletionTimestamp(); } @Override public SnapshotDescription getSnapshot() { return snapshot; } @Override public ForeignException getExceptionIfFailed() { return monitor.getException(); } @Override public void rethrowExceptionIfFailed() throws ForeignException { monitor.rethrowException(); } @Override public void rethrowException() throws ForeignException { monitor.rethrowException(); } @Override public boolean hasException() { return monitor.hasException(); } @Override public ForeignException getException() { return monitor.getException(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy