All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.backup.mapreduce.MapReduceBackupMergeJob Maven / Gradle / Ivy

There is a newer version: 3.0.0-beta-1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.backup.mapreduce;

import static org.apache.hadoop.hbase.backup.util.BackupUtils.succeeded;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.BackupInfo;
import org.apache.hadoop.hbase.backup.BackupMergeJob;
import org.apache.hadoop.hbase.backup.BackupRestoreConstants;
import org.apache.hadoop.hbase.backup.HBackupFileSystem;
import org.apache.hadoop.hbase.backup.impl.BackupManifest;
import org.apache.hadoop.hbase.backup.impl.BackupSystemTable;
import org.apache.hadoop.hbase.backup.util.BackupUtils;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.util.Tool;

/**
 * MapReduce implementation of {@link BackupMergeJob}
 * Must be initialized with configuration of a backup destination cluster
 *
 */

@InterfaceAudience.Private
public class MapReduceBackupMergeJob implements BackupMergeJob {
  public static final Log LOG = LogFactory.getLog(MapReduceBackupMergeJob.class);

  protected Tool player;
  protected Configuration conf;

  public MapReduceBackupMergeJob() {
  }

  @Override
  public Configuration getConf() {
    return conf;
  }

  @Override
  public void setConf(Configuration conf) {
    this.conf = conf;
  }

  @Override
  public void run(String[] backupIds) throws IOException {
    String bulkOutputConfKey;

    // TODO : run player on remote cluster
    player = new MapReduceHFileSplitterJob();
    bulkOutputConfKey = MapReduceHFileSplitterJob.BULK_OUTPUT_CONF_KEY;
    // Player reads all files in arbitrary directory structure and creates
    // a Map task for each file
    String bids = StringUtils.join(backupIds, ",");

    if (LOG.isDebugEnabled()) {
      LOG.debug("Merge backup images " + bids);
    }

    List> processedTableList = new ArrayList>();
    boolean finishedTables = false;
    Connection conn = ConnectionFactory.createConnection(getConf());
    BackupSystemTable table = new BackupSystemTable(conn);
    FileSystem fs = FileSystem.get(getConf());

    try {

      // Get exclusive lock on backup system
      table.startBackupExclusiveOperation();
      // Start merge operation
      table.startMergeOperation(backupIds);

      // Select most recent backup id
      String mergedBackupId = findMostRecentBackupId(backupIds);

      TableName[] tableNames = getTableNamesInBackupImages(backupIds);
      String backupRoot = null;

      BackupInfo bInfo = table.readBackupInfo(backupIds[0]);
      backupRoot = bInfo.getBackupRootDir();

      for (int i = 0; i < tableNames.length; i++) {

        LOG.info("Merge backup images for " + tableNames[i]);

        // Find input directories for table

        Path[] dirPaths = findInputDirectories(fs, backupRoot, tableNames[i], backupIds);
        String dirs = StringUtils.join(dirPaths, ",");
        Path bulkOutputPath =
            BackupUtils.getBulkOutputDir(BackupUtils.getFileNameCompatibleString(tableNames[i]),
              getConf(), false);
        // Delete content if exists
        if (fs.exists(bulkOutputPath)) {
          if (!fs.delete(bulkOutputPath, true)) {
            LOG.warn("Can not delete: " + bulkOutputPath);
          }
        }
        Configuration conf = getConf();
        conf.set(bulkOutputConfKey, bulkOutputPath.toString());
        String[] playerArgs = { dirs, tableNames[i].getNameAsString() };

        int result = 0;

        player.setConf(getConf());
        result = player.run(playerArgs);
        if (!succeeded(result)) {
          throw new IOException("Can not merge backup images for " + dirs
              + " (check Hadoop/MR and HBase logs). Player return code =" + result);
        }
        // Add to processed table list
        processedTableList.add(new Pair(tableNames[i], bulkOutputPath));
        LOG.debug("Merge Job finished:" + result);
      }
      List tableList = toTableNameList(processedTableList);
      table.updateProcessedTablesForMerge(tableList);
      finishedTables = true;

      // Move data
      for (Pair tn : processedTableList) {
        moveData(fs, backupRoot, tn.getSecond(), tn.getFirst(), mergedBackupId);
      }

      // Delete old data and update manifest
      List backupsToDelete = getBackupIdsToDelete(backupIds, mergedBackupId);
      deleteBackupImages(backupsToDelete, conn, fs, backupRoot);
      updateBackupManifest(backupRoot, mergedBackupId, backupsToDelete);
      // Finish merge session
      table.finishMergeOperation();
      // Release lock
      table.finishBackupExclusiveOperation();
    } catch (RuntimeException e) {

      throw e;
    } catch (Exception e) {
      LOG.error(e);
      if (!finishedTables) {
        // cleanup bulk directories and finish merge
        // merge MUST be repeated (no need for repair)
        cleanupBulkLoadDirs(fs, toPathList(processedTableList));
        table.finishMergeOperation();
        table.finishBackupExclusiveOperation();
        throw new IOException("Backup merge operation failed, you should try it again", e);
      } else {
        // backup repair must be run
        throw new IOException(
            "Backup merge operation failed, run backup repair tool to restore system's integrity",
            e);
      }
    } finally {
      table.close();
      conn.close();
    }
  }

  protected List toPathList(List> processedTableList) {
    ArrayList list = new ArrayList();
    for (Pair p : processedTableList) {
      list.add(p.getSecond());
    }
    return list;
  }

  protected List toTableNameList(List> processedTableList) {
    ArrayList list = new ArrayList();
    for (Pair p : processedTableList) {
      list.add(p.getFirst());
    }
    return list;
  }

  protected void cleanupBulkLoadDirs(FileSystem fs, List pathList) throws IOException {
    for (Path path : pathList) {

      if (!fs.delete(path, true)) {
        LOG.warn("Can't delete " + path);
      }
    }
  }

  protected void updateBackupManifest(String backupRoot, String mergedBackupId,
      List backupsToDelete) throws IllegalArgumentException, IOException {

    BackupManifest manifest =
        HBackupFileSystem.getManifest(conf, new Path(backupRoot), mergedBackupId);
    manifest.getBackupImage().removeAncestors(backupsToDelete);
    // save back
    manifest.store(conf);

  }

  protected void deleteBackupImages(List backupIds, Connection conn, FileSystem fs,
      String backupRoot) throws IOException {

    // Delete from backup system table
    try (BackupSystemTable table = new BackupSystemTable(conn);) {
      for (String backupId : backupIds) {
        table.deleteBackupInfo(backupId);
      }
    }

    // Delete from file system
    for (String backupId : backupIds) {
      Path backupDirPath = HBackupFileSystem.getBackupPath(backupRoot, backupId);

      if (!fs.delete(backupDirPath, true)) {
        LOG.warn("Could not delete " + backupDirPath);
      }
    }
  }

  protected List getBackupIdsToDelete(String[] backupIds, String mergedBackupId) {
    List list = new ArrayList();
    for (String id : backupIds) {
      if (id.equals(mergedBackupId)) {
        continue;
      }
      list.add(id);
    }
    return list;
  }

  protected void moveData(FileSystem fs, String backupRoot, Path bulkOutputPath, TableName tableName,
      String mergedBackupId) throws IllegalArgumentException, IOException {

    Path dest =
        new Path(HBackupFileSystem.getTableBackupDataDir(backupRoot, mergedBackupId, tableName));

    // Delete all in dest
    if (!fs.delete(dest, true)) {
      throw new IOException("Could not delete " + dest);
    }

    FileStatus[] fsts = fs.listStatus(bulkOutputPath);
    for (FileStatus fst : fsts) {
      if (fst.isDirectory()) {
        fs.rename(fst.getPath().getParent(), dest);
      }
    }

  }

  protected String findMostRecentBackupId(String[] backupIds) {
    long recentTimestamp = Long.MIN_VALUE;
    for (String backupId : backupIds) {
      long ts = Long.parseLong(backupId.split("_")[1]);
      if (ts > recentTimestamp) {
        recentTimestamp = ts;
      }
    }
    return BackupRestoreConstants.BACKUPID_PREFIX + recentTimestamp;
  }

  protected TableName[] getTableNamesInBackupImages(String[] backupIds) throws IOException {

    Set allSet = new HashSet();

    try (Connection conn = ConnectionFactory.createConnection(conf);
        BackupSystemTable table = new BackupSystemTable(conn);) {
      for (String backupId : backupIds) {
        BackupInfo bInfo = table.readBackupInfo(backupId);

        allSet.addAll(bInfo.getTableNames());
      }
    }

    TableName[] ret = new TableName[allSet.size()];
    return allSet.toArray(ret);
  }

  protected Path[] findInputDirectories(FileSystem fs, String backupRoot, TableName tableName,
      String[] backupIds) throws IOException {

    List dirs = new ArrayList();

    for (String backupId : backupIds) {
      Path fileBackupDirPath =
          new Path(HBackupFileSystem.getTableBackupDataDir(backupRoot, backupId, tableName));
      if (fs.exists(fileBackupDirPath)) {
        dirs.add(fileBackupDirPath);
      } else {
        if (LOG.isTraceEnabled()) {
          LOG.trace("File: " + fileBackupDirPath + " does not exist.");
        }
      }
    }
    Path[] ret = new Path[dirs.size()];
    return dirs.toArray(ret);
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy