All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.backup.mapreduce.MapReduceBackupMergeJob Maven / Gradle / Ivy

There is a newer version: 3.0.0-beta-1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.backup.mapreduce;

import static org.apache.hadoop.hbase.backup.util.BackupUtils.succeeded;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.Stack;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.BackupInfo;
import org.apache.hadoop.hbase.backup.BackupMergeJob;
import org.apache.hadoop.hbase.backup.HBackupFileSystem;
import org.apache.hadoop.hbase.backup.impl.BackupManifest;
import org.apache.hadoop.hbase.backup.impl.BackupSystemTable;
import org.apache.hadoop.hbase.backup.util.BackupUtils;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.util.FSTableDescriptors;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.util.Tool;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * MapReduce implementation of {@link BackupMergeJob}
 * Must be initialized with configuration of a backup destination cluster
 *
 */
@InterfaceAudience.Private
public class MapReduceBackupMergeJob implements BackupMergeJob {
  public static final Logger LOG = LoggerFactory.getLogger(MapReduceBackupMergeJob.class);

  protected Tool player;
  protected Configuration conf;

  public MapReduceBackupMergeJob() {
  }

  @Override
  public Configuration getConf() {
    return conf;
  }

  @Override
  public void setConf(Configuration conf) {
    this.conf = conf;
  }

  @Override
  public void run(String[] backupIds) throws IOException {
    String bulkOutputConfKey;

    // TODO : run player on remote cluster
    player = new MapReduceHFileSplitterJob();
    bulkOutputConfKey = MapReduceHFileSplitterJob.BULK_OUTPUT_CONF_KEY;
    // Player reads all files in arbitrary directory structure and creates
    // a Map task for each file
    String bids = StringUtils.join(backupIds, ",");

    if (LOG.isDebugEnabled()) {
      LOG.debug("Merge backup images " + bids);
    }

    List> processedTableList = new ArrayList<>();
    boolean finishedTables = false;
    Connection conn = ConnectionFactory.createConnection(getConf());
    BackupSystemTable table = new BackupSystemTable(conn);
    FileSystem fs = FileSystem.get(getConf());

    try {

      // Get exclusive lock on backup system
      table.startBackupExclusiveOperation();
      // Start merge operation
      table.startMergeOperation(backupIds);

      // Select most recent backup id
      String mergedBackupId = BackupUtils.findMostRecentBackupId(backupIds);

      TableName[] tableNames = getTableNamesInBackupImages(backupIds);

      BackupInfo bInfo = table.readBackupInfo(backupIds[0]);
      String backupRoot = bInfo.getBackupRootDir();

      for (int i = 0; i < tableNames.length; i++) {
        LOG.info("Merge backup images for " + tableNames[i]);

        // Find input directories for table
        Path[] dirPaths = findInputDirectories(fs, backupRoot, tableNames[i], backupIds);
        String dirs = StringUtils.join(dirPaths, ",");

        Path bulkOutputPath =
            BackupUtils.getBulkOutputDir(BackupUtils.getFileNameCompatibleString(tableNames[i]),
              getConf(), false);
        // Delete content if exists
        if (fs.exists(bulkOutputPath)) {
          if (!fs.delete(bulkOutputPath, true)) {
            LOG.warn("Can not delete: " + bulkOutputPath);
          }
        }
        Configuration conf = getConf();
        conf.set(bulkOutputConfKey, bulkOutputPath.toString());
        String[] playerArgs = { dirs, tableNames[i].getNameAsString() };

        player.setConf(getConf());
        int result = player.run(playerArgs);
        if (!succeeded(result)) {
          throw new IOException("Can not merge backup images for " + dirs
              + " (check Hadoop/MR and HBase logs). Player return code =" + result);
        }
        // Add to processed table list
        processedTableList.add(new Pair<>(tableNames[i], bulkOutputPath));
        LOG.debug("Merge Job finished:" + result);
      }
      List tableList = toTableNameList(processedTableList);
      table.updateProcessedTablesForMerge(tableList);
      finishedTables = true;

      // PHASE 2 (modification of a backup file system)
      // Move existing mergedBackupId data into tmp directory
      // we will need it later in case of a failure
      Path tmpBackupDir = HBackupFileSystem.getBackupTmpDirPathForBackupId(backupRoot,
        mergedBackupId);
      Path backupDirPath = HBackupFileSystem.getBackupPath(backupRoot, mergedBackupId);

      if (!fs.rename(backupDirPath, tmpBackupDir)) {
        throw new IOException("Failed to rename "+ backupDirPath +" to "+tmpBackupDir);
      } else {
        LOG.debug("Renamed "+ backupDirPath +" to "+ tmpBackupDir);
      }
      // Move new data into backup dest
      for (Pair tn : processedTableList) {
        moveData(fs, backupRoot, tn.getSecond(), tn.getFirst(), mergedBackupId);
      }
      // Update backup manifest
      List backupsToDelete = getBackupIdsToDelete(backupIds, mergedBackupId);
      updateBackupManifest(tmpBackupDir.getParent().toString(), mergedBackupId, backupsToDelete);
      // Copy meta files back from tmp to backup dir
      copyMetaData(fs, tmpBackupDir, backupDirPath);
      // Delete tmp dir (Rename back during repair)
      if (!fs.delete(tmpBackupDir, true)) {
        // WARN and ignore
        LOG.warn("Could not delete tmp dir: "+ tmpBackupDir);
      }
      // Delete old data
      deleteBackupImages(backupsToDelete, conn, fs, backupRoot);
      // Finish merge session
      table.finishMergeOperation();
      // Release lock
      table.finishBackupExclusiveOperation();
    } catch (RuntimeException e) {

      throw e;
    } catch (Exception e) {
      LOG.error(e.toString(), e);
      if (!finishedTables) {
        // cleanup bulk directories and finish merge
        // merge MUST be repeated (no need for repair)
        cleanupBulkLoadDirs(fs, toPathList(processedTableList));
        table.finishMergeOperation();
        table.finishBackupExclusiveOperation();
        throw new IOException("Backup merge operation failed, you should try it again", e);
      } else {
        // backup repair must be run
        throw new IOException(
            "Backup merge operation failed, run backup repair tool to restore system's integrity",
            e);
      }
    } finally {
      table.close();
      conn.close();
    }
  }

  /**
   * Copy meta data to of a backup session
   * @param fs file system
   * @param tmpBackupDir temp backup directory, where meta is locaed
   * @param backupDirPath new path for backup
   * @throws IOException exception
   */
  protected void copyMetaData(FileSystem fs, Path tmpBackupDir, Path backupDirPath)
      throws IOException {
    RemoteIterator it = fs.listFiles(tmpBackupDir, true);
    List toKeep = new ArrayList();
    while (it.hasNext()) {
      Path p = it.next().getPath();
      if (fs.isDirectory(p)) {
        continue;
      }
      // Keep meta
      String fileName = p.toString();
      if (fileName.indexOf(FSTableDescriptors.TABLEINFO_DIR) > 0
          || fileName.indexOf(HRegionFileSystem.REGION_INFO_FILE) > 0) {
        toKeep.add(p);
      }
    }
    // Copy meta to destination
    for (Path p : toKeep) {
      Path newPath = convertToDest(p, backupDirPath);
      copyFile(fs, p, newPath);
    }
  }

  /**
   * Copy file in DFS from p to newPath
   * @param fs file system
   * @param p old path
   * @param newPath new path
   * @throws IOException exception
   */
  protected void copyFile(FileSystem fs, Path p, Path newPath) throws IOException {
    try (InputStream in = fs.open(p); OutputStream out = fs.create(newPath, true)) {
      IOUtils.copy(in, out);
    }
    boolean exists = fs.exists(newPath);
    if (!exists) {
      throw new IOException("Failed to copy meta file to: " + newPath);
    }
  }

/**
 * Converts path before copying
 * @param p path
 * @param backupDirPath backup root
 * @return converted path
 */
  protected Path convertToDest(Path p, Path backupDirPath) {
    String backupId = backupDirPath.getName();
    Stack stack = new Stack();
    String name = null;
    while (true) {
      name = p.getName();
      if (!name.equals(backupId)) {
        stack.push(name);
        p = p.getParent();
      } else {
        break;
      }
    }
    Path newPath = new Path(backupDirPath.toString());
    while (!stack.isEmpty()) {
      newPath = new Path(newPath, stack.pop());
    }
    return newPath;
  }

  protected List toPathList(List> processedTableList) {
    ArrayList list = new ArrayList<>();
    for (Pair p : processedTableList) {
      list.add(p.getSecond());
    }
    return list;
  }

  protected List toTableNameList(List> processedTableList) {
    ArrayList list = new ArrayList<>();
    for (Pair p : processedTableList) {
      list.add(p.getFirst());
    }
    return list;
  }

  protected void cleanupBulkLoadDirs(FileSystem fs, List pathList) throws IOException {
    for (Path path : pathList) {
      if (!fs.delete(path, true)) {
        LOG.warn("Can't delete " + path);
      }
    }
  }

  protected void updateBackupManifest(String backupRoot, String mergedBackupId,
      List backupsToDelete) throws IllegalArgumentException, IOException {
    BackupManifest manifest =
        HBackupFileSystem.getManifest(conf, new Path(backupRoot), mergedBackupId);
    manifest.getBackupImage().removeAncestors(backupsToDelete);
    // save back
    manifest.store(conf);
  }

  protected void deleteBackupImages(List backupIds, Connection conn, FileSystem fs,
      String backupRoot) throws IOException {
    // Delete from backup system table
    try (BackupSystemTable table = new BackupSystemTable(conn)) {
      for (String backupId : backupIds) {
        table.deleteBackupInfo(backupId);
      }
    }

    // Delete from file system
    for (String backupId : backupIds) {
      Path backupDirPath = HBackupFileSystem.getBackupPath(backupRoot, backupId);

      if (!fs.delete(backupDirPath, true)) {
        LOG.warn("Could not delete " + backupDirPath);
      }
    }
  }

  protected List getBackupIdsToDelete(String[] backupIds, String mergedBackupId) {
    List list = new ArrayList<>();
    for (String id : backupIds) {
      if (id.equals(mergedBackupId)) {
        continue;
      }
      list.add(id);
    }
    return list;
  }

  protected void moveData(FileSystem fs, String backupRoot, Path bulkOutputPath,
          TableName tableName, String mergedBackupId) throws IllegalArgumentException, IOException {
    Path dest =
        new Path(HBackupFileSystem.getTableBackupDir(backupRoot, mergedBackupId, tableName));

    FileStatus[] fsts = fs.listStatus(bulkOutputPath);
    for (FileStatus fst : fsts) {
      if (fst.isDirectory()) {
        String family =  fst.getPath().getName();
        Path newDst = new Path(dest, family);
        if (fs.exists(newDst)) {
          if (!fs.delete(newDst, true)) {
            throw new IOException("failed to delete :"+ newDst);
          }
        } else {
          fs.mkdirs(dest);
        }
        boolean result = fs.rename(fst.getPath(), dest);
        LOG.debug("MoveData from "+ fst.getPath() +" to "+ dest+" result="+ result);
      }
    }
  }

  protected TableName[] getTableNamesInBackupImages(String[] backupIds) throws IOException {
    Set allSet = new HashSet<>();

    try (Connection conn = ConnectionFactory.createConnection(conf);
        BackupSystemTable table = new BackupSystemTable(conn)) {
      for (String backupId : backupIds) {
        BackupInfo bInfo = table.readBackupInfo(backupId);

        allSet.addAll(bInfo.getTableNames());
      }
    }

    TableName[] ret = new TableName[allSet.size()];
    return allSet.toArray(ret);
  }

  protected Path[] findInputDirectories(FileSystem fs, String backupRoot, TableName tableName,
      String[] backupIds) throws IOException {
    List dirs = new ArrayList<>();

    for (String backupId : backupIds) {
      Path fileBackupDirPath =
          new Path(HBackupFileSystem.getTableBackupDir(backupRoot, backupId, tableName));
      if (fs.exists(fileBackupDirPath)) {
        dirs.add(fileBackupDirPath);
      } else {
        if (LOG.isDebugEnabled()) {
          LOG.debug("File: " + fileBackupDirPath + " does not exist.");
        }
      }
    }
    Path[] ret = new Path[dirs.size()];
    return dirs.toArray(ret);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy