All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.backup.util.BackupUtils Maven / Gradle / Ivy

There is a newer version: 3.0.0-beta-1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.backup.util;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.TreeMap;
import java.util.TreeSet;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.BackupInfo;
import org.apache.hadoop.hbase.backup.BackupRestoreConstants;
import org.apache.hadoop.hbase.backup.HBackupFileSystem;
import org.apache.hadoop.hbase.backup.RestoreRequest;
import org.apache.hadoop.hbase.backup.impl.BackupManifest;
import org.apache.hadoop.hbase.backup.impl.BackupManifest.BackupImage;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.util.FSTableDescriptors;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;

/**
 * A collection for methods used by multiple classes to backup HBase tables.
 */
@InterfaceAudience.Private
public final class BackupUtils {
  protected static final Log LOG = LogFactory.getLog(BackupUtils.class);
  public static final String LOGNAME_SEPARATOR = ".";

  private BackupUtils() {
    throw new AssertionError("Instantiating utility class...");
  }

  /**
   * Loop through the RS log timestamp map for the tables, for each RS, find the min timestamp
   * value for the RS among the tables.
   * @param rsLogTimestampMap timestamp map
   * @return the min timestamp of each RS
   */
  public static HashMap getRSLogTimestampMins(
      HashMap> rsLogTimestampMap) {

    if (rsLogTimestampMap == null || rsLogTimestampMap.isEmpty()) {
      return null;
    }

    HashMap rsLogTimestampMins = new HashMap();
    HashMap> rsLogTimestampMapByRS =
        new HashMap>();

    for (Entry> tableEntry : rsLogTimestampMap.entrySet()) {
      TableName table = tableEntry.getKey();
      HashMap rsLogTimestamp = tableEntry.getValue();
      for (Entry rsEntry : rsLogTimestamp.entrySet()) {
        String rs = rsEntry.getKey();
        Long ts = rsEntry.getValue();
        if (!rsLogTimestampMapByRS.containsKey(rs)) {
          rsLogTimestampMapByRS.put(rs, new HashMap());
          rsLogTimestampMapByRS.get(rs).put(table, ts);
        } else {
          rsLogTimestampMapByRS.get(rs).put(table, ts);
        }
      }
    }

    for (Entry> entry : rsLogTimestampMapByRS.entrySet()) {
      String rs = entry.getKey();
      rsLogTimestampMins.put(rs, BackupUtils.getMinValue(entry.getValue()));
    }

    return rsLogTimestampMins;
  }

  /**
   * copy out Table RegionInfo into incremental backup image need to consider move this
   * logic into HBackupFileSystem
   * @param conn connection
   * @param backupInfo backup info
   * @param conf configuration
   * @throws IOException exception
   * @throws InterruptedException exception
   */
  public static void copyTableRegionInfo(Connection conn, BackupInfo backupInfo,
      Configuration conf) throws IOException, InterruptedException {
    Path rootDir = FSUtils.getRootDir(conf);
    FileSystem fs = rootDir.getFileSystem(conf);

    // for each table in the table set, copy out the table info and region
    // info files in the correct directory structure
    for (TableName table : backupInfo.getTables()) {

      if (!MetaTableAccessor.tableExists(conn, table)) {
        LOG.warn("Table " + table + " does not exists, skipping it.");
        continue;
      }
      HTableDescriptor orig = FSTableDescriptors.getTableDescriptorFromFs(fs, rootDir, table);

      // write a copy of descriptor to the target directory
      Path target = new Path(backupInfo.getTableBackupDir(table));
      FileSystem targetFs = target.getFileSystem(conf);
      FSTableDescriptors descriptors =
          new FSTableDescriptors(conf, targetFs, FSUtils.getRootDir(conf));
      descriptors.createTableDescriptorForTableDirectory(target, orig, false);
      LOG.debug("Attempting to copy table info for:" + table + " target: " + target
          + " descriptor: " + orig);
      LOG.debug("Finished copying tableinfo.");
      List regions = null;
      regions = MetaTableAccessor.getTableRegions(conn, table);
      // For each region, write the region info to disk
      LOG.debug("Starting to write region info for table " + table);
      for (HRegionInfo regionInfo : regions) {
        Path regionDir =
            HRegion.getRegionDir(new Path(backupInfo.getTableBackupDir(table)),
              regionInfo);
        regionDir =
            new Path(backupInfo.getTableBackupDir(table), regionDir.getName());
        writeRegioninfoOnFilesystem(conf, targetFs, regionDir, regionInfo);
      }
      LOG.debug("Finished writing region info for table " + table);
    }
  }

  /**
   * Write the .regioninfo file on-disk.
   */
  public static void writeRegioninfoOnFilesystem(final Configuration conf, final FileSystem fs,
      final Path regionInfoDir, HRegionInfo regionInfo) throws IOException {
    final byte[] content = regionInfo.toDelimitedByteArray();
    Path regionInfoFile = new Path(regionInfoDir, "." + HConstants.REGIONINFO_QUALIFIER_STR);
    // First check to get the permissions
    FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
    // Write the RegionInfo file content
    FSDataOutputStream out = FSUtils.create(conf, fs, regionInfoFile, perms, null);
    try {
      out.write(content);
    } finally {
      out.close();
    }
  }

  /**
   * Parses hostname:port from WAL file path
   * @param p path to WAL file
   * @return hostname:port
   */
  public static String parseHostNameFromLogFile(Path p) {
    try {
      if (AbstractFSWALProvider.isArchivedLogFile(p)) {
        return BackupUtils.parseHostFromOldLog(p);
      } else {
        ServerName sname = AbstractFSWALProvider.getServerNameFromWALDirectoryName(p);
        if (sname != null) {
          return sname.getAddress().toString();
        } else {
          LOG.error("Skip log file (can't parse): " + p);
          return null;
        }
      }
    } catch (Exception e) {
      LOG.error("Skip log file (can't parse): " + p, e);
      return null;
    }
  }

  /**
   * Returns WAL file name
   * @param walFileName WAL file name
   * @return WAL file name
   * @throws IOException exception
   * @throws IllegalArgumentException exception
   */
  public static String getUniqueWALFileNamePart(String walFileName) throws IOException {
    return getUniqueWALFileNamePart(new Path(walFileName));
  }

  /**
   * Returns WAL file name
   * @param p WAL file path
   * @return WAL file name
   * @throws IOException exception
   */
  public static String getUniqueWALFileNamePart(Path p) throws IOException {
    return p.getName();
  }

  /**
   * Get the total length of files under the given directory recursively.
   * @param fs The hadoop file system
   * @param dir The target directory
   * @return the total length of files
   * @throws IOException exception
   */
  public static long getFilesLength(FileSystem fs, Path dir) throws IOException {
    long totalLength = 0;
    FileStatus[] files = FSUtils.listStatus(fs, dir);
    if (files != null) {
      for (FileStatus fileStatus : files) {
        if (fileStatus.isDirectory()) {
          totalLength += getFilesLength(fs, fileStatus.getPath());
        } else {
          totalLength += fileStatus.getLen();
        }
      }
    }
    return totalLength;
  }

  /**
   * Get list of all old WAL files (WALs and archive)
   * @param c configuration
   * @param hostTimestampMap {host,timestamp} map
   * @return list of WAL files
   * @throws IOException exception
   */
  public static List getWALFilesOlderThan(final Configuration c,
      final HashMap hostTimestampMap) throws IOException {
    Path rootDir = FSUtils.getRootDir(c);
    Path logDir = new Path(rootDir, HConstants.HREGION_LOGDIR_NAME);
    Path oldLogDir = new Path(rootDir, HConstants.HREGION_OLDLOGDIR_NAME);
    List logFiles = new ArrayList();

    PathFilter filter = new PathFilter() {

      @Override
      public boolean accept(Path p) {
        try {
          if (AbstractFSWALProvider.isMetaFile(p)) {
            return false;
          }
          String host = parseHostNameFromLogFile(p);
          if (host == null) {
            return false;
          }
          Long oldTimestamp = hostTimestampMap.get(host);
          Long currentLogTS = BackupUtils.getCreationTime(p);
          return currentLogTS <= oldTimestamp;
        } catch (Exception e) {
          LOG.warn("Can not parse" + p, e);
          return false;
        }
      }
    };
    FileSystem fs = FileSystem.get(c);
    logFiles = BackupUtils.getFiles(fs, logDir, logFiles, filter);
    logFiles = BackupUtils.getFiles(fs, oldLogDir, logFiles, filter);
    return logFiles;
  }

  public static TableName[] parseTableNames(String tables) {
    if (tables == null) {
      return null;
    }
    String[] tableArray = tables.split(BackupRestoreConstants.TABLENAME_DELIMITER_IN_COMMAND);

    TableName[] ret = new TableName[tableArray.length];
    for (int i = 0; i < tableArray.length; i++) {
      ret[i] = TableName.valueOf(tableArray[i]);
    }
    return ret;
  }


  /**
   * Check whether the backup path exist
   * @param backupStr backup
   * @param conf configuration
   * @return Yes if path exists
   * @throws IOException exception
   */
  public static boolean checkPathExist(String backupStr, Configuration conf) throws IOException {
    boolean isExist = false;
    Path backupPath = new Path(backupStr);
    FileSystem fileSys = backupPath.getFileSystem(conf);
    String targetFsScheme = fileSys.getUri().getScheme();
    if (LOG.isTraceEnabled()) {
      LOG.trace("Schema of given url: " + backupStr + " is: " + targetFsScheme);
    }
    if (fileSys.exists(backupPath)) {
      isExist = true;
    }
    return isExist;
  }

  /**
   * Check target path first, confirm it doesn't exist before backup
   * @param backupRootPath backup destination path
   * @param conf configuration
   * @throws IOException exception
   */
  public static void checkTargetDir(String backupRootPath, Configuration conf) throws IOException {
    boolean targetExists = false;
    try {
      targetExists = checkPathExist(backupRootPath, conf);
    } catch (IOException e) {
      String expMsg = e.getMessage();
      String newMsg = null;
      if (expMsg.contains("No FileSystem for scheme")) {
        newMsg =
            "Unsupported filesystem scheme found in the backup target url. Error Message: "
                + newMsg;
        LOG.error(newMsg);
        throw new IOException(newMsg);
      } else {
        throw e;
      }
    }

    if (targetExists) {
      LOG.info("Using existing backup root dir: " + backupRootPath);
    } else {
      LOG.info("Backup root dir " + backupRootPath + " does not exist. Will be created.");
    }
  }

  /**
   * Get the min value for all the Values a map.
   * @param map map
   * @return the min value
   */
  public static  Long getMinValue(HashMap map) {
    Long minTimestamp = null;
    if (map != null) {
      ArrayList timestampList = new ArrayList(map.values());
      Collections.sort(timestampList);
      // The min among all the RS log timestamps will be kept in backup system table table.
      minTimestamp = timestampList.get(0);
    }
    return minTimestamp;
  }

  /**
   * Parses host name:port from archived WAL path
   * @param p path
   * @return host name
   * @throws IOException exception
   */
  public static String parseHostFromOldLog(Path p) {
    try {
      String n = p.getName();
      int idx = n.lastIndexOf(LOGNAME_SEPARATOR);
      String s = URLDecoder.decode(n.substring(0, idx), "UTF8");
      return ServerName.parseHostname(s) + ":" + ServerName.parsePort(s);
    } catch (Exception e) {
      LOG.warn("Skip log file (can't parse): " + p);
      return null;
    }
  }

  /**
   * Given the log file, parse the timestamp from the file name. The timestamp is the last number.
   * @param p a path to the log file
   * @return the timestamp
   * @throws IOException exception
   */
  public static Long getCreationTime(Path p) throws IOException {
    int idx = p.getName().lastIndexOf(LOGNAME_SEPARATOR);
    if (idx < 0) {
      throw new IOException("Cannot parse timestamp from path " + p);
    }
    String ts = p.getName().substring(idx + 1);
    return Long.parseLong(ts);
  }

  public static List getFiles(FileSystem fs, Path rootDir, List files,
      PathFilter filter) throws FileNotFoundException, IOException {
    RemoteIterator it = fs.listFiles(rootDir, true);

    while (it.hasNext()) {
      LocatedFileStatus lfs = it.next();
      if (lfs.isDirectory()) {
        continue;
      }
      // apply filter
      if (filter.accept(lfs.getPath())) {
        files.add(lfs.getPath().toString());
      }
    }
    return files;
  }

  public static void cleanupBackupData(BackupInfo context, Configuration conf) throws IOException {
    cleanupHLogDir(context, conf);
    cleanupTargetDir(context, conf);
  }

  /**
   * Clean up directories which are generated when DistCp copying hlogs
   * @param backupInfo backup info
   * @param conf configuration
   * @throws IOException exception
   */
  private static void cleanupHLogDir(BackupInfo backupInfo, Configuration conf)
      throws IOException {

    String logDir = backupInfo.getHLogTargetDir();
    if (logDir == null) {
      LOG.warn("No log directory specified for " + backupInfo.getBackupId());
      return;
    }

    Path rootPath = new Path(logDir).getParent();
    FileSystem fs = FileSystem.get(rootPath.toUri(), conf);
    FileStatus[] files = listStatus(fs, rootPath, null);
    if (files == null) {
      return;
    }
    for (FileStatus file : files) {
      LOG.debug("Delete log files: " + file.getPath().getName());
      fs.delete(file.getPath(), true);
    }
  }


  private static void cleanupTargetDir(BackupInfo backupInfo, Configuration conf) {
    try {
      // clean up the data at target directory
      LOG.debug("Trying to cleanup up target dir : " + backupInfo.getBackupId());
      String targetDir = backupInfo.getBackupRootDir();
      if (targetDir == null) {
        LOG.warn("No target directory specified for " + backupInfo.getBackupId());
        return;
      }

      FileSystem outputFs = FileSystem.get(new Path(backupInfo.getBackupRootDir()).toUri(), conf);

      for (TableName table : backupInfo.getTables()) {
        Path targetDirPath =
            new Path(getTableBackupDir(backupInfo.getBackupRootDir(), backupInfo.getBackupId(),
              table));
        if (outputFs.delete(targetDirPath, true)) {
          LOG.info("Cleaning up backup data at " + targetDirPath.toString() + " done.");
        } else {
          LOG.info("No data has been found in " + targetDirPath.toString() + ".");
        }

        Path tableDir = targetDirPath.getParent();
        FileStatus[] backups = listStatus(outputFs, tableDir, null);
        if (backups == null || backups.length == 0) {
          outputFs.delete(tableDir, true);
          LOG.debug(tableDir.toString() + " is empty, remove it.");
        }
      }
      outputFs.delete(new Path(targetDir, backupInfo.getBackupId()), true);
    } catch (IOException e1) {
      LOG.error("Cleaning up backup data of " + backupInfo.getBackupId() + " at "
          + backupInfo.getBackupRootDir() + " failed due to " + e1.getMessage() + ".");
    }
  }

  /**
   * Given the backup root dir, backup id and the table name, return the backup image location,
   * which is also where the backup manifest file is. return value look like:
   * "hdfs://backup.hbase.org:9000/user/biadmin/backup1/backup_1396650096738/default/t1_dn/"
   * @param backupRootDir backup root directory
   * @param backupId backup id
   * @param tableName table name
   * @return backupPath String for the particular table
   */
  public static String getTableBackupDir(String backupRootDir, String backupId,
      TableName tableName) {
    return backupRootDir + Path.SEPARATOR + backupId + Path.SEPARATOR
        + tableName.getNamespaceAsString() + Path.SEPARATOR + tableName.getQualifierAsString()
        + Path.SEPARATOR;
  }

  /**
   * Sort history list by start time in descending order.
   * @param historyList history list
   * @return sorted list of BackupCompleteData
   */
  public static ArrayList sortHistoryListDesc(ArrayList historyList) {
    ArrayList list = new ArrayList();
    TreeMap map = new TreeMap();
    for (BackupInfo h : historyList) {
      map.put(Long.toString(h.getStartTs()), h);
    }
    Iterator i = map.descendingKeySet().iterator();
    while (i.hasNext()) {
      list.add(map.get(i.next()));
    }
    return list;
  }


  /**
   * Calls fs.listStatus() and treats FileNotFoundException as non-fatal This accommodates
   * differences between hadoop versions, where hadoop 1 does not throw a FileNotFoundException, and
   * return an empty FileStatus[] while Hadoop 2 will throw FileNotFoundException.
   * @param fs file system
   * @param dir directory
   * @param filter path filter
   * @return null if dir is empty or doesn't exist, otherwise FileStatus array
   */
  public static FileStatus[]
      listStatus(final FileSystem fs, final Path dir, final PathFilter filter) throws IOException {
    FileStatus[] status = null;
    try {
      status = filter == null ? fs.listStatus(dir) : fs.listStatus(dir, filter);
    } catch (FileNotFoundException fnfe) {
      // if directory doesn't exist, return null
      if (LOG.isTraceEnabled()) {
        LOG.trace(dir + " doesn't exist");
      }
    }
    if (status == null || status.length < 1) return null;
    return status;
  }

  /**
   * Return the 'path' component of a Path. In Hadoop, Path is an URI. This method returns the
   * 'path' component of a Path's URI: e.g. If a Path is
   * hdfs://example.org:9000/hbase_trunk/TestTable/compaction.dir, this method returns
   * /hbase_trunk/TestTable/compaction.dir. This method is useful if you want to print
   * out a Path without qualifying Filesystem instance.
   * @param p file system Path whose 'path' component we are to return.
   * @return Path portion of the Filesystem
   */
  public static String getPath(Path p) {
    return p.toUri().getPath();
  }

  /**
   * Given the backup root dir and the backup id, return the log file location for an incremental
   * backup.
   * @param backupRootDir backup root directory
   * @param backupId backup id
   * @return logBackupDir: ".../user/biadmin/backup1/WALs/backup_1396650096738"
   */
  public static String getLogBackupDir(String backupRootDir, String backupId) {
    return backupRootDir + Path.SEPARATOR + backupId + Path.SEPARATOR
        + HConstants.HREGION_LOGDIR_NAME;
  }

  private static List getHistory(Configuration conf, Path backupRootPath)
      throws IOException {
    // Get all (n) history from backup root destination
    FileSystem fs = FileSystem.get(conf);
    RemoteIterator it = fs.listLocatedStatus(backupRootPath);

    List infos = new ArrayList();
    while (it.hasNext()) {
      LocatedFileStatus lfs = it.next();
      if (!lfs.isDirectory()) continue;
      String backupId = lfs.getPath().getName();
      try {
        BackupInfo info = loadBackupInfo(backupRootPath, backupId, fs);
        infos.add(info);
      } catch (IOException e) {
        LOG.error("Can not load backup info from: " + lfs.getPath(), e);
      }
    }
    // Sort
    Collections.sort(infos, new Comparator() {

      @Override
      public int compare(BackupInfo o1, BackupInfo o2) {
        long ts1 = getTimestamp(o1.getBackupId());
        long ts2 = getTimestamp(o2.getBackupId());
        if (ts1 == ts2) return 0;
        return ts1 < ts2 ? 1 : -1;
      }

      private long getTimestamp(String backupId) {
        String[] split = backupId.split("_");
        return Long.parseLong(split[1]);
      }
    });
    return infos;
  }

  public static List getHistory(Configuration conf, int n, Path backupRootPath,
      BackupInfo.Filter... filters) throws IOException {
    List infos = getHistory(conf, backupRootPath);
    List ret = new ArrayList();
    for (BackupInfo info : infos) {
      if (ret.size() == n) {
        break;
      }
      boolean passed = true;
      for (int i = 0; i < filters.length; i++) {
        if (!filters[i].apply(info)) {
          passed = false;
          break;
        }
      }
      if (passed) {
        ret.add(info);
      }
    }
    return ret;
  }

  public static BackupInfo loadBackupInfo(Path backupRootPath, String backupId, FileSystem fs)
      throws IOException {
    Path backupPath = new Path(backupRootPath, backupId);

    RemoteIterator it = fs.listFiles(backupPath, true);
    while (it.hasNext()) {
      LocatedFileStatus lfs = it.next();
      if (lfs.getPath().getName().equals(BackupManifest.MANIFEST_FILE_NAME)) {
        // Load BackupManifest
        BackupManifest manifest = new BackupManifest(fs, lfs.getPath().getParent());
        BackupInfo info = manifest.toBackupInfo();
        return info;
      }
    }
    return null;
  }

  /**
   * Create restore request.
   * @param backupRootDir backup root dir
   * @param backupId backup id
   * @param check check only
   * @param fromTables table list from
   * @param toTables   table list to
   * @param isOverwrite overwrite data
   * @return request obkect
   */
  public static RestoreRequest createRestoreRequest(String backupRootDir, String backupId,
      boolean check, TableName[] fromTables, TableName[] toTables, boolean isOverwrite) {
    RestoreRequest.Builder builder = new RestoreRequest.Builder();
    RestoreRequest request = builder.withBackupRootDir(backupRootDir)
                                    .withBackupId(backupId)
                                    .withCheck(check)
                                    .withFromTables(fromTables)
                                    .withToTables(toTables)
                                    .withOvewrite(isOverwrite).build();
    return request;
  }

  public static boolean validate(HashMap backupManifestMap,
      Configuration conf) throws IOException {
    boolean isValid = true;

    for (Entry manifestEntry : backupManifestMap.entrySet()) {
      TableName table = manifestEntry.getKey();
      TreeSet imageSet = new TreeSet();

      ArrayList depList = manifestEntry.getValue().getDependentListByTable(table);
      if (depList != null && !depList.isEmpty()) {
        imageSet.addAll(depList);
      }

      LOG.info("Dependent image(s) from old to new:");
      for (BackupImage image : imageSet) {
        String imageDir =
            HBackupFileSystem.getTableBackupDir(image.getRootDir(), image.getBackupId(), table);
        if (!BackupUtils.checkPathExist(imageDir, conf)) {
          LOG.error("ERROR: backup image does not exist: " + imageDir);
          isValid = false;
          break;
        }
        LOG.info("Backup image: " + image.getBackupId() + " for '" + table + "' is available");
      }
    }
    return isValid;
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy