All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.mapreduce.MultiTableSnapshotInputFormatImpl Maven / Gradle / Ivy

There is a newer version: 4.15.0-HBase-1.5
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.mapreduce;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
import org.apache.hadoop.hbase.util.ConfigurationUtil;
import org.apache.hadoop.hbase.util.FSUtils;

import java.io.IOException;
import java.util.AbstractMap;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.UUID;

/**
 * Shared implementation of mapreduce code over multiple table snapshots.
 * Utilized by both mapreduce ({@link org.apache.hadoop.hbase.mapreduce
 * .MultiTableSnapshotInputFormat} and mapred
 * ({@link org.apache.hadoop.hbase.mapred.MultiTableSnapshotInputFormat} implementations.
 */
@InterfaceAudience.LimitedPrivate({ "HBase" })
@InterfaceStability.Evolving
public class MultiTableSnapshotInputFormatImpl {

  private static final Log LOG = LogFactory.getLog(MultiTableSnapshotInputFormatImpl.class);

  public static final String RESTORE_DIRS_KEY =
      "hbase.MultiTableSnapshotInputFormat.restore.snapshotDirMapping";
  public static final String SNAPSHOT_TO_SCANS_KEY =
      "hbase.MultiTableSnapshotInputFormat.snapshotsToScans";

  /**
   * Configure conf to read from snapshotScans, with snapshots restored to a subdirectory of
   * restoreDir.
   * Sets: {@link #RESTORE_DIRS_KEY}, {@link #SNAPSHOT_TO_SCANS_KEY}
   *
   * @param conf
   * @param snapshotScans
   * @param restoreDir
   * @throws IOException
   */
  public void setInput(Configuration conf, Map> snapshotScans,
      Path restoreDir) throws IOException {
    Path rootDir = FSUtils.getRootDir(conf);
    FileSystem fs = rootDir.getFileSystem(conf);

    setSnapshotToScans(conf, snapshotScans);
    Map restoreDirs =
        generateSnapshotToRestoreDirMapping(snapshotScans.keySet(), restoreDir);
    setSnapshotDirs(conf, restoreDirs);
    restoreSnapshots(conf, restoreDirs, fs);
  }

  /**
   * Return the list of splits extracted from the scans/snapshots pushed to conf by
   * {@link
   * #setInput(org.apache.hadoop.conf.Configuration, java.util.Map, org.apache.hadoop.fs.Path)}
   *
   * @param conf Configuration to determine splits from
   * @return Return the list of splits extracted from the scans/snapshots pushed to conf
   * @throws IOException
   */
  public List getSplits(Configuration conf)
      throws IOException {
    Path rootDir = FSUtils.getRootDir(conf);
    FileSystem fs = rootDir.getFileSystem(conf);

    List rtn = Lists.newArrayList();

    Map> snapshotsToScans = getSnapshotsToScans(conf);
    Map snapshotsToRestoreDirs = getSnapshotDirs(conf);
    for (Map.Entry> entry : snapshotsToScans.entrySet()) {
      String snapshotName = entry.getKey();

      Path restoreDir = snapshotsToRestoreDirs.get(snapshotName);

      SnapshotManifest manifest =
          TableSnapshotInputFormatImpl.getSnapshotManifest(conf, snapshotName, rootDir, fs);
      List regionInfos =
          TableSnapshotInputFormatImpl.getRegionInfosFromManifest(manifest);

      for (Scan scan : entry.getValue()) {
        List splits =
            TableSnapshotInputFormatImpl.getSplits(scan, manifest, regionInfos, restoreDir, conf);
        rtn.addAll(splits);
      }
    }
    return rtn;
  }

  /**
   * Retrieve the snapshot name -> list<scan> mapping pushed to configuration by
   * {@link #setSnapshotToScans(org.apache.hadoop.conf.Configuration, java.util.Map)}
   *
   * @param conf Configuration to extract name -> list<scan> mappings from.
   * @return the snapshot name -> list<scan> mapping pushed to configuration
   * @throws IOException
   */
  public Map> getSnapshotsToScans(Configuration conf) throws IOException {

    Map> rtn = Maps.newHashMap();

    for (Map.Entry entry : ConfigurationUtil
        .getKeyValues(conf, SNAPSHOT_TO_SCANS_KEY)) {
      String snapshotName = entry.getKey();
      String scan = entry.getValue();

      Collection snapshotScans = rtn.get(snapshotName);
      if (snapshotScans == null) {
        snapshotScans = Lists.newArrayList();
        rtn.put(snapshotName, snapshotScans);
      }

      snapshotScans.add(TableMapReduceUtil.convertStringToScan(scan));
    }

    return rtn;
  }

  /**
   * Push snapshotScans to conf (under the key {@link #SNAPSHOT_TO_SCANS_KEY})
   *
   * @param conf
   * @param snapshotScans
   * @throws IOException
   */
  public void setSnapshotToScans(Configuration conf, Map> snapshotScans)
      throws IOException {
    // flatten out snapshotScans for serialization to the job conf
    List> snapshotToSerializedScans = Lists.newArrayList();

    for (Map.Entry> entry : snapshotScans.entrySet()) {
      String snapshotName = entry.getKey();
      Collection scans = entry.getValue();

      // serialize all scans and map them to the appropriate snapshot
      for (Scan scan : scans) {
        snapshotToSerializedScans.add(new AbstractMap.SimpleImmutableEntry(snapshotName,
            TableMapReduceUtil.convertScanToString(scan)));
      }
    }

    ConfigurationUtil.setKeyValues(conf, SNAPSHOT_TO_SCANS_KEY, snapshotToSerializedScans);
  }

  /**
   * Retrieve the directories into which snapshots have been restored from
   * ({@link #RESTORE_DIRS_KEY})
   *
   * @param conf Configuration to extract restore directories from
   * @return the directories into which snapshots have been restored from
   * @throws IOException
   */
  public Map getSnapshotDirs(Configuration conf) throws IOException {
    List> kvps = ConfigurationUtil.getKeyValues(conf, RESTORE_DIRS_KEY);
    Map rtn = Maps.newHashMapWithExpectedSize(kvps.size());

    for (Map.Entry kvp : kvps) {
      rtn.put(kvp.getKey(), new Path(kvp.getValue()));
    }

    return rtn;
  }

  public void setSnapshotDirs(Configuration conf, Map snapshotDirs) {
    Map toSet = Maps.newHashMap();

    for (Map.Entry entry : snapshotDirs.entrySet()) {
      toSet.put(entry.getKey(), entry.getValue().toString());
    }

    ConfigurationUtil.setKeyValues(conf, RESTORE_DIRS_KEY, toSet.entrySet());
  }

  /**
   * Generate a random path underneath baseRestoreDir for each snapshot in snapshots and
   * return a map from the snapshot to the restore directory.
   *
   * @param snapshots      collection of snapshot names to restore
   * @param baseRestoreDir base directory under which all snapshots in snapshots will be restored
   * @return a mapping from snapshot name to the directory in which that snapshot has been restored
   */
  private Map generateSnapshotToRestoreDirMapping(Collection snapshots,
      Path baseRestoreDir) {
    Map rtn = Maps.newHashMap();

    for (String snapshotName : snapshots) {
      Path restoreSnapshotDir =
          new Path(baseRestoreDir, snapshotName + "__" + UUID.randomUUID().toString());
      rtn.put(snapshotName, restoreSnapshotDir);
    }

    return rtn;
  }

  /**
   * Restore each (snapshot name, restore directory) pair in snapshotToDir
   *
   * @param conf          configuration to restore with
   * @param snapshotToDir mapping from snapshot names to restore directories
   * @param fs            filesystem to do snapshot restoration on
   * @throws IOException
   */
  public void restoreSnapshots(Configuration conf, Map snapshotToDir, FileSystem fs)
      throws IOException {
    // TODO: restore from record readers to parallelize.
    Path rootDir = FSUtils.getRootDir(conf);

    for (Map.Entry entry : snapshotToDir.entrySet()) {
      String snapshotName = entry.getKey();
      Path restoreDir = entry.getValue();
      LOG.info("Restoring snapshot " + snapshotName + " into " + restoreDir
          + " for MultiTableSnapshotInputFormat");
      restoreSnapshot(conf, snapshotName, rootDir, restoreDir, fs);
    }
  }

  void restoreSnapshot(Configuration conf, String snapshotName, Path rootDir, Path restoreDir,
      FileSystem fs) throws IOException {
    RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
  }

  // TODO: these probably belong elsewhere/may already be implemented elsewhere.

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy