All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.mapreduce.MultiTableSnapshotInputFormatImpl Maven / Gradle / Ivy

Go to download

This module contains implementations of InputFormat, OutputFormat, Mapper, Reducer, etc which are needed for running MR jobs on tables, WALs, HFiles and other HBase specific constructs. It also contains a bunch of tools: RowCounter, ImportTsv, Import, Export, CompactionTool, ExportSnapshot, WALPlayer, etc

There is a newer version: 3.0.0-beta-1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.mapreduce;

import java.io.IOException;
import java.util.AbstractMap;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.ConfigurationUtil;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.yetus.audience.InterfaceStability;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
import org.apache.hbase.thirdparty.com.google.common.collect.Maps;

/**
 * Shared implementation of mapreduce code over multiple table snapshots. Utilized by both mapreduce
 * {@link org.apache.hadoop.hbase.mapreduce.MultiTableSnapshotInputFormat} and mapred
 * {@link org.apache.hadoop.hbase.mapred.MultiTableSnapshotInputFormat} implementations.
 */
@InterfaceAudience.LimitedPrivate({ "HBase" })
@InterfaceStability.Evolving
public class MultiTableSnapshotInputFormatImpl {
  private static final Logger LOG =
    LoggerFactory.getLogger(MultiTableSnapshotInputFormatImpl.class);

  public static final String RESTORE_DIRS_KEY =
    "hbase.MultiTableSnapshotInputFormat.restore.snapshotDirMapping";
  public static final String SNAPSHOT_TO_SCANS_KEY =
    "hbase.MultiTableSnapshotInputFormat.snapshotsToScans";

  /**
   * Configure conf to read from snapshotScans, with snapshots restored to a subdirectory of
   * restoreDir.
   * 

* Sets: {@link #RESTORE_DIRS_KEY}, {@link #SNAPSHOT_TO_SCANS_KEY} */ public void setInput(Configuration conf, Map> snapshotScans, Path restoreDir) throws IOException { Path rootDir = CommonFSUtils.getRootDir(conf); FileSystem fs = rootDir.getFileSystem(conf); setSnapshotToScans(conf, snapshotScans); Map restoreDirs = generateSnapshotToRestoreDirMapping(snapshotScans.keySet(), restoreDir); setSnapshotDirs(conf, restoreDirs); restoreSnapshots(conf, restoreDirs, fs); } /** * Return the list of splits extracted from the scans/snapshots pushed to conf by * {@link #setInput(Configuration, Map, Path)} * @param conf Configuration to determine splits from * @return Return the list of splits extracted from the scans/snapshots pushed to conf */ public List getSplits(Configuration conf) throws IOException { Path rootDir = CommonFSUtils.getRootDir(conf); FileSystem fs = rootDir.getFileSystem(conf); List rtn = Lists.newArrayList(); Map> snapshotsToScans = getSnapshotsToScans(conf); Map snapshotsToRestoreDirs = getSnapshotDirs(conf); for (Map.Entry> entry : snapshotsToScans.entrySet()) { String snapshotName = entry.getKey(); Path restoreDir = snapshotsToRestoreDirs.get(snapshotName); SnapshotManifest manifest = TableSnapshotInputFormatImpl.getSnapshotManifest(conf, snapshotName, rootDir, fs); List regionInfos = TableSnapshotInputFormatImpl.getRegionInfosFromManifest(manifest); for (Scan scan : entry.getValue()) { List splits = TableSnapshotInputFormatImpl.getSplits(scan, manifest, regionInfos, restoreDir, conf); rtn.addAll(splits); } } return rtn; } /** * Retrieve the snapshot name -> list<scan> mapping pushed to configuration by * {@link #setSnapshotToScans(Configuration, Map)} * @param conf Configuration to extract name -> list<scan> mappings from. * @return the snapshot name -> list<scan> mapping pushed to configuration */ public Map> getSnapshotsToScans(Configuration conf) throws IOException { Map> rtn = Maps.newHashMap(); for (Map.Entry entry : ConfigurationUtil.getKeyValues(conf, SNAPSHOT_TO_SCANS_KEY)) { String snapshotName = entry.getKey(); String scan = entry.getValue(); Collection snapshotScans = rtn.get(snapshotName); if (snapshotScans == null) { snapshotScans = Lists.newArrayList(); rtn.put(snapshotName, snapshotScans); } snapshotScans.add(TableMapReduceUtil.convertStringToScan(scan)); } return rtn; } /** * Push snapshotScans to conf (under the key {@link #SNAPSHOT_TO_SCANS_KEY}) */ public void setSnapshotToScans(Configuration conf, Map> snapshotScans) throws IOException { // flatten out snapshotScans for serialization to the job conf List> snapshotToSerializedScans = Lists.newArrayList(); for (Map.Entry> entry : snapshotScans.entrySet()) { String snapshotName = entry.getKey(); Collection scans = entry.getValue(); // serialize all scans and map them to the appropriate snapshot for (Scan scan : scans) { snapshotToSerializedScans.add(new AbstractMap.SimpleImmutableEntry<>(snapshotName, TableMapReduceUtil.convertScanToString(scan))); } } ConfigurationUtil.setKeyValues(conf, SNAPSHOT_TO_SCANS_KEY, snapshotToSerializedScans); } /** * Retrieve the directories into which snapshots have been restored from * ({@link #RESTORE_DIRS_KEY}) * @param conf Configuration to extract restore directories from * @return the directories into which snapshots have been restored from */ public Map getSnapshotDirs(Configuration conf) throws IOException { List> kvps = ConfigurationUtil.getKeyValues(conf, RESTORE_DIRS_KEY); Map rtn = Maps.newHashMapWithExpectedSize(kvps.size()); for (Map.Entry kvp : kvps) { rtn.put(kvp.getKey(), new Path(kvp.getValue())); } return rtn; } public void setSnapshotDirs(Configuration conf, Map snapshotDirs) { Map toSet = Maps.newHashMap(); for (Map.Entry entry : snapshotDirs.entrySet()) { toSet.put(entry.getKey(), entry.getValue().toString()); } ConfigurationUtil.setKeyValues(conf, RESTORE_DIRS_KEY, toSet.entrySet()); } /** * Generate a random path underneath baseRestoreDir for each snapshot in snapshots and return a * map from the snapshot to the restore directory. * @param snapshots collection of snapshot names to restore * @param baseRestoreDir base directory under which all snapshots in snapshots will be restored * @return a mapping from snapshot name to the directory in which that snapshot has been restored */ private Map generateSnapshotToRestoreDirMapping(Collection snapshots, Path baseRestoreDir) { Map rtn = Maps.newHashMap(); for (String snapshotName : snapshots) { Path restoreSnapshotDir = new Path(baseRestoreDir, snapshotName + "__" + UUID.randomUUID().toString()); rtn.put(snapshotName, restoreSnapshotDir); } return rtn; } /** * Restore each (snapshot name, restore directory) pair in snapshotToDir * @param conf configuration to restore with * @param snapshotToDir mapping from snapshot names to restore directories * @param fs filesystem to do snapshot restoration on */ public void restoreSnapshots(Configuration conf, Map snapshotToDir, FileSystem fs) throws IOException { // TODO: restore from record readers to parallelize. Path rootDir = CommonFSUtils.getRootDir(conf); for (Map.Entry entry : snapshotToDir.entrySet()) { String snapshotName = entry.getKey(); Path restoreDir = entry.getValue(); LOG.info("Restoring snapshot " + snapshotName + " into " + restoreDir + " for MultiTableSnapshotInputFormat"); restoreSnapshot(conf, snapshotName, rootDir, restoreDir, fs); } } void restoreSnapshot(Configuration conf, String snapshotName, Path rootDir, Path restoreDir, FileSystem fs) throws IOException { RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy