All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication Maven / Gradle / Ivy

Go to download

This module contains implementations of InputFormat, OutputFormat, Mapper, Reducer, etc which are needed for running MR jobs on tables, WALs, HFiles and other HBase specific constructs. It also contains a bunch of tools: RowCounter, ImportTsv, Import, Export, CompactionTool, ExportSnapshot, WALPlayer, etc

There is a newer version: 3.0.0-beta-1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.mapreduce.replication;

import java.io.IOException;
import java.util.Arrays;
import java.util.UUID;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Abortable;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableSnapshotScanner;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat;
import org.apache.hadoop.hbase.mapreduce.TableSplit;
import org.apache.hadoop.hbase.replication.ReplicationException;
import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
import org.apache.hadoop.hbase.replication.ReplicationPeerStorage;
import org.apache.hadoop.hbase.replication.ReplicationStorageFactory;
import org.apache.hadoop.hbase.replication.ReplicationUtils;
import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.zookeeper.ZKConfig;
import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * This map-only job compares the data from a local table with a remote one. Every cell is compared
 * and must have exactly the same keys (even timestamp) as well as same value. It is possible to
 * restrict the job by time range and families. The peer id that's provided must match the one given
 * when the replication stream was setup.
 * 

* Two counters are provided, Verifier.Counters.GOODROWS and BADROWS. The reason for a why a row is * different is shown in the map's log. */ @InterfaceAudience.Private public class VerifyReplication extends Configured implements Tool { private static final Logger LOG = LoggerFactory.getLogger(VerifyReplication.class); public final static String NAME = "verifyrep"; private final static String PEER_CONFIG_PREFIX = NAME + ".peer."; long startTime = 0; long endTime = Long.MAX_VALUE; int batch = -1; int versions = -1; String tableName = null; String families = null; String delimiter = ""; String peerId = null; String peerQuorumAddress = null; String rowPrefixes = null; int sleepMsBeforeReCompare = 0; boolean verbose = false; boolean includeDeletedCells = false; // Source table snapshot name String sourceSnapshotName = null; // Temp location in source cluster to restore source snapshot String sourceSnapshotTmpDir = null; // Peer table snapshot name String peerSnapshotName = null; // Temp location in peer cluster to restore peer snapshot String peerSnapshotTmpDir = null; // Peer cluster Hadoop FS address String peerFSAddress = null; // Peer cluster HBase root dir location String peerHBaseRootAddress = null; // Peer Table Name String peerTableName = null; private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name"; /** * Map-only comparator for 2 tables */ public static class Verifier extends TableMapper { public enum Counters { GOODROWS, BADROWS, ONLY_IN_SOURCE_TABLE_ROWS, ONLY_IN_PEER_TABLE_ROWS, CONTENT_DIFFERENT_ROWS } private Connection sourceConnection; private Table sourceTable; private Connection replicatedConnection; private Table replicatedTable; private ResultScanner replicatedScanner; private Result currentCompareRowInPeerTable; private int sleepMsBeforeReCompare; private String delimiter = ""; private boolean verbose = false; private int batch = -1; /** * Map method that compares every scanned row with the equivalent from a distant cluster. * @param row The current table row key. * @param value The columns. * @param context The current context. * @throws IOException When something is broken with the data. */ @Override public void map(ImmutableBytesWritable row, final Result value, Context context) throws IOException { if (replicatedScanner == null) { Configuration conf = context.getConfiguration(); sleepMsBeforeReCompare = conf.getInt(NAME + ".sleepMsBeforeReCompare", 0); delimiter = conf.get(NAME + ".delimiter", ""); verbose = conf.getBoolean(NAME + ".verbose", false); batch = conf.getInt(NAME + ".batch", -1); final Scan scan = new Scan(); if (batch > 0) { scan.setBatch(batch); } scan.setCacheBlocks(false); scan.setCaching(conf.getInt(TableInputFormat.SCAN_CACHEDROWS, 1)); long startTime = conf.getLong(NAME + ".startTime", 0); long endTime = conf.getLong(NAME + ".endTime", Long.MAX_VALUE); String families = conf.get(NAME + ".families", null); if (families != null) { String[] fams = families.split(","); for (String fam : fams) { scan.addFamily(Bytes.toBytes(fam)); } } boolean includeDeletedCells = conf.getBoolean(NAME + ".includeDeletedCells", false); scan.setRaw(includeDeletedCells); String rowPrefixes = conf.get(NAME + ".rowPrefixes", null); setRowPrefixFilter(scan, rowPrefixes); scan.setTimeRange(startTime, endTime); int versions = conf.getInt(NAME + ".versions", -1); LOG.info("Setting number of version inside map as: " + versions); if (versions >= 0) { scan.setMaxVersions(versions); } TableName tableName = TableName.valueOf(conf.get(NAME + ".tableName")); sourceConnection = ConnectionFactory.createConnection(conf); sourceTable = sourceConnection.getTable(tableName); final InputSplit tableSplit = context.getInputSplit(); String zkClusterKey = conf.get(NAME + ".peerQuorumAddress"); Configuration peerConf = HBaseConfiguration.createClusterConf(conf, zkClusterKey, PEER_CONFIG_PREFIX); String peerName = peerConf.get(NAME + ".peerTableName", tableName.getNameAsString()); TableName peerTableName = TableName.valueOf(peerName); replicatedConnection = ConnectionFactory.createConnection(peerConf); replicatedTable = replicatedConnection.getTable(peerTableName); scan.setStartRow(value.getRow()); byte[] endRow = null; if (tableSplit instanceof TableSnapshotInputFormat.TableSnapshotRegionSplit) { endRow = ((TableSnapshotInputFormat.TableSnapshotRegionSplit) tableSplit).getRegionInfo() .getEndKey(); } else { endRow = ((TableSplit) tableSplit).getEndRow(); } scan.setStopRow(endRow); String peerSnapshotName = conf.get(NAME + ".peerSnapshotName", null); if (peerSnapshotName != null) { String peerSnapshotTmpDir = conf.get(NAME + ".peerSnapshotTmpDir", null); String peerFSAddress = conf.get(NAME + ".peerFSAddress", null); String peerHBaseRootAddress = conf.get(NAME + ".peerHBaseRootAddress", null); FileSystem.setDefaultUri(peerConf, peerFSAddress); CommonFSUtils.setRootDir(peerConf, new Path(peerHBaseRootAddress)); LOG.info("Using peer snapshot:" + peerSnapshotName + " with temp dir:" + peerSnapshotTmpDir + " peer root uri:" + CommonFSUtils.getRootDir(peerConf) + " peerFSAddress:" + peerFSAddress); replicatedScanner = new TableSnapshotScanner(peerConf, CommonFSUtils.getRootDir(peerConf), new Path(peerFSAddress, peerSnapshotTmpDir), peerSnapshotName, scan, true); } else { replicatedScanner = replicatedTable.getScanner(scan); } currentCompareRowInPeerTable = replicatedScanner.next(); } while (true) { if (currentCompareRowInPeerTable == null) { // reach the region end of peer table, row only in source table logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_SOURCE_TABLE_ROWS, value); break; } int rowCmpRet = Bytes.compareTo(value.getRow(), currentCompareRowInPeerTable.getRow()); if (rowCmpRet == 0) { // rowkey is same, need to compare the content of the row try { Result.compareResults(value, currentCompareRowInPeerTable, false); context.getCounter(Counters.GOODROWS).increment(1); if (verbose) { LOG.info( "Good row key: " + delimiter + Bytes.toStringBinary(value.getRow()) + delimiter); } } catch (Exception e) { logFailRowAndIncreaseCounter(context, Counters.CONTENT_DIFFERENT_ROWS, value); } currentCompareRowInPeerTable = replicatedScanner.next(); break; } else if (rowCmpRet < 0) { // row only exists in source table logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_SOURCE_TABLE_ROWS, value); break; } else { // row only exists in peer table logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_PEER_TABLE_ROWS, currentCompareRowInPeerTable); currentCompareRowInPeerTable = replicatedScanner.next(); } } } private void logFailRowAndIncreaseCounter(Context context, Counters counter, Result row) { if (sleepMsBeforeReCompare > 0) { Threads.sleep(sleepMsBeforeReCompare); try { Result sourceResult = sourceTable.get(new Get(row.getRow())); Result replicatedResult = replicatedTable.get(new Get(row.getRow())); Result.compareResults(sourceResult, replicatedResult, false); if (!sourceResult.isEmpty()) { context.getCounter(Counters.GOODROWS).increment(1); if (verbose) { LOG.info("Good row key (with recompare): " + delimiter + Bytes.toStringBinary(row.getRow()) + delimiter); } } return; } catch (Exception e) { LOG.error("recompare fail after sleep, rowkey=" + delimiter + Bytes.toStringBinary(row.getRow()) + delimiter); } } context.getCounter(counter).increment(1); context.getCounter(Counters.BADROWS).increment(1); LOG.error(counter.toString() + ", rowkey=" + delimiter + Bytes.toStringBinary(row.getRow()) + delimiter); } @Override protected void cleanup(Context context) { if (replicatedScanner != null) { try { while (currentCompareRowInPeerTable != null) { logFailRowAndIncreaseCounter(context, Counters.ONLY_IN_PEER_TABLE_ROWS, currentCompareRowInPeerTable); currentCompareRowInPeerTable = replicatedScanner.next(); } } catch (Exception e) { LOG.error("fail to scan peer table in cleanup", e); } finally { replicatedScanner.close(); replicatedScanner = null; } } if (sourceTable != null) { try { sourceTable.close(); } catch (IOException e) { LOG.error("fail to close source table in cleanup", e); } } if (sourceConnection != null) { try { sourceConnection.close(); } catch (Exception e) { LOG.error("fail to close source connection in cleanup", e); } } if (replicatedTable != null) { try { replicatedTable.close(); } catch (Exception e) { LOG.error("fail to close replicated table in cleanup", e); } } if (replicatedConnection != null) { try { replicatedConnection.close(); } catch (Exception e) { LOG.error("fail to close replicated connection in cleanup", e); } } } } private static Pair getPeerQuorumConfig(final Configuration conf, String peerId) throws IOException { ZKWatcher localZKW = null; try { localZKW = new ZKWatcher(conf, "VerifyReplication", new Abortable() { @Override public void abort(String why, Throwable e) { } @Override public boolean isAborted() { return false; } }); ReplicationPeerStorage storage = ReplicationStorageFactory.getReplicationPeerStorage(localZKW, conf); ReplicationPeerConfig peerConfig = storage.getPeerConfig(peerId); return Pair.newPair(peerConfig, ReplicationUtils.getPeerClusterConfiguration(peerConfig, conf)); } catch (ReplicationException e) { throw new IOException("An error occurred while trying to connect to the remote peer cluster", e); } finally { if (localZKW != null) { localZKW.close(); } } } private void restoreSnapshotForPeerCluster(Configuration conf, String peerQuorumAddress) throws IOException { Configuration peerConf = HBaseConfiguration.createClusterConf(conf, peerQuorumAddress, PEER_CONFIG_PREFIX); FileSystem.setDefaultUri(peerConf, peerFSAddress); CommonFSUtils.setRootDir(peerConf, new Path(peerFSAddress, peerHBaseRootAddress)); FileSystem fs = FileSystem.get(peerConf); RestoreSnapshotHelper.copySnapshotForScanner(peerConf, fs, CommonFSUtils.getRootDir(peerConf), new Path(peerFSAddress, peerSnapshotTmpDir), peerSnapshotName); } /** * Sets up the actual job. * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws java.io.IOException When setting up the job fails. */ public Job createSubmittableJob(Configuration conf, String[] args) throws IOException { if (!doCommandLine(args)) { return null; } conf.set(NAME + ".tableName", tableName); conf.setLong(NAME + ".startTime", startTime); conf.setLong(NAME + ".endTime", endTime); conf.setInt(NAME + ".sleepMsBeforeReCompare", sleepMsBeforeReCompare); conf.set(NAME + ".delimiter", delimiter); conf.setInt(NAME + ".batch", batch); conf.setBoolean(NAME + ".verbose", verbose); conf.setBoolean(NAME + ".includeDeletedCells", includeDeletedCells); if (families != null) { conf.set(NAME + ".families", families); } if (rowPrefixes != null) { conf.set(NAME + ".rowPrefixes", rowPrefixes); } String peerQuorumAddress; Pair peerConfigPair = null; if (peerId != null) { peerConfigPair = getPeerQuorumConfig(conf, peerId); ReplicationPeerConfig peerConfig = peerConfigPair.getFirst(); peerQuorumAddress = peerConfig.getClusterKey(); LOG.info("Peer Quorum Address: " + peerQuorumAddress + ", Peer Configuration: " + peerConfig.getConfiguration()); conf.set(NAME + ".peerQuorumAddress", peerQuorumAddress); HBaseConfiguration.setWithPrefix(conf, PEER_CONFIG_PREFIX, peerConfig.getConfiguration().entrySet()); } else { assert this.peerQuorumAddress != null; peerQuorumAddress = this.peerQuorumAddress; LOG.info("Peer Quorum Address: " + peerQuorumAddress); conf.set(NAME + ".peerQuorumAddress", peerQuorumAddress); } if (peerTableName != null) { LOG.info("Peer Table Name: " + peerTableName); conf.set(NAME + ".peerTableName", peerTableName); } conf.setInt(NAME + ".versions", versions); LOG.info("Number of version: " + versions); // Set Snapshot specific parameters if (peerSnapshotName != null) { conf.set(NAME + ".peerSnapshotName", peerSnapshotName); // for verifyRep by snapshot, choose a unique sub-directory under peerSnapshotTmpDir to // restore snapshot. Path restoreDir = new Path(peerSnapshotTmpDir, UUID.randomUUID().toString()); peerSnapshotTmpDir = restoreDir.toString(); conf.set(NAME + ".peerSnapshotTmpDir", peerSnapshotTmpDir); conf.set(NAME + ".peerFSAddress", peerFSAddress); conf.set(NAME + ".peerHBaseRootAddress", peerHBaseRootAddress); // This is to create HDFS delegation token for peer cluster in case of secured conf.setStrings(MRJobConfig.JOB_NAMENODES, peerFSAddress, conf.get(HConstants.HBASE_DIR)); } Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(VerifyReplication.class); Scan scan = new Scan(); scan.setTimeRange(startTime, endTime); scan.setRaw(includeDeletedCells); scan.setCacheBlocks(false); if (batch > 0) { scan.setBatch(batch); } if (versions >= 0) { scan.setMaxVersions(versions); LOG.info("Number of versions set to " + versions); } if (families != null) { String[] fams = families.split(","); for (String fam : fams) { scan.addFamily(Bytes.toBytes(fam)); } } setRowPrefixFilter(scan, rowPrefixes); if (sourceSnapshotName != null) { Path snapshotTempPath = new Path(sourceSnapshotTmpDir); LOG.info( "Using source snapshot-" + sourceSnapshotName + " with temp dir:" + sourceSnapshotTmpDir); TableMapReduceUtil.initTableSnapshotMapperJob(sourceSnapshotName, scan, Verifier.class, null, null, job, true, snapshotTempPath); restoreSnapshotForPeerCluster(conf, peerQuorumAddress); } else { TableMapReduceUtil.initTableMapperJob(tableName, scan, Verifier.class, null, null, job); } Configuration peerClusterConf; if (peerId != null) { assert peerConfigPair != null; peerClusterConf = peerConfigPair.getSecond(); } else { peerClusterConf = HBaseConfiguration.createClusterConf(conf, peerQuorumAddress, PEER_CONFIG_PREFIX); } // Obtain the auth token from peer cluster TableMapReduceUtil.initCredentialsForCluster(job, peerClusterConf); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); return job; } private static void setRowPrefixFilter(Scan scan, String rowPrefixes) { if (rowPrefixes != null && !rowPrefixes.isEmpty()) { String[] rowPrefixArray = rowPrefixes.split(","); Arrays.sort(rowPrefixArray); FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ONE); for (String prefix : rowPrefixArray) { Filter filter = new PrefixFilter(Bytes.toBytes(prefix)); filterList.addFilter(filter); } scan.setFilter(filterList); byte[] startPrefixRow = Bytes.toBytes(rowPrefixArray[0]); byte[] lastPrefixRow = Bytes.toBytes(rowPrefixArray[rowPrefixArray.length - 1]); setStartAndStopRows(scan, startPrefixRow, lastPrefixRow); } } private static void setStartAndStopRows(Scan scan, byte[] startPrefixRow, byte[] lastPrefixRow) { scan.setStartRow(startPrefixRow); byte[] stopRow = Bytes.add(Bytes.head(lastPrefixRow, lastPrefixRow.length - 1), new byte[] { (byte) (lastPrefixRow[lastPrefixRow.length - 1] + 1) }); scan.setStopRow(stopRow); } public boolean doCommandLine(final String[] args) { if (args.length < 2) { printUsage(null); return false; } try { for (int i = 0; i < args.length; i++) { String cmd = args[i]; if (cmd.equals("-h") || cmd.startsWith("--h")) { printUsage(null); return false; } final String startTimeArgKey = "--starttime="; if (cmd.startsWith(startTimeArgKey)) { startTime = Long.parseLong(cmd.substring(startTimeArgKey.length())); continue; } final String endTimeArgKey = "--endtime="; if (cmd.startsWith(endTimeArgKey)) { endTime = Long.parseLong(cmd.substring(endTimeArgKey.length())); continue; } final String includeDeletedCellsArgKey = "--raw"; if (cmd.equals(includeDeletedCellsArgKey)) { includeDeletedCells = true; continue; } final String versionsArgKey = "--versions="; if (cmd.startsWith(versionsArgKey)) { versions = Integer.parseInt(cmd.substring(versionsArgKey.length())); continue; } final String batchArgKey = "--batch="; if (cmd.startsWith(batchArgKey)) { batch = Integer.parseInt(cmd.substring(batchArgKey.length())); continue; } final String familiesArgKey = "--families="; if (cmd.startsWith(familiesArgKey)) { families = cmd.substring(familiesArgKey.length()); continue; } final String rowPrefixesKey = "--row-prefixes="; if (cmd.startsWith(rowPrefixesKey)) { rowPrefixes = cmd.substring(rowPrefixesKey.length()); continue; } final String delimiterArgKey = "--delimiter="; if (cmd.startsWith(delimiterArgKey)) { delimiter = cmd.substring(delimiterArgKey.length()); continue; } final String sleepToReCompareKey = "--recomparesleep="; if (cmd.startsWith(sleepToReCompareKey)) { sleepMsBeforeReCompare = Integer.parseInt(cmd.substring(sleepToReCompareKey.length())); continue; } final String verboseKey = "--verbose"; if (cmd.startsWith(verboseKey)) { verbose = true; continue; } final String sourceSnapshotNameArgKey = "--sourceSnapshotName="; if (cmd.startsWith(sourceSnapshotNameArgKey)) { sourceSnapshotName = cmd.substring(sourceSnapshotNameArgKey.length()); continue; } final String sourceSnapshotTmpDirArgKey = "--sourceSnapshotTmpDir="; if (cmd.startsWith(sourceSnapshotTmpDirArgKey)) { sourceSnapshotTmpDir = cmd.substring(sourceSnapshotTmpDirArgKey.length()); continue; } final String peerSnapshotNameArgKey = "--peerSnapshotName="; if (cmd.startsWith(peerSnapshotNameArgKey)) { peerSnapshotName = cmd.substring(peerSnapshotNameArgKey.length()); continue; } final String peerSnapshotTmpDirArgKey = "--peerSnapshotTmpDir="; if (cmd.startsWith(peerSnapshotTmpDirArgKey)) { peerSnapshotTmpDir = cmd.substring(peerSnapshotTmpDirArgKey.length()); continue; } final String peerFSAddressArgKey = "--peerFSAddress="; if (cmd.startsWith(peerFSAddressArgKey)) { peerFSAddress = cmd.substring(peerFSAddressArgKey.length()); continue; } final String peerHBaseRootAddressArgKey = "--peerHBaseRootAddress="; if (cmd.startsWith(peerHBaseRootAddressArgKey)) { peerHBaseRootAddress = cmd.substring(peerHBaseRootAddressArgKey.length()); continue; } final String peerTableNameArgKey = "--peerTableName="; if (cmd.startsWith(peerTableNameArgKey)) { peerTableName = cmd.substring(peerTableNameArgKey.length()); continue; } if (cmd.startsWith("--")) { printUsage("Invalid argument '" + cmd + "'"); return false; } if (i == args.length - 2) { if (isPeerQuorumAddress(cmd)) { peerQuorumAddress = cmd; } else { peerId = cmd; } } if (i == args.length - 1) { tableName = cmd; } } if ( (sourceSnapshotName != null && sourceSnapshotTmpDir == null) || (sourceSnapshotName == null && sourceSnapshotTmpDir != null) ) { printUsage("Source snapshot name and snapshot temp location should be provided" + " to use snapshots in source cluster"); return false; } if ( peerSnapshotName != null || peerSnapshotTmpDir != null || peerFSAddress != null || peerHBaseRootAddress != null ) { if ( peerSnapshotName == null || peerSnapshotTmpDir == null || peerFSAddress == null || peerHBaseRootAddress == null ) { printUsage( "Peer snapshot name, peer snapshot temp location, Peer HBase root address and " + "peer FSAddress should be provided to use snapshots in peer cluster"); return false; } } // This is to avoid making recompare calls to source/peer tables when snapshots are used if ((sourceSnapshotName != null || peerSnapshotName != null) && sleepMsBeforeReCompare > 0) { printUsage( "Using sleepMsBeforeReCompare along with snapshots is not allowed as snapshots are" + " immutable"); return false; } } catch (Exception e) { e.printStackTrace(); printUsage("Can't start because " + e.getMessage()); return false; } return true; } private boolean isPeerQuorumAddress(String cmd) { try { ZKConfig.validateClusterKey(cmd); } catch (IOException e) { // not a quorum address return false; } return true; } /* * @param errorMsg Error message. Can be null. */ private static void printUsage(final String errorMsg) { if (errorMsg != null && errorMsg.length() > 0) { System.err.println("ERROR: " + errorMsg); } System.err.println("Usage: verifyrep [--starttime=X]" + " [--endtime=Y] [--families=A] [--row-prefixes=B] [--delimiter=] [--recomparesleep=] " + "[--batch=] [--verbose] [--peerTableName=] [--sourceSnapshotName=P] " + "[--sourceSnapshotTmpDir=Q] [--peerSnapshotName=R] [--peerSnapshotTmpDir=S] " + "[--peerFSAddress=T] [--peerHBaseRootAddress=U] "); System.err.println(); System.err.println("Options:"); System.err.println(" starttime beginning of the time range"); System.err.println(" without endtime means from starttime to forever"); System.err.println(" endtime end of the time range"); System.err.println(" versions number of cell versions to verify"); System.err.println(" batch batch count for scan, note that" + " result row counts will no longer be actual number of rows when you use this option"); System.err.println(" raw includes raw scan if given in options"); System.err.println(" families comma-separated list of families to copy"); System.err.println(" row-prefixes comma-separated list of row key prefixes to filter on "); System.err.println(" delimiter the delimiter used in display around rowkey"); System.err.println(" recomparesleep milliseconds to sleep before recompare row, " + "default value is 0 which disables the recompare."); System.err.println(" verbose logs row keys of good rows"); System.err.println(" peerTableName Peer Table Name"); System.err.println(" sourceSnapshotName Source Snapshot Name"); System.err.println(" sourceSnapshotTmpDir Tmp location to restore source table snapshot"); System.err.println(" peerSnapshotName Peer Snapshot Name"); System.err.println(" peerSnapshotTmpDir Tmp location to restore peer table snapshot"); System.err.println(" peerFSAddress Peer cluster Hadoop FS address"); System.err.println(" peerHBaseRootAddress Peer cluster HBase root location"); System.err.println(); System.err.println("Args:"); System.err.println(" peerid Id of the peer used for verification," + " must match the one given for replication"); System.err.println(" peerQuorumAddress quorumAdress of the peer used for verification. The " + "format is zk_quorum:zk_port:zk_hbase_path"); System.err.println(" tablename Name of the table to verify"); System.err.println(); System.err.println("Examples:"); System.err .println(" To verify the data replicated from TestTable for a 1 hour window with peer #5 "); System.err .println(" $ hbase " + "org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication" + " --starttime=1265875194289 --endtime=1265878794289 5 TestTable "); System.err.println(); System.err.println( " To verify the data in TestTable between the cluster runs VerifyReplication and cluster-b"); System.err.println(" Assume quorum address for cluster-b is" + " cluster-b-1.example.com,cluster-b-2.example.com,cluster-b-3.example.com:2181:/cluster-b"); System.err .println(" $ hbase org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication \\\n" + " cluster-b-1.example.com,cluster-b-2.example.com,cluster-b-3.example.com:" + "2181:/cluster-b \\\n" + " TestTable"); System.err.println(); System.err .println(" To verify the data in TestTable between the secured cluster runs VerifyReplication" + " and insecure cluster-b"); System.err .println(" $ hbase org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication \\\n" + " -D verifyrep.peer.hbase.security.authentication=simple \\\n" + " cluster-b-1.example.com,cluster-b-2.example.com,cluster-b-3.example.com:" + "2181:/cluster-b \\\n" + " TestTable"); System.err.println(); System.err.println(" To verify the data in TestTable between" + " the secured cluster runs VerifyReplication and secured cluster-b"); System.err.println(" Assume cluster-b uses different kerberos principal, cluster-b/_HOST@E" + ", for master and regionserver kerberos principal from another cluster"); System.err .println(" $ hbase org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication \\\n" + " -D verifyrep.peer.hbase.regionserver.kerberos.principal=" + "cluster-b/[email protected] \\\n" + " -D verifyrep.peer.hbase.master.kerberos.principal=cluster-b/[email protected] \\\n" + " cluster-b-1.example.com,cluster-b-2.example.com,cluster-b-3.example.com:" + "2181:/cluster-b \\\n" + " TestTable"); System.err.println(); System.err.println( " To verify the data in TestTable between the insecure cluster runs VerifyReplication" + " and secured cluster-b"); System.err .println(" $ hbase org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication \\\n" + " -D verifyrep.peer.hbase.security.authentication=kerberos \\\n" + " -D verifyrep.peer.hbase.regionserver.kerberos.principal=" + "cluster-b/[email protected] \\\n" + " -D verifyrep.peer.hbase.master.kerberos.principal=cluster-b/[email protected] \\\n" + " cluster-b-1.example.com,cluster-b-2.example.com,cluster-b-3.example.com:" + "2181:/cluster-b \\\n" + " TestTable"); } @Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); Job job = createSubmittableJob(conf, args); if (job != null) { return job.waitForCompletion(true) ? 0 : 1; } return 1; } /** * Main entry point. * @param args The command line parameters. * @throws Exception When running the job fails. */ public static void main(String[] args) throws Exception { int res = ToolRunner.run(HBaseConfiguration.create(), new VerifyReplication(), args); System.exit(res); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy