org.apache.hadoop.hbase.mapreduce.SyncTable Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapreduce;
import java.io.IOException;
import java.util.Iterator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Counters;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.mapreduce.security.TokenCache;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import com.google.common.base.Throwables;
import com.google.common.collect.Iterators;
public class SyncTable extends Configured implements Tool {
private static final Log LOG = LogFactory.getLog(SyncTable.class);
static final String SOURCE_HASH_DIR_CONF_KEY = "sync.table.source.hash.dir";
static final String SOURCE_TABLE_CONF_KEY = "sync.table.source.table.name";
static final String TARGET_TABLE_CONF_KEY = "sync.table.target.table.name";
static final String SOURCE_ZK_CLUSTER_CONF_KEY = "sync.table.source.zk.cluster";
static final String TARGET_ZK_CLUSTER_CONF_KEY = "sync.table.target.zk.cluster";
static final String DRY_RUN_CONF_KEY = "sync.table.dry.run";
static final String DO_DELETES_CONF_KEY = "sync.table.do.deletes";
static final String DO_PUTS_CONF_KEY = "sync.table.do.puts";
Path sourceHashDir;
String sourceTableName;
String targetTableName;
String sourceZkCluster;
String targetZkCluster;
boolean dryRun;
boolean doDeletes = true;
boolean doPuts = true;
Counters counters;
public SyncTable(Configuration conf) {
super(conf);
}
private void initCredentialsForHBase(String zookeeper, Job job) throws IOException {
Configuration peerConf = HBaseConfiguration.createClusterConf(job
.getConfiguration(), zookeeper);
if(peerConf.get("hbase.security.authentication").equals("kerberos")){
TableMapReduceUtil.initCredentialsForCluster(job, peerConf);
}
}
public Job createSubmittableJob(String[] args) throws IOException {
FileSystem fs = sourceHashDir.getFileSystem(getConf());
if (!fs.exists(sourceHashDir)) {
throw new IOException("Source hash dir not found: " + sourceHashDir);
}
Job job = Job.getInstance(getConf(),getConf().get("mapreduce.job.name",
"syncTable_" + sourceTableName + "-" + targetTableName));
Configuration jobConf = job.getConfiguration();
if (jobConf.get("hadoop.security.authentication").equals("kerberos")) {
TokenCache.obtainTokensForNamenodes(job.getCredentials(), new
Path[] { sourceHashDir }, getConf());
}
HashTable.TableHash tableHash = HashTable.TableHash.read(getConf(), sourceHashDir);
LOG.info("Read source hash manifest: " + tableHash);
LOG.info("Read " + tableHash.partitions.size() + " partition keys");
if (!tableHash.tableName.equals(sourceTableName)) {
LOG.warn("Table name mismatch - manifest indicates hash was taken from: "
+ tableHash.tableName + " but job is reading from: " + sourceTableName);
}
if (tableHash.numHashFiles != tableHash.partitions.size() + 1) {
throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"
+ " should be 1 more than the number of partition keys. However, the manifest file "
+ " says numHashFiles=" + tableHash.numHashFiles + " but the number of partition keys"
+ " found in the partitions file is " + tableHash.partitions.size());
}
Path dataDir = new Path(sourceHashDir, HashTable.HASH_DATA_DIR);
int dataSubdirCount = 0;
for (FileStatus file : fs.listStatus(dataDir)) {
if (file.getPath().getName().startsWith(HashTable.OUTPUT_DATA_FILE_PREFIX)) {
dataSubdirCount++;
}
}
if (dataSubdirCount != tableHash.numHashFiles) {
throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"
+ " should be 1 more than the number of partition keys. However, the number of data dirs"
+ " found is " + dataSubdirCount + " but the number of partition keys"
+ " found in the partitions file is " + tableHash.partitions.size());
}
job.setJarByClass(HashTable.class);
jobConf.set(SOURCE_HASH_DIR_CONF_KEY, sourceHashDir.toString());
jobConf.set(SOURCE_TABLE_CONF_KEY, sourceTableName);
jobConf.set(TARGET_TABLE_CONF_KEY, targetTableName);
if (sourceZkCluster != null) {
jobConf.set(SOURCE_ZK_CLUSTER_CONF_KEY, sourceZkCluster);
initCredentialsForHBase(sourceZkCluster, job);
}
if (targetZkCluster != null) {
jobConf.set(TARGET_ZK_CLUSTER_CONF_KEY, targetZkCluster);
initCredentialsForHBase(targetZkCluster, job);
}
jobConf.setBoolean(DRY_RUN_CONF_KEY, dryRun);
jobConf.setBoolean(DO_DELETES_CONF_KEY, doDeletes);
jobConf.setBoolean(DO_PUTS_CONF_KEY, doPuts);
TableMapReduceUtil.initTableMapperJob(targetTableName, tableHash.initScan(),
SyncMapper.class, null, null, job);
job.setNumReduceTasks(0);
if (dryRun) {
job.setOutputFormatClass(NullOutputFormat.class);
} else {
// No reducers. Just write straight to table. Call initTableReducerJob
// because it sets up the TableOutputFormat.
TableMapReduceUtil.initTableReducerJob(targetTableName, null, job, null,
targetZkCluster, null, null);
// would be nice to add an option for bulk load instead
}
// Obtain an authentication token, for the specified cluster, on behalf of the current user
if (sourceZkCluster != null) {
Configuration peerConf =
HBaseConfiguration.createClusterConf(job.getConfiguration(), sourceZkCluster);
TableMapReduceUtil.initCredentialsForCluster(job, peerConf);
}
return job;
}
public static class SyncMapper extends TableMapper {
Path sourceHashDir;
Connection sourceConnection;
Connection targetConnection;
Table sourceTable;
Table targetTable;
boolean dryRun;
boolean doDeletes = true;
boolean doPuts = true;
HashTable.TableHash sourceTableHash;
HashTable.TableHash.Reader sourceHashReader;
ImmutableBytesWritable currentSourceHash;
ImmutableBytesWritable nextSourceKey;
HashTable.ResultHasher targetHasher;
Throwable mapperException;
public static enum Counter {BATCHES, HASHES_MATCHED, HASHES_NOT_MATCHED, SOURCEMISSINGROWS,
SOURCEMISSINGCELLS, TARGETMISSINGROWS, TARGETMISSINGCELLS, ROWSWITHDIFFS, DIFFERENTCELLVALUES,
MATCHINGROWS, MATCHINGCELLS, EMPTY_BATCHES, RANGESMATCHED, RANGESNOTMATCHED};
@Override
protected void setup(Context context) throws IOException {
Configuration conf = context.getConfiguration();
sourceHashDir = new Path(conf.get(SOURCE_HASH_DIR_CONF_KEY));
sourceConnection = openConnection(conf, SOURCE_ZK_CLUSTER_CONF_KEY, null);
targetConnection = openConnection(conf, TARGET_ZK_CLUSTER_CONF_KEY,
TableOutputFormat.OUTPUT_CONF_PREFIX);
sourceTable = openTable(sourceConnection, conf, SOURCE_TABLE_CONF_KEY);
targetTable = openTable(targetConnection, conf, TARGET_TABLE_CONF_KEY);
dryRun = conf.getBoolean(DRY_RUN_CONF_KEY, false);
doDeletes = conf.getBoolean(DO_DELETES_CONF_KEY, true);
doPuts = conf.getBoolean(DO_PUTS_CONF_KEY, true);
sourceTableHash = HashTable.TableHash.read(conf, sourceHashDir);
LOG.info("Read source hash manifest: " + sourceTableHash);
LOG.info("Read " + sourceTableHash.partitions.size() + " partition keys");
TableSplit split = (TableSplit) context.getInputSplit();
ImmutableBytesWritable splitStartKey = new ImmutableBytesWritable(split.getStartRow());
sourceHashReader = sourceTableHash.newReader(conf, splitStartKey);
findNextKeyHashPair();
// create a hasher, but don't start it right away
// instead, find the first hash batch at or after the start row
// and skip any rows that come before. they will be caught by the previous task
targetHasher = new HashTable.ResultHasher();
}
private static Connection openConnection(Configuration conf, String zkClusterConfKey,
String configPrefix)
throws IOException {
String zkCluster = conf.get(zkClusterConfKey);
Configuration clusterConf = HBaseConfiguration.createClusterConf(conf,
zkCluster, configPrefix);
return ConnectionFactory.createConnection(clusterConf);
}
private static Table openTable(Connection connection, Configuration conf,
String tableNameConfKey) throws IOException {
return connection.getTable(TableName.valueOf(conf.get(tableNameConfKey)));
}
/**
* Attempt to read the next source key/hash pair.
* If there are no more, set nextSourceKey to null
*/
private void findNextKeyHashPair() throws IOException {
boolean hasNext = sourceHashReader.next();
if (hasNext) {
nextSourceKey = sourceHashReader.getCurrentKey();
} else {
// no more keys - last hash goes to the end
nextSourceKey = null;
}
}
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context)
throws IOException, InterruptedException {
try {
// first, finish any hash batches that end before the scanned row
while (nextSourceKey != null && key.compareTo(nextSourceKey) >= 0) {
moveToNextBatch(context);
}
// next, add the scanned row (as long as we've reached the first batch)
if (targetHasher.isBatchStarted()) {
targetHasher.hashResult(value);
}
} catch (Throwable t) {
mapperException = t;
Throwables.propagateIfInstanceOf(t, IOException.class);
Throwables.propagateIfInstanceOf(t, InterruptedException.class);
Throwables.propagate(t);
}
}
/**
* If there is an open hash batch, complete it and sync if there are diffs.
* Start a new batch, and seek to read the
*/
private void moveToNextBatch(Context context) throws IOException, InterruptedException {
if (targetHasher.isBatchStarted()) {
finishBatchAndCompareHashes(context);
}
targetHasher.startBatch(nextSourceKey);
currentSourceHash = sourceHashReader.getCurrentHash();
findNextKeyHashPair();
}
/**
* Finish the currently open hash batch.
* Compare the target hash to the given source hash.
* If they do not match, then sync the covered key range.
*/
private void finishBatchAndCompareHashes(Context context)
throws IOException, InterruptedException {
targetHasher.finishBatch();
context.getCounter(Counter.BATCHES).increment(1);
if (targetHasher.getBatchSize() == 0) {
context.getCounter(Counter.EMPTY_BATCHES).increment(1);
}
ImmutableBytesWritable targetHash = targetHasher.getBatchHash();
if (targetHash.equals(currentSourceHash)) {
context.getCounter(Counter.HASHES_MATCHED).increment(1);
} else {
context.getCounter(Counter.HASHES_NOT_MATCHED).increment(1);
ImmutableBytesWritable stopRow = nextSourceKey == null
? new ImmutableBytesWritable(sourceTableHash.stopRow)
: nextSourceKey;
if (LOG.isDebugEnabled()) {
LOG.debug("Hash mismatch. Key range: " + toHex(targetHasher.getBatchStartKey())
+ " to " + toHex(stopRow)
+ " sourceHash: " + toHex(currentSourceHash)
+ " targetHash: " + toHex(targetHash));
}
syncRange(context, targetHasher.getBatchStartKey(), stopRow);
}
}
private static String toHex(ImmutableBytesWritable bytes) {
return Bytes.toHex(bytes.get(), bytes.getOffset(), bytes.getLength());
}
private static final CellScanner EMPTY_CELL_SCANNER
= new CellScanner(Iterators.emptyIterator());
/**
* Rescan the given range directly from the source and target tables.
* Count and log differences, and if this is not a dry run, output Puts and Deletes
* to make the target table match the source table for this range
*/
private void syncRange(Context context, ImmutableBytesWritable startRow,
ImmutableBytesWritable stopRow) throws IOException, InterruptedException {
Scan scan = sourceTableHash.initScan();
scan.setStartRow(startRow.copyBytes());
scan.setStopRow(stopRow.copyBytes());
ResultScanner sourceScanner = sourceTable.getScanner(scan);
CellScanner sourceCells = new CellScanner(sourceScanner.iterator());
ResultScanner targetScanner = targetTable.getScanner(new Scan(scan));
CellScanner targetCells = new CellScanner(targetScanner.iterator());
boolean rangeMatched = true;
byte[] nextSourceRow = sourceCells.nextRow();
byte[] nextTargetRow = targetCells.nextRow();
while(nextSourceRow != null || nextTargetRow != null) {
boolean rowMatched;
int rowComparison = compareRowKeys(nextSourceRow, nextTargetRow);
if (rowComparison < 0) {
if (LOG.isInfoEnabled()) {
LOG.info("Target missing row: " + Bytes.toHex(nextSourceRow));
}
context.getCounter(Counter.TARGETMISSINGROWS).increment(1);
rowMatched = syncRowCells(context, nextSourceRow, sourceCells, EMPTY_CELL_SCANNER);
nextSourceRow = sourceCells.nextRow(); // advance only source to next row
} else if (rowComparison > 0) {
if (LOG.isInfoEnabled()) {
LOG.info("Source missing row: " + Bytes.toHex(nextTargetRow));
}
context.getCounter(Counter.SOURCEMISSINGROWS).increment(1);
rowMatched = syncRowCells(context, nextTargetRow, EMPTY_CELL_SCANNER, targetCells);
nextTargetRow = targetCells.nextRow(); // advance only target to next row
} else {
// current row is the same on both sides, compare cell by cell
rowMatched = syncRowCells(context, nextSourceRow, sourceCells, targetCells);
nextSourceRow = sourceCells.nextRow();
nextTargetRow = targetCells.nextRow();
}
if (!rowMatched) {
rangeMatched = false;
}
}
sourceScanner.close();
targetScanner.close();
context.getCounter(rangeMatched ? Counter.RANGESMATCHED : Counter.RANGESNOTMATCHED)
.increment(1);
}
private static class CellScanner {
private final Iterator results;
private byte[] currentRow;
private Result currentRowResult;
private int nextCellInRow;
private Result nextRowResult;
public CellScanner(Iterator results) {
this.results = results;
}
/**
* Advance to the next row and return its row key.
* Returns null iff there are no more rows.
*/
public byte[] nextRow() {
if (nextRowResult == null) {
// no cached row - check scanner for more
while (results.hasNext()) {
nextRowResult = results.next();
Cell nextCell = nextRowResult.rawCells()[0];
if (currentRow == null
|| !Bytes.equals(currentRow, 0, currentRow.length, nextCell.getRowArray(),
nextCell.getRowOffset(), nextCell.getRowLength())) {
// found next row
break;
} else {
// found another result from current row, keep scanning
nextRowResult = null;
}
}
if (nextRowResult == null) {
// end of data, no more rows
currentRowResult = null;
currentRow = null;
return null;
}
}
// advance to cached result for next row
currentRowResult = nextRowResult;
nextCellInRow = 0;
currentRow = currentRowResult.getRow();
nextRowResult = null;
return currentRow;
}
/**
* Returns the next Cell in the current row or null iff none remain.
*/
public Cell nextCellInRow() {
if (currentRowResult == null) {
// nothing left in current row
return null;
}
Cell nextCell = currentRowResult.rawCells()[nextCellInRow];
nextCellInRow++;
if (nextCellInRow == currentRowResult.size()) {
if (results.hasNext()) {
Result result = results.next();
Cell cell = result.rawCells()[0];
if (Bytes.equals(currentRow, 0, currentRow.length, cell.getRowArray(),
cell.getRowOffset(), cell.getRowLength())) {
// result is part of current row
currentRowResult = result;
nextCellInRow = 0;
} else {
// result is part of next row, cache it
nextRowResult = result;
// current row is complete
currentRowResult = null;
}
} else {
// end of data
currentRowResult = null;
}
}
return nextCell;
}
}
/**
* Compare the cells for the given row from the source and target tables.
* Count and log any differences.
* If not a dry run, output a Put and/or Delete needed to sync the target table
* to match the source table.
*/
private boolean syncRowCells(Context context, byte[] rowKey, CellScanner sourceCells,
CellScanner targetCells) throws IOException, InterruptedException {
Put put = null;
Delete delete = null;
long matchingCells = 0;
boolean matchingRow = true;
Cell sourceCell = sourceCells.nextCellInRow();
Cell targetCell = targetCells.nextCellInRow();
while (sourceCell != null || targetCell != null) {
int cellKeyComparison = compareCellKeysWithinRow(sourceCell, targetCell);
if (cellKeyComparison < 0) {
if (LOG.isDebugEnabled()) {
LOG.debug("Target missing cell: " + sourceCell);
}
context.getCounter(Counter.TARGETMISSINGCELLS).increment(1);
matchingRow = false;
if (!dryRun && doPuts) {
if (put == null) {
put = new Put(rowKey);
}
put.add(sourceCell);
}
sourceCell = sourceCells.nextCellInRow();
} else if (cellKeyComparison > 0) {
if (LOG.isDebugEnabled()) {
LOG.debug("Source missing cell: " + targetCell);
}
context.getCounter(Counter.SOURCEMISSINGCELLS).increment(1);
matchingRow = false;
if (!dryRun && doDeletes) {
if (delete == null) {
delete = new Delete(rowKey);
}
// add a tombstone to exactly match the target cell that is missing on the source
delete.addColumn(CellUtil.cloneFamily(targetCell),
CellUtil.cloneQualifier(targetCell), targetCell.getTimestamp());
}
targetCell = targetCells.nextCellInRow();
} else {
// the cell keys are equal, now check values
if (CellUtil.matchingValue(sourceCell, targetCell)) {
matchingCells++;
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Different values: ");
LOG.debug(" source cell: " + sourceCell
+ " value: " + Bytes.toHex(sourceCell.getValueArray(),
sourceCell.getValueOffset(), sourceCell.getValueLength()));
LOG.debug(" target cell: " + targetCell
+ " value: " + Bytes.toHex(targetCell.getValueArray(),
targetCell.getValueOffset(), targetCell.getValueLength()));
}
context.getCounter(Counter.DIFFERENTCELLVALUES).increment(1);
matchingRow = false;
if (!dryRun && doPuts) {
// overwrite target cell
if (put == null) {
put = new Put(rowKey);
}
put.add(sourceCell);
}
}
sourceCell = sourceCells.nextCellInRow();
targetCell = targetCells.nextCellInRow();
}
if (!dryRun && sourceTableHash.scanBatch > 0) {
if (put != null && put.size() >= sourceTableHash.scanBatch) {
context.write(new ImmutableBytesWritable(rowKey), put);
put = null;
}
if (delete != null && delete.size() >= sourceTableHash.scanBatch) {
context.write(new ImmutableBytesWritable(rowKey), delete);
delete = null;
}
}
}
if (!dryRun) {
if (put != null) {
context.write(new ImmutableBytesWritable(rowKey), put);
}
if (delete != null) {
context.write(new ImmutableBytesWritable(rowKey), delete);
}
}
if (matchingCells > 0) {
context.getCounter(Counter.MATCHINGCELLS).increment(matchingCells);
}
if (matchingRow) {
context.getCounter(Counter.MATCHINGROWS).increment(1);
return true;
} else {
context.getCounter(Counter.ROWSWITHDIFFS).increment(1);
return false;
}
}
private static final CellComparator cellComparator = new CellComparator();
/**
* Compare row keys of the given Result objects.
* Nulls are after non-nulls
*/
private static int compareRowKeys(byte[] r1, byte[] r2) {
if (r1 == null) {
return 1; // source missing row
} else if (r2 == null) {
return -1; // target missing row
} else {
return cellComparator.compareRows(r1, 0, r1.length, r2, 0, r2.length);
}
}
/**
* Compare families, qualifiers, and timestamps of the given Cells.
* They are assumed to be of the same row.
* Nulls are after non-nulls.
*/
private static int compareCellKeysWithinRow(Cell c1, Cell c2) {
if (c1 == null) {
return 1; // source missing cell
}
if (c2 == null) {
return -1; // target missing cell
}
int result = CellComparator.compareFamilies(c1, c2);
if (result != 0) {
return result;
}
result = CellComparator.compareQualifiers(c1, c2);
if (result != 0) {
return result;
}
// note timestamp comparison is inverted - more recent cells first
return CellComparator.compareTimestamps(c1, c2);
}
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
if (mapperException == null) {
try {
finishRemainingHashRanges(context);
} catch (Throwable t) {
mapperException = t;
}
}
try {
sourceTable.close();
targetTable.close();
sourceConnection.close();
targetConnection.close();
} catch (Throwable t) {
if (mapperException == null) {
mapperException = t;
} else {
LOG.error("Suppressing exception from closing tables", t);
}
}
// propagate first exception
if (mapperException != null) {
Throwables.propagateIfInstanceOf(mapperException, IOException.class);
Throwables.propagateIfInstanceOf(mapperException, InterruptedException.class);
Throwables.propagate(mapperException);
}
}
private void finishRemainingHashRanges(Context context) throws IOException,
InterruptedException {
TableSplit split = (TableSplit) context.getInputSplit();
byte[] splitEndRow = split.getEndRow();
boolean reachedEndOfTable = HashTable.isTableEndRow(splitEndRow);
// if there are more hash batches that begin before the end of this split move to them
while (nextSourceKey != null
&& (nextSourceKey.compareTo(splitEndRow) < 0 || reachedEndOfTable)) {
moveToNextBatch(context);
}
if (targetHasher.isBatchStarted()) {
// need to complete the final open hash batch
if ((nextSourceKey != null && nextSourceKey.compareTo(splitEndRow) > 0)
|| (nextSourceKey == null && !Bytes.equals(splitEndRow, sourceTableHash.stopRow))) {
// the open hash range continues past the end of this region
// add a scan to complete the current hash range
Scan scan = sourceTableHash.initScan();
scan.setStartRow(splitEndRow);
if (nextSourceKey == null) {
scan.setStopRow(sourceTableHash.stopRow);
} else {
scan.setStopRow(nextSourceKey.copyBytes());
}
ResultScanner targetScanner = null;
try {
targetScanner = targetTable.getScanner(scan);
for (Result row : targetScanner) {
targetHasher.hashResult(row);
}
} finally {
if (targetScanner != null) {
targetScanner.close();
}
}
} // else current batch ends exactly at split end row
finishBatchAndCompareHashes(context);
}
}
}
private static final int NUM_ARGS = 3;
private static void printUsage(final String errorMsg) {
if (errorMsg != null && errorMsg.length() > 0) {
System.err.println("ERROR: " + errorMsg);
System.err.println();
}
System.err.println("Usage: SyncTable [options] ");
System.err.println();
System.err.println("Options:");
System.err.println(" sourcezkcluster ZK cluster key of the source table");
System.err.println(" (defaults to cluster in classpath's config)");
System.err.println(" targetzkcluster ZK cluster key of the target table");
System.err.println(" (defaults to cluster in classpath's config)");
System.err.println(" dryrun if true, output counters but no writes");
System.err.println(" (defaults to false)");
System.err.println(" doDeletes if false, does not perform deletes");
System.err.println(" (defaults to true)");
System.err.println(" doPuts if false, does not perform puts ");
System.err.println(" (defaults to true)");
System.err.println();
System.err.println("Args:");
System.err.println(" sourcehashdir path to HashTable output dir for source table");
System.err.println(" (see org.apache.hadoop.hbase.mapreduce.HashTable)");
System.err.println(" sourcetable Name of the source table to sync from");
System.err.println(" targettable Name of the target table to sync to");
System.err.println();
System.err.println("Examples:");
System.err.println(" For a dry run SyncTable of tableA from a remote source cluster");
System.err.println(" to a local target cluster:");
System.err.println(" $ bin/hbase " +
"org.apache.hadoop.hbase.mapreduce.SyncTable --dryrun=true"
+ " --sourcezkcluster=zk1.example.com,zk2.example.com,zk3.example.com:2181:/hbase"
+ " hdfs://nn:9000/hashes/tableA tableA tableA");
}
private boolean doCommandLine(final String[] args) {
if (args.length < NUM_ARGS) {
printUsage(null);
return false;
}
try {
sourceHashDir = new Path(args[args.length - 3]);
sourceTableName = args[args.length - 2];
targetTableName = args[args.length - 1];
for (int i = 0; i < args.length - NUM_ARGS; i++) {
String cmd = args[i];
if (cmd.equals("-h") || cmd.startsWith("--h")) {
printUsage(null);
return false;
}
final String sourceZkClusterKey = "--sourcezkcluster=";
if (cmd.startsWith(sourceZkClusterKey)) {
sourceZkCluster = cmd.substring(sourceZkClusterKey.length());
continue;
}
final String targetZkClusterKey = "--targetzkcluster=";
if (cmd.startsWith(targetZkClusterKey)) {
targetZkCluster = cmd.substring(targetZkClusterKey.length());
continue;
}
final String dryRunKey = "--dryrun=";
if (cmd.startsWith(dryRunKey)) {
dryRun = Boolean.parseBoolean(cmd.substring(dryRunKey.length()));
continue;
}
final String doDeletesKey = "--doDeletes=";
if (cmd.startsWith(doDeletesKey)) {
doDeletes = Boolean.parseBoolean(cmd.substring(doDeletesKey.length()));
continue;
}
final String doPutsKey = "--doPuts=";
if (cmd.startsWith(doPutsKey)) {
doPuts = Boolean.parseBoolean(cmd.substring(doPutsKey.length()));
continue;
}
printUsage("Invalid argument '" + cmd + "'");
return false;
}
} catch (Exception e) {
e.printStackTrace();
printUsage("Can't start because " + e.getMessage());
return false;
}
return true;
}
/**
* Main entry point.
*/
public static void main(String[] args) throws Exception {
int ret = ToolRunner.run(new SyncTable(HBaseConfiguration.create()), args);
System.exit(ret);
}
@Override
public int run(String[] args) throws Exception {
String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
if (!doCommandLine(otherArgs)) {
return 1;
}
Job job = createSubmittableJob(otherArgs);
if (!job.waitForCompletion(true)) {
LOG.info("Map-reduce job failed!");
return 1;
}
counters = job.getCounters();
return 0;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy