org.apache.hadoop.hbase.regionserver.RegionMergeTransaction Maven / Gradle / Ivy
Show all versions of hbase-server Show documentation
/**
* Copyright The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_MERGED;
import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_MERGING;
import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REQUEST_REGION_MERGE;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.MetaMutationAnnotation;
import org.apache.hadoop.hbase.RegionTransition;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.catalog.CatalogTracker;
import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.catalog.MetaReader;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.executor.EventType;
import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
import org.apache.hadoop.hbase.regionserver.SplitTransaction.LoggingProgressable;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ConfigUtil;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NodeExistsException;
import org.apache.zookeeper.data.Stat;
/**
* Executes region merge as a "transaction". It is similar with
* SplitTransaction. Call {@link #prepare(RegionServerServices)} to setup the
* transaction, {@link #execute(Server, RegionServerServices)} to run the
* transaction and {@link #rollback(Server, RegionServerServices)} to cleanup if
* execute fails.
*
*
* Here is an example of how you would use this class:
*
*
* RegionMergeTransaction mt = new RegionMergeTransaction(this.conf, parent, midKey)
* if (!mt.prepare(services)) return;
* try {
* mt.execute(server, services);
* } catch (IOException ioe) {
* try {
* mt.rollback(server, services);
* return;
* } catch (RuntimeException e) {
* myAbortable.abort("Failed merge, abort");
* }
* }
*
*
* This class is not thread safe. Caller needs ensure merge is run by one thread
* only.
*/
@InterfaceAudience.Private
public class RegionMergeTransaction {
private static final Log LOG = LogFactory.getLog(RegionMergeTransaction.class);
// Merged region info
private HRegionInfo mergedRegionInfo;
// region_a sorts before region_b
private final HRegion region_a;
private final HRegion region_b;
// merges dir is under region_a
private final Path mergesdir;
private int znodeVersion = -1;
// We only merge adjacent regions if forcible is false
private final boolean forcible;
private boolean useZKForAssignment;
private final long masterSystemTime;
/**
* Types to add to the transaction journal. Each enum is a step in the merge
* transaction. Used to figure how much we need to rollback.
*/
enum JournalEntry {
/**
* Set region as in transition, set it into MERGING state.
*/
SET_MERGING_IN_ZK,
/**
* We created the temporary merge data directory.
*/
CREATED_MERGE_DIR,
/**
* Closed the merging region A.
*/
CLOSED_REGION_A,
/**
* The merging region A has been taken out of the server's online regions list.
*/
OFFLINED_REGION_A,
/**
* Closed the merging region B.
*/
CLOSED_REGION_B,
/**
* The merging region B has been taken out of the server's online regions list.
*/
OFFLINED_REGION_B,
/**
* Started in on creation of the merged region.
*/
STARTED_MERGED_REGION_CREATION,
/**
* Point of no return. If we got here, then transaction is not recoverable
* other than by crashing out the regionserver.
*/
PONR
}
/*
* Journal of how far the merge transaction has progressed.
*/
private final List journal = new ArrayList();
private static IOException closedByOtherException = new IOException(
"Failed to close region: already closed by another thread");
private RegionServerCoprocessorHost rsCoprocessorHost = null;
/**
* Constructor
* @param a region a to merge
* @param b region b to merge
* @param forcible if false, we will only merge adjacent regions
*/
public RegionMergeTransaction(final HRegion a, final HRegion b,
final boolean forcible) {
this(a, b, forcible, EnvironmentEdgeManager.currentTimeMillis());
}
/**
* Constructor
* @param a region a to merge
* @param b region b to merge
* @param forcible if false, we will only merge adjacent regions
* @param masterSystemTime the time at the master side
*/
public RegionMergeTransaction(final HRegion a, final HRegion b,
final boolean forcible, long masterSystemTime) {
if (a.getRegionInfo().compareTo(b.getRegionInfo()) <= 0) {
this.region_a = a;
this.region_b = b;
} else {
this.region_a = b;
this.region_b = a;
}
this.forcible = forcible;
this.masterSystemTime = masterSystemTime;
this.mergesdir = region_a.getRegionFileSystem().getMergesDir();
}
/**
* Does checks on merge inputs.
* @param services
* @return true
if the regions are mergeable else
* false
if they are not (e.g. its already closed, etc.).
*/
public boolean prepare(final RegionServerServices services) {
if (!region_a.getTableDesc().getTableName()
.equals(region_b.getTableDesc().getTableName())) {
LOG.info("Can't merge regions " + region_a + "," + region_b
+ " because they do not belong to the same table");
return false;
}
if (region_a.getRegionInfo().equals(region_b.getRegionInfo())) {
LOG.info("Can't merge the same region " + region_a);
return false;
}
if (!forcible && !HRegionInfo.areAdjacent(region_a.getRegionInfo(),
region_b.getRegionInfo())) {
String msg = "Skip merging " + this.region_a.getRegionNameAsString()
+ " and " + this.region_b.getRegionNameAsString()
+ ", because they are not adjacent.";
LOG.info(msg);
return false;
}
if (!this.region_a.isMergeable() || !this.region_b.isMergeable()) {
return false;
}
try {
boolean regionAHasMergeQualifier = hasMergeQualifierInMeta(services,
region_a.getRegionName());
if (regionAHasMergeQualifier ||
hasMergeQualifierInMeta(services, region_b.getRegionName())) {
LOG.debug("Region " + (regionAHasMergeQualifier ? region_a.getRegionNameAsString()
: region_b.getRegionNameAsString())
+ " is not mergeable because it has merge qualifier in META");
return false;
}
} catch (IOException e) {
LOG.warn("Failed judging whether merge transaction is available for "
+ region_a.getRegionNameAsString() + " and "
+ region_b.getRegionNameAsString(), e);
return false;
}
// WARN: make sure there is no parent region of the two merging regions in
// hbase:meta If exists, fixing up daughters would cause daughter regions(we
// have merged one) online again when we restart master, so we should clear
// the parent region to prevent the above case
// Since HBASE-7721, we don't need fix up daughters any more. so here do
// nothing
this.mergedRegionInfo = getMergedRegionInfo(region_a.getRegionInfo(),
region_b.getRegionInfo());
return true;
}
/**
* Run the transaction.
* @param server Hosting server instance. Can be null when testing (won't try
* and update in zk if a null server)
* @param services Used to online/offline regions.
* @throws IOException If thrown, transaction failed. Call
* {@link #rollback(Server, RegionServerServices)}
* @return merged region
* @throws IOException
* @see #rollback(Server, RegionServerServices)
*/
public HRegion execute(final Server server,
final RegionServerServices services) throws IOException {
if (User.isHBaseSecurityEnabled(region_a.getBaseConf())) {
LOG.warn("Should use execute(Server, RegionServerServices, User)");
}
return execute(server, services, null);
}
public HRegion execute(final Server server, final RegionServerServices services, User user)
throws IOException {
useZKForAssignment = server == null ? true :
ConfigUtil.useZKForAssignment(server.getConfiguration());
if (rsCoprocessorHost == null) {
rsCoprocessorHost = server != null ? ((HRegionServer) server).getCoprocessorHost() : null;
}
final HRegion mergedRegion = createMergedRegion(server, services, user);
if (rsCoprocessorHost != null) {
if (user == null) {
rsCoprocessorHost.postMergeCommit(this.region_a, this.region_b, mergedRegion);
} else {
try {
user.getUGI().doAs(new PrivilegedExceptionAction() {
@Override
public Void run() throws Exception {
rsCoprocessorHost.postMergeCommit(region_a, region_b, mergedRegion);
return null;
}
});
} catch (InterruptedException ie) {
InterruptedIOException iioe = new InterruptedIOException();
iioe.initCause(ie);
throw iioe;
}
}
}
stepsAfterPONR(server, services, mergedRegion, user);
return mergedRegion;
}
@Deprecated
public void stepsAfterPONR(final Server server, final RegionServerServices services,
final HRegion mergedRegion) throws IOException {
stepsAfterPONR(server, services, mergedRegion, null);
}
public void stepsAfterPONR(final Server server, final RegionServerServices services,
final HRegion mergedRegion, User user) throws IOException {
openMergedRegion(server, services, mergedRegion);
transitionZKNode(server, services, mergedRegion, user);
}
/**
* Prepare the merged region and region files.
* @param server Hosting server instance. Can be null when testing (won't try
* and update in zk if a null server)
* @param services Used to online/offline regions.
* @return merged region
* @throws IOException If thrown, transaction failed. Call
* {@link #rollback(Server, RegionServerServices)}
*/
HRegion createMergedRegion(final Server server,
final RegionServerServices services, User user) throws IOException {
LOG.info("Starting merge of " + region_a + " and "
+ region_b.getRegionNameAsString() + ", forcible=" + forcible);
if ((server != null && server.isStopped())
|| (services != null && services.isStopping())) {
throw new IOException("Server is stopped or stopping");
}
if (rsCoprocessorHost != null) {
boolean ret = false;
if (user == null) {
ret = rsCoprocessorHost.preMerge(region_a, region_b);
} else {
try {
ret = user.getUGI().doAs(new PrivilegedExceptionAction() {
@Override
public Boolean run() throws Exception {
return rsCoprocessorHost.preMerge(region_a, region_b);
}
});
} catch (InterruptedException ie) {
InterruptedIOException iioe = new InterruptedIOException();
iioe.initCause(ie);
throw iioe;
}
}
if (ret) {
throw new IOException("Coprocessor bypassing regions " + this.region_a + " "
+ this.region_b + " merge.");
}
}
// If true, no cluster to write meta edits to or to update znodes in.
boolean testing = server == null ? true : server.getConfiguration()
.getBoolean("hbase.testing.nocluster", false);
HRegion mergedRegion = stepsBeforePONR(server, services, testing);
@MetaMutationAnnotation
final List metaEntries = new ArrayList();
if (rsCoprocessorHost != null) {
boolean ret = false;
if (user == null) {
ret = rsCoprocessorHost.preMergeCommit(region_a, region_b, metaEntries);
} else {
try {
ret = user.getUGI().doAs(new PrivilegedExceptionAction() {
@Override
public Boolean run() throws Exception {
return rsCoprocessorHost.preMergeCommit(region_a, region_b, metaEntries);
}
});
} catch (InterruptedException ie) {
InterruptedIOException iioe = new InterruptedIOException();
iioe.initCause(ie);
throw iioe;
}
}
if (ret) {
throw new IOException("Coprocessor bypassing regions " + this.region_a + " "
+ this.region_b + " merge.");
}
try {
for (Mutation p : metaEntries) {
HRegionInfo.parseRegionName(p.getRow());
}
} catch (IOException e) {
LOG.error("Row key of mutation from coprocessor is not parsable as region name."
+ "Mutations from coprocessor should only be for hbase:meta table.", e);
throw e;
}
}
// This is the point of no return. Similar with SplitTransaction.
// IF we reach the PONR then subsequent failures need to crash out this
// regionserver
this.journal.add(JournalEntry.PONR);
// Add merged region and delete region_a and region_b
// as an atomic update. See HBASE-7721. This update to hbase:meta makes the region
// will determine whether the region is merged or not in case of failures.
// If it is successful, master will roll-forward, if not, master will
// rollback
if (!testing && useZKForAssignment) {
if (metaEntries.isEmpty()) {
MetaEditor.mergeRegions(server.getCatalogTracker(), mergedRegion.getRegionInfo(), region_a
.getRegionInfo(), region_b.getRegionInfo(), server.getServerName(), masterSystemTime);
} else {
mergeRegionsAndPutMetaEntries(server.getCatalogTracker(), mergedRegion.getRegionInfo(),
region_a.getRegionInfo(), region_b.getRegionInfo(), server.getServerName(), metaEntries);
}
} else if (services != null && !useZKForAssignment) {
if (!services.reportRegionStateTransition(TransitionCode.MERGE_PONR,
mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo())) {
// Passed PONR, let SSH clean it up
throw new IOException("Failed to notify master that merge passed PONR: "
+ region_a.getRegionInfo().getRegionNameAsString() + " and "
+ region_b.getRegionInfo().getRegionNameAsString());
}
}
return mergedRegion;
}
private void mergeRegionsAndPutMetaEntries(CatalogTracker catalogTracker,
HRegionInfo mergedRegion, HRegionInfo regionA, HRegionInfo regionB, ServerName serverName,
List metaEntries) throws IOException {
prepareMutationsForMerge(mergedRegion, regionA, regionB, serverName, metaEntries);
MetaEditor.mutateMetaTable(catalogTracker, metaEntries);
}
public void prepareMutationsForMerge(HRegionInfo mergedRegion, HRegionInfo regionA,
HRegionInfo regionB, ServerName serverName, List mutations) throws IOException {
HRegionInfo copyOfMerged = new HRegionInfo(mergedRegion);
// use the maximum of what master passed us vs local time.
long time = Math.max(EnvironmentEdgeManager.currentTimeMillis(), masterSystemTime);
// Put for parent
Put putOfMerged = MetaEditor.makePutFromRegionInfo(copyOfMerged, time);
putOfMerged.add(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER, regionA.toByteArray());
putOfMerged.add(HConstants.CATALOG_FAMILY, HConstants.MERGEB_QUALIFIER, regionB.toByteArray());
mutations.add(putOfMerged);
// Deletes for merging regions
Delete deleteA = MetaEditor.makeDeleteFromRegionInfo(regionA, time);
Delete deleteB = MetaEditor.makeDeleteFromRegionInfo(regionB, time);
mutations.add(deleteA);
mutations.add(deleteB);
// The merged is a new region, openSeqNum = 1 is fine.
addLocation(putOfMerged, serverName, 1);
}
@SuppressWarnings("deprecation")
public Put addLocation(final Put p, final ServerName sn, long openSeqNum) {
p.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes
.toBytes(sn.getHostAndPort()));
p.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, Bytes.toBytes(sn
.getStartcode()));
p.add(HConstants.CATALOG_FAMILY, HConstants.SEQNUM_QUALIFIER, Bytes.toBytes(openSeqNum));
return p;
}
public HRegion stepsBeforePONR(final Server server, final RegionServerServices services,
boolean testing) throws IOException {
// Set ephemeral MERGING znode up in zk. Mocked servers sometimes don't
// have zookeeper so don't do zk stuff if server or zookeeper is null
if (useZKAndZKIsSet(server)) {
try {
createNodeMerging(server.getZooKeeper(), this.mergedRegionInfo,
server.getServerName(), region_a.getRegionInfo(), region_b.getRegionInfo());
} catch (KeeperException e) {
throw new IOException("Failed creating PENDING_MERGE znode on "
+ this.mergedRegionInfo.getRegionNameAsString(), e);
}
} else if (services != null && !useZKForAssignment) {
if (!services.reportRegionStateTransition(TransitionCode.READY_TO_MERGE,
mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo())) {
throw new IOException("Failed to get ok from master to merge "
+ region_a.getRegionInfo().getRegionNameAsString() + " and "
+ region_b.getRegionInfo().getRegionNameAsString());
}
}
this.journal.add(JournalEntry.SET_MERGING_IN_ZK);
if (useZKAndZKIsSet(server)) {
// After creating the merge node, wait for master to transition it
// from PENDING_MERGE to MERGING so that we can move on. We want master
// knows about it and won't transition any region which is merging.
znodeVersion = getZKNode(server, services);
}
this.region_a.getRegionFileSystem().createMergesDir();
this.journal.add(JournalEntry.CREATED_MERGE_DIR);
Map> hstoreFilesOfRegionA = closeAndOfflineRegion(
services, this.region_a, true, testing);
Map> hstoreFilesOfRegionB = closeAndOfflineRegion(
services, this.region_b, false, testing);
assert hstoreFilesOfRegionA != null && hstoreFilesOfRegionB != null;
//
// mergeStoreFiles creates merged region dirs under the region_a merges dir
// Nothing to unroll here if failure -- clean up of CREATE_MERGE_DIR will
// clean this up.
mergeStoreFiles(hstoreFilesOfRegionA, hstoreFilesOfRegionB);
if (server != null && useZKAndZKIsSet(server)) {
try {
// Do one more check on the merging znode (before it is too late) in case
// any merging region is moved somehow. If so, the znode transition will fail.
this.znodeVersion = transitionMergingNode(server.getZooKeeper(),
this.mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo(),
server.getServerName(), this.znodeVersion,
RS_ZK_REGION_MERGING, RS_ZK_REGION_MERGING);
} catch (KeeperException e) {
throw new IOException("Failed setting MERGING znode on "
+ this.mergedRegionInfo.getRegionNameAsString(), e);
}
}
// Log to the journal that we are creating merged region. We could fail
// halfway through. If we do, we could have left
// stuff in fs that needs cleanup -- a storefile or two. Thats why we
// add entry to journal BEFORE rather than AFTER the change.
this.journal.add(JournalEntry.STARTED_MERGED_REGION_CREATION);
HRegion mergedRegion = createMergedRegionFromMerges(this.region_a,
this.region_b, this.mergedRegionInfo);
return mergedRegion;
}
/**
* Create a merged region from the merges directory under region a. In order
* to mock it for tests, place it with a new method.
* @param a hri of region a
* @param b hri of region b
* @param mergedRegion hri of merged region
* @return merged HRegion.
* @throws IOException
*/
HRegion createMergedRegionFromMerges(final HRegion a, final HRegion b,
final HRegionInfo mergedRegion) throws IOException {
return a.createMergedRegionFromMerges(mergedRegion, b);
}
/**
* Close the merging region and offline it in regionserver
* @param services
* @param region
* @param isRegionA true if it is merging region a, false if it is region b
* @param testing true if it is testing
* @return a map of family name to list of store files
* @throws IOException
*/
private Map> closeAndOfflineRegion(
final RegionServerServices services, final HRegion region,
final boolean isRegionA, final boolean testing) throws IOException {
Map> hstoreFilesToMerge = null;
Exception exceptionToThrow = null;
try {
hstoreFilesToMerge = region.close(false);
} catch (Exception e) {
exceptionToThrow = e;
}
if (exceptionToThrow == null && hstoreFilesToMerge == null) {
// The region was closed by a concurrent thread. We can't continue
// with the merge, instead we must just abandon the merge. If we
// reopen or merge this could cause problems because the region has
// probably already been moved to a different server, or is in the
// process of moving to a different server.
exceptionToThrow = closedByOtherException;
}
if (exceptionToThrow != closedByOtherException) {
this.journal.add(isRegionA ? JournalEntry.CLOSED_REGION_A
: JournalEntry.CLOSED_REGION_B);
}
if (exceptionToThrow != null) {
if (exceptionToThrow instanceof IOException)
throw (IOException) exceptionToThrow;
throw new IOException(exceptionToThrow);
}
if (!testing) {
services.removeFromOnlineRegions(region, null);
}
this.journal.add(isRegionA ? JournalEntry.OFFLINED_REGION_A
: JournalEntry.OFFLINED_REGION_B);
return hstoreFilesToMerge;
}
/**
* Get merged region info through the specified two regions
* @param a merging region A
* @param b merging region B
* @return the merged region info
*/
public static HRegionInfo getMergedRegionInfo(final HRegionInfo a,
final HRegionInfo b) {
long rid = EnvironmentEdgeManager.currentTimeMillis();
// Regionid is timestamp. Merged region's id can't be less than that of
// merging regions else will insert at wrong location in hbase:meta
if (rid < a.getRegionId() || rid < b.getRegionId()) {
LOG.warn("Clock skew; merging regions id are " + a.getRegionId()
+ " and " + b.getRegionId() + ", but current time here is " + rid);
rid = Math.max(a.getRegionId(), b.getRegionId()) + 1;
}
byte[] startKey = null;
byte[] endKey = null;
// Choose the smaller as start key
if (a.compareTo(b) <= 0) {
startKey = a.getStartKey();
} else {
startKey = b.getStartKey();
}
// Choose the bigger as end key
if (Bytes.equals(a.getEndKey(), HConstants.EMPTY_BYTE_ARRAY)
|| (!Bytes.equals(b.getEndKey(), HConstants.EMPTY_BYTE_ARRAY)
&& Bytes.compareTo(a.getEndKey(), b.getEndKey()) > 0)) {
endKey = a.getEndKey();
} else {
endKey = b.getEndKey();
}
// Merged region is sorted between two merging regions in META
HRegionInfo mergedRegionInfo = new HRegionInfo(a.getTable(), startKey,
endKey, false, rid);
return mergedRegionInfo;
}
/**
* Perform time consuming opening of the merged region.
* @param server Hosting server instance. Can be null when testing (won't try
* and update in zk if a null server)
* @param services Used to online/offline regions.
* @param merged the merged region
* @throws IOException If thrown, transaction failed. Call
* {@link #rollback(Server, RegionServerServices)}
*/
void openMergedRegion(final Server server,
final RegionServerServices services, HRegion merged) throws IOException {
boolean stopped = server != null && server.isStopped();
boolean stopping = services != null && services.isStopping();
if (stopped || stopping) {
LOG.info("Not opening merged region " + merged.getRegionNameAsString()
+ " because stopping=" + stopping + ", stopped=" + stopped);
return;
}
HRegionInfo hri = merged.getRegionInfo();
LoggingProgressable reporter = server == null ? null
: new LoggingProgressable(hri, server.getConfiguration().getLong(
"hbase.regionserver.regionmerge.open.log.interval", 10000));
merged.openHRegion(reporter);
if (services != null) {
try {
if (useZKForAssignment) {
services.postOpenDeployTasks(merged, server.getCatalogTracker());
} else if (!services.reportRegionStateTransition(TransitionCode.MERGED,
mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo())) {
throw new IOException("Failed to report merged region to master: "
+ mergedRegionInfo.getShortNameToLog());
}
services.addToOnlineRegions(merged);
} catch (KeeperException ke) {
throw new IOException(ke);
}
}
}
/**
* Finish off merge transaction, transition the zknode
* @param server Hosting server instance. Can be null when testing (won't try
* and update in zk if a null server)
* @param services Used to online/offline regions.
* @throws IOException If thrown, transaction failed. Call
* {@link #rollback(Server, RegionServerServices)}
*/
void transitionZKNode(final Server server, final RegionServerServices services,
final HRegion mergedRegion, User user) throws IOException {
if (useZKAndZKIsSet(server)) {
// Tell master about merge by updating zk. If we fail, abort.
try {
this.znodeVersion = transitionMergingNode(server.getZooKeeper(),
this.mergedRegionInfo, region_a.getRegionInfo(),
region_b.getRegionInfo(), server.getServerName(), this.znodeVersion,
RS_ZK_REGION_MERGING, RS_ZK_REGION_MERGED);
long startTime = EnvironmentEdgeManager.currentTimeMillis();
int spins = 0;
// Now wait for the master to process the merge. We know it's done
// when the znode is deleted. The reason we keep tickling the znode is
// that it's possible for the master to miss an event.
do {
if (spins % 10 == 0) {
LOG.debug("Still waiting on the master to process the merge for "
+ this.mergedRegionInfo.getEncodedName() + ", waited "
+ (EnvironmentEdgeManager.currentTimeMillis() - startTime) + "ms");
}
Thread.sleep(100);
// When this returns -1 it means the znode doesn't exist
this.znodeVersion = transitionMergingNode(server.getZooKeeper(),
this.mergedRegionInfo, region_a.getRegionInfo(),
region_b.getRegionInfo(), server.getServerName(), this.znodeVersion,
RS_ZK_REGION_MERGED, RS_ZK_REGION_MERGED);
spins++;
} while (this.znodeVersion != -1 && !server.isStopped()
&& !services.isStopping());
} catch (Exception e) {
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
throw new IOException("Failed telling master about merge "
+ mergedRegionInfo.getEncodedName(), e);
}
}
if (rsCoprocessorHost != null) {
if (user == null) {
rsCoprocessorHost.postMerge(region_a, region_b, mergedRegion);
} else {
try {
user.getUGI().doAs(new PrivilegedExceptionAction() {
@Override
public Void run() throws Exception {
rsCoprocessorHost.postMerge(region_a, region_b, mergedRegion);
return null;
}
});
} catch (InterruptedException ie) {
InterruptedIOException iioe = new InterruptedIOException();
iioe.initCause(ie);
throw iioe;
}
}
}
// Leaving here, the mergedir with its dross will be in place but since the
// merge was successful, just leave it; it'll be cleaned when region_a is
// cleaned up by CatalogJanitor on master
}
/**
* Wait for the merging node to be transitioned from pending_merge
* to merging by master. That's how we are sure master has processed
* the event and is good with us to move on. If we don't get any update,
* we periodically transition the node so that master gets the callback.
* If the node is removed or is not in pending_merge state any more,
* we abort the merge.
*/
private int getZKNode(final Server server,
final RegionServerServices services) throws IOException {
// Wait for the master to process the pending_merge.
try {
int spins = 0;
Stat stat = new Stat();
ZooKeeperWatcher zkw = server.getZooKeeper();
ServerName expectedServer = server.getServerName();
String node = mergedRegionInfo.getEncodedName();
while (!(server.isStopped() || services.isStopping())) {
if (spins % 5 == 0) {
LOG.debug("Still waiting for master to process "
+ "the pending_merge for " + node);
transitionMergingNode(zkw, mergedRegionInfo, region_a.getRegionInfo(),
region_b.getRegionInfo(), expectedServer, -1, RS_ZK_REQUEST_REGION_MERGE,
RS_ZK_REQUEST_REGION_MERGE);
}
Thread.sleep(100);
spins++;
byte [] data = ZKAssign.getDataNoWatch(zkw, node, stat);
if (data == null) {
throw new IOException("Data is null, merging node "
+ node + " no longer exists");
}
RegionTransition rt = RegionTransition.parseFrom(data);
EventType et = rt.getEventType();
if (et == RS_ZK_REGION_MERGING) {
ServerName serverName = rt.getServerName();
if (!serverName.equals(expectedServer)) {
throw new IOException("Merging node " + node + " is for "
+ serverName + ", not us " + expectedServer);
}
byte [] payloadOfMerging = rt.getPayload();
List mergingRegions = HRegionInfo.parseDelimitedFrom(
payloadOfMerging, 0, payloadOfMerging.length);
assert mergingRegions.size() == 3;
HRegionInfo a = mergingRegions.get(1);
HRegionInfo b = mergingRegions.get(2);
HRegionInfo hri_a = region_a.getRegionInfo();
HRegionInfo hri_b = region_b.getRegionInfo();
if (!(hri_a.equals(a) && hri_b.equals(b))) {
throw new IOException("Merging node " + node + " is for " + a + ", "
+ b + ", not expected regions: " + hri_a + ", " + hri_b);
}
// Master has processed it.
return stat.getVersion();
}
if (et != RS_ZK_REQUEST_REGION_MERGE) {
throw new IOException("Merging node " + node
+ " moved out of merging to " + et);
}
}
// Server is stopping/stopped
throw new IOException("Server is "
+ (services.isStopping() ? "stopping" : "stopped"));
} catch (Exception e) {
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
throw new IOException("Failed getting MERGING znode on "
+ mergedRegionInfo.getRegionNameAsString(), e);
}
}
/**
* Create reference file(s) of merging regions under the region_a merges dir
* @param hstoreFilesOfRegionA
* @param hstoreFilesOfRegionB
* @throws IOException
*/
private void mergeStoreFiles(
Map> hstoreFilesOfRegionA,
Map> hstoreFilesOfRegionB)
throws IOException {
// Create reference file(s) of region A in mergdir
HRegionFileSystem fs_a = this.region_a.getRegionFileSystem();
for (Map.Entry> entry : hstoreFilesOfRegionA
.entrySet()) {
String familyName = Bytes.toString(entry.getKey());
for (StoreFile storeFile : entry.getValue()) {
fs_a.mergeStoreFile(this.mergedRegionInfo, familyName, storeFile,
this.mergesdir);
}
}
// Create reference file(s) of region B in mergedir
HRegionFileSystem fs_b = this.region_b.getRegionFileSystem();
for (Map.Entry> entry : hstoreFilesOfRegionB
.entrySet()) {
String familyName = Bytes.toString(entry.getKey());
for (StoreFile storeFile : entry.getValue()) {
fs_b.mergeStoreFile(this.mergedRegionInfo, familyName, storeFile,
this.mergesdir);
}
}
}
/**
* @param server Hosting server instance (May be null when testing).
* @param services Services of regionserver, used to online regions.
* @throws IOException If thrown, rollback failed. Take drastic action.
* @return True if we successfully rolled back, false if we got to the point
* of no return and so now need to abort the server to minimize
* damage.
*/
@SuppressWarnings("deprecation")
public boolean rollback(final Server server,
final RegionServerServices services) throws IOException {
if (User.isHBaseSecurityEnabled(region_a.getBaseConf())) {
LOG.warn("Should use execute(Server, RegionServerServices, User)");
}
return rollback(server, services, null);
}
public boolean rollback(final Server server,
final RegionServerServices services, User user) throws IOException {
assert this.mergedRegionInfo != null;
// Coprocessor callback
if (rsCoprocessorHost != null) {
if (user == null) {
rsCoprocessorHost.preRollBackMerge(region_a, region_b);
} else {
try {
user.getUGI().doAs(new PrivilegedExceptionAction() {
@Override
public Void run() throws Exception {
rsCoprocessorHost.preRollBackMerge(region_a, region_b);
return null;
}
});
} catch (InterruptedException ie) {
InterruptedIOException iioe = new InterruptedIOException();
iioe.initCause(ie);
throw iioe;
}
}
}
boolean result = true;
ListIterator iterator = this.journal
.listIterator(this.journal.size());
// Iterate in reverse.
while (iterator.hasPrevious()) {
JournalEntry je = iterator.previous();
switch (je) {
case SET_MERGING_IN_ZK:
if (useZKAndZKIsSet(server)) {
cleanZK(server, this.mergedRegionInfo);
} else if (services != null && !useZKForAssignment
&& !services.reportRegionStateTransition(TransitionCode.MERGE_REVERTED,
mergedRegionInfo, region_a.getRegionInfo(), region_b.getRegionInfo())) {
return false;
}
break;
case CREATED_MERGE_DIR:
this.region_a.writestate.writesEnabled = true;
this.region_b.writestate.writesEnabled = true;
this.region_a.getRegionFileSystem().cleanupMergesDir();
break;
case CLOSED_REGION_A:
try {
// So, this returns a seqid but if we just closed and then reopened,
// we should be ok. On close, we flushed using sequenceid obtained
// from hosting regionserver so no need to propagate the sequenceid
// returned out of initialize below up into regionserver as we
// normally do.
this.region_a.initialize();
} catch (IOException e) {
LOG.error("Failed rollbacking CLOSED_REGION_A of region "
+ this.region_a.getRegionNameAsString(), e);
throw new RuntimeException(e);
}
break;
case OFFLINED_REGION_A:
if (services != null)
services.addToOnlineRegions(this.region_a);
break;
case CLOSED_REGION_B:
try {
this.region_b.initialize();
} catch (IOException e) {
LOG.error("Failed rollbacking CLOSED_REGION_A of region "
+ this.region_b.getRegionNameAsString(), e);
throw new RuntimeException(e);
}
break;
case OFFLINED_REGION_B:
if (services != null)
services.addToOnlineRegions(this.region_b);
break;
case STARTED_MERGED_REGION_CREATION:
this.region_a.getRegionFileSystem().cleanupMergedRegion(
this.mergedRegionInfo);
break;
case PONR:
// We got to the point-of-no-return so we need to just abort. Return
// immediately. Do not clean up created merged regions.
return false;
default:
throw new RuntimeException("Unhandled journal entry: " + je);
}
}
// Coprocessor callback
if (rsCoprocessorHost != null) {
if (user == null) {
rsCoprocessorHost.postRollBackMerge(region_a, region_b);
} else {
try {
user.getUGI().doAs(new PrivilegedExceptionAction() {
@Override
public Void run() throws Exception {
rsCoprocessorHost.postRollBackMerge(region_a, region_b);
return null;
}
});
} catch (InterruptedException ie) {
InterruptedIOException iioe = new InterruptedIOException();
iioe.initCause(ie);
throw iioe;
}
}
}
return result;
}
HRegionInfo getMergedRegionInfo() {
return this.mergedRegionInfo;
}
// For unit testing.
Path getMergesDir() {
return this.mergesdir;
}
private boolean useZKAndZKIsSet(final Server server) {
return server != null && useZKForAssignment && server.getZooKeeper() != null;
}
private static void cleanZK(final Server server, final HRegionInfo hri) {
try {
// Only delete if its in expected state; could have been hijacked.
if (!ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
RS_ZK_REQUEST_REGION_MERGE, server.getServerName())) {
ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
RS_ZK_REGION_MERGING, server.getServerName());
}
} catch (KeeperException.NoNodeException e) {
LOG.info("Failed cleanup zk node of " + hri.getRegionNameAsString(), e);
} catch (KeeperException e) {
server.abort("Failed cleanup zk node of " + hri.getRegionNameAsString(),e);
}
}
/**
* Creates a new ephemeral node in the PENDING_MERGE state for the merged region.
* Create it ephemeral in case regionserver dies mid-merge.
*
*
* Does not transition nodes from other states. If a node already exists for
* this region, a {@link NodeExistsException} will be thrown.
*
* @param zkw zk reference
* @param region region to be created as offline
* @param serverName server event originates from
* @throws KeeperException
* @throws IOException
*/
public static void createNodeMerging(final ZooKeeperWatcher zkw, final HRegionInfo region,
final ServerName serverName, final HRegionInfo a,
final HRegionInfo b) throws KeeperException, IOException {
LOG.debug(zkw.prefix("Creating ephemeral node for "
+ region.getEncodedName() + " in PENDING_MERGE state"));
byte [] payload = HRegionInfo.toDelimitedByteArray(region, a, b);
RegionTransition rt = RegionTransition.createRegionTransition(
RS_ZK_REQUEST_REGION_MERGE, region.getRegionName(), serverName, payload);
String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
throw new IOException("Failed create of ephemeral " + node);
}
}
/**
* Transitions an existing ephemeral node for the specified region which is
* currently in the begin state to be in the end state. Master cleans up the
* final MERGE znode when it reads it (or if we crash, zk will clean it up).
*
*
* Does not transition nodes from other states. If for some reason the node
* could not be transitioned, the method returns -1. If the transition is
* successful, the version of the node after transition is returned.
*
*
* This method can fail and return false for three different reasons:
*
* - Node for this region does not exist
* - Node for this region is not in the begin state
* - After verifying the begin state, update fails because of wrong version
* (this should never actually happen since an RS only does this transition
* following a transition to the begin state. If two RS are conflicting, one would
* fail the original transition to the begin state and not this transition)
*
*
*
* Does not set any watches.
*
*
* This method should only be used by a RegionServer when merging two regions.
*
* @param zkw zk reference
* @param merged region to be transitioned to opened
* @param a merging region A
* @param b merging region B
* @param serverName server event originates from
* @param znodeVersion expected version of data before modification
* @param beginState the expected current state the znode should be
* @param endState the state to be transition to
* @return version of node after transition, -1 if unsuccessful transition
* @throws KeeperException if unexpected zookeeper exception
* @throws IOException
*/
public static int transitionMergingNode(ZooKeeperWatcher zkw,
HRegionInfo merged, HRegionInfo a, HRegionInfo b, ServerName serverName,
final int znodeVersion, final EventType beginState,
final EventType endState) throws KeeperException, IOException {
byte[] payload = HRegionInfo.toDelimitedByteArray(merged, a, b);
return ZKAssign.transitionNode(zkw, merged, serverName,
beginState, endState, znodeVersion, payload);
}
/**
* Checks if the given region has merge qualifier in hbase:meta
* @param services
* @param regionName name of specified region
* @return true if the given region has merge qualifier in META.(It will be
* cleaned by CatalogJanitor)
* @throws IOException
*/
boolean hasMergeQualifierInMeta(final RegionServerServices services,
final byte[] regionName) throws IOException {
if (services == null) return false;
// Get merge regions if it is a merged region and already has merge
// qualifier
Pair mergeRegions = MetaReader
.getRegionsFromMergeQualifier(services.getCatalogTracker(), regionName);
if (mergeRegions != null &&
(mergeRegions.getFirst() != null || mergeRegions.getSecond() != null)) {
// It has merge qualifier
return true;
}
return false;
}
}