org.apache.hadoop.hbase.regionserver.SplitTransactionImpl Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hbase-server Show documentation
Show all versions of hbase-server Show documentation
Server functionality for HBase
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
import org.apache.hadoop.hbase.coordination.SplitTransactionCoordination;
import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CancelableProgressable;
import org.apache.hadoop.hbase.util.ConfigUtil;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.HasThread;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.PairOfSameType;
import org.apache.zookeeper.KeeperException;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
@InterfaceAudience.Private
public class SplitTransactionImpl implements SplitTransaction {
private static final Log LOG = LogFactory.getLog(SplitTransaction.class);
/*
* Region to split
*/
private final HRegion parent;
private HRegionInfo hri_a;
private HRegionInfo hri_b;
private long fileSplitTimeout = 30000;
public SplitTransactionCoordination.SplitTransactionDetails std;
boolean useZKForAssignment;
/*
* Row to split around
*/
private final byte [] splitrow;
/*
* Transaction state for listener, only valid during execute and
* rollback
*/
private SplitTransactionPhase currentPhase = SplitTransactionPhase.STARTED;
private Server server;
private RegionServerServices rsServices;
public static class JournalEntryImpl implements JournalEntry {
private SplitTransactionPhase type;
private long timestamp;
public JournalEntryImpl(SplitTransactionPhase type) {
this(type, EnvironmentEdgeManager.currentTime());
}
public JournalEntryImpl(SplitTransactionPhase type, long timestamp) {
this.type = type;
this.timestamp = timestamp;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(type);
sb.append(" at ");
sb.append(timestamp);
return sb.toString();
}
@Override
public SplitTransactionPhase getPhase() {
return type;
}
@Override
public long getTimeStamp() {
return timestamp;
}
}
/*
* Journal of how far the split transaction has progressed.
*/
private final List journal = new ArrayList();
/**
* Listeners
*/
private final ArrayList listeners = new ArrayList();
/**
* Constructor
* @param r Region to split
* @param splitrow Row to split around
*/
public SplitTransactionImpl(final Region r, final byte [] splitrow) {
this.parent = (HRegion)r;
this.splitrow = splitrow;
this.journal.add(new JournalEntryImpl(SplitTransactionPhase.STARTED));
useZKForAssignment = ConfigUtil.useZKForAssignment(parent.getBaseConf());
}
private void transition(SplitTransactionPhase nextPhase) throws IOException {
transition(nextPhase, false);
}
private void transition(SplitTransactionPhase nextPhase, boolean isRollback)
throws IOException {
if (!isRollback) {
// Add to the journal first, because if the listener throws an exception
// we need to roll back starting at 'nextPhase'
this.journal.add(new JournalEntryImpl(nextPhase));
}
for (int i = 0; i < listeners.size(); i++) {
TransactionListener listener = listeners.get(i);
if (!isRollback) {
listener.transition(this, currentPhase, nextPhase);
} else {
listener.rollback(this, currentPhase, nextPhase);
}
}
currentPhase = nextPhase;
}
/**
* Does checks on split inputs.
* @return true
if the region is splittable else
* false
if it is not (e.g. its already closed, etc.).
*/
public boolean prepare() throws IOException {
if (!this.parent.isSplittable()) return false;
// Split key can be null if this region is unsplittable; i.e. has refs.
if (this.splitrow == null) return false;
HRegionInfo hri = this.parent.getRegionInfo();
parent.prepareToSplit();
// Check splitrow.
byte [] startKey = hri.getStartKey();
byte [] endKey = hri.getEndKey();
if (Bytes.equals(startKey, splitrow) ||
!this.parent.getRegionInfo().containsRow(splitrow)) {
LOG.info("Split row is not inside region key range or is equal to " +
"startkey: " + Bytes.toStringBinary(this.splitrow));
return false;
}
long rid = getDaughterRegionIdTimestamp(hri);
this.hri_a = new HRegionInfo(hri.getTable(), startKey, this.splitrow, false, rid);
this.hri_b = new HRegionInfo(hri.getTable(), this.splitrow, endKey, false, rid);
transition(SplitTransactionPhase.PREPARED);
return true;
}
/**
* Calculate daughter regionid to use.
* @param hri Parent {@link HRegionInfo}
* @return Daughter region id (timestamp) to use.
*/
private static long getDaughterRegionIdTimestamp(final HRegionInfo hri) {
long rid = EnvironmentEdgeManager.currentTime();
// Regionid is timestamp. Can't be less than that of parent else will insert
// at wrong location in hbase:meta (See HBASE-710).
if (rid < hri.getRegionId()) {
LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() +
" but current time here is " + rid);
rid = hri.getRegionId() + 1;
}
return rid;
}
private static IOException closedByOtherException = new IOException(
"Failed to close region: already closed by another thread");
/* package */PairOfSameType createDaughters(final Server server,
final RegionServerServices services) throws IOException {
return createDaughters(server, services, null);
}
/**
* Prepare the regions and region files.
* @param server Hosting server instance. Can be null when testing (won't try
* and update in zk if a null server)
* @param services Used to online/offline regions.
* @param user
* @throws IOException If thrown, transaction failed.
* Call {@link #rollback(Server, RegionServerServices)}
* @return Regions created
*/
/* package */PairOfSameType createDaughters(final Server server,
final RegionServerServices services, User user) throws IOException {
LOG.info("Starting split of region " + this.parent);
if ((server != null && server.isStopped()) ||
(services != null && services.isStopping())) {
throw new IOException("Server is stopped or stopping");
}
assert !this.parent.lock.writeLock().isHeldByCurrentThread():
"Unsafe to hold write lock while performing RPCs";
transition(SplitTransactionPhase.BEFORE_PRE_SPLIT_HOOK);
// Coprocessor callback
if (this.parent.getCoprocessorHost() != null) {
if (user == null) {
// TODO: Remove one of these
parent.getCoprocessorHost().preSplit();
parent.getCoprocessorHost().preSplit(splitrow);
} else {
try {
user.getUGI().doAs(new PrivilegedExceptionAction() {
@Override
public Void run() throws Exception {
parent.getCoprocessorHost().preSplit();
parent.getCoprocessorHost().preSplit(splitrow);
return null;
}
});
} catch (InterruptedException ie) {
InterruptedIOException iioe = new InterruptedIOException();
iioe.initCause(ie);
throw iioe;
}
}
}
transition(SplitTransactionPhase.AFTER_PRE_SPLIT_HOOK);
// If true, no cluster to write meta edits to or to update znodes in.
boolean testing = server == null? true:
server.getConfiguration().getBoolean("hbase.testing.nocluster", false);
this.fileSplitTimeout = testing ? this.fileSplitTimeout :
server.getConfiguration().getLong("hbase.regionserver.fileSplitTimeout",
this.fileSplitTimeout);
PairOfSameType daughterRegions = stepsBeforePONR(server, services, testing);
final List metaEntries = new ArrayList();
boolean ret = false;
if (this.parent.getCoprocessorHost() != null) {
if (user == null) {
ret = parent.getCoprocessorHost().preSplitBeforePONR(splitrow, metaEntries);
} else {
try {
ret = user.getUGI().doAs(new PrivilegedExceptionAction() {
@Override
public Boolean run() throws Exception {
return parent.getCoprocessorHost().preSplitBeforePONR(splitrow, metaEntries);
}
});
} catch (InterruptedException ie) {
InterruptedIOException iioe = new InterruptedIOException();
iioe.initCause(ie);
throw iioe;
}
}
if (ret) {
throw new IOException("Coprocessor bypassing region "
+ this.parent.getRegionInfo().getRegionNameAsString() + " split.");
}
try {
for (Mutation p : metaEntries) {
HRegionInfo.parseRegionName(p.getRow());
}
} catch (IOException e) {
LOG.error("Row key of mutation from coprocessor is not parsable as region name."
+ "Mutations from coprocessor should only for hbase:meta table.");
throw e;
}
}
// This is the point of no return. Adding subsequent edits to .META. as we
// do below when we do the daughter opens adding each to .META. can fail in
// various interesting ways the most interesting of which is a timeout
// BUT the edits all go through (See HBASE-3872). IF we reach the PONR
// then subsequent failures need to crash out this regionserver; the
// server shutdown processing should be able to fix-up the incomplete split.
// The offlined parent will have the daughters as extra columns. If
// we leave the daughter regions in place and do not remove them when we
// crash out, then they will have their references to the parent in place
// still and the server shutdown fixup of .META. will point to these
// regions.
// We should add PONR JournalEntry before offlineParentInMeta,so even if
// OfflineParentInMeta timeout,this will cause regionserver exit,and then
// master ServerShutdownHandler will fix daughter & avoid data loss. (See
// HBase-4562).
transition(SplitTransactionPhase.PONR);
// Edit parent in meta. Offlines parent region and adds splita and splitb
// as an atomic update. See HBASE-7721. This update to META makes the region
// will determine whether the region is split or not in case of failures.
// If it is successful, master will roll-forward, if not, master will rollback
// and assign the parent region.
if (!testing && useZKForAssignment) {
if (metaEntries == null || metaEntries.isEmpty()) {
MetaTableAccessor.splitRegion(server.getConnection(),
parent.getRegionInfo(), daughterRegions.getFirst().getRegionInfo(),
daughterRegions.getSecond().getRegionInfo(), server.getServerName(),
parent.getTableDesc().getRegionReplication());
} else {
offlineParentInMetaAndputMetaEntries(server.getConnection(),
parent.getRegionInfo(), daughterRegions.getFirst().getRegionInfo(), daughterRegions
.getSecond().getRegionInfo(), server.getServerName(), metaEntries,
parent.getTableDesc().getRegionReplication());
}
} else if (services != null && !useZKForAssignment) {
if (!services.reportRegionStateTransition(TransitionCode.SPLIT_PONR,
parent.getRegionInfo(), hri_a, hri_b)) {
// Passed PONR, let SSH clean it up
throw new IOException("Failed to notify master that split passed PONR: "
+ parent.getRegionInfo().getRegionNameAsString());
}
}
return daughterRegions;
}
public PairOfSameType stepsBeforePONR(final Server server,
final RegionServerServices services, boolean testing) throws IOException {
if (useCoordinatedStateManager(server)) {
if (std == null) {
std =
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitTransactionCoordination().getDefaultDetails();
}
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitTransactionCoordination().startSplitTransaction(parent, server.getServerName(),
hri_a, hri_b);
} else if (services != null && !useZKForAssignment) {
if (!services.reportRegionStateTransition(TransitionCode.READY_TO_SPLIT,
parent.getRegionInfo(), hri_a, hri_b)) {
throw new IOException("Failed to get ok from master to split "
+ parent.getRegionInfo().getRegionNameAsString());
}
}
transition(SplitTransactionPhase.SET_SPLITTING);
if (useCoordinatedStateManager(server)) {
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitTransactionCoordination().waitForSplitTransaction(services, parent, hri_a,
hri_b, std);
}
this.parent.getRegionFileSystem().createSplitsDir(hri_a, hri_b);
transition(SplitTransactionPhase.CREATE_SPLIT_DIR);
Map> hstoreFilesToSplit = null;
Exception exceptionToThrow = null;
try{
hstoreFilesToSplit = this.parent.close(false);
} catch (Exception e) {
exceptionToThrow = e;
}
if (exceptionToThrow == null && hstoreFilesToSplit == null) {
// The region was closed by a concurrent thread. We can't continue
// with the split, instead we must just abandon the split. If we
// reopen or split this could cause problems because the region has
// probably already been moved to a different server, or is in the
// process of moving to a different server.
exceptionToThrow = closedByOtherException;
}
if (exceptionToThrow != closedByOtherException) {
transition(SplitTransactionPhase.CLOSED_PARENT_REGION);
}
if (exceptionToThrow != null) {
if (exceptionToThrow instanceof IOException) throw (IOException)exceptionToThrow;
throw new IOException(exceptionToThrow);
}
if (!testing) {
services.removeFromOnlineRegions(this.parent, null);
}
transition(SplitTransactionPhase.OFFLINED_PARENT);
// TODO: If splitStoreFiles were multithreaded would we complete steps in
// less elapsed time? St.Ack 20100920
//
// splitStoreFiles creates daughter region dirs under the parent splits dir
// Nothing to unroll here if failure -- clean up of CREATE_SPLIT_DIR will
// clean this up.
Pair expectedReferences = splitStoreFiles(hstoreFilesToSplit);
// Log to the journal that we are creating region A, the first daughter
// region. We could fail halfway through. If we do, we could have left
// stuff in fs that needs cleanup -- a storefile or two. Thats why we
// add entry to journal BEFORE rather than AFTER the change.
transition(SplitTransactionPhase.STARTED_REGION_A_CREATION);
assertReferenceFileCount(expectedReferences.getFirst(),
this.parent.getRegionFileSystem().getSplitsDir(this.hri_a));
Region a = this.parent.createDaughterRegionFromSplits(this.hri_a);
assertReferenceFileCount(expectedReferences.getFirst(),
new Path(this.parent.getRegionFileSystem().getTableDir(), this.hri_a.getEncodedName()));
// Ditto
transition(SplitTransactionPhase.STARTED_REGION_B_CREATION);
assertReferenceFileCount(expectedReferences.getSecond(),
this.parent.getRegionFileSystem().getSplitsDir(this.hri_b));
Region b = this.parent.createDaughterRegionFromSplits(this.hri_b);
assertReferenceFileCount(expectedReferences.getSecond(),
new Path(this.parent.getRegionFileSystem().getTableDir(), this.hri_b.getEncodedName()));
return new PairOfSameType(a, b);
}
void assertReferenceFileCount(int expectedReferenceFileCount, Path dir)
throws IOException {
if (expectedReferenceFileCount != 0 &&
expectedReferenceFileCount != FSUtils.getRegionReferenceFileCount(parent.getFilesystem(),
dir)) {
throw new IOException("Failing split. Expected reference file count isn't equal.");
}
}
/**
* Perform time consuming opening of the daughter regions.
* @param server Hosting server instance. Can be null when testing
* @param services Used to online/offline regions.
* @param a first daughter region
* @param a second daughter region
* @throws IOException If thrown, transaction failed.
* Call {@link #rollback(Server, RegionServerServices)}
*/
/* package */void openDaughters(final Server server,
final RegionServerServices services, Region a, Region b)
throws IOException {
boolean stopped = server != null && server.isStopped();
boolean stopping = services != null && services.isStopping();
// TODO: Is this check needed here?
if (stopped || stopping) {
LOG.info("Not opening daughters " +
b.getRegionInfo().getRegionNameAsString() +
" and " +
a.getRegionInfo().getRegionNameAsString() +
" because stopping=" + stopping + ", stopped=" + stopped);
} else {
// Open daughters in parallel.
DaughterOpener aOpener = new DaughterOpener(server, (HRegion)a);
DaughterOpener bOpener = new DaughterOpener(server, (HRegion)b);
aOpener.start();
bOpener.start();
try {
aOpener.join();
if (aOpener.getException() == null) {
transition(SplitTransactionPhase.OPENED_REGION_A);
}
bOpener.join();
if (bOpener.getException() == null) {
transition(SplitTransactionPhase.OPENED_REGION_B);
}
} catch (InterruptedException e) {
throw (InterruptedIOException)new InterruptedIOException().initCause(e);
}
if (aOpener.getException() != null) {
throw new IOException("Failed " +
aOpener.getName(), aOpener.getException());
}
if (bOpener.getException() != null) {
throw new IOException("Failed " +
bOpener.getName(), bOpener.getException());
}
if (services != null) {
try {
if (useZKForAssignment) {
// add 2nd daughter first (see HBASE-4335)
services.postOpenDeployTasks(b);
} else if (!services.reportRegionStateTransition(TransitionCode.SPLIT,
parent.getRegionInfo(), hri_a, hri_b)) {
throw new IOException("Failed to report split region to master: "
+ parent.getRegionInfo().getShortNameToLog());
}
// Should add it to OnlineRegions
services.addToOnlineRegions(b);
if (useZKForAssignment) {
services.postOpenDeployTasks(a);
}
services.addToOnlineRegions(a);
} catch (KeeperException ke) {
throw new IOException(ke);
}
}
}
}
public PairOfSameType execute(final Server server,
final RegionServerServices services)
throws IOException {
if (User.isHBaseSecurityEnabled(parent.getBaseConf())) {
LOG.warn("Should use execute(Server, RegionServerServices, User)");
}
return execute(server, services, null);
}
/**
* Run the transaction.
* @param server Hosting server instance. Can be null when testing
* @param services Used to online/offline regions.
* @throws IOException If thrown, transaction failed.
* Call {@link #rollback(Server, RegionServerServices)}
* @return Regions created
* @throws IOException
* @see #rollback(Server, RegionServerServices)
*/
@Override
public PairOfSameType execute(final Server server,
final RegionServerServices services, User user) throws IOException {
this.server = server;
this.rsServices = services;
useZKForAssignment = server == null ? true :
ConfigUtil.useZKForAssignment(server.getConfiguration());
if (useCoordinatedStateManager(server)) {
std =
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitTransactionCoordination().getDefaultDetails();
}
PairOfSameType regions = createDaughters(server, services, user);
if (this.parent.getCoprocessorHost() != null) {
if (user == null) {
parent.getCoprocessorHost().preSplitAfterPONR();
} else {
try {
user.getUGI().doAs(new PrivilegedExceptionAction() {
@Override
public Void run() throws Exception {
parent.getCoprocessorHost().preSplitAfterPONR();
return null;
}
});
} catch (InterruptedException ie) {
InterruptedIOException iioe = new InterruptedIOException();
iioe.initCause(ie);
throw iioe;
}
}
}
regions = stepsAfterPONR(server, services, regions, user);
transition(SplitTransactionPhase.COMPLETED);
return regions;
}
@Deprecated
public PairOfSameType stepsAfterPONR(final Server server,
final RegionServerServices services, final PairOfSameType regions)
throws IOException {
return stepsAfterPONR(server, services, regions, null);
}
public PairOfSameType stepsAfterPONR(final Server server,
final RegionServerServices services, final PairOfSameType regions, User user)
throws IOException {
openDaughters(server, services, regions.getFirst(), regions.getSecond());
if (useCoordinatedStateManager(server)) {
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitTransactionCoordination().completeSplitTransaction(services, regions.getFirst(),
regions.getSecond(), std, parent);
}
transition(SplitTransactionPhase.BEFORE_POST_SPLIT_HOOK);
// Coprocessor callback
if (parent.getCoprocessorHost() != null) {
if (user == null) {
this.parent.getCoprocessorHost().postSplit(regions.getFirst(), regions.getSecond());
} else {
try {
user.getUGI().doAs(new PrivilegedExceptionAction() {
@Override
public Void run() throws Exception {
parent.getCoprocessorHost().postSplit(regions.getFirst(), regions.getSecond());
return null;
}
});
} catch (InterruptedException ie) {
InterruptedIOException iioe = new InterruptedIOException();
iioe.initCause(ie);
throw iioe;
}
}
}
transition(SplitTransactionPhase.AFTER_POST_SPLIT_HOOK);
return regions;
}
private void offlineParentInMetaAndputMetaEntries(HConnection hConnection,
HRegionInfo parent, HRegionInfo splitA, HRegionInfo splitB,
ServerName serverName, List metaEntries, int regionReplication)
throws IOException {
List mutations = metaEntries;
HRegionInfo copyOfParent = new HRegionInfo(parent);
copyOfParent.setOffline(true);
copyOfParent.setSplit(true);
//Put for parent
Put putParent = MetaTableAccessor.makePutFromRegionInfo(copyOfParent);
MetaTableAccessor.addDaughtersToPut(putParent, splitA, splitB);
mutations.add(putParent);
//Puts for daughters
Put putA = MetaTableAccessor.makePutFromRegionInfo(splitA);
Put putB = MetaTableAccessor.makePutFromRegionInfo(splitB);
addLocation(putA, serverName, 1); //these are new regions, openSeqNum = 1 is fine.
addLocation(putB, serverName, 1);
mutations.add(putA);
mutations.add(putB);
// Add empty locations for region replicas of daughters so that number of replicas can be
// cached whenever the primary region is looked up from meta
for (int i = 1; i < regionReplication; i++) {
addEmptyLocation(putA, i);
addEmptyLocation(putB, i);
}
MetaTableAccessor.mutateMetaTable(hConnection, mutations);
}
private static Put addEmptyLocation(final Put p, int replicaId){
p.addImmutable(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(replicaId), null);
p.addImmutable(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(replicaId),
null);
p.addImmutable(HConstants.CATALOG_FAMILY, MetaTableAccessor.getSeqNumColumn(replicaId), null);
return p;
}
public Put addLocation(final Put p, final ServerName sn, long openSeqNum) {
p.addImmutable(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
Bytes.toBytes(sn.getHostAndPort()));
p.addImmutable(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
Bytes.toBytes(sn.getStartcode()));
p.addImmutable(HConstants.CATALOG_FAMILY, HConstants.SEQNUM_QUALIFIER,
Bytes.toBytes(openSeqNum));
return p;
}
/*
* Open daughter region in its own thread.
* If we fail, abort this hosting server.
*/
class DaughterOpener extends HasThread {
private final Server server;
private final HRegion r;
private Throwable t = null;
DaughterOpener(final Server s, final HRegion r) {
super((s == null? "null-services": s.getServerName()) +
"-daughterOpener=" + r.getRegionInfo().getEncodedName());
setDaemon(true);
this.server = s;
this.r = r;
}
/**
* @return Null if open succeeded else exception that causes us fail open.
* Call it after this thread exits else you may get wrong view on result.
*/
Throwable getException() {
return this.t;
}
@Override
public void run() {
try {
openDaughterRegion(this.server, r);
} catch (Throwable t) {
this.t = t;
}
}
}
/**
* Open daughter regions, add them to online list and update meta.
* @param server
* @param daughter
* @throws IOException
* @throws KeeperException
*/
void openDaughterRegion(final Server server, final HRegion daughter)
throws IOException, KeeperException {
HRegionInfo hri = daughter.getRegionInfo();
LoggingProgressable reporter = server == null ? null
: new LoggingProgressable(hri, server.getConfiguration().getLong(
"hbase.regionserver.split.daughter.open.log.interval", 10000));
daughter.openHRegion(reporter);
}
static class LoggingProgressable implements CancelableProgressable {
private final HRegionInfo hri;
private long lastLog = -1;
private final long interval;
LoggingProgressable(final HRegionInfo hri, final long interval) {
this.hri = hri;
this.interval = interval;
}
@Override
public boolean progress() {
long now = EnvironmentEdgeManager.currentTime();
if (now - lastLog > this.interval) {
LOG.info("Opening " + this.hri.getRegionNameAsString());
this.lastLog = now;
}
return true;
}
}
private boolean useCoordinatedStateManager(final Server server) {
return server != null && useZKForAssignment && server.getCoordinatedStateManager() != null;
}
/**
* Creates reference files for top and bottom half of the
* @param hstoreFilesToSplit map of store files to create half file references for.
* @return the number of reference files that were created.
* @throws IOException
*/
private Pair splitStoreFiles(
final Map> hstoreFilesToSplit)
throws IOException {
if (hstoreFilesToSplit == null) {
// Could be null because close didn't succeed -- for now consider it fatal
throw new IOException("Close returned empty list of StoreFiles");
}
// The following code sets up a thread pool executor with as many slots as
// there's files to split. It then fires up everything, waits for
// completion and finally checks for any exception
int nbFiles = 0;
for (Map.Entry> entry: hstoreFilesToSplit.entrySet()) {
nbFiles += entry.getValue().size();
}
if (nbFiles == 0) {
// no file needs to be splitted.
return new Pair(0,0);
}
// Default max #threads to use is the smaller of table's configured number of blocking store
// files or the available number of logical cores.
int defMaxThreads = Math.min(parent.conf.getInt(HStore.BLOCKING_STOREFILES_KEY,
HStore.DEFAULT_BLOCKING_STOREFILE_COUNT),
Runtime.getRuntime().availableProcessors());
// Max #threads is the smaller of the number of storefiles or the default max determined above.
int maxThreads = Math.min(parent.conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX,
defMaxThreads), nbFiles);
LOG.info("Preparing to split " + nbFiles + " storefiles for region " + this.parent +
" using " + maxThreads + " threads");
ThreadFactoryBuilder builder = new ThreadFactoryBuilder();
builder.setNameFormat("StoreFileSplitter-%1$d");
ThreadFactory factory = builder.build();
ThreadPoolExecutor threadPool =
(ThreadPoolExecutor) Executors.newFixedThreadPool(maxThreads, factory);
List>> futures = new ArrayList>> (nbFiles);
// Split each store file.
for (Map.Entry> entry: hstoreFilesToSplit.entrySet()) {
for (StoreFile sf: entry.getValue()) {
StoreFileSplitter sfs = new StoreFileSplitter(entry.getKey(), sf);
futures.add(threadPool.submit(sfs));
}
}
// Shutdown the pool
threadPool.shutdown();
// Wait for all the tasks to finish
try {
boolean stillRunning = !threadPool.awaitTermination(
this.fileSplitTimeout, TimeUnit.MILLISECONDS);
if (stillRunning) {
threadPool.shutdownNow();
// wait for the thread to shutdown completely.
while (!threadPool.isTerminated()) {
Thread.sleep(50);
}
throw new IOException("Took too long to split the" +
" files and create the references, aborting split");
}
} catch (InterruptedException e) {
throw (InterruptedIOException)new InterruptedIOException().initCause(e);
}
int created_a = 0;
int created_b = 0;
// Look for any exception
for (Future> future : futures) {
try {
Pair p = future.get();
created_a += p.getFirst() != null ? 1 : 0;
created_b += p.getSecond() != null ? 1 : 0;
} catch (InterruptedException e) {
throw (InterruptedIOException) new InterruptedIOException().initCause(e);
} catch (ExecutionException e) {
throw new IOException(e);
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Split storefiles for region " + this.parent + " Daughter A: " + created_a
+ " storefiles, Daughter B: " + created_b + " storefiles.");
}
return new Pair(created_a, created_b);
}
private Pair splitStoreFile(final byte[] family, final StoreFile sf)
throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("Splitting started for store file: " + sf.getPath() + " for region: " +
this.parent);
}
HRegionFileSystem fs = this.parent.getRegionFileSystem();
String familyName = Bytes.toString(family);
Path path_a =
fs.splitStoreFile(this.hri_a, familyName, sf, this.splitrow, false,
this.parent.getSplitPolicy());
Path path_b =
fs.splitStoreFile(this.hri_b, familyName, sf, this.splitrow, true,
this.parent.getSplitPolicy());
if (LOG.isDebugEnabled()) {
LOG.debug("Splitting complete for store file: " + sf.getPath() + " for region: " +
this.parent);
}
return new Pair(path_a, path_b);
}
/**
* Utility class used to do the file splitting / reference writing
* in parallel instead of sequentially.
*/
class StoreFileSplitter implements Callable> {
private final byte[] family;
private final StoreFile sf;
/**
* Constructor that takes what it needs to split
* @param family Family that contains the store file
* @param sf which file
*/
public StoreFileSplitter(final byte[] family, final StoreFile sf) {
this.sf = sf;
this.family = family;
}
public Pair call() throws IOException {
return splitStoreFile(family, sf);
}
}
@Override
public boolean rollback(final Server server, final RegionServerServices services)
throws IOException {
if (User.isHBaseSecurityEnabled(parent.getBaseConf())) {
LOG.warn("Should use rollback(Server, RegionServerServices, User)");
}
return rollback(server, services, null);
}
/**
* @param server Hosting server instance (May be null when testing).
* @param services
* @throws IOException If thrown, rollback failed. Take drastic action.
* @return True if we successfully rolled back, false if we got to the point
* of no return and so now need to abort the server to minimize damage.
*/
@Override
@SuppressWarnings("deprecation")
public boolean rollback(final Server server, final RegionServerServices services, User user)
throws IOException {
// Coprocessor callback
if (this.parent.getCoprocessorHost() != null) {
if (user == null) {
this.parent.getCoprocessorHost().preRollBackSplit();
} else {
try {
user.getUGI().doAs(new PrivilegedExceptionAction() {
@Override
public Void run() throws Exception {
parent.getCoprocessorHost().preRollBackSplit();
return null;
}
});
} catch (InterruptedException ie) {
InterruptedIOException iioe = new InterruptedIOException();
iioe.initCause(ie);
throw iioe;
}
}
}
boolean result = true;
ListIterator iterator =
this.journal.listIterator(this.journal.size());
// Iterate in reverse.
while (iterator.hasPrevious()) {
JournalEntry je = iterator.previous();
transition(je.getPhase(), true);
switch(je.getPhase()) {
case SET_SPLITTING:
if (useCoordinatedStateManager(server) && server instanceof HRegionServer) {
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitTransactionCoordination().clean(this.parent.getRegionInfo());
} else if (services != null && !useZKForAssignment
&& !services.reportRegionStateTransition(TransitionCode.SPLIT_REVERTED,
parent.getRegionInfo(), hri_a, hri_b)) {
return false;
}
break;
case CREATE_SPLIT_DIR:
this.parent.writestate.writesEnabled = true;
this.parent.getRegionFileSystem().cleanupSplitsDir();
break;
case CLOSED_PARENT_REGION:
try {
// So, this returns a seqid but if we just closed and then reopened, we
// should be ok. On close, we flushed using sequenceid obtained from
// hosting regionserver so no need to propagate the sequenceid returned
// out of initialize below up into regionserver as we normally do.
// TODO: Verify.
this.parent.initialize();
} catch (IOException e) {
LOG.error("Failed rollbacking CLOSED_PARENT_REGION of region " +
this.parent.getRegionInfo().getRegionNameAsString(), e);
throw new RuntimeException(e);
}
break;
case STARTED_REGION_A_CREATION:
this.parent.getRegionFileSystem().cleanupDaughterRegion(this.hri_a);
break;
case STARTED_REGION_B_CREATION:
this.parent.getRegionFileSystem().cleanupDaughterRegion(this.hri_b);
break;
case OFFLINED_PARENT:
if (services != null) services.addToOnlineRegions(this.parent);
break;
case PONR:
// We got to the point-of-no-return so we need to just abort. Return
// immediately. Do not clean up created daughter regions. They need
// to be in place so we don't delete the parent region mistakenly.
// See HBASE-3872.
return false;
// Informational only cases
case STARTED:
case PREPARED:
case BEFORE_PRE_SPLIT_HOOK:
case AFTER_PRE_SPLIT_HOOK:
case BEFORE_POST_SPLIT_HOOK:
case AFTER_POST_SPLIT_HOOK:
case OPENED_REGION_A:
case OPENED_REGION_B:
case COMPLETED:
break;
default:
throw new RuntimeException("Unhandled journal entry: " + je);
}
}
// Coprocessor callback
if (this.parent.getCoprocessorHost() != null) {
if (user == null) {
this.parent.getCoprocessorHost().postRollBackSplit();
} else {
try {
user.getUGI().doAs(new PrivilegedExceptionAction() {
@Override
public Void run() throws Exception {
parent.getCoprocessorHost().postRollBackSplit();
return null;
}
});
} catch (InterruptedException ie) {
InterruptedIOException iioe = new InterruptedIOException();
iioe.initCause(ie);
throw iioe;
}
}
}
return result;
}
HRegionInfo getFirstDaughter() {
return hri_a;
}
HRegionInfo getSecondDaughter() {
return hri_b;
}
@Override
public List getJournal() {
return journal;
}
@Override
public SplitTransaction registerTransactionListener(TransactionListener listener) {
listeners.add(listener);
return this;
}
@Override
public Server getServer() {
return server;
}
@Override
public RegionServerServices getRegionServerServices() {
return rsServices;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < journal.size(); i++) {
JournalEntry je = journal.get(i);
sb.append(je.toString());
if (i != 0) {
JournalEntry jep = journal.get(i-1);
long delta = je.getTimeStamp() - jep.getTimeStamp();
if (delta != 0) {
sb.append(" (+" + delta + " ms)");
}
}
sb.append("\n");
}
return sb.toString();
}
}