org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.procedure;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.MasterWalManager;
import org.apache.hadoop.hbase.master.assignment.AssignProcedure;
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
import org.apache.hadoop.hbase.master.assignment.RegionTransitionProcedure;
import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.ServerCrashState;
/**
* Handle crashed server. This is a port to ProcedureV2 of what used to be euphemistically called
* ServerShutdownHandler.
*
* The procedure flow varies dependent on whether meta is assigned and if we are to split logs.
*
*
We come in here after ServerManager has noticed a server has expired. Procedures
* queued on the rpc should have been notified about fail and should be concurrently
* getting themselves ready to assign elsewhere.
*/
@InterfaceAudience.Private
public class ServerCrashProcedure
extends StateMachineProcedure
implements ServerProcedureInterface {
private static final Logger LOG = LoggerFactory.getLogger(ServerCrashProcedure.class);
/**
* Name of the crashed server to process.
*/
private ServerName serverName;
/**
* Whether DeadServer knows that we are processing it.
*/
private boolean notifiedDeadServer = false;
/**
* Regions that were on the crashed server.
*/
private List regionsOnCrashedServer;
private boolean carryingMeta = false;
private boolean shouldSplitWal;
/**
* Call this constructor queuing up a Procedure.
* @param serverName Name of the crashed server.
* @param shouldSplitWal True if we should split WALs as part of crashed server processing.
* @param carryingMeta True if carrying hbase:meta table region.
*/
public ServerCrashProcedure(final MasterProcedureEnv env, final ServerName serverName,
final boolean shouldSplitWal, final boolean carryingMeta) {
this.serverName = serverName;
this.shouldSplitWal = shouldSplitWal;
this.carryingMeta = carryingMeta;
this.setOwner(env.getRequestUser());
}
/**
* Used when deserializing from a procedure store; we'll construct one of these then call
* #deserializeStateData(InputStream). Do not use directly.
*/
public ServerCrashProcedure() {
super();
}
@Override
protected Flow executeFromState(MasterProcedureEnv env, ServerCrashState state)
throws ProcedureSuspendedException, ProcedureYieldException {
final MasterServices services = env.getMasterServices();
// HBASE-14802
// If we have not yet notified that we are processing a dead server, we should do now.
if (!notifiedDeadServer) {
services.getServerManager().getDeadServers().notifyServer(serverName);
notifiedDeadServer = true;
}
try {
switch (state) {
case SERVER_CRASH_START:
LOG.info("Start " + this);
// If carrying meta, process it first. Else, get list of regions on crashed server.
if (this.carryingMeta) {
setNextState(ServerCrashState.SERVER_CRASH_SPLIT_META_LOGS);
} else {
setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
}
break;
case SERVER_CRASH_SPLIT_META_LOGS:
splitMetaLogs(env);
setNextState(ServerCrashState.SERVER_CRASH_ASSIGN_META);
break;
case SERVER_CRASH_ASSIGN_META:
handleRIT(env, Arrays.asList(RegionInfoBuilder.FIRST_META_REGIONINFO));
addChildProcedure(env.getAssignmentManager()
.createAssignProcedure(RegionInfoBuilder.FIRST_META_REGIONINFO));
setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
break;
case SERVER_CRASH_PROCESS_META:
// not used any more but still leave it here to keep compatible as there maybe old SCP
// which is stored in ProcedureStore which has this state.
processMeta(env);
setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
break;
case SERVER_CRASH_GET_REGIONS:
// If hbase:meta is not assigned, yield.
if (env.getAssignmentManager().waitMetaLoaded(this)) {
throw new ProcedureSuspendedException();
}
this.regionsOnCrashedServer = services.getAssignmentManager().getRegionStates()
.getServerRegionInfoSet(serverName);
// Where to go next? Depends on whether we should split logs at all or
// if we should do distributed log splitting.
if (!this.shouldSplitWal) {
setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
} else {
setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS);
}
break;
case SERVER_CRASH_SPLIT_LOGS:
splitLogs(env);
setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
break;
case SERVER_CRASH_ASSIGN:
// If no regions to assign, skip assign and skip to the finish.
// Filter out meta regions. Those are handled elsewhere in this procedure.
// Filter changes this.regionsOnCrashedServer.
if (filterDefaultMetaRegions(regionsOnCrashedServer)) {
if (LOG.isTraceEnabled()) {
LOG.trace("Assigning regions " +
RegionInfo.getShortNameToLog(regionsOnCrashedServer) + ", " + this +
"; cycles=" + getCycles());
}
// Handle RIT against crashed server. Will cancel any ongoing assigns/unassigns.
// Returns list of regions we need to reassign.
// NOTE: there is nothing to stop a dispatch happening AFTER this point. Check for the
// condition if a dispatch RPC fails inside in AssignProcedure/UnassignProcedure.
// AssignProcedure just keeps retrying. UnassignProcedure is more complicated. See where
// it does the check by calling am#isLogSplittingDone.
List toAssign = handleRIT(env, regionsOnCrashedServer);
AssignmentManager am = env.getAssignmentManager();
// Do not create assigns for Regions on disabling or disabled Tables.
// We do this inside in the AssignProcedure.
int size = toAssign.size();
if (toAssign.removeIf(r -> !AssignProcedure.assign(env.getMasterServices(), r))) {
LOG.debug("Dropped {} assigns because against disabling/disabled tables",
size - toAssign.size());
}
// Assign regions to new candidate server. See HBASE-23035 for more details.
addChildProcedure(am.createRoundRobinAssignProcedures(toAssign));
setNextState(ServerCrashState.SERVER_CRASH_HANDLE_RIT2);
} else {
setNextState(ServerCrashState.SERVER_CRASH_FINISH);
}
break;
case SERVER_CRASH_HANDLE_RIT2:
// Noop. Left in place because we used to call handleRIT here for a second time
// but no longer necessary since HBASE-20634.
setNextState(ServerCrashState.SERVER_CRASH_FINISH);
break;
case SERVER_CRASH_FINISH:
services.getAssignmentManager().getRegionStates().removeServer(serverName);
services.getServerManager().getDeadServers().finish(serverName);
return Flow.NO_MORE_STATE;
default:
throw new UnsupportedOperationException("unhandled state=" + state);
}
} catch (IOException e) {
LOG.warn("Failed state=" + state + ", retry " + this + "; cycles=" + getCycles(), e);
}
return Flow.HAS_MORE_STATE;
}
private void processMeta(final MasterProcedureEnv env) throws IOException {
LOG.debug("{}; processing hbase:meta", this);
// Assign meta if still carrying it. Check again: region may be assigned because of RIT timeout
final AssignmentManager am = env.getMasterServices().getAssignmentManager();
for (RegionInfo hri: am.getRegionStates().getServerRegionInfoSet(serverName)) {
if (!isDefaultMetaRegion(hri)) {
continue;
}
addChildProcedure(new RecoverMetaProcedure(serverName, this.shouldSplitWal));
}
}
private boolean filterDefaultMetaRegions(final List regions) {
if (regions == null) return false;
regions.removeIf(this::isDefaultMetaRegion);
return !regions.isEmpty();
}
private boolean isDefaultMetaRegion(final RegionInfo hri) {
return hri.getTable().equals(TableName.META_TABLE_NAME) &&
RegionReplicaUtil.isDefaultReplica(hri);
}
private void splitMetaLogs(MasterProcedureEnv env) throws IOException {
LOG.debug("Splitting meta WALs {}", this);
MasterWalManager mwm = env.getMasterServices().getMasterWalManager();
AssignmentManager am = env.getMasterServices().getAssignmentManager();
am.getRegionStates().metaLogSplitting(serverName);
mwm.splitMetaLog(serverName);
am.getRegionStates().metaLogSplit(serverName);
LOG.debug("Done splitting meta WALs {}", this);
}
private void splitLogs(final MasterProcedureEnv env) throws IOException {
LOG.debug("Splitting WALs {}", this);
MasterWalManager mwm = env.getMasterServices().getMasterWalManager();
AssignmentManager am = env.getMasterServices().getAssignmentManager();
// TODO: For Matteo. Below BLOCKs!!!! Redo so can relinquish executor while it is running.
// PROBLEM!!! WE BLOCK HERE.
am.getRegionStates().logSplitting(this.serverName);
mwm.splitLog(this.serverName);
if (!carryingMeta) {
mwm.archiveMetaLog(this.serverName);
}
am.getRegionStates().logSplit(this.serverName);
LOG.debug("Done splitting WALs {}", this);
}
@Override
protected void rollbackState(MasterProcedureEnv env, ServerCrashState state)
throws IOException {
// Can't rollback.
throw new UnsupportedOperationException("unhandled state=" + state);
}
@Override
protected ServerCrashState getState(int stateId) {
return ServerCrashState.forNumber(stateId);
}
@Override
protected int getStateId(ServerCrashState state) {
return state.getNumber();
}
@Override
protected ServerCrashState getInitialState() {
return ServerCrashState.SERVER_CRASH_START;
}
@Override
protected boolean abort(MasterProcedureEnv env) {
// TODO
return false;
}
@Override
protected LockState acquireLock(final MasterProcedureEnv env) {
if (env.getProcedureScheduler().waitServerExclusiveLock(this, getServerName())) {
return LockState.LOCK_EVENT_WAIT;
}
return LockState.LOCK_ACQUIRED;
}
@Override
protected void releaseLock(final MasterProcedureEnv env) {
env.getProcedureScheduler().wakeServerExclusiveLock(this, getServerName());
}
@Override
public void toStringClassDetails(StringBuilder sb) {
sb.append(getClass().getSimpleName());
sb.append(" server=");
sb.append(serverName);
sb.append(", splitWal=");
sb.append(shouldSplitWal);
sb.append(", meta=");
sb.append(carryingMeta);
}
@Override
protected void serializeStateData(ProcedureStateSerializer serializer)
throws IOException {
super.serializeStateData(serializer);
MasterProcedureProtos.ServerCrashStateData.Builder state =
MasterProcedureProtos.ServerCrashStateData.newBuilder().
setServerName(ProtobufUtil.toServerName(this.serverName)).
setCarryingMeta(this.carryingMeta).
setShouldSplitWal(this.shouldSplitWal);
if (this.regionsOnCrashedServer != null && !this.regionsOnCrashedServer.isEmpty()) {
for (RegionInfo hri: this.regionsOnCrashedServer) {
state.addRegionsOnCrashedServer(ProtobufUtil.toRegionInfo(hri));
}
}
serializer.serialize(state.build());
}
@Override
protected void deserializeStateData(ProcedureStateSerializer serializer)
throws IOException {
super.deserializeStateData(serializer);
MasterProcedureProtos.ServerCrashStateData state =
serializer.deserialize(MasterProcedureProtos.ServerCrashStateData.class);
this.serverName = ProtobufUtil.toServerName(state.getServerName());
this.carryingMeta = state.hasCarryingMeta()? state.getCarryingMeta(): false;
// shouldSplitWAL has a default over in pb so this invocation will always work.
this.shouldSplitWal = state.getShouldSplitWal();
int size = state.getRegionsOnCrashedServerCount();
if (size > 0) {
this.regionsOnCrashedServer = new ArrayList<>(size);
for (org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.RegionInfo ri: state.getRegionsOnCrashedServerList()) {
this.regionsOnCrashedServer.add(ProtobufUtil.toRegionInfo(ri));
}
}
}
@Override
public ServerName getServerName() {
return this.serverName;
}
@Override
public boolean hasMetaTableRegion() {
return this.carryingMeta;
}
@Override
public ServerOperationType getServerOperationType() {
return ServerOperationType.CRASH_HANDLER;
}
/**
* For this procedure, yield at end of each successful flow step so that all crashed servers
* can make progress rather than do the default which has each procedure running to completion
* before we move to the next. For crashed servers, especially if running with distributed log
* replay, we will want all servers to come along; we do not want the scenario where a server is
* stuck waiting for regions to online so it can replay edits.
*/
@Override
protected boolean isYieldBeforeExecuteFromState(MasterProcedureEnv env, ServerCrashState state) {
return true;
}
@Override
protected boolean shouldWaitClientAck(MasterProcedureEnv env) {
// The operation is triggered internally on the server
// the client does not know about this procedure.
return false;
}
/**
* Handle any outstanding RIT that are up against this.serverName, the crashed server.
* Notify them of crash. Remove assign entries from the passed in regions
* otherwise we have two assigns going on and they will fight over who has lock.
* Notify Unassigns. If unable to unassign because server went away, unassigns block waiting
* on the below callback from a ServerCrashProcedure before proceeding.
* @param regions Regions on the Crashed Server.
* @return List of regions we should assign to new homes (not same as regions on crashed server).
*/
private List handleRIT(final MasterProcedureEnv env, List regions) {
if (regions == null || regions.isEmpty()) {
return Collections.emptyList();
}
AssignmentManager am = env.getMasterServices().getAssignmentManager();
List toAssign = new ArrayList(regions);
// Get an iterator so can remove items.
final Iterator it = toAssign.iterator();
ServerCrashException sce = null;
while (it.hasNext()) {
final RegionInfo hri = it.next();
RegionTransitionProcedure rtp = am.getRegionStates().getRegionTransitionProcedure(hri);
if (rtp == null) {
continue;
}
// Make sure the RIT is against this crashed server. In the case where there are many
// processings of a crashed server -- backed up for whatever reason (slow WAL split) --
// then a previous SCP may have already failed an assign, etc., and it may have a new
// location target; DO NOT fail these else we make for assign flux.
ServerName rtpServerName = rtp.getServer(env);
if (rtpServerName == null) {
LOG.warn("RIT with ServerName null! " + rtp);
continue;
}
if (!rtpServerName.equals(this.serverName)) continue;
LOG.info("pid=" + getProcId() + " found RIT " + rtp + "; " +
rtp.getRegionState(env).toShortString());
// Notify RIT on server crash.
if (sce == null) {
sce = new ServerCrashException(getProcId(), getServerName());
}
if(rtp.remoteCallFailed(env, this.serverName, sce)) {
// If an assign, remove from passed-in list of regions so we subsequently do not create
// a new assign; the exisitng assign after the call to remoteCallFailed will recalibrate
// and assign to a server other than the crashed one; no need to create new assign.
// If an unassign, do not return this region; the above cancel will wake up the unassign and
// it will complete. Done.
it.remove();
}
}
return toAssign;
}
@Override
protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
return env.getMasterServices().getMasterMetrics().getServerCrashProcMetrics();
}
}