org.apache.hadoop.hbase.master.procedure.ServerRemoteProcedure Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.procedure;
import java.io.IOException;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.procedure2.FailedRemoteDispatchException;
import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher;
import org.apache.hadoop.hbase.procedure2.RemoteProcedureException;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@InterfaceAudience.Private
/**
* The base class for Procedures that run {@link java.util.concurrent.Callable}s on a (remote)
* RegionServer; e.g. asking a RegionServer to split a WAL file as a sub-procedure of the
* ServerCrashProcedure recovery process.
*
* To implement a new Procedure type, extend this class and override remoteCallBuild() and
* complete(). The dispatch and callback will be handled for you here, internally.
*
* The Procedure works as follows. It uses {@link RSProcedureDispatcher}, the same system used
* dispatching Region OPEN and CLOSE RPCs, to pass a Callable to a RegionServer. Examples include
* {@link org.apache.hadoop.hbase.regionserver.SplitWALCallable} and
* {@link org.apache.hadoop.hbase.replication.regionserver.RefreshPeerCallable}. Rather than
* assign/unassign, the Master calls #executeProcedures against the remote RegionServer wrapping a
* Callable in a {@link ExecuteProceduresRequest}. Upon successful dispatch, the Procedure then
* suspends itself on the Master-side and relinqushes its executor worker. On receipt, the
* RegionServer submits the Callable to its executor service. When the Callable completes, it adds
* itself to a queue on the RegionServer side for processing by a background thread, the
* {@link RemoteProcedureResultReporter}. It picks up the completed Callable from the queue and RPCs
* the master at #reportProcedureDone with the procedure id and whether success or failure. The
* master calls complete() setting success or failure state and then reschedules the suspended
* Procedure so it can finish.
*
* Here are some details on operation:
*
* If adding the operation to the dispatcher fails, addOperationToNode will throw
* FailedRemoteDispatchException, and this Procedure will return 'null'. The Procedure Executor will
* then mark this procedure as 'complete' (though we failed to dispatch our task). In this case, the
* upper layer of this procedure must have a way to check if this Procedure really succeeded or not
* and have appropriate handling.
*
* If sending the operation to remote RS failed, dispatcher will call remoteCallFailed() to handle
* this which calls remoteOperationDone with the exception. If the targetServer crashed but this
* procedure has no response, than dispatcher will call remoteOperationFailed() which also calls
* remoteOperationDone with the exception. If the operation is successful, then
* remoteOperationCompleted will be called and actually calls the remoteOperationDone without
* exception. In remoteOperationDone, we'll check if the procedure is already get wake up by others.
* Then developer could implement complete() based on their own purpose. But basic logic is that if
* operation succeed, set succ to true and do the clean work. If operation failed and require to
* resend it to the same server, leave the succ as false. If operation failed and require to resend
* it to another server, set succ to true and upper layer should be able to find out this operation
* not work and send a operation to another server.
*/
public abstract class ServerRemoteProcedure extends Procedure
implements RemoteProcedureDispatcher.RemoteProcedure {
protected static final Logger LOG = LoggerFactory.getLogger(ServerRemoteProcedure.class);
protected ProcedureEvent> event;
protected ServerName targetServer;
protected boolean dispatched;
protected boolean succ;
protected abstract void complete(MasterProcedureEnv env, Throwable error);
@Override
protected synchronized Procedure[] execute(MasterProcedureEnv env)
throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException {
if (dispatched) {
if (succ) {
return null;
}
dispatched = false;
}
try {
env.getRemoteDispatcher().addOperationToNode(targetServer, this);
} catch (FailedRemoteDispatchException frde) {
LOG.warn("Can not send remote operation {} to {}, this operation will "
+ "be retried to send to another server", this.getProcId(), targetServer);
return null;
}
dispatched = true;
event = new ProcedureEvent<>(this);
event.suspendIfNotReady(this);
throw new ProcedureSuspendedException();
}
@Override
protected synchronized void completionCleanup(MasterProcedureEnv env) {
env.getRemoteDispatcher().removeCompletedOperation(targetServer, this);
}
@Override
public synchronized void remoteCallFailed(MasterProcedureEnv env, ServerName serverName,
IOException exception) {
remoteOperationDone(env, exception);
}
@Override
public synchronized void remoteOperationCompleted(MasterProcedureEnv env) {
remoteOperationDone(env, null);
}
@Override
public synchronized void remoteOperationFailed(MasterProcedureEnv env,
RemoteProcedureException error) {
remoteOperationDone(env, error);
}
synchronized void remoteOperationDone(MasterProcedureEnv env, Throwable error) {
if (this.isFinished()) {
LOG.info("This procedure {} is already finished, skip the rest processes", this.getProcId());
return;
}
if (event == null) {
LOG.warn("procedure event for {} is null, maybe the procedure is created when recovery",
getProcId());
return;
}
complete(env, error);
event.wake(env.getProcedureScheduler());
event = null;
}
}