
org.apache.hadoop.hbase.master.replication.ClaimReplicationQueuesProcedure Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.replication;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.ServerProcedureInterface;
import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
import org.apache.hadoop.hbase.replication.ReplicationException;
import org.apache.hadoop.hbase.replication.ReplicationGroupOffset;
import org.apache.hadoop.hbase.replication.ReplicationPeerDescription;
import org.apache.hadoop.hbase.replication.ReplicationQueueId;
import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
import org.apache.hadoop.hbase.util.RetryCounter;
import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.ClaimReplicationQueuesStateData;
import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
/**
* Used to assign the replication queues of a dead server to other region servers.
* @deprecated Use {@link AssignReplicationQueuesProcedure} instead, kept only for keeping
* compatibility.
*/
@Deprecated
@InterfaceAudience.Private
public class ClaimReplicationQueuesProcedure extends Procedure
implements ServerProcedureInterface {
private static final Logger LOG = LoggerFactory.getLogger(ClaimReplicationQueuesProcedure.class);
private ServerName crashedServer;
private RetryCounter retryCounter;
public ClaimReplicationQueuesProcedure() {
}
public ClaimReplicationQueuesProcedure(ServerName crashedServer) {
this.crashedServer = crashedServer;
}
@Override
public ServerName getServerName() {
return crashedServer;
}
@Override
public boolean hasMetaTableRegion() {
return false;
}
@Override
public ServerOperationType getServerOperationType() {
return ServerOperationType.CLAIM_REPLICATION_QUEUES;
}
@Override
protected Procedure[] execute(MasterProcedureEnv env)
throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException {
ReplicationQueueStorage storage = env.getReplicationPeerManager().getQueueStorage();
try {
List queues = storage.listAllQueueIds(crashedServer);
Set existQueuePeerIds = new HashSet<>();
// this is for upgrading to the new region replication framework, where we will delete the
// legacy region_replica_replication peer directly, without deleting the replication queues
for (Iterator iter = queues.iterator(); iter.hasNext();) {
ReplicationQueueId queueId = iter.next();
if (queueId.getPeerId().equals(ServerRegionReplicaUtil.REGION_REPLICA_REPLICATION_PEER)) {
LOG.info("Found replication queue {} for legacy region replication peer, "
+ "skipping claiming and removing...", queueId);
iter.remove();
storage.removeQueue(queueId);
} else if (!queueId.isRecovered()) {
existQueuePeerIds.add(queueId.getPeerId());
}
}
List peers = env.getReplicationPeerManager().listPeers(null);
// TODO: the implementation is not enough yet, if there are retries, we need to know whether
// the replication queue for the given peer has been claimed or not, otherwise this logic will
// introduce redundant replication queues for the same peer. Add this logic to make some UTs
// pass first.
for (ReplicationPeerDescription peer : peers) {
if (!existQueuePeerIds.contains(peer.getPeerId())) {
ReplicationQueueId queueId = new ReplicationQueueId(crashedServer, peer.getPeerId());
env.getReplicationPeerManager().getQueueStorage().setOffset(queueId,
crashedServer.toString(), ReplicationGroupOffset.BEGIN, Collections.emptyMap());
queues.add(queueId);
}
}
if (queues.isEmpty()) {
LOG.debug("Finish claiming replication queues for {}", crashedServer);
// we are done
return null;
}
LOG.debug("There are {} replication queues need to be claimed for {}", queues.size(),
crashedServer);
List targetServers =
env.getMasterServices().getServerManager().getOnlineServersList();
if (targetServers.isEmpty()) {
throw new ReplicationException("no region server available");
}
Collections.shuffle(targetServers);
ClaimReplicationQueueRemoteProcedure[] procs =
new ClaimReplicationQueueRemoteProcedure[Math.min(queues.size(), targetServers.size())];
for (int i = 0; i < procs.length; i++) {
procs[i] = new ClaimReplicationQueueRemoteProcedure(queues.get(i), targetServers.get(i));
}
return procs;
} catch (ReplicationException e) {
if (retryCounter == null) {
retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
}
long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
LOG.warn("Failed to claim replication queues for {}, suspend {}secs {}; {};", crashedServer,
backoff / 1000, e);
setTimeout(Math.toIntExact(backoff));
setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
skipPersistence();
throw new ProcedureSuspendedException();
}
}
@Override
protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
setState(ProcedureProtos.ProcedureState.RUNNABLE);
env.getProcedureScheduler().addFront(this);
return false;
}
@Override
protected void rollback(MasterProcedureEnv env) throws IOException, InterruptedException {
throw new UnsupportedOperationException();
}
@Override
protected boolean abort(MasterProcedureEnv env) {
return false;
}
@Override
protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
serializer.serialize(ClaimReplicationQueuesStateData.newBuilder()
.setCrashedServer(ProtobufUtil.toServerName(crashedServer)).build());
}
@Override
protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
ClaimReplicationQueuesStateData data =
serializer.deserialize(ClaimReplicationQueuesStateData.class);
crashedServer = ProtobufUtil.toServerName(data.getCrashedServer());
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy